1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright 2015 6WIND S.A.
5 *   Copyright 2015 Mellanox.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of 6WIND S.A. nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <stddef.h>
35#include <assert.h>
36#include <stdint.h>
37#include <string.h>
38#include <errno.h>
39
40#define TREX_PATCH
41
42/* Verbs header. */
43/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
44#ifdef PEDANTIC
45#pragma GCC diagnostic ignored "-Wpedantic"
46#endif
47#include <infiniband/verbs_exp.h>
48#ifdef PEDANTIC
49#pragma GCC diagnostic error "-Wpedantic"
50#endif
51
52/* DPDK headers don't like -pedantic. */
53#ifdef PEDANTIC
54#pragma GCC diagnostic ignored "-Wpedantic"
55#endif
56#include <rte_ether.h>
57#include <rte_malloc.h>
58#include <rte_ethdev.h>
59#include <rte_common.h>
60#include <rte_flow.h>
61#include <rte_flow_driver.h>
62#ifdef PEDANTIC
63#pragma GCC diagnostic error "-Wpedantic"
64#endif
65
66#include "mlx5.h"
67#include "mlx5_rxtx.h"
68
69struct fdir_flow_desc {
70	uint16_t dst_port;
71	uint16_t src_port;
72	uint32_t src_ip[4];
73	uint32_t dst_ip[4];
74    uint8_t  tos;
75    uint8_t  ip_id;
76    uint8_t  proto;
77	uint8_t	mac[6];
78	uint16_t vlan_tag;
79	enum hash_rxq_type type;
80};
81
82struct mlx5_fdir_filter {
83	LIST_ENTRY(mlx5_fdir_filter) next;
84	uint16_t queue; /* Queue assigned to if FDIR match. */
85	enum rte_eth_fdir_behavior behavior;
86	struct fdir_flow_desc desc;
87	struct ibv_exp_flow *flow;
88};
89
90LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
91
92/**
93 * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
94 *
95 * @param[in] fdir_filter
96 *   DPDK filter structure to convert.
97 * @param[out] desc
98 *   Resulting mlx5 filter descriptor.
99 * @param mode
100 *   Flow director mode.
101 */
102static void
103fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
104			 struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
105{
106	/* Initialize descriptor. */
107	memset(desc, 0, sizeof(*desc));
108
109	/* Set VLAN ID. */
110	desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
111
112#ifndef TREX_PATCH
113	/* Set MAC address. */
114	if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
115		rte_memcpy(desc->mac,
116			   fdir_filter->input.flow.mac_vlan_flow.mac_addr.
117				addr_bytes,
118			   sizeof(desc->mac));
119		desc->type = HASH_RXQ_ETH;
120		return;
121	}
122#else
123    if (fdir_filter->input.flow.ip4_flow.ip_id == 2) {
124        desc->type = HASH_RXQ_ETH;
125        desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id;
126        return;
127    }
128#endif
129
130
131	/* Set mode */
132	switch (fdir_filter->input.flow_type) {
133	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
134		desc->type = HASH_RXQ_UDPV4;
135		break;
136	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
137		desc->type = HASH_RXQ_TCPV4;
138		break;
139	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
140		desc->type = HASH_RXQ_IPV4;
141		break;
142	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
143		desc->type = HASH_RXQ_UDPV6;
144		break;
145	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
146		desc->type = HASH_RXQ_TCPV6;
147		break;
148	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
149		desc->type = HASH_RXQ_IPV6;
150		break;
151	default:
152		break;
153	}
154
155	/* Set flow values */
156	switch (fdir_filter->input.flow_type) {
157	case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
158	case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
159		desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
160		desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
161	case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
162		desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
163		desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
164        desc->tos       = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */
165        desc->ip_id     = fdir_filter->input.flow.ip4_flow.ip_id;
166        desc->proto     = fdir_filter->input.flow.ip4_flow.proto;
167		break;
168	case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
169	case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
170		desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
171		desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
172		/* Fall through. */
173	case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
174		rte_memcpy(desc->src_ip,
175			   fdir_filter->input.flow.ipv6_flow.src_ip,
176			   sizeof(desc->src_ip));
177		rte_memcpy(desc->dst_ip,
178			   fdir_filter->input.flow.ipv6_flow.dst_ip,
179			   sizeof(desc->dst_ip));
180        desc->tos       = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits;  /* TTL is mapped to TOS - TREX_PATCH */
181        desc->ip_id     = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label;
182        desc->proto     = fdir_filter->input.flow.ipv6_flow.proto;
183		break;
184	default:
185		break;
186	}
187}
188
189/**
190 * Check if two flow descriptors overlap according to configured mask.
191 *
192 * @param priv
193 *   Private structure that provides flow director mask.
194 * @param desc1
195 *   First flow descriptor to compare.
196 * @param desc2
197 *   Second flow descriptor to compare.
198 *
199 * @return
200 *   Nonzero if descriptors overlap.
201 */
202static int
203priv_fdir_overlap(const struct priv *priv,
204		  const struct fdir_flow_desc *desc1,
205		  const struct fdir_flow_desc *desc2)
206{
207	const struct rte_eth_fdir_masks *mask =
208		&priv->dev->data->dev_conf.fdir_conf.mask;
209	unsigned int i;
210
211	if (desc1->type != desc2->type)
212		return 0;
213	/* Ignore non masked bits. */
214	for (i = 0; i != RTE_DIM(desc1->mac); ++i)
215		if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
216		    (desc2->mac[i] & mask->mac_addr_byte_mask))
217			return 0;
218	if (((desc1->src_port & mask->src_port_mask) !=
219	     (desc2->src_port & mask->src_port_mask)) ||
220	    ((desc1->dst_port & mask->dst_port_mask) !=
221	     (desc2->dst_port & mask->dst_port_mask)))
222		return 0;
223    if  ( (desc1->tos    != desc2->tos)  ||
224          (desc1->ip_id  != desc2->ip_id) ||
225          (desc1->proto  != desc2->proto) )
226        return 0;
227
228	switch (desc1->type) {
229	case HASH_RXQ_IPV4:
230	case HASH_RXQ_UDPV4:
231	case HASH_RXQ_TCPV4:
232		if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
233		     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
234		    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
235		     (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
236			return 0;
237		break;
238	case HASH_RXQ_IPV6:
239	case HASH_RXQ_UDPV6:
240	case HASH_RXQ_TCPV6:
241		for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
242			if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
243			     (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
244			    ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
245			     (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
246				return 0;
247		break;
248	default:
249		break;
250	}
251	return 1;
252}
253
254/**
255 * Create flow director steering rule for a specific filter.
256 *
257 * @param priv
258 *   Private structure.
259 * @param mlx5_fdir_filter
260 *   Filter to create a steering rule for.
261 * @param fdir_queue
262 *   Flow director queue for matching packets.
263 *
264 * @return
265 *   0 on success, errno value on failure.
266 */
267static int
268priv_fdir_flow_add(struct priv *priv,
269		   struct mlx5_fdir_filter *mlx5_fdir_filter,
270		   struct fdir_queue *fdir_queue)
271{
272	struct ibv_exp_flow *flow;
273	struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
274	enum rte_fdir_mode fdir_mode =
275		priv->dev->data->dev_conf.fdir_conf.mode;
276	struct rte_eth_fdir_masks *mask =
277		&priv->dev->data->dev_conf.fdir_conf.mask;
278	FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
279	struct ibv_exp_flow_attr *attr = &data->attr;
280	uintptr_t spec_offset = (uintptr_t)&data->spec;
281	struct ibv_exp_flow_spec_eth *spec_eth;
282	struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4;
283	struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6;
284	struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
285	struct mlx5_fdir_filter *iter_fdir_filter;
286	unsigned int i;
287
288	/* Abort if an existing flow overlaps this one to avoid packet
289	 * duplication, even if it targets another queue. */
290	LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
291		if ((iter_fdir_filter != mlx5_fdir_filter) &&
292		    (iter_fdir_filter->flow != NULL) &&
293		    (priv_fdir_overlap(priv,
294				       &mlx5_fdir_filter->desc,
295				       &iter_fdir_filter->desc))){
296            ERROR("overlap rules, please check your rules");
297            return EEXIST;
298        }
299
300	/*
301	 * No padding must be inserted by the compiler between attr and spec.
302	 * This layout is expected by libibverbs.
303	 */
304	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
305	priv_flow_attr(priv, attr, sizeof(data), desc->type);
306
307	/* Set Ethernet spec */
308	spec_eth = (struct ibv_exp_flow_spec_eth *)spec_offset;
309
310	/* The first specification must be Ethernet. */
311	assert(spec_eth->type == IBV_EXP_FLOW_SPEC_ETH);
312	assert(spec_eth->size == sizeof(*spec_eth));
313
314	/* VLAN ID */
315	spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
316	spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
317
318	/* Update priority */
319	attr->priority = 2;
320
321#ifndef TREX_PATCH
322
323	if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
324		/* MAC Address */
325		for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
326			spec_eth->val.dst_mac[i] =
327				desc->mac[i] & mask->mac_addr_byte_mask;
328			spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
329		}
330		goto create_flow;
331	}
332#else
333    // empty mask means "match everything". This rule will match all packets, no matter what is the ether type
334    if (desc->ip_id == 2) {
335        spec_eth->val.ether_type = 0x0000;
336        spec_eth->mask.ether_type = 0x0000;
337        goto create_flow;
338    }
339#endif
340
341
342	switch (desc->type) {
343	case HASH_RXQ_IPV4:
344	case HASH_RXQ_UDPV4:
345	case HASH_RXQ_TCPV4:
346		spec_offset += spec_eth->size;
347
348		/* Set IP spec */
349		spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset;
350
351		/* The second specification must be IP. */
352		assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT);
353		assert(spec_ipv4->size == sizeof(*spec_ipv4));
354
355		spec_ipv4->val.src_ip =
356			desc->src_ip[0] & mask->ipv4_mask.src_ip;
357		spec_ipv4->val.dst_ip =
358			desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
359		spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
360		spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
361
362		/* Update priority */
363		attr->priority = 1;
364
365        spec_ipv4->val.proto  = desc->proto & mask->ipv4_mask.proto;
366        spec_ipv4->mask.proto = mask->ipv4_mask.proto;
367
368#ifdef TREX_PATCH
369        /* TOS */
370        if (desc->ip_id == 1) {
371            spec_ipv4->mask.tos = 0x1;
372            spec_ipv4->val.tos = 0x1;
373        } else {
374            spec_ipv4->mask.tos = 0x0;
375            spec_ipv4->val.tos = 0x0;
376        }
377#endif
378
379
380		if (desc->type == HASH_RXQ_IPV4)
381			goto create_flow;
382
383		spec_offset += spec_ipv4->size;
384		break;
385	case HASH_RXQ_IPV6:
386	case HASH_RXQ_UDPV6:
387	case HASH_RXQ_TCPV6:
388		spec_offset += spec_eth->size;
389
390		/* Set IP spec */
391		spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset;
392
393		/* The second specification must be IP. */
394		assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT);
395		assert(spec_ipv6->size == sizeof(*spec_ipv6));
396
397		for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
398			((uint32_t *)spec_ipv6->val.src_ip)[i] =
399				desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
400			((uint32_t *)spec_ipv6->val.dst_ip)[i] =
401				desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
402		}
403		rte_memcpy(spec_ipv6->mask.src_ip,
404			   mask->ipv6_mask.src_ip,
405			   sizeof(spec_ipv6->mask.src_ip));
406		rte_memcpy(spec_ipv6->mask.dst_ip,
407			   mask->ipv6_mask.dst_ip,
408			   sizeof(spec_ipv6->mask.dst_ip));
409
410        spec_ipv6->val.next_hdr  = desc->proto & mask->ipv6_mask.proto;
411        spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto;
412
413#ifdef TREX_PATCH
414        /* TOS */
415        if (desc->ip_id == 1) {
416            spec_ipv6->mask.traffic_class = 0x1;
417            spec_ipv6->val.traffic_class = 0x1;
418        } else {
419            spec_ipv6->mask.traffic_class = 0;
420            spec_ipv6->val.traffic_class = 0;
421        }
422#endif
423
424		/* Update priority */
425		attr->priority = 1;
426
427		if (desc->type == HASH_RXQ_IPV6)
428			goto create_flow;
429
430		spec_offset += spec_ipv6->size;
431		break;
432	default:
433		ERROR("invalid flow attribute type");
434		return EINVAL;
435	}
436
437	/* Set TCP/UDP flow specification. */
438	spec_tcp_udp = (struct ibv_exp_flow_spec_tcp_udp *)spec_offset;
439
440	/* The third specification must be TCP/UDP. */
441	assert(spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_TCP ||
442	       spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_UDP);
443	assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
444
445	spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
446	spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
447	spec_tcp_udp->mask.src_port = mask->src_port_mask;
448	spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
449
450	/* Update priority */
451	attr->priority = 0;
452
453create_flow:
454
455	errno = 0;
456	flow = ibv_exp_create_flow(fdir_queue->qp, attr);
457	if (flow == NULL) {
458		/* It's not clear whether errno is always set in this case. */
459		ERROR("%p: flow director configuration failed, errno=%d: %s",
460		      (void *)priv, errno,
461		      (errno ? strerror(errno) : "Unknown error"));
462		if (errno)
463			return errno;
464		return EINVAL;
465	}
466
467	DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
468	mlx5_fdir_filter->flow = flow;
469	return 0;
470}
471
472/**
473 * Destroy a flow director queue.
474 *
475 * @param fdir_queue
476 *   Flow director queue to be destroyed.
477 */
478void
479priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
480{
481	struct mlx5_fdir_filter *fdir_filter;
482
483	/* Disable filter flows still applying to this queue. */
484	LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
485		unsigned int idx = fdir_filter->queue;
486		struct rxq_ctrl *rxq_ctrl =
487			container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
488
489		assert(idx < priv->rxqs_n);
490		if (fdir_queue == rxq_ctrl->fdir_queue &&
491		    fdir_filter->flow != NULL) {
492			claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
493			fdir_filter->flow = NULL;
494		}
495	}
496	assert(fdir_queue->qp);
497	claim_zero(ibv_destroy_qp(fdir_queue->qp));
498	assert(fdir_queue->ind_table);
499	claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
500	if (fdir_queue->wq)
501		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
502	if (fdir_queue->cq)
503		claim_zero(ibv_destroy_cq(fdir_queue->cq));
504#ifndef NDEBUG
505	memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
506#endif
507	rte_free(fdir_queue);
508}
509
510/**
511 * Create a flow director queue.
512 *
513 * @param priv
514 *   Private structure.
515 * @param wq
516 *   Work queue to route matched packets to, NULL if one needs to
517 *   be created.
518 *
519 * @return
520 *   Related flow director queue on success, NULL otherwise.
521 */
522static struct fdir_queue *
523priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
524		       unsigned int socket)
525{
526	struct fdir_queue *fdir_queue;
527
528	fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
529				       0, socket);
530	if (!fdir_queue) {
531		ERROR("cannot allocate flow director queue");
532		return NULL;
533	}
534	assert(priv->pd);
535	assert(priv->ctx);
536	if (!wq) {
537		fdir_queue->cq = ibv_exp_create_cq(
538			priv->ctx, 1, NULL, NULL, 0,
539			&(struct ibv_exp_cq_init_attr){
540				.comp_mask = 0,
541			});
542		if (!fdir_queue->cq) {
543			ERROR("cannot create flow director CQ");
544			goto error;
545		}
546		fdir_queue->wq = ibv_exp_create_wq(
547			priv->ctx,
548			&(struct ibv_exp_wq_init_attr){
549				.wq_type = IBV_EXP_WQT_RQ,
550				.max_recv_wr = 1,
551				.max_recv_sge = 1,
552				.pd = priv->pd,
553				.cq = fdir_queue->cq,
554			});
555		if (!fdir_queue->wq) {
556			ERROR("cannot create flow director WQ");
557			goto error;
558		}
559		wq = fdir_queue->wq;
560	}
561	fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
562		priv->ctx,
563		&(struct ibv_exp_rwq_ind_table_init_attr){
564			.pd = priv->pd,
565			.log_ind_tbl_size = 0,
566			.ind_tbl = &wq,
567			.comp_mask = 0,
568		});
569	if (!fdir_queue->ind_table) {
570		ERROR("cannot create flow director indirection table");
571		goto error;
572	}
573	fdir_queue->qp = ibv_exp_create_qp(
574		priv->ctx,
575		&(struct ibv_exp_qp_init_attr){
576			.qp_type = IBV_QPT_RAW_PACKET,
577			.comp_mask =
578				IBV_EXP_QP_INIT_ATTR_PD |
579				IBV_EXP_QP_INIT_ATTR_PORT |
580				IBV_EXP_QP_INIT_ATTR_RX_HASH,
581			.pd = priv->pd,
582			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
583				.rx_hash_function =
584					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
585				.rx_hash_key_len = rss_hash_default_key_len,
586				.rx_hash_key = rss_hash_default_key,
587				.rx_hash_fields_mask = 0,
588				.rwq_ind_tbl = fdir_queue->ind_table,
589			},
590			.port_num = priv->port,
591		});
592	if (!fdir_queue->qp) {
593		ERROR("cannot create flow director hash RX QP");
594		goto error;
595	}
596	return fdir_queue;
597error:
598	assert(fdir_queue);
599	assert(!fdir_queue->qp);
600	if (fdir_queue->ind_table)
601		claim_zero(ibv_exp_destroy_rwq_ind_table
602			   (fdir_queue->ind_table));
603	if (fdir_queue->wq)
604		claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
605	if (fdir_queue->cq)
606		claim_zero(ibv_destroy_cq(fdir_queue->cq));
607	rte_free(fdir_queue);
608	return NULL;
609}
610
611/**
612 * Get flow director queue for a specific RX queue, create it in case
613 * it does not exist.
614 *
615 * @param priv
616 *   Private structure.
617 * @param idx
618 *   RX queue index.
619 *
620 * @return
621 *   Related flow director queue on success, NULL otherwise.
622 */
623static struct fdir_queue *
624priv_get_fdir_queue(struct priv *priv, uint16_t idx)
625{
626	struct rxq_ctrl *rxq_ctrl =
627		container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
628	struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
629
630	assert(rxq_ctrl->wq);
631	if (fdir_queue == NULL) {
632		fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
633						    rxq_ctrl->socket);
634		rxq_ctrl->fdir_queue = fdir_queue;
635	}
636	return fdir_queue;
637}
638
639/**
640 * Get or flow director drop queue. Create it if it does not exist.
641 *
642 * @param priv
643 *   Private structure.
644 *
645 * @return
646 *   Flow director drop queue on success, NULL otherwise.
647 */
648static struct fdir_queue *
649priv_get_fdir_drop_queue(struct priv *priv)
650{
651	struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
652
653	if (fdir_queue == NULL) {
654		unsigned int socket = SOCKET_ID_ANY;
655
656		/* Select a known NUMA socket if possible. */
657		if (priv->rxqs_n && (*priv->rxqs)[0])
658			socket = container_of((*priv->rxqs)[0],
659					      struct rxq_ctrl, rxq)->socket;
660		fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
661		priv->fdir_drop_queue = fdir_queue;
662	}
663	return fdir_queue;
664}
665
666/**
667 * Enable flow director filter and create steering rules.
668 *
669 * @param priv
670 *   Private structure.
671 * @param mlx5_fdir_filter
672 *   Filter to create steering rule for.
673 *
674 * @return
675 *   0 on success, errno value on failure.
676 */
677static int
678priv_fdir_filter_enable(struct priv *priv,
679			struct mlx5_fdir_filter *mlx5_fdir_filter)
680{
681	struct fdir_queue *fdir_queue;
682
683	/* Check if flow already exists. */
684	if (mlx5_fdir_filter->flow != NULL)
685		return 0;
686
687	/* Get fdir_queue for specific queue. */
688	if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
689		fdir_queue = priv_get_fdir_drop_queue(priv);
690	else
691		fdir_queue = priv_get_fdir_queue(priv,
692						 mlx5_fdir_filter->queue);
693
694	if (fdir_queue == NULL) {
695		ERROR("failed to create flow director rxq for queue %d",
696		      mlx5_fdir_filter->queue);
697		return EINVAL;
698	}
699
700	/* Create flow */
701	return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
702}
703
704/**
705 * Initialize flow director filters list.
706 *
707 * @param priv
708 *   Private structure.
709 *
710 * @return
711 *   0 on success, errno value on failure.
712 */
713int
714fdir_init_filters_list(struct priv *priv)
715{
716	/* Filter list initialization should be done only once. */
717	if (priv->fdir_filter_list)
718		return 0;
719
720	/* Create filters list. */
721	priv->fdir_filter_list =
722		rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
723
724	if (priv->fdir_filter_list == NULL) {
725		int err = ENOMEM;
726
727		ERROR("cannot allocate flow director filter list: %s",
728		      strerror(err));
729		return err;
730	}
731
732	LIST_INIT(priv->fdir_filter_list);
733
734	return 0;
735}
736
737/**
738 * Flush all filters.
739 *
740 * @param priv
741 *   Private structure.
742 */
743static void
744priv_fdir_filter_flush(struct priv *priv)
745{
746	struct mlx5_fdir_filter *mlx5_fdir_filter;
747
748	while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
749		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
750
751		DEBUG("%p: flushing flow director filter %p",
752		      (void *)priv, (void *)mlx5_fdir_filter);
753		LIST_REMOVE(mlx5_fdir_filter, next);
754		if (flow != NULL)
755			claim_zero(ibv_exp_destroy_flow(flow));
756		rte_free(mlx5_fdir_filter);
757	}
758}
759
760/**
761 * Remove all flow director filters and delete list.
762 *
763 * @param priv
764 *   Private structure.
765 */
766void
767priv_fdir_delete_filters_list(struct priv *priv)
768{
769	priv_fdir_filter_flush(priv);
770	rte_free(priv->fdir_filter_list);
771	priv->fdir_filter_list = NULL;
772}
773
774/**
775 * Disable flow director, remove all steering rules.
776 *
777 * @param priv
778 *   Private structure.
779 */
780void
781priv_fdir_disable(struct priv *priv)
782{
783	unsigned int i;
784	struct mlx5_fdir_filter *mlx5_fdir_filter;
785
786	/* Run on every flow director filter and destroy flow handle. */
787	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
788		struct ibv_exp_flow *flow;
789
790		/* Only valid elements should be in the list */
791		assert(mlx5_fdir_filter != NULL);
792		flow = mlx5_fdir_filter->flow;
793
794		/* Destroy flow handle */
795		if (flow != NULL) {
796			claim_zero(ibv_exp_destroy_flow(flow));
797			mlx5_fdir_filter->flow = NULL;
798		}
799	}
800
801	/* Destroy flow director context in each RX queue. */
802	for (i = 0; (i != priv->rxqs_n); i++) {
803		struct rxq_ctrl *rxq_ctrl =
804			container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
805
806		if (!rxq_ctrl->fdir_queue)
807			continue;
808		priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
809		rxq_ctrl->fdir_queue = NULL;
810	}
811	if (priv->fdir_drop_queue) {
812		priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
813		priv->fdir_drop_queue = NULL;
814	}
815}
816
817/**
818 * Enable flow director, create steering rules.
819 *
820 * @param priv
821 *   Private structure.
822 */
823void
824priv_fdir_enable(struct priv *priv)
825{
826	struct mlx5_fdir_filter *mlx5_fdir_filter;
827
828	/* Run on every fdir filter and create flow handle */
829	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
830		/* Only valid elements should be in the list */
831		assert(mlx5_fdir_filter != NULL);
832
833		priv_fdir_filter_enable(priv, mlx5_fdir_filter);
834	}
835}
836
837/**
838 * Find specific filter in list.
839 *
840 * @param priv
841 *   Private structure.
842 * @param fdir_filter
843 *   Flow director filter to find.
844 *
845 * @return
846 *   Filter element if found, otherwise NULL.
847 */
848static struct mlx5_fdir_filter *
849priv_find_filter_in_list(struct priv *priv,
850			 const struct rte_eth_fdir_filter *fdir_filter)
851{
852	struct fdir_flow_desc desc;
853	struct mlx5_fdir_filter *mlx5_fdir_filter;
854	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
855
856	/* Get flow director filter to look for. */
857	fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
858
859	/* Look for the requested element. */
860	LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
861		/* Only valid elements should be in the list. */
862		assert(mlx5_fdir_filter != NULL);
863
864		/* Return matching filter. */
865		if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
866			return mlx5_fdir_filter;
867	}
868
869	/* Filter not found */
870	return NULL;
871}
872
873/**
874 * Add new flow director filter and store it in list.
875 *
876 * @param priv
877 *   Private structure.
878 * @param fdir_filter
879 *   Flow director filter to add.
880 *
881 * @return
882 *   0 on success, errno value on failure.
883 */
884static int
885priv_fdir_filter_add(struct priv *priv,
886		     const struct rte_eth_fdir_filter *fdir_filter)
887{
888	struct mlx5_fdir_filter *mlx5_fdir_filter;
889	enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
890	int err = 0;
891
892	/* Validate queue number. */
893	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
894		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
895		return EINVAL;
896	}
897
898	/* Duplicate filters are currently unsupported. */
899	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
900	if (mlx5_fdir_filter != NULL) {
901#ifndef TREX_PATCH
902		ERROR("filter already exists");
903#endif
904		return EEXIST;
905	}
906
907	/* Create new flow director filter. */
908	mlx5_fdir_filter =
909		rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
910	if (mlx5_fdir_filter == NULL) {
911		err = ENOMEM;
912		ERROR("cannot allocate flow director filter: %s",
913		      strerror(err));
914		return err;
915	}
916
917	/* Set action parameters. */
918	mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
919	mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
920
921	/* Convert to mlx5 filter descriptor. */
922	fdir_filter_to_flow_desc(fdir_filter,
923				 &mlx5_fdir_filter->desc, fdir_mode);
924
925	/* Insert new filter into list. */
926	LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
927
928	DEBUG("%p: flow director filter %p added",
929	      (void *)priv, (void *)mlx5_fdir_filter);
930
931	/* Enable filter immediately if device is started. */
932	if (priv->started)
933		err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
934
935	return err;
936}
937
938/**
939 * Update queue for specific filter.
940 *
941 * @param priv
942 *   Private structure.
943 * @param fdir_filter
944 *   Filter to be updated.
945 *
946 * @return
947 *   0 on success, errno value on failure.
948 */
949static int
950priv_fdir_filter_update(struct priv *priv,
951			const struct rte_eth_fdir_filter *fdir_filter)
952{
953	struct mlx5_fdir_filter *mlx5_fdir_filter;
954
955	/* Validate queue number. */
956	if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
957		ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
958		return EINVAL;
959	}
960
961	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
962	if (mlx5_fdir_filter != NULL) {
963		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
964		int err = 0;
965
966		/* Update queue number. */
967		mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
968
969		/* Destroy flow handle. */
970		if (flow != NULL) {
971			claim_zero(ibv_exp_destroy_flow(flow));
972			mlx5_fdir_filter->flow = NULL;
973		}
974		DEBUG("%p: flow director filter %p updated",
975		      (void *)priv, (void *)mlx5_fdir_filter);
976
977		/* Enable filter if device is started. */
978		if (priv->started)
979			err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
980
981		return err;
982	}
983
984	/* Filter not found, create it. */
985	DEBUG("%p: filter not found for update, creating new filter",
986	      (void *)priv);
987	return priv_fdir_filter_add(priv, fdir_filter);
988}
989
990/**
991 * Delete specific filter.
992 *
993 * @param priv
994 *   Private structure.
995 * @param fdir_filter
996 *   Filter to be deleted.
997 *
998 * @return
999 *   0 on success, errno value on failure.
1000 */
1001static int
1002priv_fdir_filter_delete(struct priv *priv,
1003			const struct rte_eth_fdir_filter *fdir_filter)
1004{
1005	struct mlx5_fdir_filter *mlx5_fdir_filter;
1006
1007	mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
1008	if (mlx5_fdir_filter != NULL) {
1009		struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
1010
1011		/* Remove element from list. */
1012		LIST_REMOVE(mlx5_fdir_filter, next);
1013
1014		/* Destroy flow handle. */
1015		if (flow != NULL) {
1016			claim_zero(ibv_exp_destroy_flow(flow));
1017			mlx5_fdir_filter->flow = NULL;
1018		}
1019
1020		DEBUG("%p: flow director filter %p deleted",
1021		      (void *)priv, (void *)mlx5_fdir_filter);
1022
1023		/* Delete filter. */
1024		rte_free(mlx5_fdir_filter);
1025
1026		return 0;
1027	}
1028
1029#ifndef TREX_PATCH
1030	ERROR("%p: flow director delete failed, cannot find filter",
1031	      (void *)priv);
1032#endif
1033	 return ENOENT;
1034}
1035
1036/**
1037 * Get flow director information.
1038 *
1039 * @param priv
1040 *   Private structure.
1041 * @param[out] fdir_info
1042 *   Resulting flow director information.
1043 */
1044static void
1045priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
1046{
1047	struct rte_eth_fdir_masks *mask =
1048		&priv->dev->data->dev_conf.fdir_conf.mask;
1049
1050	fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
1051	fdir_info->guarant_spc = 0;
1052
1053	rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
1054
1055	fdir_info->max_flexpayload = 0;
1056	fdir_info->flow_types_mask[0] = 0;
1057
1058	fdir_info->flex_payload_unit = 0;
1059	fdir_info->max_flex_payload_segment_num = 0;
1060	fdir_info->flex_payload_limit = 0;
1061	memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
1062}
1063
1064/**
1065 * Deal with flow director operations.
1066 *
1067 * @param priv
1068 *   Pointer to private structure.
1069 * @param filter_op
1070 *   Operation to perform.
1071 * @param arg
1072 *   Pointer to operation-specific structure.
1073 *
1074 * @return
1075 *   0 on success, errno value on failure.
1076 */
1077static int
1078priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
1079{
1080	enum rte_fdir_mode fdir_mode =
1081		priv->dev->data->dev_conf.fdir_conf.mode;
1082	int ret = 0;
1083
1084	if (filter_op == RTE_ETH_FILTER_NOP)
1085		return 0;
1086
1087	if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
1088	    fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
1089		ERROR("%p: flow director mode %d not supported",
1090		      (void *)priv, fdir_mode);
1091		return EINVAL;
1092	}
1093
1094	switch (filter_op) {
1095	case RTE_ETH_FILTER_ADD:
1096		ret = priv_fdir_filter_add(priv, arg);
1097		break;
1098	case RTE_ETH_FILTER_UPDATE:
1099		ret = priv_fdir_filter_update(priv, arg);
1100		break;
1101	case RTE_ETH_FILTER_DELETE:
1102		ret = priv_fdir_filter_delete(priv, arg);
1103		break;
1104	case RTE_ETH_FILTER_FLUSH:
1105		priv_fdir_filter_flush(priv);
1106		break;
1107	case RTE_ETH_FILTER_INFO:
1108		priv_fdir_info_get(priv, arg);
1109		break;
1110	default:
1111		DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
1112		ret = EINVAL;
1113		break;
1114	}
1115	return ret;
1116}
1117
1118static const struct rte_flow_ops mlx5_flow_ops = {
1119	.validate = mlx5_flow_validate,
1120	.create = mlx5_flow_create,
1121	.destroy = mlx5_flow_destroy,
1122	.flush = mlx5_flow_flush,
1123	.query = NULL,
1124};
1125
1126/**
1127 * Manage filter operations.
1128 *
1129 * @param dev
1130 *   Pointer to Ethernet device structure.
1131 * @param filter_type
1132 *   Filter type.
1133 * @param filter_op
1134 *   Operation to perform.
1135 * @param arg
1136 *   Pointer to operation-specific structure.
1137 *
1138 * @return
1139 *   0 on success, negative errno value on failure.
1140 */
1141int
1142mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
1143		     enum rte_filter_type filter_type,
1144		     enum rte_filter_op filter_op,
1145		     void *arg)
1146{
1147	int ret = EINVAL;
1148	struct priv *priv = dev->data->dev_private;
1149
1150	switch (filter_type) {
1151	case RTE_ETH_FILTER_GENERIC:
1152		if (filter_op != RTE_ETH_FILTER_GET)
1153			return -EINVAL;
1154		*(const void **)arg = &mlx5_flow_ops;
1155		return 0;
1156	case RTE_ETH_FILTER_FDIR:
1157		priv_lock(priv);
1158		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
1159		priv_unlock(priv);
1160		break;
1161	default:
1162		ERROR("%p: filter type (%d) not supported",
1163		      (void *)dev, filter_type);
1164		break;
1165	}
1166
1167	return -ret;
1168}
1169