rte_eth_bond_pmd.c revision 8b25d1ad
1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33#include <stdlib.h>
34#include <netinet/in.h>
35
36#include <rte_mbuf.h>
37#include <rte_malloc.h>
38#include <rte_ethdev.h>
39#include <rte_tcp.h>
40#include <rte_udp.h>
41#include <rte_ip.h>
42#include <rte_ip_frag.h>
43#include <rte_devargs.h>
44#include <rte_kvargs.h>
45#include <rte_dev.h>
46#include <rte_alarm.h>
47#include <rte_cycles.h>
48
49#include "rte_eth_bond.h"
50#include "rte_eth_bond_private.h"
51#include "rte_eth_bond_8023ad_private.h"
52
53#define REORDER_PERIOD_MS 10
54
55#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56
57/* Table for statistics in mode 5 TLB */
58static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59
60static inline size_t
61get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62{
63	size_t vlan_offset = 0;
64
65	if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67
68		vlan_offset = sizeof(struct vlan_hdr);
69		*proto = vlan_hdr->eth_proto;
70
71		if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72			vlan_hdr = vlan_hdr + 1;
73			*proto = vlan_hdr->eth_proto;
74			vlan_offset += sizeof(struct vlan_hdr);
75		}
76	}
77	return vlan_offset;
78}
79
80static uint16_t
81bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82{
83	struct bond_dev_private *internals;
84
85	uint16_t num_rx_slave = 0;
86	uint16_t num_rx_total = 0;
87
88	int i;
89
90	/* Cast to structure, containing bonded device's port id and queue id */
91	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92
93	internals = bd_rx_q->dev_private;
94
95
96	for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97		/* Offset of pointer to *bufs increases as packets are received
98		 * from other slaves */
99		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100				bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101		if (num_rx_slave) {
102			num_rx_total += num_rx_slave;
103			nb_pkts -= num_rx_slave;
104		}
105	}
106
107	return num_rx_total;
108}
109
110static uint16_t
111bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
112		uint16_t nb_pkts)
113{
114	struct bond_dev_private *internals;
115
116	/* Cast to structure, containing bonded device's port id and queue id */
117	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118
119	internals = bd_rx_q->dev_private;
120
121	return rte_eth_rx_burst(internals->current_primary_port,
122			bd_rx_q->queue_id, bufs, nb_pkts);
123}
124
125static uint16_t
126bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
127		uint16_t nb_pkts)
128{
129	/* Cast to structure, containing bonded device's port id and queue id */
130	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
131	struct bond_dev_private *internals = bd_rx_q->dev_private;
132	struct ether_addr bond_mac;
133
134	struct ether_hdr *hdr;
135
136	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
137	uint16_t num_rx_total = 0;	/* Total number of received packets */
138	uint8_t slaves[RTE_MAX_ETHPORTS];
139	uint8_t slave_count;
140
141	uint8_t collecting;  /* current slave collecting status */
142	const uint8_t promisc = internals->promiscuous_en;
143	uint8_t i, j, k;
144
145	rte_eth_macaddr_get(internals->port_id, &bond_mac);
146	/* Copy slave list to protect against slave up/down changes during tx
147	 * bursting */
148	slave_count = internals->active_slave_count;
149	memcpy(slaves, internals->active_slaves,
150			sizeof(internals->active_slaves[0]) * slave_count);
151
152	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
153		j = num_rx_total;
154		collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
155
156		/* Read packets from this slave */
157		num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
158				&bufs[num_rx_total], nb_pkts - num_rx_total);
159
160		for (k = j; k < 2 && k < num_rx_total; k++)
161			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
162
163		/* Handle slow protocol packets. */
164		while (j < num_rx_total) {
165			if (j + 3 < num_rx_total)
166				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
167
168			hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
169			/* Remove packet from array if it is slow packet or slave is not
170			 * in collecting state or bondign interface is not in promiscus
171			 * mode and packet address does not match. */
172			if (unlikely(hdr->ether_type == ether_type_slow_be ||
173				!collecting || (!promisc &&
174					!is_multicast_ether_addr(&hdr->d_addr) &&
175					!is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
176
177				if (hdr->ether_type == ether_type_slow_be) {
178					bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
179						bufs[j]);
180				} else
181					rte_pktmbuf_free(bufs[j]);
182
183				/* Packet is managed by mode 4 or dropped, shift the array */
184				num_rx_total--;
185				if (j < num_rx_total) {
186					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
187						(num_rx_total - j));
188				}
189			} else
190				j++;
191		}
192	}
193
194	return num_rx_total;
195}
196
197#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
198uint32_t burstnumberRX;
199uint32_t burstnumberTX;
200
201#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
202
203static void
204arp_op_name(uint16_t arp_op, char *buf)
205{
206	switch (arp_op) {
207	case ARP_OP_REQUEST:
208		snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
209		return;
210	case ARP_OP_REPLY:
211		snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
212		return;
213	case ARP_OP_REVREQUEST:
214		snprintf(buf, sizeof("Reverse ARP Request"), "%s",
215				"Reverse ARP Request");
216		return;
217	case ARP_OP_REVREPLY:
218		snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
219				"Reverse ARP Reply");
220		return;
221	case ARP_OP_INVREQUEST:
222		snprintf(buf, sizeof("Peer Identify Request"), "%s",
223				"Peer Identify Request");
224		return;
225	case ARP_OP_INVREPLY:
226		snprintf(buf, sizeof("Peer Identify Reply"), "%s",
227				"Peer Identify Reply");
228		return;
229	default:
230		break;
231	}
232	snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
233	return;
234}
235#endif
236#define MaxIPv4String	16
237static void
238ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
239{
240	uint32_t ipv4_addr;
241
242	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
243	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
244		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
245		ipv4_addr & 0xFF);
246}
247
248#define MAX_CLIENTS_NUMBER	128
249uint8_t active_clients;
250struct client_stats_t {
251	uint8_t port;
252	uint32_t ipv4_addr;
253	uint32_t ipv4_rx_packets;
254	uint32_t ipv4_tx_packets;
255};
256struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
257
258static void
259update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
260{
261	int i = 0;
262
263	for (; i < MAX_CLIENTS_NUMBER; i++)	{
264		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
265			/* Just update RX packets number for this client */
266			if (TXorRXindicator == &burstnumberRX)
267				client_stats[i].ipv4_rx_packets++;
268			else
269				client_stats[i].ipv4_tx_packets++;
270			return;
271		}
272	}
273	/* We have a new client. Insert him to the table, and increment stats */
274	if (TXorRXindicator == &burstnumberRX)
275		client_stats[active_clients].ipv4_rx_packets++;
276	else
277		client_stats[active_clients].ipv4_tx_packets++;
278	client_stats[active_clients].ipv4_addr = addr;
279	client_stats[active_clients].port = port;
280	active_clients++;
281
282}
283
284#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
285#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)	\
286		RTE_LOG(DEBUG, PMD, \
287		"%s " \
288		"port:%d " \
289		"SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
290		"SrcIP:%s " \
291		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
292		"DstIP:%s " \
293		"%s " \
294		"%d\n", \
295		info, \
296		port, \
297		eth_h->s_addr.addr_bytes[0], \
298		eth_h->s_addr.addr_bytes[1], \
299		eth_h->s_addr.addr_bytes[2], \
300		eth_h->s_addr.addr_bytes[3], \
301		eth_h->s_addr.addr_bytes[4], \
302		eth_h->s_addr.addr_bytes[5], \
303		src_ip, \
304		eth_h->d_addr.addr_bytes[0], \
305		eth_h->d_addr.addr_bytes[1], \
306		eth_h->d_addr.addr_bytes[2], \
307		eth_h->d_addr.addr_bytes[3], \
308		eth_h->d_addr.addr_bytes[4], \
309		eth_h->d_addr.addr_bytes[5], \
310		dst_ip, \
311		arp_op, \
312		++burstnumber)
313#endif
314
315static void
316mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
317		uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
318{
319	struct ipv4_hdr *ipv4_h;
320#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
321	struct arp_hdr *arp_h;
322	char dst_ip[16];
323	char ArpOp[24];
324	char buf[16];
325#endif
326	char src_ip[16];
327
328	uint16_t ether_type = eth_h->ether_type;
329	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
330
331#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
332	snprintf(buf, 16, "%s", info);
333#endif
334
335	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
336		ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
337		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
338#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
339		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
340		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
341#endif
342		update_client_stats(ipv4_h->src_addr, port, burstnumber);
343	}
344#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
345	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
346		arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
347		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
348		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
349		arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
350		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
351	}
352#endif
353}
354#endif
355
356static uint16_t
357bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
358{
359	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
360	struct bond_dev_private *internals = bd_tx_q->dev_private;
361	struct ether_hdr *eth_h;
362	uint16_t ether_type, offset;
363	uint16_t nb_recv_pkts;
364	int i;
365
366	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
367
368	for (i = 0; i < nb_recv_pkts; i++) {
369		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
370		ether_type = eth_h->ether_type;
371		offset = get_vlan_offset(eth_h, &ether_type);
372
373		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
374#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
375			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
376#endif
377			bond_mode_alb_arp_recv(eth_h, offset, internals);
378		}
379#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
380		else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
381			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
382#endif
383	}
384
385	return nb_recv_pkts;
386}
387
388static uint16_t
389bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
390		uint16_t nb_pkts)
391{
392	struct bond_dev_private *internals;
393	struct bond_tx_queue *bd_tx_q;
394
395	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
396	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
397
398	uint8_t num_of_slaves;
399	uint8_t slaves[RTE_MAX_ETHPORTS];
400
401	uint16_t num_tx_total = 0, num_tx_slave;
402
403	static int slave_idx = 0;
404	int i, cslave_idx = 0, tx_fail_total = 0;
405
406	bd_tx_q = (struct bond_tx_queue *)queue;
407	internals = bd_tx_q->dev_private;
408
409	/* Copy slave list to protect against slave up/down changes during tx
410	 * bursting */
411	num_of_slaves = internals->active_slave_count;
412	memcpy(slaves, internals->active_slaves,
413			sizeof(internals->active_slaves[0]) * num_of_slaves);
414
415	if (num_of_slaves < 1)
416		return num_tx_total;
417
418	/* Populate slaves mbuf with which packets are to be sent on it  */
419	for (i = 0; i < nb_pkts; i++) {
420		cslave_idx = (slave_idx + i) % num_of_slaves;
421		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
422	}
423
424	/* increment current slave index so the next call to tx burst starts on the
425	 * next slave */
426	slave_idx = ++cslave_idx;
427
428	/* Send packet burst on each slave device */
429	for (i = 0; i < num_of_slaves; i++) {
430		if (slave_nb_pkts[i] > 0) {
431			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
432					slave_bufs[i], slave_nb_pkts[i]);
433
434			/* if tx burst fails move packets to end of bufs */
435			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
436				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
437
438				tx_fail_total += tx_fail_slave;
439
440				memcpy(&bufs[nb_pkts - tx_fail_total],
441						&slave_bufs[i][num_tx_slave],
442						tx_fail_slave * sizeof(bufs[0]));
443			}
444			num_tx_total += num_tx_slave;
445		}
446	}
447
448	return num_tx_total;
449}
450
451static uint16_t
452bond_ethdev_tx_burst_active_backup(void *queue,
453		struct rte_mbuf **bufs, uint16_t nb_pkts)
454{
455	struct bond_dev_private *internals;
456	struct bond_tx_queue *bd_tx_q;
457
458	bd_tx_q = (struct bond_tx_queue *)queue;
459	internals = bd_tx_q->dev_private;
460
461	if (internals->active_slave_count < 1)
462		return 0;
463
464	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
465			bufs, nb_pkts);
466}
467
468static inline uint16_t
469ether_hash(struct ether_hdr *eth_hdr)
470{
471	unaligned_uint16_t *word_src_addr =
472		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
473	unaligned_uint16_t *word_dst_addr =
474		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
475
476	return (word_src_addr[0] ^ word_dst_addr[0]) ^
477			(word_src_addr[1] ^ word_dst_addr[1]) ^
478			(word_src_addr[2] ^ word_dst_addr[2]);
479}
480
481static inline uint32_t
482ipv4_hash(struct ipv4_hdr *ipv4_hdr)
483{
484	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
485}
486
487static inline uint32_t
488ipv6_hash(struct ipv6_hdr *ipv6_hdr)
489{
490	unaligned_uint32_t *word_src_addr =
491		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
492	unaligned_uint32_t *word_dst_addr =
493		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
494
495	return (word_src_addr[0] ^ word_dst_addr[0]) ^
496			(word_src_addr[1] ^ word_dst_addr[1]) ^
497			(word_src_addr[2] ^ word_dst_addr[2]) ^
498			(word_src_addr[3] ^ word_dst_addr[3]);
499}
500
501uint16_t
502xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
503{
504	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
505
506	uint32_t hash = ether_hash(eth_hdr);
507
508	return (hash ^= hash >> 8) % slave_count;
509}
510
511uint16_t
512xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
513{
514	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
515	uint16_t proto = eth_hdr->ether_type;
516	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
517	uint32_t hash, l3hash = 0;
518
519	hash = ether_hash(eth_hdr);
520
521	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
522		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
523				((char *)(eth_hdr + 1) + vlan_offset);
524		l3hash = ipv4_hash(ipv4_hdr);
525
526	} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
527		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
528				((char *)(eth_hdr + 1) + vlan_offset);
529		l3hash = ipv6_hash(ipv6_hdr);
530	}
531
532	hash = hash ^ l3hash;
533	hash ^= hash >> 16;
534	hash ^= hash >> 8;
535
536	return hash % slave_count;
537}
538
539uint16_t
540xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
541{
542	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
543	uint16_t proto = eth_hdr->ether_type;
544	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
545
546	struct udp_hdr *udp_hdr = NULL;
547	struct tcp_hdr *tcp_hdr = NULL;
548	uint32_t hash, l3hash = 0, l4hash = 0;
549
550	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
551		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
552				((char *)(eth_hdr + 1) + vlan_offset);
553		size_t ip_hdr_offset;
554
555		l3hash = ipv4_hash(ipv4_hdr);
556
557		/* there is no L4 header in fragmented packet */
558		if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
559			ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
560					IPV4_IHL_MULTIPLIER;
561
562			if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
563				tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
564						ip_hdr_offset);
565				l4hash = HASH_L4_PORTS(tcp_hdr);
566			} else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
567				udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
568						ip_hdr_offset);
569				l4hash = HASH_L4_PORTS(udp_hdr);
570			}
571		}
572	} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
573		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
574				((char *)(eth_hdr + 1) + vlan_offset);
575		l3hash = ipv6_hash(ipv6_hdr);
576
577		if (ipv6_hdr->proto == IPPROTO_TCP) {
578			tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
579			l4hash = HASH_L4_PORTS(tcp_hdr);
580		} else if (ipv6_hdr->proto == IPPROTO_UDP) {
581			udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
582			l4hash = HASH_L4_PORTS(udp_hdr);
583		}
584	}
585
586	hash = l3hash ^ l4hash;
587	hash ^= hash >> 16;
588	hash ^= hash >> 8;
589
590	return hash % slave_count;
591}
592
593struct bwg_slave {
594	uint64_t bwg_left_int;
595	uint64_t bwg_left_remainder;
596	uint8_t slave;
597};
598
599void
600bond_tlb_activate_slave(struct bond_dev_private *internals) {
601	int i;
602
603	for (i = 0; i < internals->active_slave_count; i++) {
604		tlb_last_obytets[internals->active_slaves[i]] = 0;
605	}
606}
607
608static int
609bandwidth_cmp(const void *a, const void *b)
610{
611	const struct bwg_slave *bwg_a = a;
612	const struct bwg_slave *bwg_b = b;
613	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
614	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
615			(int64_t)bwg_a->bwg_left_remainder;
616	if (diff > 0)
617		return 1;
618	else if (diff < 0)
619		return -1;
620	else if (diff2 > 0)
621		return 1;
622	else if (diff2 < 0)
623		return -1;
624	else
625		return 0;
626}
627
628static void
629bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
630		struct bwg_slave *bwg_slave)
631{
632	struct rte_eth_link link_status;
633
634	rte_eth_link_get(port_id, &link_status);
635	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
636	if (link_bwg == 0)
637		return;
638	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
639	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
640	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
641}
642
643static void
644bond_ethdev_update_tlb_slave_cb(void *arg)
645{
646	struct bond_dev_private *internals = arg;
647	struct rte_eth_stats slave_stats;
648	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
649	uint8_t slave_count;
650	uint64_t tx_bytes;
651
652	uint8_t update_stats = 0;
653	uint8_t i, slave_id;
654
655	internals->slave_update_idx++;
656
657
658	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
659		update_stats = 1;
660
661	for (i = 0; i < internals->active_slave_count; i++) {
662		slave_id = internals->active_slaves[i];
663		rte_eth_stats_get(slave_id, &slave_stats);
664		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
665		bandwidth_left(slave_id, tx_bytes,
666				internals->slave_update_idx, &bwg_array[i]);
667		bwg_array[i].slave = slave_id;
668
669		if (update_stats) {
670			tlb_last_obytets[slave_id] = slave_stats.obytes;
671		}
672	}
673
674	if (update_stats == 1)
675		internals->slave_update_idx = 0;
676
677	slave_count = i;
678	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
679	for (i = 0; i < slave_count; i++)
680		internals->tlb_slaves_order[i] = bwg_array[i].slave;
681
682	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
683			(struct bond_dev_private *)internals);
684}
685
686static uint16_t
687bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
688{
689	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
690	struct bond_dev_private *internals = bd_tx_q->dev_private;
691
692	struct rte_eth_dev *primary_port =
693			&rte_eth_devices[internals->primary_port];
694	uint16_t num_tx_total = 0;
695	uint8_t i, j;
696
697	uint8_t num_of_slaves = internals->active_slave_count;
698	uint8_t slaves[RTE_MAX_ETHPORTS];
699
700	struct ether_hdr *ether_hdr;
701	struct ether_addr primary_slave_addr;
702	struct ether_addr active_slave_addr;
703
704	if (num_of_slaves < 1)
705		return num_tx_total;
706
707	memcpy(slaves, internals->tlb_slaves_order,
708				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
709
710
711	ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
712
713	if (nb_pkts > 3) {
714		for (i = 0; i < 3; i++)
715			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
716	}
717
718	for (i = 0; i < num_of_slaves; i++) {
719		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
720		for (j = num_tx_total; j < nb_pkts; j++) {
721			if (j + 3 < nb_pkts)
722				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
723
724			ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
725			if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
726				ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
727#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
728					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
729#endif
730		}
731
732		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
733				bufs + num_tx_total, nb_pkts - num_tx_total);
734
735		if (num_tx_total == nb_pkts)
736			break;
737	}
738
739	return num_tx_total;
740}
741
742void
743bond_tlb_disable(struct bond_dev_private *internals)
744{
745	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
746}
747
748void
749bond_tlb_enable(struct bond_dev_private *internals)
750{
751	bond_ethdev_update_tlb_slave_cb(internals);
752}
753
754static uint16_t
755bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
756{
757	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
758	struct bond_dev_private *internals = bd_tx_q->dev_private;
759
760	struct ether_hdr *eth_h;
761	uint16_t ether_type, offset;
762
763	struct client_data *client_info;
764
765	/*
766	 * We create transmit buffers for every slave and one additional to send
767	 * through tlb. In worst case every packet will be send on one port.
768	 */
769	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
770	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
771
772	/*
773	 * We create separate transmit buffers for update packets as they wont be
774	 * counted in num_tx_total.
775	 */
776	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
777	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
778
779	struct rte_mbuf *upd_pkt;
780	size_t pkt_size;
781
782	uint16_t num_send, num_not_send = 0;
783	uint16_t num_tx_total = 0;
784	uint8_t slave_idx;
785
786	int i, j;
787
788	/* Search tx buffer for ARP packets and forward them to alb */
789	for (i = 0; i < nb_pkts; i++) {
790		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
791		ether_type = eth_h->ether_type;
792		offset = get_vlan_offset(eth_h, &ether_type);
793
794		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
795			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
796
797			/* Change src mac in eth header */
798			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
799
800			/* Add packet to slave tx buffer */
801			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
802			slave_bufs_pkts[slave_idx]++;
803		} else {
804			/* If packet is not ARP, send it with TLB policy */
805			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
806					bufs[i];
807			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
808		}
809	}
810
811	/* Update connected client ARP tables */
812	if (internals->mode6.ntt) {
813		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
814			client_info = &internals->mode6.client_table[i];
815
816			if (client_info->in_use) {
817				/* Allocate new packet to send ARP update on current slave */
818				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
819				if (upd_pkt == NULL) {
820					RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
821					continue;
822				}
823				pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
824						+ client_info->vlan_count * sizeof(struct vlan_hdr);
825				upd_pkt->data_len = pkt_size;
826				upd_pkt->pkt_len = pkt_size;
827
828				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
829						internals);
830
831				/* Add packet to update tx buffer */
832				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
833				update_bufs_pkts[slave_idx]++;
834			}
835		}
836		internals->mode6.ntt = 0;
837	}
838
839	/* Send ARP packets on proper slaves */
840	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
841		if (slave_bufs_pkts[i] > 0) {
842			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
843					slave_bufs[i], slave_bufs_pkts[i]);
844			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
845				bufs[nb_pkts - 1 - num_not_send - j] =
846						slave_bufs[i][nb_pkts - 1 - j];
847			}
848
849			num_tx_total += num_send;
850			num_not_send += slave_bufs_pkts[i] - num_send;
851
852#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
853	/* Print TX stats including update packets */
854			for (j = 0; j < slave_bufs_pkts[i]; j++) {
855				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
856				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
857			}
858#endif
859		}
860	}
861
862	/* Send update packets on proper slaves */
863	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
864		if (update_bufs_pkts[i] > 0) {
865			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
866					update_bufs_pkts[i]);
867			for (j = num_send; j < update_bufs_pkts[i]; j++) {
868				rte_pktmbuf_free(update_bufs[i][j]);
869			}
870#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
871			for (j = 0; j < update_bufs_pkts[i]; j++) {
872				eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
873				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
874			}
875#endif
876		}
877	}
878
879	/* Send non-ARP packets using tlb policy */
880	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
881		num_send = bond_ethdev_tx_burst_tlb(queue,
882				slave_bufs[RTE_MAX_ETHPORTS],
883				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
884
885		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
886			bufs[nb_pkts - 1 - num_not_send - j] =
887					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
888		}
889
890		num_tx_total += num_send;
891		num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
892	}
893
894	return num_tx_total;
895}
896
897static uint16_t
898bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
899		uint16_t nb_pkts)
900{
901	struct bond_dev_private *internals;
902	struct bond_tx_queue *bd_tx_q;
903
904	uint8_t num_of_slaves;
905	uint8_t slaves[RTE_MAX_ETHPORTS];
906
907	uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
908
909	int i, op_slave_id;
910
911	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
912	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
913
914	bd_tx_q = (struct bond_tx_queue *)queue;
915	internals = bd_tx_q->dev_private;
916
917	/* Copy slave list to protect against slave up/down changes during tx
918	 * bursting */
919	num_of_slaves = internals->active_slave_count;
920	memcpy(slaves, internals->active_slaves,
921			sizeof(internals->active_slaves[0]) * num_of_slaves);
922
923	if (num_of_slaves < 1)
924		return num_tx_total;
925
926	/* Populate slaves mbuf with the packets which are to be sent on it  */
927	for (i = 0; i < nb_pkts; i++) {
928		/* Select output slave using hash based on xmit policy */
929		op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
930
931		/* Populate slave mbuf arrays with mbufs for that slave */
932		slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
933	}
934
935	/* Send packet burst on each slave device */
936	for (i = 0; i < num_of_slaves; i++) {
937		if (slave_nb_pkts[i] > 0) {
938			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
939					slave_bufs[i], slave_nb_pkts[i]);
940
941			/* if tx burst fails move packets to end of bufs */
942			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
943				int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
944
945				tx_fail_total += slave_tx_fail_count;
946				memcpy(&bufs[nb_pkts - tx_fail_total],
947						&slave_bufs[i][num_tx_slave],
948						slave_tx_fail_count * sizeof(bufs[0]));
949			}
950
951			num_tx_total += num_tx_slave;
952		}
953	}
954
955	return num_tx_total;
956}
957
958static uint16_t
959bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
960		uint16_t nb_pkts)
961{
962	struct bond_dev_private *internals;
963	struct bond_tx_queue *bd_tx_q;
964
965	uint8_t num_of_slaves;
966	uint8_t slaves[RTE_MAX_ETHPORTS];
967	 /* positions in slaves, not ID */
968	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
969	uint8_t distributing_count;
970
971	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
972	uint16_t i, j, op_slave_idx;
973	const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
974
975	/* Allocate additional packets in case 8023AD mode. */
976	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
977	void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
978
979	/* Total amount of packets in slave_bufs */
980	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
981	/* Slow packets placed in each slave */
982	uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
983
984	bd_tx_q = (struct bond_tx_queue *)queue;
985	internals = bd_tx_q->dev_private;
986
987	/* Copy slave list to protect against slave up/down changes during tx
988	 * bursting */
989	num_of_slaves = internals->active_slave_count;
990	if (num_of_slaves < 1)
991		return num_tx_total;
992
993	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
994
995	distributing_count = 0;
996	for (i = 0; i < num_of_slaves; i++) {
997		struct port *port = &mode_8023ad_ports[slaves[i]];
998
999		slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1000				slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1001		slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1002
1003		for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1004			slave_bufs[i][j] = slow_pkts[j];
1005
1006		if (ACTOR_STATE(port, DISTRIBUTING))
1007			distributing_offsets[distributing_count++] = i;
1008	}
1009
1010	if (likely(distributing_count > 0)) {
1011		/* Populate slaves mbuf with the packets which are to be sent on it */
1012		for (i = 0; i < nb_pkts; i++) {
1013			/* Select output slave using hash based on xmit policy */
1014			op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1015
1016			/* Populate slave mbuf arrays with mbufs for that slave. Use only
1017			 * slaves that are currently distributing. */
1018			uint8_t slave_offset = distributing_offsets[op_slave_idx];
1019			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1020			slave_nb_pkts[slave_offset]++;
1021		}
1022	}
1023
1024	/* Send packet burst on each slave device */
1025	for (i = 0; i < num_of_slaves; i++) {
1026		if (slave_nb_pkts[i] == 0)
1027			continue;
1028
1029		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1030				slave_bufs[i], slave_nb_pkts[i]);
1031
1032		/* If tx burst fails drop slow packets */
1033		for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1034			rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1035
1036		num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1037		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1038
1039		/* If tx burst fails move packets to end of bufs */
1040		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1041			uint16_t j = nb_pkts - num_tx_fail_total;
1042			for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1043				bufs[j] = slave_bufs[i][num_tx_slave];
1044		}
1045	}
1046
1047	return num_tx_total;
1048}
1049
1050static uint16_t
1051bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1052		uint16_t nb_pkts)
1053{
1054	struct bond_dev_private *internals;
1055	struct bond_tx_queue *bd_tx_q;
1056
1057	uint8_t tx_failed_flag = 0, num_of_slaves;
1058	uint8_t slaves[RTE_MAX_ETHPORTS];
1059
1060	uint16_t max_nb_of_tx_pkts = 0;
1061
1062	int slave_tx_total[RTE_MAX_ETHPORTS];
1063	int i, most_successful_tx_slave = -1;
1064
1065	bd_tx_q = (struct bond_tx_queue *)queue;
1066	internals = bd_tx_q->dev_private;
1067
1068	/* Copy slave list to protect against slave up/down changes during tx
1069	 * bursting */
1070	num_of_slaves = internals->active_slave_count;
1071	memcpy(slaves, internals->active_slaves,
1072			sizeof(internals->active_slaves[0]) * num_of_slaves);
1073
1074	if (num_of_slaves < 1)
1075		return 0;
1076
1077	/* Increment reference count on mbufs */
1078	for (i = 0; i < nb_pkts; i++)
1079		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1080
1081	/* Transmit burst on each active slave */
1082	for (i = 0; i < num_of_slaves; i++) {
1083		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1084					bufs, nb_pkts);
1085
1086		if (unlikely(slave_tx_total[i] < nb_pkts))
1087			tx_failed_flag = 1;
1088
1089		/* record the value and slave index for the slave which transmits the
1090		 * maximum number of packets */
1091		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1092			max_nb_of_tx_pkts = slave_tx_total[i];
1093			most_successful_tx_slave = i;
1094		}
1095	}
1096
1097	/* if slaves fail to transmit packets from burst, the calling application
1098	 * is not expected to know about multiple references to packets so we must
1099	 * handle failures of all packets except those of the most successful slave
1100	 */
1101	if (unlikely(tx_failed_flag))
1102		for (i = 0; i < num_of_slaves; i++)
1103			if (i != most_successful_tx_slave)
1104				while (slave_tx_total[i] < nb_pkts)
1105					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1106
1107	return max_nb_of_tx_pkts;
1108}
1109
1110void
1111link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1112		struct rte_eth_link *slave_dev_link)
1113{
1114	struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1115	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1116
1117	if (slave_dev_link->link_status &&
1118		bonded_eth_dev->data->dev_started) {
1119		bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1120		bonded_dev_link->link_speed = slave_dev_link->link_speed;
1121
1122		internals->link_props_set = 1;
1123	}
1124}
1125
1126void
1127link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1128{
1129	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1130
1131	memset(&(bonded_eth_dev->data->dev_link), 0,
1132			sizeof(bonded_eth_dev->data->dev_link));
1133
1134	internals->link_props_set = 0;
1135}
1136
1137int
1138link_properties_valid(struct rte_eth_link *bonded_dev_link,
1139		struct rte_eth_link *slave_dev_link)
1140{
1141	if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1142		bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1143		return -1;
1144
1145	return 0;
1146}
1147
1148int
1149mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1150{
1151	struct ether_addr *mac_addr;
1152
1153	if (eth_dev == NULL) {
1154		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1155		return -1;
1156	}
1157
1158	if (dst_mac_addr == NULL) {
1159		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1160		return -1;
1161	}
1162
1163	mac_addr = eth_dev->data->mac_addrs;
1164
1165	ether_addr_copy(mac_addr, dst_mac_addr);
1166	return 0;
1167}
1168
1169int
1170mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1171{
1172	struct ether_addr *mac_addr;
1173
1174	if (eth_dev == NULL) {
1175		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1176		return -1;
1177	}
1178
1179	if (new_mac_addr == NULL) {
1180		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1181		return -1;
1182	}
1183
1184	mac_addr = eth_dev->data->mac_addrs;
1185
1186	/* If new MAC is different to current MAC then update */
1187	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1188		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1189
1190	return 0;
1191}
1192
1193int
1194mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1195{
1196	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1197	int i;
1198
1199	/* Update slave devices MAC addresses */
1200	if (internals->slave_count < 1)
1201		return -1;
1202
1203	switch (internals->mode) {
1204	case BONDING_MODE_ROUND_ROBIN:
1205	case BONDING_MODE_BALANCE:
1206	case BONDING_MODE_BROADCAST:
1207		for (i = 0; i < internals->slave_count; i++) {
1208			if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1209					bonded_eth_dev->data->mac_addrs)) {
1210				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1211						internals->slaves[i].port_id);
1212				return -1;
1213			}
1214		}
1215		break;
1216	case BONDING_MODE_8023AD:
1217		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1218		break;
1219	case BONDING_MODE_ACTIVE_BACKUP:
1220	case BONDING_MODE_TLB:
1221	case BONDING_MODE_ALB:
1222	default:
1223		for (i = 0; i < internals->slave_count; i++) {
1224			if (internals->slaves[i].port_id ==
1225					internals->current_primary_port) {
1226				if (mac_address_set(&rte_eth_devices[internals->primary_port],
1227						bonded_eth_dev->data->mac_addrs)) {
1228					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1229							internals->current_primary_port);
1230					return -1;
1231				}
1232			} else {
1233				if (mac_address_set(
1234						&rte_eth_devices[internals->slaves[i].port_id],
1235						&internals->slaves[i].persisted_mac_addr)) {
1236					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1237							internals->slaves[i].port_id);
1238					return -1;
1239				}
1240			}
1241		}
1242	}
1243
1244	return 0;
1245}
1246
1247int
1248bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1249{
1250	struct bond_dev_private *internals;
1251
1252	internals = eth_dev->data->dev_private;
1253
1254	switch (mode) {
1255	case BONDING_MODE_ROUND_ROBIN:
1256		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1257		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1258		break;
1259	case BONDING_MODE_ACTIVE_BACKUP:
1260		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1261		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1262		break;
1263	case BONDING_MODE_BALANCE:
1264		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1265		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1266		break;
1267	case BONDING_MODE_BROADCAST:
1268		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1269		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1270		break;
1271	case BONDING_MODE_8023AD:
1272		if (bond_mode_8023ad_enable(eth_dev) != 0)
1273			return -1;
1274
1275		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1276		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1277		RTE_LOG(WARNING, PMD,
1278				"Using mode 4, it is necessary to do TX burst and RX burst "
1279				"at least every 100ms.\n");
1280		break;
1281	case BONDING_MODE_TLB:
1282		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1283		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1284		break;
1285	case BONDING_MODE_ALB:
1286		if (bond_mode_alb_enable(eth_dev) != 0)
1287			return -1;
1288
1289		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1290		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1291		break;
1292	default:
1293		return -1;
1294	}
1295
1296	internals->mode = mode;
1297
1298	return 0;
1299}
1300
1301int
1302slave_configure(struct rte_eth_dev *bonded_eth_dev,
1303		struct rte_eth_dev *slave_eth_dev)
1304{
1305	struct bond_rx_queue *bd_rx_q;
1306	struct bond_tx_queue *bd_tx_q;
1307
1308	uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
1309	uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
1310	int errval;
1311	uint16_t q_id;
1312
1313	/* Stop slave */
1314	rte_eth_dev_stop(slave_eth_dev->data->port_id);
1315
1316	/* Enable interrupts on slave device if supported */
1317	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1318		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1319
1320	/* If RSS is enabled for bonding, try to enable it for slaves  */
1321	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1322		if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1323				!= 0) {
1324			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1325					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1326			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1327					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1328		} else {
1329			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1330		}
1331
1332		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1333				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1334		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1335				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1336	}
1337
1338	/* Configure device */
1339	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1340			bonded_eth_dev->data->nb_rx_queues,
1341			bonded_eth_dev->data->nb_tx_queues,
1342			&(slave_eth_dev->data->dev_conf));
1343	if (errval != 0) {
1344		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1345				slave_eth_dev->data->port_id, errval);
1346		return errval;
1347	}
1348
1349	/* Setup Rx Queues */
1350	/* Use existing queues, if any */
1351	for (q_id = old_nb_rx_queues;
1352	     q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1353		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1354
1355		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1356				bd_rx_q->nb_rx_desc,
1357				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1358				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1359		if (errval != 0) {
1360			RTE_BOND_LOG(ERR,
1361					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1362					slave_eth_dev->data->port_id, q_id, errval);
1363			return errval;
1364		}
1365	}
1366
1367	/* Setup Tx Queues */
1368	/* Use existing queues, if any */
1369	for (q_id = old_nb_tx_queues;
1370	     q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1371		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1372
1373		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1374				bd_tx_q->nb_tx_desc,
1375				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1376				&bd_tx_q->tx_conf);
1377		if (errval != 0) {
1378			RTE_BOND_LOG(ERR,
1379					"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1380					slave_eth_dev->data->port_id, q_id, errval);
1381			return errval;
1382		}
1383	}
1384
1385	/* Start device */
1386	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1387	if (errval != 0) {
1388		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1389				slave_eth_dev->data->port_id, errval);
1390		return -1;
1391	}
1392
1393	/* If RSS is enabled for bonding, synchronize RETA */
1394	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1395		int i;
1396		struct bond_dev_private *internals;
1397
1398		internals = bonded_eth_dev->data->dev_private;
1399
1400		for (i = 0; i < internals->slave_count; i++) {
1401			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1402				errval = rte_eth_dev_rss_reta_update(
1403						slave_eth_dev->data->port_id,
1404						&internals->reta_conf[0],
1405						internals->slaves[i].reta_size);
1406				if (errval != 0) {
1407					RTE_LOG(WARNING, PMD,
1408							"rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1409							" RSS Configuration for bonding may be inconsistent.\n",
1410							slave_eth_dev->data->port_id, errval);
1411				}
1412				break;
1413			}
1414		}
1415	}
1416
1417	/* If lsc interrupt is set, check initial slave's link status */
1418	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1419		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1420			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1421
1422	return 0;
1423}
1424
1425void
1426slave_remove(struct bond_dev_private *internals,
1427		struct rte_eth_dev *slave_eth_dev)
1428{
1429	uint8_t i;
1430
1431	for (i = 0; i < internals->slave_count; i++)
1432		if (internals->slaves[i].port_id ==
1433				slave_eth_dev->data->port_id)
1434			break;
1435
1436	if (i < (internals->slave_count - 1))
1437		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1438				sizeof(internals->slaves[0]) *
1439				(internals->slave_count - i - 1));
1440
1441	internals->slave_count--;
1442}
1443
1444static void
1445bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1446
1447void
1448slave_add(struct bond_dev_private *internals,
1449		struct rte_eth_dev *slave_eth_dev)
1450{
1451	struct bond_slave_details *slave_details =
1452			&internals->slaves[internals->slave_count];
1453
1454	slave_details->port_id = slave_eth_dev->data->port_id;
1455	slave_details->last_link_status = 0;
1456
1457	/* Mark slave devices that don't support interrupts so we can
1458	 * compensate when we start the bond
1459	 */
1460	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1461		slave_details->link_status_poll_enabled = 1;
1462	}
1463
1464	slave_details->link_status_wait_to_complete = 0;
1465	/* clean tlb_last_obytes when adding port for bonding device */
1466	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1467			sizeof(struct ether_addr));
1468}
1469
1470void
1471bond_ethdev_primary_set(struct bond_dev_private *internals,
1472		uint8_t slave_port_id)
1473{
1474	int i;
1475
1476	if (internals->active_slave_count < 1)
1477		internals->current_primary_port = slave_port_id;
1478	else
1479		/* Search bonded device slave ports for new proposed primary port */
1480		for (i = 0; i < internals->active_slave_count; i++) {
1481			if (internals->active_slaves[i] == slave_port_id)
1482				internals->current_primary_port = slave_port_id;
1483		}
1484}
1485
1486static void
1487bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1488
1489static int
1490bond_ethdev_start(struct rte_eth_dev *eth_dev)
1491{
1492	struct bond_dev_private *internals;
1493	int i;
1494
1495	/* slave eth dev will be started by bonded device */
1496	if (check_for_bonded_ethdev(eth_dev)) {
1497		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1498				eth_dev->data->port_id);
1499		return -1;
1500	}
1501
1502	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1503	eth_dev->data->dev_started = 1;
1504
1505	internals = eth_dev->data->dev_private;
1506
1507	if (internals->slave_count == 0) {
1508		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1509		return -1;
1510	}
1511
1512	if (internals->user_defined_mac == 0) {
1513		struct ether_addr *new_mac_addr = NULL;
1514
1515		for (i = 0; i < internals->slave_count; i++)
1516			if (internals->slaves[i].port_id == internals->primary_port)
1517				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1518
1519		if (new_mac_addr == NULL)
1520			return -1;
1521
1522		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1523			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1524					eth_dev->data->port_id);
1525			return -1;
1526		}
1527	}
1528
1529	/* Update all slave devices MACs*/
1530	if (mac_address_slaves_update(eth_dev) != 0)
1531		return -1;
1532
1533	/* If bonded device is configure in promiscuous mode then re-apply config */
1534	if (internals->promiscuous_en)
1535		bond_ethdev_promiscuous_enable(eth_dev);
1536
1537	/* Reconfigure each slave device if starting bonded device */
1538	for (i = 0; i < internals->slave_count; i++) {
1539		if (slave_configure(eth_dev,
1540				&(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1541			RTE_BOND_LOG(ERR,
1542					"bonded port (%d) failed to reconfigure slave device (%d)",
1543					eth_dev->data->port_id, internals->slaves[i].port_id);
1544			return -1;
1545		}
1546		/* We will need to poll for link status if any slave doesn't
1547		 * support interrupts
1548		 */
1549		if (internals->slaves[i].link_status_poll_enabled)
1550			internals->link_status_polling_enabled = 1;
1551	}
1552	/* start polling if needed */
1553	if (internals->link_status_polling_enabled) {
1554		rte_eal_alarm_set(
1555			internals->link_status_polling_interval_ms * 1000,
1556			bond_ethdev_slave_link_status_change_monitor,
1557			(void *)&rte_eth_devices[internals->port_id]);
1558	}
1559
1560	if (internals->user_defined_primary_port)
1561		bond_ethdev_primary_set(internals, internals->primary_port);
1562
1563	if (internals->mode == BONDING_MODE_8023AD)
1564		bond_mode_8023ad_start(eth_dev);
1565
1566	if (internals->mode == BONDING_MODE_TLB ||
1567			internals->mode == BONDING_MODE_ALB)
1568		bond_tlb_enable(internals);
1569
1570	return 0;
1571}
1572
1573static void
1574bond_ethdev_free_queues(struct rte_eth_dev *dev)
1575{
1576	uint8_t i;
1577
1578	if (dev->data->rx_queues != NULL) {
1579		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1580			rte_free(dev->data->rx_queues[i]);
1581			dev->data->rx_queues[i] = NULL;
1582		}
1583		dev->data->nb_rx_queues = 0;
1584	}
1585
1586	if (dev->data->tx_queues != NULL) {
1587		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1588			rte_free(dev->data->tx_queues[i]);
1589			dev->data->tx_queues[i] = NULL;
1590		}
1591		dev->data->nb_tx_queues = 0;
1592	}
1593}
1594
1595void
1596bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1597{
1598	struct bond_dev_private *internals = eth_dev->data->dev_private;
1599	uint8_t i;
1600
1601	if (internals->mode == BONDING_MODE_8023AD) {
1602		struct port *port;
1603		void *pkt = NULL;
1604
1605		bond_mode_8023ad_stop(eth_dev);
1606
1607		/* Discard all messages to/from mode 4 state machines */
1608		for (i = 0; i < internals->active_slave_count; i++) {
1609			port = &mode_8023ad_ports[internals->active_slaves[i]];
1610
1611			RTE_ASSERT(port->rx_ring != NULL);
1612			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1613				rte_pktmbuf_free(pkt);
1614
1615			RTE_ASSERT(port->tx_ring != NULL);
1616			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1617				rte_pktmbuf_free(pkt);
1618		}
1619	}
1620
1621	if (internals->mode == BONDING_MODE_TLB ||
1622			internals->mode == BONDING_MODE_ALB) {
1623		bond_tlb_disable(internals);
1624		for (i = 0; i < internals->active_slave_count; i++)
1625			tlb_last_obytets[internals->active_slaves[i]] = 0;
1626	}
1627
1628	internals->active_slave_count = 0;
1629	internals->link_status_polling_enabled = 0;
1630	for (i = 0; i < internals->slave_count; i++)
1631		internals->slaves[i].last_link_status = 0;
1632
1633	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1634	eth_dev->data->dev_started = 0;
1635}
1636
1637void
1638bond_ethdev_close(struct rte_eth_dev *dev)
1639{
1640	bond_ethdev_free_queues(dev);
1641}
1642
1643/* forward declaration */
1644static int bond_ethdev_configure(struct rte_eth_dev *dev);
1645
1646static void
1647bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1648{
1649	struct bond_dev_private *internals = dev->data->dev_private;
1650
1651	dev_info->max_mac_addrs = 1;
1652
1653	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
1654				  internals->candidate_max_rx_pktlen : 2048;
1655
1656	dev_info->max_rx_queues = (uint16_t)128;
1657	dev_info->max_tx_queues = (uint16_t)512;
1658
1659	dev_info->min_rx_bufsize = 0;
1660	dev_info->pci_dev = NULL;
1661
1662	dev_info->rx_offload_capa = internals->rx_offload_capa;
1663	dev_info->tx_offload_capa = internals->tx_offload_capa;
1664	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1665
1666	dev_info->reta_size = internals->reta_size;
1667}
1668
1669static int
1670bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1671		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1672		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1673{
1674	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1675			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1676					0, dev->data->numa_node);
1677	if (bd_rx_q == NULL)
1678		return -1;
1679
1680	bd_rx_q->queue_id = rx_queue_id;
1681	bd_rx_q->dev_private = dev->data->dev_private;
1682
1683	bd_rx_q->nb_rx_desc = nb_rx_desc;
1684
1685	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1686	bd_rx_q->mb_pool = mb_pool;
1687
1688	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1689
1690	return 0;
1691}
1692
1693static int
1694bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1695		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1696		const struct rte_eth_txconf *tx_conf)
1697{
1698	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1699			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1700					0, dev->data->numa_node);
1701
1702	if (bd_tx_q == NULL)
1703		return -1;
1704
1705	bd_tx_q->queue_id = tx_queue_id;
1706	bd_tx_q->dev_private = dev->data->dev_private;
1707
1708	bd_tx_q->nb_tx_desc = nb_tx_desc;
1709	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1710
1711	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1712
1713	return 0;
1714}
1715
1716static void
1717bond_ethdev_rx_queue_release(void *queue)
1718{
1719	if (queue == NULL)
1720		return;
1721
1722	rte_free(queue);
1723}
1724
1725static void
1726bond_ethdev_tx_queue_release(void *queue)
1727{
1728	if (queue == NULL)
1729		return;
1730
1731	rte_free(queue);
1732}
1733
1734static void
1735bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1736{
1737	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1738	struct bond_dev_private *internals;
1739
1740	/* Default value for polling slave found is true as we don't want to
1741	 * disable the polling thread if we cannot get the lock */
1742	int i, polling_slave_found = 1;
1743
1744	if (cb_arg == NULL)
1745		return;
1746
1747	bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1748	internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1749
1750	if (!bonded_ethdev->data->dev_started ||
1751		!internals->link_status_polling_enabled)
1752		return;
1753
1754	/* If device is currently being configured then don't check slaves link
1755	 * status, wait until next period */
1756	if (rte_spinlock_trylock(&internals->lock)) {
1757		if (internals->slave_count > 0)
1758			polling_slave_found = 0;
1759
1760		for (i = 0; i < internals->slave_count; i++) {
1761			if (!internals->slaves[i].link_status_poll_enabled)
1762				continue;
1763
1764			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1765			polling_slave_found = 1;
1766
1767			/* Update slave link status */
1768			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1769					internals->slaves[i].link_status_wait_to_complete);
1770
1771			/* if link status has changed since last checked then call lsc
1772			 * event callback */
1773			if (slave_ethdev->data->dev_link.link_status !=
1774					internals->slaves[i].last_link_status) {
1775				internals->slaves[i].last_link_status =
1776						slave_ethdev->data->dev_link.link_status;
1777
1778				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1779						RTE_ETH_EVENT_INTR_LSC,
1780						&bonded_ethdev->data->port_id);
1781			}
1782		}
1783		rte_spinlock_unlock(&internals->lock);
1784	}
1785
1786	if (polling_slave_found)
1787		/* Set alarm to continue monitoring link status of slave ethdev's */
1788		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1789				bond_ethdev_slave_link_status_change_monitor, cb_arg);
1790}
1791
1792static int
1793bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1794		int wait_to_complete)
1795{
1796	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1797
1798	if (!bonded_eth_dev->data->dev_started ||
1799		internals->active_slave_count == 0) {
1800		bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1801		return 0;
1802	} else {
1803		struct rte_eth_dev *slave_eth_dev;
1804		int i, link_up = 0;
1805
1806		for (i = 0; i < internals->active_slave_count; i++) {
1807			slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1808
1809			(*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1810					wait_to_complete);
1811			if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
1812				link_up = 1;
1813				break;
1814			}
1815		}
1816
1817		bonded_eth_dev->data->dev_link.link_status = link_up;
1818	}
1819
1820	return 0;
1821}
1822
1823static void
1824bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1825{
1826	struct bond_dev_private *internals = dev->data->dev_private;
1827	struct rte_eth_stats slave_stats;
1828	int i, j;
1829
1830	for (i = 0; i < internals->slave_count; i++) {
1831		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1832
1833		stats->ipackets += slave_stats.ipackets;
1834		stats->opackets += slave_stats.opackets;
1835		stats->ibytes += slave_stats.ibytes;
1836		stats->obytes += slave_stats.obytes;
1837		stats->imissed += slave_stats.imissed;
1838		stats->ierrors += slave_stats.ierrors;
1839		stats->oerrors += slave_stats.oerrors;
1840		stats->rx_nombuf += slave_stats.rx_nombuf;
1841
1842		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1843			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1844			stats->q_opackets[j] += slave_stats.q_opackets[j];
1845			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1846			stats->q_obytes[j] += slave_stats.q_obytes[j];
1847			stats->q_errors[j] += slave_stats.q_errors[j];
1848		}
1849
1850	}
1851}
1852
1853static void
1854bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1855{
1856	struct bond_dev_private *internals = dev->data->dev_private;
1857	int i;
1858
1859	for (i = 0; i < internals->slave_count; i++)
1860		rte_eth_stats_reset(internals->slaves[i].port_id);
1861}
1862
1863static void
1864bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1865{
1866	struct bond_dev_private *internals = eth_dev->data->dev_private;
1867	int i;
1868
1869	internals->promiscuous_en = 1;
1870
1871	switch (internals->mode) {
1872	/* Promiscuous mode is propagated to all slaves */
1873	case BONDING_MODE_ROUND_ROBIN:
1874	case BONDING_MODE_BALANCE:
1875	case BONDING_MODE_BROADCAST:
1876		for (i = 0; i < internals->slave_count; i++)
1877			rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1878		break;
1879	/* In mode4 promiscus mode is managed when slave is added/removed */
1880	case BONDING_MODE_8023AD:
1881		break;
1882	/* Promiscuous mode is propagated only to primary slave */
1883	case BONDING_MODE_ACTIVE_BACKUP:
1884	case BONDING_MODE_TLB:
1885	case BONDING_MODE_ALB:
1886	default:
1887		rte_eth_promiscuous_enable(internals->current_primary_port);
1888	}
1889}
1890
1891static void
1892bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1893{
1894	struct bond_dev_private *internals = dev->data->dev_private;
1895	int i;
1896
1897	internals->promiscuous_en = 0;
1898
1899	switch (internals->mode) {
1900	/* Promiscuous mode is propagated to all slaves */
1901	case BONDING_MODE_ROUND_ROBIN:
1902	case BONDING_MODE_BALANCE:
1903	case BONDING_MODE_BROADCAST:
1904		for (i = 0; i < internals->slave_count; i++)
1905			rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1906		break;
1907	/* In mode4 promiscus mode is set managed when slave is added/removed */
1908	case BONDING_MODE_8023AD:
1909		break;
1910	/* Promiscuous mode is propagated only to primary slave */
1911	case BONDING_MODE_ACTIVE_BACKUP:
1912	case BONDING_MODE_TLB:
1913	case BONDING_MODE_ALB:
1914	default:
1915		rte_eth_promiscuous_disable(internals->current_primary_port);
1916	}
1917}
1918
1919static void
1920bond_ethdev_delayed_lsc_propagation(void *arg)
1921{
1922	if (arg == NULL)
1923		return;
1924
1925	_rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1926			RTE_ETH_EVENT_INTR_LSC);
1927}
1928
1929void
1930bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1931		void *param)
1932{
1933	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1934	struct bond_dev_private *internals;
1935	struct rte_eth_link link;
1936
1937	int i, valid_slave = 0;
1938	uint8_t active_pos;
1939	uint8_t lsc_flag = 0;
1940
1941	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1942		return;
1943
1944	bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1945	slave_eth_dev = &rte_eth_devices[port_id];
1946
1947	if (check_for_bonded_ethdev(bonded_eth_dev))
1948		return;
1949
1950	internals = bonded_eth_dev->data->dev_private;
1951
1952	/* If the device isn't started don't handle interrupts */
1953	if (!bonded_eth_dev->data->dev_started)
1954		return;
1955
1956	/* verify that port_id is a valid slave of bonded port */
1957	for (i = 0; i < internals->slave_count; i++) {
1958		if (internals->slaves[i].port_id == port_id) {
1959			valid_slave = 1;
1960			break;
1961		}
1962	}
1963
1964	if (!valid_slave)
1965		return;
1966
1967	/* Search for port in active port list */
1968	active_pos = find_slave_by_id(internals->active_slaves,
1969			internals->active_slave_count, port_id);
1970
1971	rte_eth_link_get_nowait(port_id, &link);
1972	if (link.link_status) {
1973		if (active_pos < internals->active_slave_count)
1974			return;
1975
1976		/* if no active slave ports then set this port to be primary port */
1977		if (internals->active_slave_count < 1) {
1978			/* If first active slave, then change link status */
1979			bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
1980			internals->current_primary_port = port_id;
1981			lsc_flag = 1;
1982
1983			mac_address_slaves_update(bonded_eth_dev);
1984
1985			/* Inherit eth dev link properties from first active slave */
1986			link_properties_set(bonded_eth_dev,
1987					&(slave_eth_dev->data->dev_link));
1988		}
1989
1990		activate_slave(bonded_eth_dev, port_id);
1991
1992		/* If user has defined the primary port then default to using it */
1993		if (internals->user_defined_primary_port &&
1994				internals->primary_port == port_id)
1995			bond_ethdev_primary_set(internals, port_id);
1996	} else {
1997		if (active_pos == internals->active_slave_count)
1998			return;
1999
2000		/* Remove from active slave list */
2001		deactivate_slave(bonded_eth_dev, port_id);
2002
2003		/* No active slaves, change link status to down and reset other
2004		 * link properties */
2005		if (internals->active_slave_count < 1) {
2006			lsc_flag = 1;
2007			bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2008
2009			link_properties_reset(bonded_eth_dev);
2010		}
2011
2012		/* Update primary id, take first active slave from list or if none
2013		 * available set to -1 */
2014		if (port_id == internals->current_primary_port) {
2015			if (internals->active_slave_count > 0)
2016				bond_ethdev_primary_set(internals,
2017						internals->active_slaves[0]);
2018			else
2019				internals->current_primary_port = internals->primary_port;
2020		}
2021	}
2022
2023	if (lsc_flag) {
2024		/* Cancel any possible outstanding interrupts if delays are enabled */
2025		if (internals->link_up_delay_ms > 0 ||
2026			internals->link_down_delay_ms > 0)
2027			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2028					bonded_eth_dev);
2029
2030		if (bonded_eth_dev->data->dev_link.link_status) {
2031			if (internals->link_up_delay_ms > 0)
2032				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2033						bond_ethdev_delayed_lsc_propagation,
2034						(void *)bonded_eth_dev);
2035			else
2036				_rte_eth_dev_callback_process(bonded_eth_dev,
2037						RTE_ETH_EVENT_INTR_LSC);
2038
2039		} else {
2040			if (internals->link_down_delay_ms > 0)
2041				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2042						bond_ethdev_delayed_lsc_propagation,
2043						(void *)bonded_eth_dev);
2044			else
2045				_rte_eth_dev_callback_process(bonded_eth_dev,
2046						RTE_ETH_EVENT_INTR_LSC);
2047		}
2048	}
2049}
2050
2051static int
2052bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2053		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2054{
2055	unsigned i, j;
2056	int result = 0;
2057	int slave_reta_size;
2058	unsigned reta_count;
2059	struct bond_dev_private *internals = dev->data->dev_private;
2060
2061	if (reta_size != internals->reta_size)
2062		return -EINVAL;
2063
2064	 /* Copy RETA table */
2065	reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2066
2067	for (i = 0; i < reta_count; i++) {
2068		internals->reta_conf[i].mask = reta_conf[i].mask;
2069		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2070			if ((reta_conf[i].mask >> j) & 0x01)
2071				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2072	}
2073
2074	/* Fill rest of array */
2075	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2076		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2077				sizeof(internals->reta_conf[0]) * reta_count);
2078
2079	/* Propagate RETA over slaves */
2080	for (i = 0; i < internals->slave_count; i++) {
2081		slave_reta_size = internals->slaves[i].reta_size;
2082		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2083				&internals->reta_conf[0], slave_reta_size);
2084		if (result < 0)
2085			return result;
2086	}
2087
2088	return 0;
2089}
2090
2091static int
2092bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2093		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2094{
2095	int i, j;
2096	struct bond_dev_private *internals = dev->data->dev_private;
2097
2098	if (reta_size != internals->reta_size)
2099		return -EINVAL;
2100
2101	 /* Copy RETA table */
2102	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2103		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2104			if ((reta_conf[i].mask >> j) & 0x01)
2105				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2106
2107	return 0;
2108}
2109
2110static int
2111bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2112		struct rte_eth_rss_conf *rss_conf)
2113{
2114	int i, result = 0;
2115	struct bond_dev_private *internals = dev->data->dev_private;
2116	struct rte_eth_rss_conf bond_rss_conf;
2117
2118	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2119
2120	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2121
2122	if (bond_rss_conf.rss_hf != 0)
2123		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2124
2125	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2126			sizeof(internals->rss_key)) {
2127		if (bond_rss_conf.rss_key_len == 0)
2128			bond_rss_conf.rss_key_len = 40;
2129		internals->rss_key_len = bond_rss_conf.rss_key_len;
2130		memcpy(internals->rss_key, bond_rss_conf.rss_key,
2131				internals->rss_key_len);
2132	}
2133
2134	for (i = 0; i < internals->slave_count; i++) {
2135		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2136				&bond_rss_conf);
2137		if (result < 0)
2138			return result;
2139	}
2140
2141	return 0;
2142}
2143
2144static int
2145bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2146		struct rte_eth_rss_conf *rss_conf)
2147{
2148	struct bond_dev_private *internals = dev->data->dev_private;
2149
2150	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2151	rss_conf->rss_key_len = internals->rss_key_len;
2152	if (rss_conf->rss_key)
2153		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2154
2155	return 0;
2156}
2157
2158const struct eth_dev_ops default_dev_ops = {
2159	.dev_start            = bond_ethdev_start,
2160	.dev_stop             = bond_ethdev_stop,
2161	.dev_close            = bond_ethdev_close,
2162	.dev_configure        = bond_ethdev_configure,
2163	.dev_infos_get        = bond_ethdev_info,
2164	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
2165	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
2166	.rx_queue_release     = bond_ethdev_rx_queue_release,
2167	.tx_queue_release     = bond_ethdev_tx_queue_release,
2168	.link_update          = bond_ethdev_link_update,
2169	.stats_get            = bond_ethdev_stats_get,
2170	.stats_reset          = bond_ethdev_stats_reset,
2171	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
2172	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
2173	.reta_update          = bond_ethdev_rss_reta_update,
2174	.reta_query           = bond_ethdev_rss_reta_query,
2175	.rss_hash_update      = bond_ethdev_rss_hash_update,
2176	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2177};
2178
2179static int
2180bond_init(const char *name, const char *params)
2181{
2182	struct bond_dev_private *internals;
2183	struct rte_kvargs *kvlist;
2184	uint8_t bonding_mode, socket_id;
2185	int  arg_count, port_id;
2186
2187	RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2188
2189	kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2190	if (kvlist == NULL)
2191		return -1;
2192
2193	/* Parse link bonding mode */
2194	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2195		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2196				&bond_ethdev_parse_slave_mode_kvarg,
2197				&bonding_mode) != 0) {
2198			RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2199					name);
2200			goto parse_error;
2201		}
2202	} else {
2203		RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2204				"device %s\n", name);
2205		goto parse_error;
2206	}
2207
2208	/* Parse socket id to create bonding device on */
2209	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2210	if (arg_count == 1) {
2211		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2212				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
2213				!= 0) {
2214			RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2215					"bonded device %s\n", name);
2216			goto parse_error;
2217		}
2218	} else if (arg_count > 1) {
2219		RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2220				"bonded device %s\n", name);
2221		goto parse_error;
2222	} else {
2223		socket_id = rte_socket_id();
2224	}
2225
2226	/* Create link bonding eth device */
2227	port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2228	if (port_id < 0) {
2229		RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2230				"socket %u.\n",	name, bonding_mode, socket_id);
2231		goto parse_error;
2232	}
2233	internals = rte_eth_devices[port_id].data->dev_private;
2234	internals->kvlist = kvlist;
2235
2236	RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2237			"socket %u.\n",	name, port_id, bonding_mode, socket_id);
2238	return 0;
2239
2240parse_error:
2241	rte_kvargs_free(kvlist);
2242
2243	return -1;
2244}
2245
2246static int
2247bond_uninit(const char *name)
2248{
2249	int  ret;
2250
2251	if (name == NULL)
2252		return -EINVAL;
2253
2254	RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2255
2256	/* free link bonding eth device */
2257	ret = rte_eth_bond_free(name);
2258	if (ret < 0)
2259		RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2260
2261	return ret;
2262}
2263
2264/* this part will resolve the slave portids after all the other pdev and vdev
2265 * have been allocated */
2266static int
2267bond_ethdev_configure(struct rte_eth_dev *dev)
2268{
2269	char *name = dev->data->name;
2270	struct bond_dev_private *internals = dev->data->dev_private;
2271	struct rte_kvargs *kvlist = internals->kvlist;
2272	int arg_count;
2273	uint8_t port_id = dev - rte_eth_devices;
2274
2275	static const uint8_t default_rss_key[40] = {
2276		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2277		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2278		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2279		0xBE, 0xAC, 0x01, 0xFA
2280	};
2281
2282	unsigned i, j;
2283
2284	/* If RSS is enabled, fill table and key with default values */
2285	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2286		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2287		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2288		memcpy(internals->rss_key, default_rss_key, 40);
2289
2290		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2291			internals->reta_conf[i].mask = ~0LL;
2292			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2293				internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2294		}
2295	}
2296
2297	/* set the max_rx_pktlen */
2298	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2299
2300	/*
2301	 * if no kvlist, it means that this bonded device has been created
2302	 * through the bonding api.
2303	 */
2304	if (!kvlist)
2305		return 0;
2306
2307	/* Parse MAC address for bonded device */
2308	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2309	if (arg_count == 1) {
2310		struct ether_addr bond_mac;
2311
2312		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2313				&bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2314			RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2315					name);
2316			return -1;
2317		}
2318
2319		/* Set MAC address */
2320		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2321			RTE_LOG(ERR, EAL,
2322					"Failed to set mac address on bonded device %s\n",
2323					name);
2324			return -1;
2325		}
2326	} else if (arg_count > 1) {
2327		RTE_LOG(ERR, EAL,
2328				"MAC address can be specified only once for bonded device %s\n",
2329				name);
2330		return -1;
2331	}
2332
2333	/* Parse/set balance mode transmit policy */
2334	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2335	if (arg_count == 1) {
2336		uint8_t xmit_policy;
2337
2338		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2339				&bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2340						0) {
2341			RTE_LOG(INFO, EAL,
2342					"Invalid xmit policy specified for bonded device %s\n",
2343					name);
2344			return -1;
2345		}
2346
2347		/* Set balance mode transmit policy*/
2348		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2349			RTE_LOG(ERR, EAL,
2350					"Failed to set balance xmit policy on bonded device %s\n",
2351					name);
2352			return -1;
2353		}
2354	} else if (arg_count > 1) {
2355		RTE_LOG(ERR, EAL,
2356				"Transmit policy can be specified only once for bonded device"
2357				" %s\n", name);
2358		return -1;
2359	}
2360
2361	/* Parse/add slave ports to bonded device */
2362	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2363		struct bond_ethdev_slave_ports slave_ports;
2364		unsigned i;
2365
2366		memset(&slave_ports, 0, sizeof(slave_ports));
2367
2368		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2369				&bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2370			RTE_LOG(ERR, EAL,
2371					"Failed to parse slave ports for bonded device %s\n",
2372					name);
2373			return -1;
2374		}
2375
2376		for (i = 0; i < slave_ports.slave_count; i++) {
2377			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2378				RTE_LOG(ERR, EAL,
2379						"Failed to add port %d as slave to bonded device %s\n",
2380						slave_ports.slaves[i], name);
2381			}
2382		}
2383
2384	} else {
2385		RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2386		return -1;
2387	}
2388
2389	/* Parse/set primary slave port id*/
2390	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2391	if (arg_count == 1) {
2392		uint8_t primary_slave_port_id;
2393
2394		if (rte_kvargs_process(kvlist,
2395				PMD_BOND_PRIMARY_SLAVE_KVARG,
2396				&bond_ethdev_parse_primary_slave_port_id_kvarg,
2397				&primary_slave_port_id) < 0) {
2398			RTE_LOG(INFO, EAL,
2399					"Invalid primary slave port id specified for bonded device"
2400					" %s\n", name);
2401			return -1;
2402		}
2403
2404		/* Set balance mode transmit policy*/
2405		if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2406				!= 0) {
2407			RTE_LOG(ERR, EAL,
2408					"Failed to set primary slave port %d on bonded device %s\n",
2409					primary_slave_port_id, name);
2410			return -1;
2411		}
2412	} else if (arg_count > 1) {
2413		RTE_LOG(INFO, EAL,
2414				"Primary slave can be specified only once for bonded device"
2415				" %s\n", name);
2416		return -1;
2417	}
2418
2419	/* Parse link status monitor polling interval */
2420	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2421	if (arg_count == 1) {
2422		uint32_t lsc_poll_interval_ms;
2423
2424		if (rte_kvargs_process(kvlist,
2425				PMD_BOND_LSC_POLL_PERIOD_KVARG,
2426				&bond_ethdev_parse_time_ms_kvarg,
2427				&lsc_poll_interval_ms) < 0) {
2428			RTE_LOG(INFO, EAL,
2429					"Invalid lsc polling interval value specified for bonded"
2430					" device %s\n", name);
2431			return -1;
2432		}
2433
2434		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2435				!= 0) {
2436			RTE_LOG(ERR, EAL,
2437					"Failed to set lsc monitor polling interval (%u ms) on"
2438					" bonded device %s\n", lsc_poll_interval_ms, name);
2439			return -1;
2440		}
2441	} else if (arg_count > 1) {
2442		RTE_LOG(INFO, EAL,
2443				"LSC polling interval can be specified only once for bonded"
2444				" device %s\n", name);
2445		return -1;
2446	}
2447
2448	/* Parse link up interrupt propagation delay */
2449	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2450	if (arg_count == 1) {
2451		uint32_t link_up_delay_ms;
2452
2453		if (rte_kvargs_process(kvlist,
2454				PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2455				&bond_ethdev_parse_time_ms_kvarg,
2456				&link_up_delay_ms) < 0) {
2457			RTE_LOG(INFO, EAL,
2458					"Invalid link up propagation delay value specified for"
2459					" bonded device %s\n", name);
2460			return -1;
2461		}
2462
2463		/* Set balance mode transmit policy*/
2464		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2465				!= 0) {
2466			RTE_LOG(ERR, EAL,
2467					"Failed to set link up propagation delay (%u ms) on bonded"
2468					" device %s\n", link_up_delay_ms, name);
2469			return -1;
2470		}
2471	} else if (arg_count > 1) {
2472		RTE_LOG(INFO, EAL,
2473				"Link up propagation delay can be specified only once for"
2474				" bonded device %s\n", name);
2475		return -1;
2476	}
2477
2478	/* Parse link down interrupt propagation delay */
2479	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2480	if (arg_count == 1) {
2481		uint32_t link_down_delay_ms;
2482
2483		if (rte_kvargs_process(kvlist,
2484				PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2485				&bond_ethdev_parse_time_ms_kvarg,
2486				&link_down_delay_ms) < 0) {
2487			RTE_LOG(INFO, EAL,
2488					"Invalid link down propagation delay value specified for"
2489					" bonded device %s\n", name);
2490			return -1;
2491		}
2492
2493		/* Set balance mode transmit policy*/
2494		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2495				!= 0) {
2496			RTE_LOG(ERR, EAL,
2497					"Failed to set link down propagation delay (%u ms) on"
2498					" bonded device %s\n", link_down_delay_ms, name);
2499			return -1;
2500		}
2501	} else if (arg_count > 1) {
2502		RTE_LOG(INFO, EAL,
2503				"Link down propagation delay can be specified only once for"
2504				" bonded device %s\n", name);
2505		return -1;
2506	}
2507
2508	return 0;
2509}
2510
2511static struct rte_driver bond_drv = {
2512	.name = "eth_bond",
2513	.type = PMD_VDEV,
2514	.init = bond_init,
2515	.uninit = bond_uninit,
2516};
2517
2518PMD_REGISTER_DRIVER(bond_drv);
2519