main.c revision 809f0800
1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <stdint.h>
35#include <sys/queue.h>
36#include <stdlib.h>
37#include <string.h>
38#include <stdio.h>
39#include <assert.h>
40#include <errno.h>
41#include <signal.h>
42#include <stdarg.h>
43#include <inttypes.h>
44#include <getopt.h>
45
46#include <rte_common.h>
47#include <rte_log.h>
48#include <rte_memory.h>
49#include <rte_memcpy.h>
50#include <rte_memzone.h>
51#include <rte_eal.h>
52#include <rte_per_lcore.h>
53#include <rte_launch.h>
54#include <rte_atomic.h>
55#include <rte_cycles.h>
56#include <rte_prefetch.h>
57#include <rte_lcore.h>
58#include <rte_per_lcore.h>
59#include <rte_branch_prediction.h>
60#include <rte_interrupts.h>
61#include <rte_pci.h>
62#include <rte_random.h>
63#include <rte_debug.h>
64#include <rte_ether.h>
65#include <rte_ethdev.h>
66#include <rte_ring.h>
67#include <rte_log.h>
68#include <rte_mempool.h>
69#include <rte_mbuf.h>
70#include <rte_memcpy.h>
71
72#define MAX_QUEUES 1024
73/*
74 * 1024 queues require to meet the needs of a large number of vmdq_pools.
75 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
76 */
77#define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
78						RTE_TEST_TX_DESC_DEFAULT))
79#define MBUF_CACHE_SIZE 64
80
81#define MAX_PKT_BURST 32
82
83/*
84 * Configurable number of RX/TX ring descriptors
85 */
86#define RTE_TEST_RX_DESC_DEFAULT 128
87#define RTE_TEST_TX_DESC_DEFAULT 512
88
89#define INVALID_PORT_ID 0xFF
90
91/* mask of enabled ports */
92static uint32_t enabled_port_mask;
93
94/* number of pools (if user does not specify any, 8 by default */
95static uint32_t num_queues = 8;
96static uint32_t num_pools = 8;
97
98/* empty vmdq configuration structure. Filled in programatically */
99static const struct rte_eth_conf vmdq_conf_default = {
100	.rxmode = {
101		.mq_mode        = ETH_MQ_RX_VMDQ_ONLY,
102		.split_hdr_size = 0,
103		.header_split   = 0, /**< Header Split disabled */
104		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
105		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
106		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
107	},
108
109	.txmode = {
110		.mq_mode = ETH_MQ_TX_NONE,
111	},
112	.rx_adv_conf = {
113		/*
114		 * should be overridden separately in code with
115		 * appropriate values
116		 */
117		.vmdq_rx_conf = {
118			.nb_queue_pools = ETH_8_POOLS,
119			.enable_default_pool = 0,
120			.default_pool = 0,
121			.nb_pool_maps = 0,
122			.pool_map = {{0, 0},},
123		},
124	},
125};
126
127static unsigned lcore_ids[RTE_MAX_LCORE];
128static uint8_t ports[RTE_MAX_ETHPORTS];
129static unsigned num_ports; /**< The number of ports specified in command line */
130
131/* array used for printing out statistics */
132volatile unsigned long rxPackets[MAX_QUEUES] = {0};
133
134const uint16_t vlan_tags[] = {
135	0,  1,  2,  3,  4,  5,  6,  7,
136	8,  9, 10, 11,	12, 13, 14, 15,
137	16, 17, 18, 19, 20, 21, 22, 23,
138	24, 25, 26, 27, 28, 29, 30, 31,
139	32, 33, 34, 35, 36, 37, 38, 39,
140	40, 41, 42, 43, 44, 45, 46, 47,
141	48, 49, 50, 51, 52, 53, 54, 55,
142	56, 57, 58, 59, 60, 61, 62, 63,
143};
144const uint16_t num_vlans = RTE_DIM(vlan_tags);
145static uint16_t num_pf_queues,  num_vmdq_queues;
146static uint16_t vmdq_pool_base, vmdq_queue_base;
147/* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
148static struct ether_addr pool_addr_template = {
149	.addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
150};
151
152/* ethernet addresses of ports */
153static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
154
155#define MAX_QUEUE_NUM_10G 128
156#define MAX_QUEUE_NUM_1G 8
157#define MAX_POOL_MAP_NUM_10G 64
158#define MAX_POOL_MAP_NUM_1G 32
159#define MAX_POOL_NUM_10G 64
160#define MAX_POOL_NUM_1G 8
161/*
162 * Builds up the correct configuration for vmdq based on the vlan tags array
163 * given above, and determine the queue number and pool map number according to
164 * valid pool number
165 */
166static inline int
167get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
168{
169	struct rte_eth_vmdq_rx_conf conf;
170	unsigned i;
171
172	conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
173	conf.nb_pool_maps = num_pools;
174	conf.enable_default_pool = 0;
175	conf.default_pool = 0; /* set explicit value, even if not used */
176
177	for (i = 0; i < conf.nb_pool_maps; i++) {
178		conf.pool_map[i].vlan_id = vlan_tags[i];
179		conf.pool_map[i].pools = (1UL << (i % num_pools));
180	}
181
182	(void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
183	(void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
184		   sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
185	return 0;
186}
187
188/*
189 * Initialises a given port using global settings and with the rx buffers
190 * coming from the mbuf_pool passed as parameter
191 */
192static inline int
193port_init(uint8_t port, struct rte_mempool *mbuf_pool)
194{
195	struct rte_eth_dev_info dev_info;
196	struct rte_eth_rxconf *rxconf;
197	struct rte_eth_conf port_conf;
198	uint16_t rxRings, txRings;
199	const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
200	int retval;
201	uint16_t q;
202	uint16_t queues_per_pool;
203	uint32_t max_nb_pools;
204
205	/*
206	 * The max pool number from dev_info will be used to validate the pool
207	 * number specified in cmd line
208	 */
209	rte_eth_dev_info_get(port, &dev_info);
210	max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
211	/*
212	 * We allow to process part of VMDQ pools specified by num_pools in
213	 * command line.
214	 */
215	if (num_pools > max_nb_pools) {
216		printf("num_pools %d >max_nb_pools %d\n",
217			num_pools, max_nb_pools);
218		return -1;
219	}
220	retval = get_eth_conf(&port_conf, max_nb_pools);
221	if (retval < 0)
222		return retval;
223
224	/*
225	 * NIC queues are divided into pf queues and vmdq queues.
226	 */
227	/* There is assumption here all ports have the same configuration! */
228	num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
229	queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
230	num_vmdq_queues = num_pools * queues_per_pool;
231	num_queues = num_pf_queues + num_vmdq_queues;
232	vmdq_queue_base = dev_info.vmdq_queue_base;
233	vmdq_pool_base  = dev_info.vmdq_pool_base;
234
235	printf("pf queue num: %u, configured vmdq pool num: %u,"
236		" each vmdq pool has %u queues\n",
237		num_pf_queues, num_pools, queues_per_pool);
238	printf("vmdq queue base: %d pool base %d\n",
239		vmdq_queue_base, vmdq_pool_base);
240	if (port >= rte_eth_dev_count())
241		return -1;
242
243	/*
244	 * Though in this example, we only receive packets from the first queue
245	 * of each pool and send packets through first rte_lcore_count() tx
246	 * queues of vmdq queues, all queues including pf queues are setup.
247	 * This is because VMDQ queues doesn't always start from zero, and the
248	 * PMD layer doesn't support selectively initialising part of rx/tx
249	 * queues.
250	 */
251	rxRings = (uint16_t)dev_info.max_rx_queues;
252	txRings = (uint16_t)dev_info.max_tx_queues;
253	retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
254	if (retval != 0)
255		return retval;
256
257	rte_eth_dev_info_get(port, &dev_info);
258	rxconf = &dev_info.default_rxconf;
259	rxconf->rx_drop_en = 1;
260	for (q = 0; q < rxRings; q++) {
261		retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
262					rte_eth_dev_socket_id(port),
263					rxconf,
264					mbuf_pool);
265		if (retval < 0) {
266			printf("initialise rx queue %d failed\n", q);
267			return retval;
268		}
269	}
270
271	for (q = 0; q < txRings; q++) {
272		retval = rte_eth_tx_queue_setup(port, q, txRingSize,
273					rte_eth_dev_socket_id(port),
274					NULL);
275		if (retval < 0) {
276			printf("initialise tx queue %d failed\n", q);
277			return retval;
278		}
279	}
280
281	retval  = rte_eth_dev_start(port);
282	if (retval < 0) {
283		printf("port %d start failed\n", port);
284		return retval;
285	}
286
287	rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
288	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
289			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
290			(unsigned)port,
291			vmdq_ports_eth_addr[port].addr_bytes[0],
292			vmdq_ports_eth_addr[port].addr_bytes[1],
293			vmdq_ports_eth_addr[port].addr_bytes[2],
294			vmdq_ports_eth_addr[port].addr_bytes[3],
295			vmdq_ports_eth_addr[port].addr_bytes[4],
296			vmdq_ports_eth_addr[port].addr_bytes[5]);
297
298	/*
299	 * Set mac for each pool.
300	 * There is no default mac for the pools in i40.
301	 * Removes this after i40e fixes this issue.
302	 */
303	for (q = 0; q < num_pools; q++) {
304		struct ether_addr mac;
305		mac = pool_addr_template;
306		mac.addr_bytes[4] = port;
307		mac.addr_bytes[5] = q;
308		printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
309			port, q,
310			mac.addr_bytes[0], mac.addr_bytes[1],
311			mac.addr_bytes[2], mac.addr_bytes[3],
312			mac.addr_bytes[4], mac.addr_bytes[5]);
313		retval = rte_eth_dev_mac_addr_add(port, &mac,
314				q + vmdq_pool_base);
315		if (retval) {
316			printf("mac addr add failed at pool %d\n", q);
317			return retval;
318		}
319	}
320
321	return 0;
322}
323
324/* Check num_pools parameter and set it if OK*/
325static int
326vmdq_parse_num_pools(const char *q_arg)
327{
328	char *end = NULL;
329	int n;
330
331	/* parse number string */
332	n = strtol(q_arg, &end, 10);
333	if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
334		return -1;
335
336	if (num_pools > num_vlans) {
337		printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
338		return -1;
339	}
340
341	num_pools = n;
342
343	return 0;
344}
345
346
347static int
348parse_portmask(const char *portmask)
349{
350	char *end = NULL;
351	unsigned long pm;
352
353	/* parse hexadecimal string */
354	pm = strtoul(portmask, &end, 16);
355	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
356		return -1;
357
358	if (pm == 0)
359		return -1;
360
361	return pm;
362}
363
364/* Display usage */
365static void
366vmdq_usage(const char *prgname)
367{
368	printf("%s [EAL options] -- -p PORTMASK]\n"
369	"  --nb-pools NP: number of pools\n",
370	       prgname);
371}
372
373/*  Parse the argument (num_pools) given in the command line of the application */
374static int
375vmdq_parse_args(int argc, char **argv)
376{
377	int opt;
378	int option_index;
379	unsigned i;
380	const char *prgname = argv[0];
381	static struct option long_option[] = {
382		{"nb-pools", required_argument, NULL, 0},
383		{NULL, 0, 0, 0}
384	};
385
386	/* Parse command line */
387	while ((opt = getopt_long(argc, argv, "p:", long_option,
388		&option_index)) != EOF) {
389		switch (opt) {
390		/* portmask */
391		case 'p':
392			enabled_port_mask = parse_portmask(optarg);
393			if (enabled_port_mask == 0) {
394				printf("invalid portmask\n");
395				vmdq_usage(prgname);
396				return -1;
397			}
398			break;
399		case 0:
400			if (vmdq_parse_num_pools(optarg) == -1) {
401				printf("invalid number of pools\n");
402				vmdq_usage(prgname);
403				return -1;
404			}
405			break;
406
407		default:
408			vmdq_usage(prgname);
409			return -1;
410		}
411	}
412
413	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
414		if (enabled_port_mask & (1 << i))
415			ports[num_ports++] = (uint8_t)i;
416	}
417
418	if (num_ports < 2 || num_ports % 2) {
419		printf("Current enabled port number is %u,"
420			"but it should be even and at least 2\n", num_ports);
421		return -1;
422	}
423
424	return 0;
425}
426
427static void
428update_mac_address(struct rte_mbuf *m, unsigned dst_port)
429{
430	struct ether_hdr *eth;
431	void *tmp;
432
433	eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
434
435	/* 02:00:00:00:00:xx */
436	tmp = &eth->d_addr.addr_bytes[0];
437	*((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
438
439	/* src addr */
440	ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
441}
442
443/* When we receive a HUP signal, print out our stats */
444static void
445sighup_handler(int signum)
446{
447	unsigned q;
448	for (q = 0; q < num_queues; q++) {
449		if (q % (num_queues/num_pools) == 0)
450			printf("\nPool %u: ", q/(num_queues/num_pools));
451		printf("%lu ", rxPackets[q]);
452	}
453	printf("\nFinished handling signal %d\n", signum);
454}
455
456/*
457 * Main thread that does the work, reading from INPUT_PORT
458 * and writing to OUTPUT_PORT
459 */
460static int
461lcore_main(__attribute__((__unused__)) void *dummy)
462{
463	const uint16_t lcore_id = (uint16_t)rte_lcore_id();
464	const uint16_t num_cores = (uint16_t)rte_lcore_count();
465	uint16_t core_id = 0;
466	uint16_t startQueue, endQueue;
467	uint16_t q, i, p;
468	const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
469
470	for (i = 0; i < num_cores; i++)
471		if (lcore_ids[i] == lcore_id) {
472			core_id = i;
473			break;
474		}
475
476	if (remainder != 0) {
477		if (core_id < remainder) {
478			startQueue = (uint16_t)(core_id *
479					(num_vmdq_queues / num_cores + 1));
480			endQueue = (uint16_t)(startQueue +
481					(num_vmdq_queues / num_cores) + 1);
482		} else {
483			startQueue = (uint16_t)(core_id *
484					(num_vmdq_queues / num_cores) +
485					remainder);
486			endQueue = (uint16_t)(startQueue +
487					(num_vmdq_queues / num_cores));
488		}
489	} else {
490		startQueue = (uint16_t)(core_id *
491				(num_vmdq_queues / num_cores));
492		endQueue = (uint16_t)(startQueue +
493				(num_vmdq_queues / num_cores));
494	}
495
496	/* vmdq queue idx doesn't always start from zero.*/
497	startQueue += vmdq_queue_base;
498	endQueue   += vmdq_queue_base;
499	printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
500		(unsigned)lcore_id, startQueue, endQueue - 1);
501
502	if (startQueue == endQueue) {
503		printf("lcore %u has nothing to do\n", lcore_id);
504		return 0;
505	}
506
507	for (;;) {
508		struct rte_mbuf *buf[MAX_PKT_BURST];
509		const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
510
511		for (p = 0; p < num_ports; p++) {
512			const uint8_t sport = ports[p];
513			/* 0 <-> 1, 2 <-> 3 etc */
514			const uint8_t dport = ports[p ^ 1];
515			if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
516				continue;
517
518			for (q = startQueue; q < endQueue; q++) {
519				const uint16_t rxCount = rte_eth_rx_burst(sport,
520					q, buf, buf_size);
521
522				if (unlikely(rxCount == 0))
523					continue;
524
525				rxPackets[q] += rxCount;
526
527				for (i = 0; i < rxCount; i++)
528					update_mac_address(buf[i], dport);
529
530				const uint16_t txCount = rte_eth_tx_burst(dport,
531					vmdq_queue_base + core_id,
532					buf,
533					rxCount);
534
535				if (txCount != rxCount) {
536					for (i = txCount; i < rxCount; i++)
537						rte_pktmbuf_free(buf[i]);
538				}
539			}
540		}
541	}
542}
543
544/*
545 * Update the global var NUM_PORTS and array PORTS according to system ports number
546 * and return valid ports number
547 */
548static unsigned check_ports_num(unsigned nb_ports)
549{
550	unsigned valid_num_ports = num_ports;
551	unsigned portid;
552
553	if (num_ports > nb_ports) {
554		printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
555			num_ports, nb_ports);
556		num_ports = nb_ports;
557	}
558
559	for (portid = 0; portid < num_ports; portid++) {
560		if (ports[portid] >= nb_ports) {
561			printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
562				ports[portid], (nb_ports - 1));
563			ports[portid] = INVALID_PORT_ID;
564			valid_num_ports--;
565		}
566	}
567	return valid_num_ports;
568}
569
570/* Main function, does initialisation and calls the per-lcore functions */
571int
572main(int argc, char *argv[])
573{
574	struct rte_mempool *mbuf_pool;
575	unsigned lcore_id, core_id = 0;
576	int ret;
577	unsigned nb_ports, valid_num_ports;
578	uint8_t portid;
579
580	signal(SIGHUP, sighup_handler);
581
582	/* init EAL */
583	ret = rte_eal_init(argc, argv);
584	if (ret < 0)
585		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
586	argc -= ret;
587	argv += ret;
588
589	/* parse app arguments */
590	ret = vmdq_parse_args(argc, argv);
591	if (ret < 0)
592		rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
593
594	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
595		if (rte_lcore_is_enabled(lcore_id))
596			lcore_ids[core_id++] = lcore_id;
597
598	if (rte_lcore_count() > RTE_MAX_LCORE)
599		rte_exit(EXIT_FAILURE, "Not enough cores\n");
600
601	nb_ports = rte_eth_dev_count();
602
603	/*
604	 * Update the global var NUM_PORTS and global array PORTS
605	 * and get value of var VALID_NUM_PORTS according to system ports number
606	 */
607	valid_num_ports = check_ports_num(nb_ports);
608
609	if (valid_num_ports < 2 || valid_num_ports % 2) {
610		printf("Current valid ports number is %u\n", valid_num_ports);
611		rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
612	}
613
614	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
615		NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
616		0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
617	if (mbuf_pool == NULL)
618		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
619
620	/* initialize all ports */
621	for (portid = 0; portid < nb_ports; portid++) {
622		/* skip ports that are not enabled */
623		if ((enabled_port_mask & (1 << portid)) == 0) {
624			printf("\nSkipping disabled port %d\n", portid);
625			continue;
626		}
627		if (port_init(portid, mbuf_pool) != 0)
628			rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
629	}
630
631	/* call lcore_main() on every lcore */
632	rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
633	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
634		if (rte_eal_wait_lcore(lcore_id) < 0)
635			return -1;
636	}
637
638	return 0;
639}
640