1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5 *   Copyright(c) 2012-2014 6WIND S.A.
6 *   All rights reserved.
7 *
8 *   Redistribution and use in source and binary forms, with or without
9 *   modification, are permitted provided that the following conditions
10 *   are met:
11 *
12 *     * Redistributions of source code must retain the above copyright
13 *       notice, this list of conditions and the following disclaimer.
14 *     * Redistributions in binary form must reproduce the above copyright
15 *       notice, this list of conditions and the following disclaimer in
16 *       the documentation and/or other materials provided with the
17 *       distribution.
18 *     * Neither the name of Intel Corporation nor the names of its
19 *       contributors may be used to endorse or promote products derived
20 *       from this software without specific prior written permission.
21 *
22 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <stdint.h>
38#include <string.h>
39#include <stdarg.h>
40#include <unistd.h>
41#include <pthread.h>
42#include <syslog.h>
43#include <getopt.h>
44#include <sys/file.h>
45#include <fcntl.h>
46#include <stddef.h>
47#include <errno.h>
48#include <limits.h>
49#include <errno.h>
50#include <sys/mman.h>
51#include <sys/queue.h>
52#include <sys/stat.h>
53#if defined(RTE_ARCH_X86)
54#include <sys/io.h>
55#endif
56
57#include <rte_common.h>
58#include <rte_debug.h>
59#include <rte_memory.h>
60#include <rte_memzone.h>
61#include <rte_launch.h>
62#include <rte_eal.h>
63#include <rte_eal_memconfig.h>
64#include <rte_per_lcore.h>
65#include <rte_lcore.h>
66#include <rte_log.h>
67#include <rte_random.h>
68#include <rte_cycles.h>
69#include <rte_string_fns.h>
70#include <rte_cpuflags.h>
71#include <rte_interrupts.h>
72#include <rte_bus.h>
73#include <rte_pci.h>
74#include <rte_dev.h>
75#include <rte_devargs.h>
76#include <rte_common.h>
77#include <rte_version.h>
78#include <rte_atomic.h>
79#include <malloc_heap.h>
80
81#include "eal_private.h"
82#include "eal_thread.h"
83#include "eal_internal_cfg.h"
84#include "eal_filesystem.h"
85#include "eal_hugepages.h"
86#include "eal_options.h"
87#include "eal_vfio.h"
88
89#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
90
91#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
92
93/* Allow the application to print its usage message too if set */
94static rte_usage_hook_t	rte_application_usage_hook = NULL;
95
96/* early configuration structure, when memory config is not mmapped */
97static struct rte_mem_config early_mem_config;
98
99/* define fd variable here, because file needs to be kept open for the
100 * duration of the program, as we hold a write lock on it in the primary proc */
101static int mem_cfg_fd = -1;
102
103static struct flock wr_lock = {
104		.l_type = F_WRLCK,
105		.l_whence = SEEK_SET,
106		.l_start = offsetof(struct rte_mem_config, memseg),
107		.l_len = sizeof(early_mem_config.memseg),
108};
109
110/* Address of global and public configuration */
111static struct rte_config rte_config = {
112		.mem_config = &early_mem_config,
113};
114
115/* internal configuration (per-core) */
116struct lcore_config lcore_config[RTE_MAX_LCORE];
117
118/* internal configuration */
119struct internal_config internal_config;
120
121/* used by rte_rdtsc() */
122int rte_cycles_vmware_tsc_map;
123
124/* Return a pointer to the configuration structure */
125struct rte_config *
126rte_eal_get_configuration(void)
127{
128	return &rte_config;
129}
130
131/* parse a sysfs (or other) file containing one integer value */
132int
133eal_parse_sysfs_value(const char *filename, unsigned long *val)
134{
135	FILE *f;
136	char buf[BUFSIZ];
137	char *end = NULL;
138
139	if ((f = fopen(filename, "r")) == NULL) {
140		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
141			__func__, filename);
142		return -1;
143	}
144
145	if (fgets(buf, sizeof(buf), f) == NULL) {
146		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
147			__func__, filename);
148		fclose(f);
149		return -1;
150	}
151	*val = strtoul(buf, &end, 0);
152	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
153		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
154				__func__, filename);
155		fclose(f);
156		return -1;
157	}
158	fclose(f);
159	return 0;
160}
161
162
163/* create memory configuration in shared/mmap memory. Take out
164 * a write lock on the memsegs, so we can auto-detect primary/secondary.
165 * This means we never close the file while running (auto-close on exit).
166 * We also don't lock the whole file, so that in future we can use read-locks
167 * on other parts, e.g. memzones, to detect if there are running secondary
168 * processes. */
169static void
170rte_eal_config_create(void)
171{
172	void *rte_mem_cfg_addr;
173	int retval;
174
175	const char *pathname = eal_runtime_config_path();
176
177	if (internal_config.no_shconf)
178		return;
179
180	/* map the config before hugepage address so that we don't waste a page */
181	if (internal_config.base_virtaddr != 0)
182		rte_mem_cfg_addr = (void *)
183			RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
184			sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
185	else
186		rte_mem_cfg_addr = NULL;
187
188	if (mem_cfg_fd < 0){
189		mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
190		if (mem_cfg_fd < 0)
191			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
192	}
193
194	retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
195	if (retval < 0){
196		close(mem_cfg_fd);
197		rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
198	}
199
200	retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
201	if (retval < 0){
202		close(mem_cfg_fd);
203		rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
204				"process running?\n", pathname);
205	}
206
207	rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
208				PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
209
210	if (rte_mem_cfg_addr == MAP_FAILED){
211		rte_panic("Cannot mmap memory for rte_config\n");
212	}
213	memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
214	rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
215
216	/* store address of the config in the config itself so that secondary
217	 * processes could later map the config into this exact location */
218	rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
219
220}
221
222/* attach to an existing shared memory config */
223static void
224rte_eal_config_attach(void)
225{
226	struct rte_mem_config *mem_config;
227
228	const char *pathname = eal_runtime_config_path();
229
230	if (internal_config.no_shconf)
231		return;
232
233	if (mem_cfg_fd < 0){
234		mem_cfg_fd = open(pathname, O_RDWR);
235		if (mem_cfg_fd < 0)
236			rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
237	}
238
239	/* map it as read-only first */
240	mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
241			PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
242	if (mem_config == MAP_FAILED)
243		rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
244			  errno, strerror(errno));
245
246	rte_config.mem_config = mem_config;
247}
248
249/* reattach the shared config at exact memory location primary process has it */
250static void
251rte_eal_config_reattach(void)
252{
253	struct rte_mem_config *mem_config;
254	void *rte_mem_cfg_addr;
255
256	if (internal_config.no_shconf)
257		return;
258
259	/* save the address primary process has mapped shared config to */
260	rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
261
262	/* unmap original config */
263	munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
264
265	/* remap the config at proper address */
266	mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
267			sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
268			mem_cfg_fd, 0);
269	if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
270		if (mem_config != MAP_FAILED)
271			/* errno is stale, don't use */
272			rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]"
273				  " - please use '--base-virtaddr' option\n",
274				  rte_mem_cfg_addr, mem_config);
275		else
276			rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
277				  errno, strerror(errno));
278	}
279	close(mem_cfg_fd);
280
281	rte_config.mem_config = mem_config;
282}
283
284/* Detect if we are a primary or a secondary process */
285enum rte_proc_type_t
286eal_proc_type_detect(void)
287{
288	enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
289	const char *pathname = eal_runtime_config_path();
290
291	/* if we can open the file but not get a write-lock we are a secondary
292	 * process. NOTE: if we get a file handle back, we keep that open
293	 * and don't close it to prevent a race condition between multiple opens */
294	if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
295			(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
296		ptype = RTE_PROC_SECONDARY;
297
298	RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
299			ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
300
301	return ptype;
302}
303
304/* Sets up rte_config structure with the pointer to shared memory config.*/
305static void
306rte_config_init(void)
307{
308	rte_config.process_type = internal_config.process_type;
309
310	switch (rte_config.process_type){
311	case RTE_PROC_PRIMARY:
312		rte_eal_config_create();
313		break;
314	case RTE_PROC_SECONDARY:
315		rte_eal_config_attach();
316		rte_eal_mcfg_wait_complete(rte_config.mem_config);
317		rte_eal_config_reattach();
318		break;
319	case RTE_PROC_AUTO:
320	case RTE_PROC_INVALID:
321		rte_panic("Invalid process type\n");
322	}
323}
324
325/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
326static void
327eal_hugedirs_unlock(void)
328{
329	int i;
330
331	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
332	{
333		/* skip uninitialized */
334		if (internal_config.hugepage_info[i].lock_descriptor < 0)
335			continue;
336		/* unlock hugepage file */
337		flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
338		close(internal_config.hugepage_info[i].lock_descriptor);
339		/* reset the field */
340		internal_config.hugepage_info[i].lock_descriptor = -1;
341	}
342}
343
344/* display usage */
345static void
346eal_usage(const char *prgname)
347{
348	printf("\nUsage: %s ", prgname);
349	eal_common_usage();
350	printf("EAL Linux options:\n"
351	       "  --"OPT_SOCKET_MEM"        Memory to allocate on sockets (comma separated values)\n"
352	       "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
353	       "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
354	       "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
355	       "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
356	       "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
357	       "  --"OPT_XEN_DOM0"          Support running on Xen dom0 without hugetlbfs\n"
358	       "\n");
359	/* Allow the application to print its usage message too if hook is set */
360	if ( rte_application_usage_hook ) {
361		printf("===== Application Usage =====\n\n");
362		rte_application_usage_hook(prgname);
363	}
364}
365
366/* Set a per-application usage message */
367rte_usage_hook_t
368rte_set_application_usage_hook( rte_usage_hook_t usage_func )
369{
370	rte_usage_hook_t	old_func;
371
372	/* Will be NULL on the first call to denote the last usage routine. */
373	old_func					= rte_application_usage_hook;
374	rte_application_usage_hook	= usage_func;
375
376	return old_func;
377}
378
379static int
380eal_parse_socket_mem(char *socket_mem)
381{
382	char * arg[RTE_MAX_NUMA_NODES];
383	char *end;
384	int arg_num, i, len;
385	uint64_t total_mem = 0;
386
387	len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
388	if (len == SOCKET_MEM_STRLEN) {
389		RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
390		return -1;
391	}
392
393	/* all other error cases will be caught later */
394	if (!isdigit(socket_mem[len-1]))
395		return -1;
396
397	/* split the optarg into separate socket values */
398	arg_num = rte_strsplit(socket_mem, len,
399			arg, RTE_MAX_NUMA_NODES, ',');
400
401	/* if split failed, or 0 arguments */
402	if (arg_num <= 0)
403		return -1;
404
405	internal_config.force_sockets = 1;
406
407	/* parse each defined socket option */
408	errno = 0;
409	for (i = 0; i < arg_num; i++) {
410		end = NULL;
411		internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
412
413		/* check for invalid input */
414		if ((errno != 0)  ||
415				(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
416			return -1;
417		internal_config.socket_mem[i] *= 1024ULL;
418		internal_config.socket_mem[i] *= 1024ULL;
419		total_mem += internal_config.socket_mem[i];
420	}
421
422	/* check if we have a positive amount of total memory */
423	if (total_mem == 0)
424		return -1;
425
426	return 0;
427}
428
429static int
430eal_parse_base_virtaddr(const char *arg)
431{
432	char *end;
433	uint64_t addr;
434
435	errno = 0;
436	addr = strtoull(arg, &end, 16);
437
438	/* check for errors */
439	if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
440		return -1;
441
442	/* make sure we don't exceed 32-bit boundary on 32-bit target */
443#ifndef RTE_ARCH_64
444	if (addr >= UINTPTR_MAX)
445		return -1;
446#endif
447
448	/* align the addr on 16M boundary, 16MB is the minimum huge page
449	 * size on IBM Power architecture. If the addr is aligned to 16MB,
450	 * it can align to 2MB for x86. So this alignment can also be used
451	 * on x86 */
452	internal_config.base_virtaddr =
453		RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
454
455	return 0;
456}
457
458static int
459eal_parse_vfio_intr(const char *mode)
460{
461	unsigned i;
462	static struct {
463		const char *name;
464		enum rte_intr_mode value;
465	} map[] = {
466		{ "legacy", RTE_INTR_MODE_LEGACY },
467		{ "msi", RTE_INTR_MODE_MSI },
468		{ "msix", RTE_INTR_MODE_MSIX },
469	};
470
471	for (i = 0; i < RTE_DIM(map); i++) {
472		if (!strcmp(mode, map[i].name)) {
473			internal_config.vfio_intr_mode = map[i].value;
474			return 0;
475		}
476	}
477	return -1;
478}
479
480/* Parse the arguments for --log-level only */
481static void
482eal_log_level_parse(int argc, char **argv)
483{
484	int opt;
485	char **argvopt;
486	int option_index;
487	const int old_optind = optind;
488	const int old_optopt = optopt;
489	char * const old_optarg = optarg;
490
491	argvopt = argv;
492	optind = 1;
493
494	eal_reset_internal_config(&internal_config);
495
496	while ((opt = getopt_long(argc, argvopt, eal_short_options,
497				  eal_long_options, &option_index)) != EOF) {
498
499		int ret;
500
501		/* getopt is not happy, stop right now */
502		if (opt == '?')
503			break;
504
505		ret = (opt == OPT_LOG_LEVEL_NUM) ?
506			eal_parse_common_option(opt, optarg, &internal_config) : 0;
507
508		/* common parser is not happy */
509		if (ret < 0)
510			break;
511	}
512
513	/* restore getopt lib */
514	optind = old_optind;
515	optopt = old_optopt;
516	optarg = old_optarg;
517}
518
519/* Parse the argument given in the command line of the application */
520static int
521eal_parse_args(int argc, char **argv)
522{
523	int opt, ret;
524	char **argvopt;
525	int option_index;
526	char *prgname = argv[0];
527	const int old_optind = optind;
528	const int old_optopt = optopt;
529	char * const old_optarg = optarg;
530
531	argvopt = argv;
532	optind = 1;
533
534	while ((opt = getopt_long(argc, argvopt, eal_short_options,
535				  eal_long_options, &option_index)) != EOF) {
536
537		/* getopt is not happy, stop right now */
538		if (opt == '?') {
539			eal_usage(prgname);
540			ret = -1;
541			goto out;
542		}
543
544		ret = eal_parse_common_option(opt, optarg, &internal_config);
545		/* common parser is not happy */
546		if (ret < 0) {
547			eal_usage(prgname);
548			ret = -1;
549			goto out;
550		}
551		/* common parser handled this option */
552		if (ret == 0)
553			continue;
554
555		switch (opt) {
556		case 'h':
557			eal_usage(prgname);
558			exit(EXIT_SUCCESS);
559
560		/* long options */
561		case OPT_XEN_DOM0_NUM:
562#ifdef RTE_LIBRTE_XEN_DOM0
563			internal_config.xen_dom0_support = 1;
564#else
565			RTE_LOG(ERR, EAL, "Can't support DPDK app "
566				"running on Dom0, please configure"
567				" RTE_LIBRTE_XEN_DOM0=y\n");
568			ret = -1;
569			goto out;
570#endif
571			break;
572
573		case OPT_HUGE_DIR_NUM:
574			internal_config.hugepage_dir = optarg;
575			break;
576
577		case OPT_FILE_PREFIX_NUM:
578			internal_config.hugefile_prefix = optarg;
579			break;
580
581		case OPT_SOCKET_MEM_NUM:
582			if (eal_parse_socket_mem(optarg) < 0) {
583				RTE_LOG(ERR, EAL, "invalid parameters for --"
584						OPT_SOCKET_MEM "\n");
585				eal_usage(prgname);
586				ret = -1;
587				goto out;
588			}
589			break;
590
591		case OPT_BASE_VIRTADDR_NUM:
592			if (eal_parse_base_virtaddr(optarg) < 0) {
593				RTE_LOG(ERR, EAL, "invalid parameter for --"
594						OPT_BASE_VIRTADDR "\n");
595				eal_usage(prgname);
596				ret = -1;
597				goto out;
598			}
599			break;
600
601		case OPT_VFIO_INTR_NUM:
602			if (eal_parse_vfio_intr(optarg) < 0) {
603				RTE_LOG(ERR, EAL, "invalid parameters for --"
604						OPT_VFIO_INTR "\n");
605				eal_usage(prgname);
606				ret = -1;
607				goto out;
608			}
609			break;
610
611		case OPT_CREATE_UIO_DEV_NUM:
612			internal_config.create_uio_dev = 1;
613			break;
614
615		default:
616			if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
617				RTE_LOG(ERR, EAL, "Option %c is not supported "
618					"on Linux\n", opt);
619			} else if (opt >= OPT_LONG_MIN_NUM &&
620				   opt < OPT_LONG_MAX_NUM) {
621				RTE_LOG(ERR, EAL, "Option %s is not supported "
622					"on Linux\n",
623					eal_long_options[option_index].name);
624			} else {
625				RTE_LOG(ERR, EAL, "Option %d is not supported "
626					"on Linux\n", opt);
627			}
628			eal_usage(prgname);
629			ret = -1;
630			goto out;
631		}
632	}
633
634	if (eal_adjust_config(&internal_config) != 0) {
635		ret = -1;
636		goto out;
637	}
638
639	/* sanity checks */
640	if (eal_check_common_options(&internal_config) != 0) {
641		eal_usage(prgname);
642		ret = -1;
643		goto out;
644	}
645
646	/* --xen-dom0 doesn't make sense with --socket-mem */
647	if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
648		RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified "
649			"together with --"OPT_XEN_DOM0"\n");
650		eal_usage(prgname);
651		ret = -1;
652		goto out;
653	}
654
655	if (optind >= 0)
656		argv[optind-1] = prgname;
657	ret = optind-1;
658
659out:
660	/* restore getopt lib */
661	optind = old_optind;
662	optopt = old_optopt;
663	optarg = old_optarg;
664
665	return ret;
666}
667
668static void
669eal_check_mem_on_local_socket(void)
670{
671	const struct rte_memseg *ms;
672	int i, socket_id;
673
674	socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
675
676	ms = rte_eal_get_physmem_layout();
677
678	for (i = 0; i < RTE_MAX_MEMSEG; i++)
679		if (ms[i].socket_id == socket_id &&
680				ms[i].len > 0)
681			return;
682
683	RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
684			"memory on local socket!\n");
685}
686
687static int
688sync_func(__attribute__((unused)) void *arg)
689{
690	return 0;
691}
692
693inline static void
694rte_eal_mcfg_complete(void)
695{
696	/* ALL shared mem_config related INIT DONE */
697	if (rte_config.process_type == RTE_PROC_PRIMARY)
698		rte_config.mem_config->magic = RTE_MAGIC;
699}
700
701/*
702 * Request iopl privilege for all RPL, returns 0 on success
703 * iopl() call is mostly for the i386 architecture. For other architectures,
704 * return -1 to indicate IO privilege can't be changed in this way.
705 */
706int
707rte_eal_iopl_init(void)
708{
709#if defined(RTE_ARCH_X86)
710	if (iopl(3) != 0)
711		return -1;
712#endif
713	return 0;
714}
715
716#ifdef VFIO_PRESENT
717static int rte_eal_vfio_setup(void)
718{
719	int vfio_enabled = 0;
720
721	if (!internal_config.no_pci) {
722		pci_vfio_enable();
723		vfio_enabled |= pci_vfio_is_enabled();
724	}
725
726	if (vfio_enabled) {
727
728		/* if we are primary process, create a thread to communicate with
729		 * secondary processes. the thread will use a socket to wait for
730		 * requests from secondary process to send open file descriptors,
731		 * because VFIO does not allow multiple open descriptors on a group or
732		 * VFIO container.
733		 */
734		if (internal_config.process_type == RTE_PROC_PRIMARY &&
735				vfio_mp_sync_setup() < 0)
736			return -1;
737	}
738
739	return 0;
740}
741#endif
742
743/* Launch threads, called at application init(). */
744int
745rte_eal_init(int argc, char **argv)
746{
747	int i, fctret, ret;
748	pthread_t thread_id;
749	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
750	const char *logid;
751	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
752	char thread_name[RTE_MAX_THREAD_NAME_LEN];
753
754	/* checks if the machine is adequate */
755	rte_cpu_check_supported();
756
757	if (!rte_atomic32_test_and_set(&run_once))
758		return -1;
759
760	logid = strrchr(argv[0], '/');
761	logid = strdup(logid ? logid + 1: argv[0]);
762
763	thread_id = pthread_self();
764
765	eal_log_level_parse(argc, argv);
766
767	/* set log level as early as possible */
768	rte_set_log_level(internal_config.log_level);
769
770	if (rte_eal_cpu_init() < 0)
771		rte_panic("Cannot detect lcores\n");
772
773	fctret = eal_parse_args(argc, argv);
774	if (fctret < 0)
775		exit(1);
776
777	if (internal_config.no_hugetlbfs == 0 &&
778			internal_config.process_type != RTE_PROC_SECONDARY &&
779			internal_config.xen_dom0_support == 0 &&
780			eal_hugepage_info_init() < 0)
781		rte_panic("Cannot get hugepage information\n");
782
783	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
784		if (internal_config.no_hugetlbfs)
785			internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
786	}
787
788	if (internal_config.vmware_tsc_map == 1) {
789#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
790		rte_cycles_vmware_tsc_map = 1;
791		RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
792				"you must have monitor_control.pseudo_perfctr = TRUE\n");
793#else
794		RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
795				"RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
796#endif
797	}
798
799	rte_srand(rte_rdtsc());
800
801	rte_config_init();
802
803	if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
804		rte_panic("Cannot init logs\n");
805
806	if (rte_eal_pci_init() < 0)
807		rte_panic("Cannot init PCI\n");
808
809#ifdef VFIO_PRESENT
810	if (rte_eal_vfio_setup() < 0)
811		rte_panic("Cannot init VFIO\n");
812#endif
813
814	if (rte_eal_memory_init() < 0)
815		rte_panic("Cannot init memory\n");
816
817	/* the directories are locked during eal_hugepage_info_init */
818	eal_hugedirs_unlock();
819
820	if (rte_eal_memzone_init() < 0)
821		rte_panic("Cannot init memzone\n");
822
823	if (rte_eal_tailqs_init() < 0)
824		rte_panic("Cannot init tail queues for objects\n");
825
826	if (rte_eal_alarm_init() < 0)
827		rte_panic("Cannot init interrupt-handling thread\n");
828
829	if (rte_eal_timer_init() < 0)
830		rte_panic("Cannot init HPET or TSC timers\n");
831
832	eal_check_mem_on_local_socket();
833
834	if (eal_plugins_init() < 0)
835		rte_panic("Cannot init plugins\n");
836
837	eal_thread_init_master(rte_config.master_lcore);
838
839	ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
840
841	RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
842		rte_config.master_lcore, (int)thread_id, cpuset,
843		ret == 0 ? "" : "...");
844
845	if (rte_eal_intr_init() < 0)
846		rte_panic("Cannot init interrupt-handling thread\n");
847
848	if (rte_bus_scan())
849		rte_panic("Cannot scan the buses for devices\n");
850
851	RTE_LCORE_FOREACH_SLAVE(i) {
852
853		/*
854		 * create communication pipes between master thread
855		 * and children
856		 */
857		if (pipe(lcore_config[i].pipe_master2slave) < 0)
858			rte_panic("Cannot create pipe\n");
859		if (pipe(lcore_config[i].pipe_slave2master) < 0)
860			rte_panic("Cannot create pipe\n");
861
862		lcore_config[i].state = WAIT;
863
864		/* create a thread for each lcore */
865		ret = pthread_create(&lcore_config[i].thread_id, NULL,
866				     eal_thread_loop, NULL);
867		if (ret != 0)
868			rte_panic("Cannot create thread\n");
869
870		/* Set thread_name for aid in debugging. */
871		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
872			"lcore-slave-%d", i);
873		ret = rte_thread_setname(lcore_config[i].thread_id,
874						thread_name);
875		if (ret != 0)
876			RTE_LOG(DEBUG, EAL,
877				"Cannot set name for lcore thread\n");
878	}
879
880	/*
881	 * Launch a dummy function on all slave lcores, so that master lcore
882	 * knows they are all ready when this function returns.
883	 */
884	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
885	rte_eal_mp_wait_lcore();
886
887	/* Probe all the buses and devices/drivers on them */
888	if (rte_bus_probe())
889		rte_panic("Cannot probe devices\n");
890
891	/* Probe & Initialize PCI devices */
892	if (rte_eal_pci_probe())
893		rte_panic("Cannot probe PCI\n");
894
895	if (rte_eal_dev_init() < 0)
896		rte_panic("Cannot init pmd devices\n");
897
898	rte_eal_mcfg_complete();
899
900	return fctret;
901}
902
903/* get core role */
904enum rte_lcore_role_t
905rte_eal_lcore_role(unsigned lcore_id)
906{
907	return rte_config.lcore_role[lcore_id];
908}
909
910enum rte_proc_type_t
911rte_eal_process_type(void)
912{
913	return rte_config.process_type;
914}
915
916int rte_eal_has_hugepages(void)
917{
918	return ! internal_config.no_hugetlbfs;
919}
920
921int
922rte_eal_check_module(const char *module_name)
923{
924	char sysfs_mod_name[PATH_MAX];
925	struct stat st;
926	int n;
927
928	if (NULL == module_name)
929		return -1;
930
931	/* Check if there is sysfs mounted */
932	if (stat("/sys/module", &st) != 0) {
933		RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n",
934			errno, strerror(errno));
935		return -1;
936	}
937
938	/* A module might be built-in, therefore try sysfs */
939	n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name);
940	if (n < 0 || n > PATH_MAX) {
941		RTE_LOG(DEBUG, EAL, "Could not format module path\n");
942		return -1;
943	}
944
945	if (stat(sysfs_mod_name, &st) != 0) {
946		RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n",
947		        sysfs_mod_name, errno, strerror(errno));
948		return 0;
949	}
950
951	/* Module has been found */
952	return 1;
953}
954