eal_pci.c revision 9ca4a157
1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <ctype.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <stdarg.h>
39#include <unistd.h>
40#include <inttypes.h>
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <fcntl.h>
44#include <stdarg.h>
45#include <errno.h>
46#include <dirent.h>
47#include <limits.h>
48#include <sys/queue.h>
49#include <sys/mman.h>
50#include <sys/ioctl.h>
51#include <sys/pciio.h>
52#include <dev/pci/pcireg.h>
53
54#if defined(RTE_ARCH_X86)
55#include <sys/types.h>
56#include <machine/cpufunc.h>
57#endif
58
59#include <rte_interrupts.h>
60#include <rte_log.h>
61#include <rte_pci.h>
62#include <rte_common.h>
63#include <rte_launch.h>
64#include <rte_memory.h>
65#include <rte_memzone.h>
66#include <rte_eal.h>
67#include <rte_eal_memconfig.h>
68#include <rte_per_lcore.h>
69#include <rte_lcore.h>
70#include <rte_malloc.h>
71#include <rte_string_fns.h>
72#include <rte_debug.h>
73#include <rte_devargs.h>
74
75#include "eal_filesystem.h"
76#include "eal_private.h"
77
78/**
79 * @file
80 * PCI probing under linux
81 *
82 * This code is used to simulate a PCI probe by parsing information in
83 * sysfs. Moreover, when a registered driver matches a device, the
84 * kernel driver currently using it is unloaded and replaced by
85 * igb_uio module, which is a very minimal userland driver for Intel
86 * network card, only providing access to PCI BAR to applications, and
87 * enabling bus master.
88 */
89
90/* Map pci device */
91int
92rte_eal_pci_map_device(struct rte_pci_device *dev)
93{
94	int ret = -1;
95
96	/* try mapping the NIC resources */
97	switch (dev->kdrv) {
98	case RTE_KDRV_NIC_UIO:
99		/* map resources for devices that use uio */
100		ret = pci_uio_map_resource(dev);
101		break;
102	default:
103		RTE_LOG(DEBUG, EAL,
104			"  Not managed by a supported kernel driver, skipped\n");
105		ret = 1;
106		break;
107	}
108
109	return ret;
110}
111
112/* Unmap pci device */
113void
114rte_eal_pci_unmap_device(struct rte_pci_device *dev)
115{
116	/* try unmapping the NIC resources */
117	switch (dev->kdrv) {
118	case RTE_KDRV_NIC_UIO:
119		/* unmap resources for devices that use uio */
120		pci_uio_unmap_resource(dev);
121		break;
122	default:
123		RTE_LOG(DEBUG, EAL,
124			"  Not managed by a supported kernel driver, skipped\n");
125		break;
126	}
127}
128
129void
130pci_uio_free_resource(struct rte_pci_device *dev,
131		struct mapped_pci_resource *uio_res)
132{
133	rte_free(uio_res);
134
135	if (dev->intr_handle.fd) {
136		close(dev->intr_handle.fd);
137		dev->intr_handle.fd = -1;
138		dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
139	}
140}
141
142int
143pci_uio_alloc_resource(struct rte_pci_device *dev,
144		struct mapped_pci_resource **uio_res)
145{
146	char devname[PATH_MAX]; /* contains the /dev/uioX */
147	struct rte_pci_addr *loc;
148
149	loc = &dev->addr;
150
151	snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u",
152			dev->addr.bus, dev->addr.devid, dev->addr.function);
153
154	if (access(devname, O_RDWR) < 0) {
155		RTE_LOG(WARNING, EAL, "  "PCI_PRI_FMT" not managed by UIO driver, "
156				"skipping\n", loc->domain, loc->bus, loc->devid, loc->function);
157		return 1;
158	}
159
160	/* save fd if in primary process */
161	dev->intr_handle.fd = open(devname, O_RDWR);
162	if (dev->intr_handle.fd < 0) {
163		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
164			devname, strerror(errno));
165		goto error;
166	}
167	dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
168
169	/* allocate the mapping details for secondary processes*/
170	*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
171	if (*uio_res == NULL) {
172		RTE_LOG(ERR, EAL,
173			"%s(): cannot store uio mmap details\n", __func__);
174		goto error;
175	}
176
177	snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname);
178	memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
179
180	return 0;
181
182error:
183	pci_uio_free_resource(dev, *uio_res);
184	return -1;
185}
186
187int
188pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
189		struct mapped_pci_resource *uio_res, int map_idx)
190{
191	int fd;
192	char *devname;
193	void *mapaddr;
194	uint64_t offset;
195	uint64_t pagesz;
196	struct pci_map *maps;
197
198	maps = uio_res->maps;
199	devname = uio_res->path;
200	pagesz = sysconf(_SC_PAGESIZE);
201
202	/* allocate memory to keep path */
203	maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
204	if (maps[map_idx].path == NULL) {
205		RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n",
206				strerror(errno));
207		return -1;
208	}
209
210	/*
211	 * open resource file, to mmap it
212	 */
213	fd = open(devname, O_RDWR);
214	if (fd < 0) {
215		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
216				devname, strerror(errno));
217		goto error;
218	}
219
220	/* if matching map is found, then use it */
221	offset = res_idx * pagesz;
222	mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
223			(size_t)dev->mem_resource[res_idx].len, 0);
224	close(fd);
225	if (mapaddr == MAP_FAILED)
226		goto error;
227
228	maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
229	maps[map_idx].size = dev->mem_resource[res_idx].len;
230	maps[map_idx].addr = mapaddr;
231	maps[map_idx].offset = offset;
232	strcpy(maps[map_idx].path, devname);
233	dev->mem_resource[res_idx].addr = mapaddr;
234
235	return 0;
236
237error:
238	rte_free(maps[map_idx].path);
239	return -1;
240}
241
242static int
243pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
244{
245	struct rte_pci_device *dev;
246	struct pci_bar_io bar;
247	unsigned i, max;
248
249	dev = malloc(sizeof(*dev));
250	if (dev == NULL) {
251		return -1;
252	}
253
254	memset(dev, 0, sizeof(*dev));
255	dev->addr.domain = conf->pc_sel.pc_domain;
256	dev->addr.bus = conf->pc_sel.pc_bus;
257	dev->addr.devid = conf->pc_sel.pc_dev;
258	dev->addr.function = conf->pc_sel.pc_func;
259
260	/* get vendor id */
261	dev->id.vendor_id = conf->pc_vendor;
262
263	/* get device id */
264	dev->id.device_id = conf->pc_device;
265
266	/* get subsystem_vendor id */
267	dev->id.subsystem_vendor_id = conf->pc_subvendor;
268
269	/* get subsystem_device id */
270	dev->id.subsystem_device_id = conf->pc_subdevice;
271
272	/* get class id */
273	dev->id.class_id = (conf->pc_class << 16) |
274			   (conf->pc_subclass << 8) |
275			   (conf->pc_progif);
276
277	/* TODO: get max_vfs */
278	dev->max_vfs = 0;
279
280	/* FreeBSD has no NUMA support (yet) */
281	dev->device.numa_node = 0;
282
283	/* FreeBSD has only one pass through driver */
284	dev->kdrv = RTE_KDRV_NIC_UIO;
285
286	/* parse resources */
287	switch (conf->pc_hdr & PCIM_HDRTYPE) {
288	case PCIM_HDRTYPE_NORMAL:
289		max = PCIR_MAX_BAR_0;
290		break;
291	case PCIM_HDRTYPE_BRIDGE:
292		max = PCIR_MAX_BAR_1;
293		break;
294	case PCIM_HDRTYPE_CARDBUS:
295		max = PCIR_MAX_BAR_2;
296		break;
297	default:
298		goto skipdev;
299	}
300
301	for (i = 0; i <= max; i++) {
302		bar.pbi_sel = conf->pc_sel;
303		bar.pbi_reg = PCIR_BAR(i);
304		if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0)
305			continue;
306
307		dev->mem_resource[i].len = bar.pbi_length;
308		if (PCI_BAR_IO(bar.pbi_base)) {
309			dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf));
310			continue;
311		}
312		dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf);
313	}
314
315	/* device is valid, add in list (sorted) */
316	if (TAILQ_EMPTY(&pci_device_list)) {
317		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
318	}
319	else {
320		struct rte_pci_device *dev2 = NULL;
321		int ret;
322
323		TAILQ_FOREACH(dev2, &pci_device_list, next) {
324			ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
325			if (ret > 0)
326				continue;
327			else if (ret < 0) {
328				TAILQ_INSERT_BEFORE(dev2, dev, next);
329				return 0;
330			} else { /* already registered */
331				dev2->kdrv = dev->kdrv;
332				dev2->max_vfs = dev->max_vfs;
333				memmove(dev2->mem_resource,
334					dev->mem_resource,
335					sizeof(dev->mem_resource));
336				free(dev);
337				return 0;
338			}
339		}
340		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
341	}
342
343	return 0;
344
345skipdev:
346	free(dev);
347	return 0;
348}
349
350/*
351 * Scan the content of the PCI bus, and add the devices in the devices
352 * list. Call pci_scan_one() for each pci entry found.
353 */
354int
355rte_eal_pci_scan(void)
356{
357	int fd;
358	unsigned dev_count = 0;
359	struct pci_conf matches[16];
360	struct pci_conf_io conf_io = {
361			.pat_buf_len = 0,
362			.num_patterns = 0,
363			.patterns = NULL,
364			.match_buf_len = sizeof(matches),
365			.matches = &matches[0],
366	};
367
368	fd = open("/dev/pci", O_RDONLY);
369	if (fd < 0) {
370		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
371		goto error;
372	}
373
374	do {
375		unsigned i;
376		if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
377			RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
378					__func__, strerror(errno));
379			goto error;
380		}
381
382		for (i = 0; i < conf_io.num_matches; i++)
383			if (pci_scan_one(fd, &matches[i]) < 0)
384				goto error;
385
386		dev_count += conf_io.num_matches;
387	} while(conf_io.status == PCI_GETCONF_MORE_DEVS);
388
389	close(fd);
390
391	RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count);
392	return 0;
393
394error:
395	if (fd >= 0)
396		close(fd);
397	return -1;
398}
399
400int
401pci_update_device(const struct rte_pci_addr *addr)
402{
403	int fd;
404	struct pci_conf matches[2];
405	struct pci_match_conf match = {
406		.pc_sel = {
407			.pc_domain = addr->domain,
408			.pc_bus = addr->bus,
409			.pc_dev = addr->devid,
410			.pc_func = addr->function,
411		},
412	};
413	struct pci_conf_io conf_io = {
414		.pat_buf_len = 0,
415		.num_patterns = 1,
416		.patterns = &match,
417		.match_buf_len = sizeof(matches),
418		.matches = &matches[0],
419	};
420
421	fd = open("/dev/pci", O_RDONLY);
422	if (fd < 0) {
423		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
424		goto error;
425	}
426
427	if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) {
428		RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n",
429				__func__, strerror(errno));
430		goto error;
431	}
432
433	if (conf_io.num_matches != 1)
434		goto error;
435
436	if (pci_scan_one(fd, &matches[0]) < 0)
437		goto error;
438
439	close(fd);
440
441	return 0;
442
443error:
444	if (fd >= 0)
445		close(fd);
446	return -1;
447}
448
449/* Read PCI config space. */
450int rte_eal_pci_read_config(const struct rte_pci_device *dev,
451			    void *buf, size_t len, off_t offset)
452{
453	int fd = -1;
454	struct pci_io pi = {
455		.pi_sel = {
456			.pc_domain = dev->addr.domain,
457			.pc_bus = dev->addr.bus,
458			.pc_dev = dev->addr.devid,
459			.pc_func = dev->addr.function,
460		},
461		.pi_reg = offset,
462		.pi_width = len,
463	};
464
465	if (len == 3 || len > sizeof(pi.pi_data)) {
466		RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
467		goto error;
468	}
469
470	fd = open("/dev/pci", O_RDWR);
471	if (fd < 0) {
472		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
473		goto error;
474	}
475
476	if (ioctl(fd, PCIOCREAD, &pi) < 0)
477		goto error;
478	close(fd);
479
480	memcpy(buf, &pi.pi_data, len);
481	return 0;
482
483 error:
484	if (fd >= 0)
485		close(fd);
486	return -1;
487}
488
489/* Write PCI config space. */
490int rte_eal_pci_write_config(const struct rte_pci_device *dev,
491			     const void *buf, size_t len, off_t offset)
492{
493	int fd = -1;
494
495	struct pci_io pi = {
496		.pi_sel = {
497			.pc_domain = dev->addr.domain,
498			.pc_bus = dev->addr.bus,
499			.pc_dev = dev->addr.devid,
500			.pc_func = dev->addr.function,
501		},
502		.pi_reg = offset,
503		.pi_data = *(const uint32_t *)buf,
504		.pi_width = len,
505	};
506
507	if (len == 3 || len > sizeof(pi.pi_data)) {
508		RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__);
509		goto error;
510	}
511
512	memcpy(&pi.pi_data, buf, len);
513
514	fd = open("/dev/pci", O_RDWR);
515	if (fd < 0) {
516		RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__);
517		goto error;
518	}
519
520	if (ioctl(fd, PCIOCWRITE, &pi) < 0)
521		goto error;
522
523	close(fd);
524	return 0;
525
526 error:
527	if (fd >= 0)
528		close(fd);
529	return -1;
530}
531
532int
533rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
534		       struct rte_pci_ioport *p)
535{
536	int ret;
537
538	switch (dev->kdrv) {
539#if defined(RTE_ARCH_X86)
540	case RTE_KDRV_NIC_UIO:
541		if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) {
542			p->base = (uintptr_t)dev->mem_resource[bar].addr;
543			ret = 0;
544		} else
545			ret = -1;
546		break;
547#endif
548	default:
549		ret = -1;
550		break;
551	}
552
553	if (!ret)
554		p->dev = dev;
555
556	return ret;
557}
558
559static void
560pci_uio_ioport_read(struct rte_pci_ioport *p,
561		    void *data, size_t len, off_t offset)
562{
563#if defined(RTE_ARCH_X86)
564	uint8_t *d;
565	int size;
566	unsigned short reg = p->base + offset;
567
568	for (d = data; len > 0; d += size, reg += size, len -= size) {
569		if (len >= 4) {
570			size = 4;
571			*(uint32_t *)d = inl(reg);
572		} else if (len >= 2) {
573			size = 2;
574			*(uint16_t *)d = inw(reg);
575		} else {
576			size = 1;
577			*d = inb(reg);
578		}
579	}
580#else
581	RTE_SET_USED(p);
582	RTE_SET_USED(data);
583	RTE_SET_USED(len);
584	RTE_SET_USED(offset);
585#endif
586}
587
588void
589rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
590			void *data, size_t len, off_t offset)
591{
592	switch (p->dev->kdrv) {
593	case RTE_KDRV_NIC_UIO:
594		pci_uio_ioport_read(p, data, len, offset);
595		break;
596	default:
597		break;
598	}
599}
600
601static void
602pci_uio_ioport_write(struct rte_pci_ioport *p,
603		     const void *data, size_t len, off_t offset)
604{
605#if defined(RTE_ARCH_X86)
606	const uint8_t *s;
607	int size;
608	unsigned short reg = p->base + offset;
609
610	for (s = data; len > 0; s += size, reg += size, len -= size) {
611		if (len >= 4) {
612			size = 4;
613			outl(*(const uint32_t *)s, reg);
614		} else if (len >= 2) {
615			size = 2;
616			outw(*(const uint16_t *)s, reg);
617		} else {
618			size = 1;
619			outb(*s, reg);
620		}
621	}
622#else
623	RTE_SET_USED(p);
624	RTE_SET_USED(data);
625	RTE_SET_USED(len);
626	RTE_SET_USED(offset);
627#endif
628}
629
630void
631rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
632			 const void *data, size_t len, off_t offset)
633{
634	switch (p->dev->kdrv) {
635	case RTE_KDRV_NIC_UIO:
636		pci_uio_ioport_write(p, data, len, offset);
637		break;
638	default:
639		break;
640	}
641}
642
643int
644rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
645{
646	int ret;
647
648	switch (p->dev->kdrv) {
649#if defined(RTE_ARCH_X86)
650	case RTE_KDRV_NIC_UIO:
651		ret = 0;
652		break;
653#endif
654	default:
655		ret = -1;
656		break;
657	}
658
659	return ret;
660}
661
662/* Init the PCI EAL subsystem */
663int
664rte_eal_pci_init(void)
665{
666	/* for debug purposes, PCI can be disabled */
667	if (internal_config.no_pci)
668		return 0;
669
670	if (rte_eal_pci_scan() < 0) {
671		RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
672		return -1;
673	}
674	return 0;
675}
676