pci.c revision 0eae2bb1
1/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * pci.c: Linux user space PCI bus management.
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vppinfra/linux/sysfs.h>
41
42#include <vlib/vlib.h>
43#include <vlib/pci/pci.h>
44#include <vlib/unix/unix.h>
45#include <vlib/linux/vfio.h>
46
47#include <sys/types.h>
48#include <sys/stat.h>
49#include <fcntl.h>
50#include <dirent.h>
51#include <sys/ioctl.h>
52#include <net/if.h>
53#include <linux/ethtool.h>
54#include <linux/sockios.h>
55#include <linux/vfio.h>
56#include <sys/eventfd.h>
57
58static const char *sysfs_pci_dev_path = "/sys/bus/pci/devices";
59static const char *sysfs_pci_drv_path = "/sys/bus/pci/drivers";
60static char *sysfs_mod_vfio_noiommu =
61  "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode";
62
63#define pci_log_debug(vm, dev, f, ...) \
64  vlib_log(VLIB_LOG_LEVEL_DEBUG, pci_main.log_default, "%U: " f, \
65           format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__)
66#define pci_log_err(vm, dev, f, ...) \
67  vlib_log(VLIB_LOG_LEVEL_ERR, pci_main.log_default, "%U: " f, \
68           format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__)
69
70typedef struct
71{
72  int fd;
73  void *addr;
74  size_t size;
75} linux_pci_region_t;
76
77typedef struct
78{
79  int fd;
80  u32 clib_file_index;
81  union
82  {
83    pci_intx_handler_function_t *intx_handler;
84    pci_msix_handler_function_t *msix_handler;
85  };
86} linux_pci_irq_t;
87
88typedef enum
89{
90  LINUX_PCI_DEVICE_TYPE_UNKNOWN,
91  LINUX_PCI_DEVICE_TYPE_UIO,
92  LINUX_PCI_DEVICE_TYPE_VFIO,
93} linux_pci_device_type_t;
94
95typedef struct
96{
97  linux_pci_device_type_t type;
98  vlib_pci_dev_handle_t handle;
99  vlib_pci_addr_t addr;
100  u32 numa_node;
101
102  /* Resource file descriptors. */
103  linux_pci_region_t *regions;
104
105  /* File descriptor for config space read/write. */
106  int config_fd;
107  u64 config_offset;
108
109  /* Device File descriptor */
110  int fd;
111
112  /* read/write file descriptor for io bar */
113  int io_fd;
114  u64 io_offset;
115
116  /* Minor device for uio device. */
117  u32 uio_minor;
118
119  /* Interrupt handlers */
120  linux_pci_irq_t intx_irq;
121  linux_pci_irq_t *msix_irqs;
122
123  /* private data */
124  uword private_data;
125
126  u8 supports_va_dma;
127
128} linux_pci_device_t;
129
130/* Pool of PCI devices. */
131typedef struct
132{
133  vlib_main_t *vlib_main;
134  linux_pci_device_t *linux_pci_devices;
135
136} linux_pci_main_t;
137
138extern linux_pci_main_t linux_pci_main;
139
140static linux_pci_device_t *
141linux_pci_get_device (vlib_pci_dev_handle_t h)
142{
143  linux_pci_main_t *lpm = &linux_pci_main;
144  return pool_elt_at_index (lpm->linux_pci_devices, h);
145}
146
147uword
148vlib_pci_get_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h)
149{
150  linux_pci_device_t *d = linux_pci_get_device (h);
151  return d->private_data;
152}
153
154void
155vlib_pci_set_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h,
156			   uword private_data)
157{
158  linux_pci_device_t *d = linux_pci_get_device (h);
159  d->private_data = private_data;
160}
161
162vlib_pci_addr_t *
163vlib_pci_get_addr (vlib_main_t * vm, vlib_pci_dev_handle_t h)
164{
165  linux_pci_device_t *d = linux_pci_get_device (h);
166  return &d->addr;
167}
168
169u32
170vlib_pci_get_numa_node (vlib_main_t * vm, vlib_pci_dev_handle_t h)
171{
172  linux_pci_device_t *d = linux_pci_get_device (h);
173  return d->numa_node;
174}
175
176u32
177vlib_pci_get_num_msix_interrupts (vlib_main_t * vm, vlib_pci_dev_handle_t h)
178{
179  linux_pci_device_t *d = linux_pci_get_device (h);
180
181  if (d->type == LINUX_PCI_DEVICE_TYPE_VFIO)
182    {
183      struct vfio_irq_info ii = { 0 };
184
185      ii.argsz = sizeof (struct vfio_irq_info);
186      ii.index = VFIO_PCI_MSIX_IRQ_INDEX;
187      if (ioctl (d->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0)
188	return 0;
189      return ii.count;
190    }
191  return 0;
192}
193
194/* Call to allocate/initialize the pci subsystem.
195   This is not an init function so that users can explicitly enable
196   pci only when it's needed. */
197clib_error_t *pci_bus_init (vlib_main_t * vm);
198
199linux_pci_main_t linux_pci_main;
200
201vlib_pci_device_info_t *
202vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
203			  clib_error_t ** error)
204{
205  linux_vfio_main_t *lvm = &vfio_main;
206  clib_error_t *err;
207  vlib_pci_device_info_t *di;
208  u8 *f = 0;
209  u32 tmp;
210  int fd;
211
212  di = clib_mem_alloc (sizeof (vlib_pci_device_info_t));
213  clib_memset (di, 0, sizeof (vlib_pci_device_info_t));
214  di->addr.as_u32 = addr->as_u32;
215
216  u8 *dev_dir_name = format (0, "%s/%U", sysfs_pci_dev_path,
217			     format_vlib_pci_addr, addr);
218
219  f = format (0, "%v/config%c", dev_dir_name, 0);
220  fd = open ((char *) f, O_RDWR);
221
222  /* Try read-only access if write fails. */
223  if (fd < 0)
224    fd = open ((char *) f, O_RDONLY);
225
226  if (fd < 0)
227    {
228      err = clib_error_return_unix (0, "open `%s'", f);
229      goto error;
230    }
231
232  /* You can only read more that 64 bytes of config space as root; so we try to
233     read the full space but fall back to just the first 64 bytes. */
234  if (read (fd, &di->config_data, sizeof (di->config_data)) <
235      sizeof (di->config0))
236    {
237      err = clib_error_return_unix (0, "read `%s'", f);
238      close (fd);
239      goto error;
240    }
241
242  {
243    static pci_config_header_t all_ones;
244    if (all_ones.vendor_id == 0)
245      clib_memset (&all_ones, ~0, sizeof (all_ones));
246
247    if (!memcmp (&di->config0.header, &all_ones, sizeof (all_ones)))
248      {
249	err = clib_error_return (0, "invalid PCI config for `%s'", f);
250	close (fd);
251	goto error;
252      }
253  }
254
255  if (di->config0.header.header_type == 0)
256    pci_config_type0_little_to_host (&di->config0);
257  else
258    pci_config_type1_little_to_host (&di->config1);
259
260  di->numa_node = -1;
261  vec_reset_length (f);
262  f = format (f, "%v/numa_node%c", dev_dir_name, 0);
263  err = clib_sysfs_read ((char *) f, "%u", &di->numa_node);
264  if (err)
265    {
266      di->numa_node = -1;
267      clib_error_free (err);
268    }
269
270  vec_reset_length (f);
271  f = format (f, "%v/class%c", dev_dir_name, 0);
272  err = clib_sysfs_read ((char *) f, "0x%x", &tmp);
273  if (err)
274    goto error;
275  di->device_class = tmp >> 8;
276
277  vec_reset_length (f);
278  f = format (f, "%v/vendor%c", dev_dir_name, 0);
279  err = clib_sysfs_read ((char *) f, "0x%x", &tmp);
280  if (err)
281    goto error;
282  di->vendor_id = tmp;
283
284  vec_reset_length (f);
285  f = format (f, "%v/device%c", dev_dir_name, 0);
286  err = clib_sysfs_read ((char *) f, "0x%x", &tmp);
287  if (err)
288    goto error;
289  di->device_id = tmp;
290
291  vec_reset_length (f);
292  f = format (f, "%v/driver%c", dev_dir_name, 0);
293  di->driver_name = clib_sysfs_link_to_name ((char *) f);
294  if (!di->driver_name)
295    di->driver_name = format (0, "<NONE>%c", 0);
296
297  di->iommu_group = -1;
298  if (lvm->container_fd != -1)
299    {
300      u8 *tmpstr;
301      vec_reset_length (f);
302      f = format (f, "%v/iommu_group%c", dev_dir_name, 0);
303      tmpstr = clib_sysfs_link_to_name ((char *) f);
304      if (tmpstr)
305	{
306	  di->iommu_group = atoi ((char *) tmpstr);
307	  vec_free (tmpstr);
308	}
309      vec_reset_length (f);
310      f = format (f, "%v/iommu_group/name%c", dev_dir_name, 0);
311      err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
312      if (err == 0)
313	{
314	  if (strncmp ((char *) tmpstr, "vfio-noiommu", 12) == 0)
315	    di->flags |= VLIB_PCI_DEVICE_INFO_F_NOIOMMU;
316	  vec_free (tmpstr);
317	}
318      else
319	clib_error_free (err);
320    }
321
322  close (fd);
323
324  vec_reset_length (f);
325  f = format (f, "%v/vpd%c", dev_dir_name, 0);
326  fd = open ((char *) f, O_RDONLY);
327  if (fd >= 0)
328    {
329      while (1)
330	{
331	  u8 tag[3];
332	  u8 *data = 0;
333	  uword len;
334
335	  if (read (fd, &tag, 3) != 3)
336	    break;
337
338	  if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91)
339	    break;
340
341	  len = (tag[2] << 8) | tag[1];
342	  vec_validate (data, len);
343
344	  if (read (fd, data, len) != len)
345	    {
346	      vec_free (data);
347	      break;
348	    }
349	  if (tag[0] == 0x82)
350	    di->product_name = data;
351	  else if (tag[0] == 0x90)
352	    di->vpd_r = data;
353	  else if (tag[0] == 0x91)
354	    di->vpd_w = data;
355
356	  data = 0;
357	}
358      close (fd);
359    }
360
361  goto done;
362
363error:
364  vlib_pci_free_device_info (di);
365  di = 0;
366
367done:
368  vec_free (f);
369  vec_free (dev_dir_name);
370  if (error)
371    *error = err;
372  else
373    clib_error_free (err);
374  return di;
375}
376
377static int
378directory_exists (char *path)
379{
380  struct stat s = { 0 };
381  if (stat (path, &s) == -1)
382    return 0;
383
384  return S_ISDIR (s.st_mode);
385}
386
387clib_error_t *
388vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
389		      char *uio_drv_name)
390{
391  clib_error_t *error = 0;
392  u8 *s = 0, *driver_name = 0;
393  DIR *dir = 0;
394  struct dirent *e;
395  vlib_pci_device_info_t *di;
396  int fd, clear_driver_override = 0;
397  u8 *dev_dir_name = format (0, "%s/%U", sysfs_pci_dev_path,
398			     format_vlib_pci_addr, addr);
399
400  di = vlib_pci_get_device_info (vm, addr, &error);
401
402  if (error)
403    return error;
404
405  if (strncmp ("auto", uio_drv_name, 5) == 0)
406    {
407      int vfio_pci_loaded = 0;
408
409      if (directory_exists ("/sys/module/vfio_pci"))
410	vfio_pci_loaded = 1;
411
412      if (di->iommu_group != -1)
413	{
414	  /* device is bound to IOMMU group */
415	  if (!vfio_pci_loaded)
416	    {
417	      error = clib_error_return (0, "Skipping PCI device %U: device "
418					 "is bound to IOMMU group and "
419					 "vfio-pci driver is not loaded",
420					 format_vlib_pci_addr, addr);
421	      goto done;
422	    }
423	  else
424	    uio_drv_name = "vfio-pci";
425	}
426      else
427	{
428	  /* device is not bound to IOMMU group so we have multiple options */
429	  if (vfio_pci_loaded &&
430	      (error = clib_sysfs_write (sysfs_mod_vfio_noiommu, "Y")) == 0)
431	    uio_drv_name = "vfio-pci";
432	  else if (directory_exists ("/sys/module/uio_pci_generic"))
433	    uio_drv_name = "uio_pci_generic";
434	  else if (directory_exists ("/sys/module/igb_uio"))
435	    uio_drv_name = "igb_uio";
436	  else
437	    {
438	      clib_error_free (error);
439	      error = clib_error_return (0, "Skipping PCI device %U: missing "
440					 "kernel VFIO or UIO driver",
441					 format_vlib_pci_addr, addr);
442	      goto done;
443	    }
444	  clib_error_free (error);
445	}
446    }
447
448  s = format (s, "%v/driver%c", dev_dir_name, 0);
449  driver_name = clib_sysfs_link_to_name ((char *) s);
450  vec_reset_length (s);
451
452  if (driver_name &&
453      ((strcmp ("vfio-pci", (char *) driver_name) == 0) ||
454       (strcmp ("uio_pci_generic", (char *) driver_name) == 0) ||
455       (strcmp ("igb_uio", (char *) driver_name) == 0)))
456    goto done;
457
458  /* walk trough all linux interfaces and if interface belonging to
459     this device is founf check if interface is admin up  */
460  dir = opendir ("/sys/class/net");
461  s = format (s, "%U%c", format_vlib_pci_addr, addr, 0);
462
463  if (!dir)
464    {
465      error = clib_error_return (0, "Skipping PCI device %U: failed to "
466				 "read /sys/class/net",
467				 format_vlib_pci_addr, addr);
468      goto done;
469    }
470
471  fd = socket (PF_INET, SOCK_DGRAM, 0);
472  if (fd < 0)
473    {
474      error = clib_error_return_unix (0, "socket");
475      goto done;
476    }
477
478  while ((e = readdir (dir)))
479    {
480      struct ifreq ifr;
481      struct ethtool_drvinfo drvinfo;
482
483      if (e->d_name[0] == '.')	/* skip . and .. */
484	continue;
485
486      clib_memset (&ifr, 0, sizeof ifr);
487      clib_memset (&drvinfo, 0, sizeof drvinfo);
488      ifr.ifr_data = (char *) &drvinfo;
489      clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
490
491      drvinfo.cmd = ETHTOOL_GDRVINFO;
492      if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
493	{
494	  /* Some interfaces (eg "lo") don't support this ioctl */
495	  if ((errno != ENOTSUP) && (errno != ENODEV))
496	    clib_unix_warning ("ioctl fetch intf %s bus info error",
497			       e->d_name);
498	  continue;
499	}
500
501      if (strcmp ((char *) s, drvinfo.bus_info))
502	continue;
503
504      clib_memset (&ifr, 0, sizeof (ifr));
505      clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
506
507      if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
508	{
509	  error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
510					  e->d_name);
511	  close (fd);
512	  goto done;
513	}
514
515      if (ifr.ifr_flags & IFF_UP)
516	{
517	  error = clib_error_return (0, "Skipping PCI device %U as host "
518				     "interface %s is up",
519				     format_vlib_pci_addr, addr, e->d_name);
520	  close (fd);
521	  goto done;
522	}
523    }
524
525  close (fd);
526  vec_reset_length (s);
527
528  s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
529  clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, addr);
530  vec_reset_length (s);
531
532  s = format (s, "%v/driver_override%c", dev_dir_name, 0);
533  if (access ((char *) s, F_OK) == 0)
534    {
535      clib_sysfs_write ((char *) s, "%s", uio_drv_name);
536      clear_driver_override = 1;
537    }
538  else
539    {
540      vec_reset_length (s);
541      s = format (s, "%s/%s/new_id%c", sysfs_pci_drv_path, uio_drv_name, 0);
542      clib_sysfs_write ((char *) s, "0x%04x 0x%04x", di->vendor_id,
543			di->device_id);
544    }
545  vec_reset_length (s);
546
547  s = format (s, "%s/%s/bind%c", sysfs_pci_drv_path, uio_drv_name, 0);
548  clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, addr);
549  vec_reset_length (s);
550
551  if (clear_driver_override)
552    {
553      s = format (s, "%v/driver_override%c", dev_dir_name, 0);
554      clib_sysfs_write ((char *) s, "%c", 0);
555      vec_reset_length (s);
556    }
557
558done:
559  closedir (dir);
560  vec_free (s);
561  vec_free (dev_dir_name);
562  vec_free (driver_name);
563  return error;
564}
565
566
567static clib_error_t *
568scan_uio_dir (void *arg, u8 * path_name, u8 * file_name)
569{
570  linux_pci_device_t *l = arg;
571  unformat_input_t input;
572
573  unformat_init_string (&input, (char *) file_name, vec_len (file_name));
574
575  if (!unformat (&input, "uio%d", &l->uio_minor))
576    abort ();
577
578  unformat_free (&input);
579  return 0;
580}
581
582static clib_error_t *
583vfio_set_irqs (vlib_main_t * vm, linux_pci_device_t * p, u32 index, u32 start,
584	       u32 count, u32 flags, int *efds)
585{
586  int data_len = efds ? count * sizeof (int) : 0;
587  u8 buf[sizeof (struct vfio_irq_set) + data_len];
588  struct vfio_irq_info ii = { 0 };
589  struct vfio_irq_set *irq_set = (struct vfio_irq_set *) buf;
590
591
592  ii.argsz = sizeof (struct vfio_irq_info);
593  ii.index = index;
594
595  if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0)
596    return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) "
597				   "'%U'", format_vlib_pci_addr, &p->addr);
598
599  pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)",
600		 __func__, ii.index, ii.count,
601		 ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "",
602		 ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "",
603		 ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "",
604		 ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "",
605		 ii.flags);
606
607  if (ii.count < start + count)
608    return clib_error_return_unix (0, "vfio_set_irq: unexistng interrupt on "
609				   "'%U'", format_vlib_pci_addr, &p->addr);
610
611
612  if (efds)
613    {
614      flags |= VFIO_IRQ_SET_DATA_EVENTFD;
615      clib_memcpy_fast (&irq_set->data, efds, data_len);
616    }
617  else
618    flags |= VFIO_IRQ_SET_DATA_NONE;
619
620  ASSERT ((flags & (VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_DATA_EVENTFD)) !=
621	  (VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_DATA_EVENTFD));
622
623  irq_set->argsz = sizeof (struct vfio_irq_set) + data_len;
624  irq_set->index = index;
625  irq_set->start = start;
626  irq_set->count = count;
627  irq_set->flags = flags;
628
629  if (ioctl (p->fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0)
630    return clib_error_return_unix (0, "%U:ioctl(VFIO_DEVICE_SET_IRQS) "
631				   "[index = %u, start = %u, count = %u, "
632				   "flags = 0x%x]",
633				   format_vlib_pci_addr, &p->addr,
634				   index, start, count, flags);
635  return 0;
636}
637
638static clib_error_t *
639linux_pci_uio_read_ready (clib_file_t * uf)
640{
641  vlib_main_t *vm = vlib_get_main ();
642  int __attribute__ ((unused)) rv;
643  vlib_pci_dev_handle_t h = uf->private_data;
644  linux_pci_device_t *p = linux_pci_get_device (h);
645  linux_pci_irq_t *irq = &p->intx_irq;
646
647  u32 icount;
648  rv = read (uf->file_descriptor, &icount, 4);
649
650  if (irq->intx_handler)
651    irq->intx_handler (vm, h);
652
653  vlib_pci_intr_enable (vm, h);
654
655  return /* no error */ 0;
656}
657
658static clib_error_t *
659linux_pci_vfio_unmask_intx (vlib_main_t * vm, linux_pci_device_t * d)
660{
661  return vfio_set_irqs (vm, d, VFIO_PCI_INTX_IRQ_INDEX, 0, 1,
662			VFIO_IRQ_SET_ACTION_UNMASK, 0);
663}
664
665static clib_error_t *
666linux_pci_uio_error_ready (clib_file_t * uf)
667{
668  u32 error_index = (u32) uf->private_data;
669
670  return clib_error_return (0, "pci device %d: error", error_index);
671}
672
673static clib_error_t *
674linux_pci_vfio_msix_read_ready (clib_file_t * uf)
675{
676  vlib_main_t *vm = vlib_get_main ();
677  int __attribute__ ((unused)) rv;
678  vlib_pci_dev_handle_t h = uf->private_data >> 16;
679  u16 line = uf->private_data & 0xffff;
680  linux_pci_device_t *p = linux_pci_get_device (h);
681  linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, line);
682
683  u64 icount;
684  rv = read (uf->file_descriptor, &icount, sizeof (icount));
685
686  if (irq->msix_handler)
687    irq->msix_handler (vm, h, line);
688
689  return /* no error */ 0;
690}
691
692static clib_error_t *
693linux_pci_vfio_intx_read_ready (clib_file_t * uf)
694{
695  vlib_main_t *vm = vlib_get_main ();
696  int __attribute__ ((unused)) rv;
697  vlib_pci_dev_handle_t h = uf->private_data;
698  linux_pci_device_t *p = linux_pci_get_device (h);
699  linux_pci_irq_t *irq = &p->intx_irq;
700
701  u64 icount;
702  rv = read (uf->file_descriptor, &icount, sizeof (icount));
703
704  if (irq->intx_handler)
705    irq->intx_handler (vm, h);
706
707  linux_pci_vfio_unmask_intx (vm, p);
708
709  return /* no error */ 0;
710}
711
712static clib_error_t *
713linux_pci_vfio_error_ready (clib_file_t * uf)
714{
715  u32 error_index = (u32) uf->private_data;
716
717  return clib_error_return (0, "pci device %d: error", error_index);
718}
719
720static clib_error_t *
721add_device_uio (vlib_main_t * vm, linux_pci_device_t * p,
722		vlib_pci_device_info_t * di, pci_device_registration_t * r)
723{
724  linux_pci_main_t *lpm = &linux_pci_main;
725  clib_error_t *err = 0;
726  u8 *s = 0;
727
728  p->fd = -1;
729  p->type = LINUX_PCI_DEVICE_TYPE_UIO;
730
731  s = format (s, "%s/%U/config%c", sysfs_pci_dev_path,
732	      format_vlib_pci_addr, &di->addr, 0);
733
734  p->config_fd = open ((char *) s, O_RDWR);
735  p->config_offset = 0;
736  vec_reset_length (s);
737
738  if (p->config_fd == -1)
739    {
740      err = clib_error_return_unix (0, "open '%s'", s);
741      goto error;
742    }
743
744  s = format (0, "%s/%U/uio", sysfs_pci_dev_path,
745	      format_vlib_pci_addr, &di->addr);
746  foreach_directory_file ((char *) s, scan_uio_dir, p,	/* scan_dirs */
747			  1);
748  vec_reset_length (s);
749
750  s = format (s, "/dev/uio%d%c", p->uio_minor, 0);
751  p->fd = open ((char *) s, O_RDWR);
752  if (p->fd < 0)
753    {
754      err = clib_error_return_unix (0, "open '%s'", s);
755      goto error;
756    }
757
758  if (r && r->interrupt_handler)
759    vlib_pci_register_intx_handler (vm, p->handle, r->interrupt_handler);
760
761  if (r && r->init_function)
762    err = r->init_function (lpm->vlib_main, p->handle);
763
764error:
765  vec_free (s);
766  if (err)
767    {
768      if (p->config_fd != -1)
769	close (p->config_fd);
770      if (p->fd != -1)
771	close (p->fd);
772    }
773  return err;
774}
775
776clib_error_t *
777vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
778				pci_intx_handler_function_t * intx_handler)
779{
780  linux_pci_device_t *p = linux_pci_get_device (h);
781  clib_file_t t = { 0 };
782  linux_pci_irq_t *irq = &p->intx_irq;
783  ASSERT (irq->fd == -1);
784
785  if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
786    {
787      struct vfio_irq_info ii = { 0 };
788      ii.argsz = sizeof (struct vfio_irq_info);
789      ii.index = VFIO_PCI_INTX_IRQ_INDEX;
790      if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0)
791	return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) '"
792				       "%U'", format_vlib_pci_addr, &p->addr);
793      pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)",
794		     __func__, ii.index, ii.count,
795		     ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "",
796		     ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "",
797		     ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "",
798		     ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "",
799		     ii.flags);
800      if (ii.count != 1)
801	return clib_error_return (0, "INTx interrupt does not exist on device"
802				  "'%U'", format_vlib_pci_addr, &p->addr);
803
804      irq->fd = eventfd (0, EFD_NONBLOCK);
805      if (irq->fd == -1)
806	return clib_error_return_unix (0, "eventfd");
807
808      t.file_descriptor = irq->fd;
809      t.read_function = linux_pci_vfio_intx_read_ready;
810    }
811  else if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
812    {
813      t.file_descriptor = p->fd;
814      t.read_function = linux_pci_uio_read_ready;
815    }
816  else
817    return 0;
818
819  t.error_function = linux_pci_uio_error_ready;
820  t.private_data = p->handle;
821  t.description = format (0, "PCI %U INTx", format_vlib_pci_addr, &p->addr);
822  irq->clib_file_index = clib_file_add (&file_main, &t);
823  irq->intx_handler = intx_handler;
824  return 0;
825}
826
827clib_error_t *
828vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
829				u32 start, u32 count,
830				pci_msix_handler_function_t * msix_handler)
831{
832  clib_error_t *err = 0;
833  linux_pci_device_t *p = linux_pci_get_device (h);
834  u32 i;
835
836  if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
837    return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
838			      "support");
839
840  /* *INDENT-OFF* */
841  vec_validate_init_empty (p->msix_irqs, start + count - 1, (linux_pci_irq_t)
842			   { .fd = -1});
843  /* *INDENT-ON* */
844
845  for (i = start; i < start + count; i++)
846    {
847      clib_file_t t = { 0 };
848      linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
849      ASSERT (irq->fd == -1);
850
851      irq->fd = eventfd (0, EFD_NONBLOCK);
852      if (irq->fd == -1)
853	{
854	  err = clib_error_return_unix (0, "eventfd");
855	  goto error;
856	}
857
858      t.read_function = linux_pci_vfio_msix_read_ready;
859      t.file_descriptor = irq->fd;
860      t.error_function = linux_pci_vfio_error_ready;
861      t.private_data = p->handle << 16 | i;
862      t.description = format (0, "PCI %U MSI-X #%u", format_vlib_pci_addr,
863			      &p->addr, i);
864      irq->clib_file_index = clib_file_add (&file_main, &t);
865      irq->msix_handler = msix_handler;
866    }
867
868  return 0;
869
870error:
871  while (i-- > start)
872    {
873      linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
874      if (irq->fd != -1)
875	{
876	  clib_file_del_by_index (&file_main, irq->clib_file_index);
877	  close (irq->fd);
878	  irq->fd = -1;
879	}
880    }
881  return err;
882}
883
884clib_error_t *
885vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
886			  u16 start, u16 count)
887{
888  linux_pci_device_t *p = linux_pci_get_device (h);
889  int fds[count];
890  int i;
891
892  if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
893    return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
894			      "support");
895
896  for (i = start; i < start + count; i++)
897    {
898      linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
899      fds[i] = irq->fd;
900    }
901
902  return vfio_set_irqs (vm, p, VFIO_PCI_MSIX_IRQ_INDEX, start, count,
903			VFIO_IRQ_SET_ACTION_TRIGGER, fds);
904}
905
906clib_error_t *
907vlib_pci_disable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
908			   u16 start, u16 count)
909{
910  linux_pci_device_t *p = linux_pci_get_device (h);
911  int i, fds[count];
912
913  if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
914    return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
915			      "support");
916
917  for (i = start; i < start + count; i++)
918    fds[i] = -1;
919
920  return vfio_set_irqs (vm, p, VFIO_PCI_MSIX_IRQ_INDEX, start, count,
921			VFIO_IRQ_SET_ACTION_TRIGGER, fds);
922}
923
924static clib_error_t *
925add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p,
926		 vlib_pci_device_info_t * di, pci_device_registration_t * r)
927{
928  linux_pci_main_t *lpm = &linux_pci_main;
929  struct vfio_device_info device_info = { 0 };
930  struct vfio_region_info reg = { 0 };
931  clib_error_t *err = 0;
932  u8 *s = 0;
933  int is_noiommu;
934
935  p->type = LINUX_PCI_DEVICE_TYPE_VFIO;
936
937  if ((err = linux_vfio_group_get_device_fd (&p->addr, &p->fd, &is_noiommu)))
938    return err;
939
940  if (is_noiommu == 0)
941    p->supports_va_dma = 1;
942
943  device_info.argsz = sizeof (device_info);
944  if (ioctl (p->fd, VFIO_DEVICE_GET_INFO, &device_info) < 0)
945    {
946      err = clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_INFO) '%U'",
947				    format_vlib_pci_addr, &di->addr);
948      goto error;
949    }
950
951  reg.argsz = sizeof (struct vfio_region_info);
952  reg.index = VFIO_PCI_CONFIG_REGION_INDEX;
953  if (ioctl (p->fd, VFIO_DEVICE_GET_REGION_INFO, &reg) < 0)
954    {
955      err = clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_INFO) '%U'",
956				    format_vlib_pci_addr, &di->addr);
957      goto error;
958    }
959
960  pci_log_debug (vm, p, "%s region_info index:%u size:0x%lx offset:0x%lx "
961		 "flags: %s%s%s(0x%x)", __func__,
962		 reg.index, reg.size, reg.offset,
963		 reg.flags & VFIO_REGION_INFO_FLAG_READ ? "rd " : "",
964		 reg.flags & VFIO_REGION_INFO_FLAG_WRITE ? "wr " : "",
965		 reg.flags & VFIO_REGION_INFO_FLAG_MMAP ? "mmap " : "",
966		 reg.flags);
967
968  p->config_offset = reg.offset;
969  p->config_fd = p->fd;
970
971  /* reset if device supports it */
972  if (device_info.flags & VFIO_DEVICE_FLAGS_RESET)
973    if (ioctl (p->fd, VFIO_DEVICE_RESET) < 0)
974      {
975	err = clib_error_return_unix (0, "ioctl(VFIO_DEVICE_RESET) '%U'",
976				      format_vlib_pci_addr, &di->addr);
977	goto error;
978      }
979
980  if (r && r->interrupt_handler)
981    {
982      vlib_pci_register_intx_handler (vm, p->handle, r->interrupt_handler);
983      linux_pci_vfio_unmask_intx (vm, p);
984    }
985
986  if (p->supports_va_dma)
987    {
988      vlib_buffer_pool_t *bp;
989      /* *INDENT-OFF* */
990      vec_foreach (bp, vm->buffer_main->buffer_pools)
991	{
992	  u32 i;
993	  vlib_physmem_map_t *pm;
994	  pm = vlib_physmem_get_map (vm, bp->physmem_map_index);
995	  for (i = 0; i < pm->n_pages; i++)
996	    vfio_map_physmem_page (vm, pm->base + (i << pm->log2_page_size));
997	}
998      /* *INDENT-ON* */
999    }
1000
1001  if (r && r->init_function)
1002    err = r->init_function (lpm->vlib_main, p->handle);
1003
1004error:
1005  vec_free (s);
1006  if (err)
1007    {
1008      if (p->fd != -1)
1009	close (p->fd);
1010      if (p->config_fd != -1 && p->config_fd != p->fd)
1011	close (p->config_fd);
1012      p->config_fd = p->fd = -1;
1013    }
1014  return err;
1015}
1016
1017/* Configuration space read/write. */
1018clib_error_t *
1019vlib_pci_read_write_config (vlib_main_t * vm, vlib_pci_dev_handle_t h,
1020			    vlib_read_or_write_t read_or_write,
1021			    uword address, void *data, u32 n_bytes)
1022{
1023  linux_pci_device_t *p = linux_pci_get_device (h);
1024  int n;
1025
1026  if (read_or_write == VLIB_READ)
1027    n = pread (p->config_fd, data, n_bytes, p->config_offset + address);
1028  else
1029    n = pwrite (p->config_fd, data, n_bytes, p->config_offset + address);
1030
1031  if (n != n_bytes)
1032    return clib_error_return_unix (0, "%s",
1033				   read_or_write == VLIB_READ
1034				   ? "read" : "write");
1035
1036  return 0;
1037}
1038
1039static clib_error_t *
1040vlib_pci_region (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 bar, int *fd,
1041		 u64 * size, u64 * offset)
1042{
1043  linux_pci_device_t *p = linux_pci_get_device (h);
1044  clib_error_t *error = 0;
1045  int _fd = -1;
1046  u64 _size = 0, _offset = 0;
1047
1048  ASSERT (bar <= 5);
1049
1050  error = 0;
1051
1052  if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
1053    {
1054      u8 *file_name;
1055      struct stat stat_buf;
1056      file_name = format (0, "%s/%U/resource%d%c", sysfs_pci_dev_path,
1057			  format_vlib_pci_addr, &p->addr, bar, 0);
1058
1059      _fd = open ((char *) file_name, O_RDWR);
1060      if (_fd < 0)
1061	{
1062	  error = clib_error_return_unix (0, "open `%s'", file_name);
1063	  vec_free (file_name);
1064	  return error;
1065	}
1066
1067      if (fstat (_fd, &stat_buf) < 0)
1068	{
1069	  error = clib_error_return_unix (0, "fstat `%s'", file_name);
1070	  vec_free (file_name);
1071	  close (_fd);
1072	  return error;
1073	}
1074
1075      vec_free (file_name);
1076      _size = stat_buf.st_size;
1077      _offset = 0;
1078    }
1079  else if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
1080    {
1081      struct vfio_region_info reg = { 0 };
1082      reg.argsz = sizeof (struct vfio_region_info);
1083      reg.index = bar;
1084      if (ioctl (p->fd, VFIO_DEVICE_GET_REGION_INFO, &reg) < 0)
1085	return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_INFO) "
1086				       "'%U'", format_vlib_pci_addr,
1087				       &p->addr);
1088      _fd = p->fd;
1089      _size = reg.size;
1090      _offset = reg.offset;
1091      pci_log_debug (vm, p, "%s region_info index:%u size:0x%lx offset:0x%lx "
1092		     "flags: %s%s%s(0x%x)", __func__,
1093		     reg.index, reg.size, reg.offset,
1094		     reg.flags & VFIO_REGION_INFO_FLAG_READ ? "rd " : "",
1095		     reg.flags & VFIO_REGION_INFO_FLAG_WRITE ? "wr " : "",
1096		     reg.flags & VFIO_REGION_INFO_FLAG_MMAP ? "mmap " : "",
1097		     reg.flags);
1098    }
1099  else
1100    ASSERT (0);
1101
1102  *fd = _fd;
1103  *size = _size;
1104  *offset = _offset;
1105
1106  return error;
1107}
1108
1109static clib_error_t *
1110vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h,
1111			 u32 bar, u8 * addr, void **result)
1112{
1113  linux_pci_device_t *p = linux_pci_get_device (h);
1114  int fd = -1;
1115  clib_error_t *error;
1116  int flags = MAP_SHARED;
1117  u64 size = 0, offset = 0;
1118
1119  pci_log_debug (vm, p, "map region %u to va %p", bar, addr);
1120
1121  if ((error = vlib_pci_region (vm, h, bar, &fd, &size, &offset)))
1122    return error;
1123
1124  if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0)
1125    flags |= MAP_FIXED;
1126
1127  *result = mmap (addr, size, PROT_READ | PROT_WRITE, flags, fd, offset);
1128  if (*result == (void *) -1)
1129    {
1130      error = clib_error_return_unix (0, "mmap `BAR%u'", bar);
1131      if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
1132	close (fd);
1133      return error;
1134    }
1135
1136  /* *INDENT-OFF* */
1137  vec_validate_init_empty (p->regions, bar,
1138			   (linux_pci_region_t) { .fd = -1});
1139  /* *INDENT-ON* */
1140  if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
1141    p->regions[bar].fd = fd;
1142  p->regions[bar].addr = *result;
1143  p->regions[bar].size = size;
1144  return 0;
1145}
1146
1147clib_error_t *
1148vlib_pci_map_region (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 resource,
1149		     void **result)
1150{
1151  return (vlib_pci_map_region_int (vm, h, resource, 0 /* addr */ , result));
1152}
1153
1154clib_error_t *
1155vlib_pci_map_region_fixed (vlib_main_t * vm, vlib_pci_dev_handle_t h,
1156			   u32 resource, u8 * addr, void **result)
1157{
1158  return (vlib_pci_map_region_int (vm, h, resource, addr, result));
1159}
1160
1161clib_error_t *
1162vlib_pci_io_region (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 resource)
1163{
1164  linux_pci_device_t *p = linux_pci_get_device (h);
1165  clib_error_t *error = 0;
1166  int fd = -1;
1167  u64 size = 0, offset = 0;
1168
1169  if ((error = vlib_pci_region (vm, h, resource, &fd, &size, &offset)))
1170    return error;
1171
1172  p->io_fd = fd;
1173  p->io_offset = offset;
1174  return error;
1175}
1176
1177clib_error_t *
1178vlib_pci_read_write_io (vlib_main_t * vm, vlib_pci_dev_handle_t h,
1179			vlib_read_or_write_t read_or_write,
1180			uword offset, void *data, u32 length)
1181{
1182  linux_pci_device_t *p = linux_pci_get_device (h);
1183  int n = 0;
1184
1185  if (read_or_write == VLIB_READ)
1186    n = pread (p->io_fd, data, length, p->io_offset + offset);
1187  else
1188    n = pwrite (p->io_fd, data, length, p->io_offset + offset);
1189
1190  if (n != length)
1191    return clib_error_return_unix (0, "%s",
1192				   read_or_write == VLIB_READ
1193				   ? "read" : "write");
1194  return 0;
1195}
1196
1197clib_error_t *
1198vlib_pci_map_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h, void *ptr)
1199{
1200  linux_pci_device_t *p = linux_pci_get_device (h);
1201
1202  if (!p->supports_va_dma)
1203    return 0;
1204
1205  return vfio_map_physmem_page (vm, ptr);
1206}
1207
1208int
1209vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h)
1210{
1211  linux_pci_device_t *p = linux_pci_get_device (h);
1212
1213  return p->supports_va_dma != 0;
1214}
1215
1216clib_error_t *
1217vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr,
1218		      pci_device_id_t ids[], vlib_pci_dev_handle_t * handle)
1219{
1220  linux_pci_main_t *lpm = &linux_pci_main;
1221  vlib_pci_device_info_t *di;
1222  linux_pci_device_t *p;
1223  clib_error_t *err = 0;
1224  pci_device_id_t *i;
1225
1226  di = vlib_pci_get_device_info (vm, addr, &err);
1227
1228  if (err)
1229    return err;
1230  for (i = ids; i->vendor_id != 0; i++)
1231    if (i->vendor_id == di->vendor_id && i->device_id == di->device_id)
1232      break;
1233
1234  if (i->vendor_id == 0)
1235    return clib_error_return (0, "Wrong vendor or device id");
1236
1237  pool_get (lpm->linux_pci_devices, p);
1238  p->handle = p - lpm->linux_pci_devices;
1239  p->addr.as_u32 = di->addr.as_u32;
1240  p->intx_irq.fd = -1;
1241  p->intx_irq.clib_file_index = -1;
1242  p->numa_node = di->numa_node;
1243  /*
1244   * pci io bar read/write fd
1245   */
1246  p->io_fd = -1;
1247
1248  pci_log_debug (vm, p, "open vid:0x%04x did:0x%04x driver:%s iommu_group:%d",
1249		 di->vendor_id, di->device_id, di->driver_name,
1250		 di->iommu_group);
1251
1252  if (clib_strncmp ("vfio-pci", (char *) di->driver_name, 8) == 0)
1253    err = add_device_vfio (vm, p, di, 0);
1254  else if (clib_strncmp ("uio_pci_generic", (char *) di->driver_name, 8) == 0)
1255    err = add_device_uio (vm, p, di, 0);
1256  else
1257    err = clib_error_create ("device not bound to 'vfio-pci' or "
1258			     "'uio_pci_generic' kernel module");
1259  if (err)
1260    goto error;
1261
1262  *handle = p->handle;
1263
1264error:
1265  vlib_pci_free_device_info (di);
1266  if (err)
1267    {
1268      pci_log_err (vm, p, "%U", format_clib_error, err);
1269      clib_memset (p, 0, sizeof (linux_pci_device_t));
1270      pool_put (lpm->linux_pci_devices, p);
1271    }
1272
1273  return err;
1274}
1275
1276void
1277vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
1278{
1279  linux_pci_main_t *lpm = &linux_pci_main;
1280  linux_pci_device_t *p = linux_pci_get_device (h);
1281  linux_pci_irq_t *irq;
1282  linux_pci_region_t *res;
1283  clib_error_t *err = 0;
1284
1285  if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
1286    {
1287      irq = &p->intx_irq;
1288      if (irq->clib_file_index != -1)
1289	clib_file_del_by_index (&file_main, irq->clib_file_index);
1290      close (p->config_fd);
1291      if (p->io_fd != -1)
1292	close (p->io_fd);
1293    }
1294  else if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
1295    {
1296      irq = &p->intx_irq;
1297      /* close INTx irqs */
1298      if (irq->fd != -1)
1299	{
1300	  err = vfio_set_irqs (vm, p, VFIO_PCI_INTX_IRQ_INDEX, 0, 0,
1301			       VFIO_IRQ_SET_ACTION_TRIGGER, 0);
1302	  clib_error_free (err);
1303	  if (irq->clib_file_index != -1)
1304	    clib_file_del_by_index (&file_main, irq->clib_file_index);
1305	  close (irq->fd);
1306	}
1307
1308      /* close MSI-X irqs */
1309      if (vec_len (p->msix_irqs))
1310	{
1311	  err = vfio_set_irqs (vm, p, VFIO_PCI_MSIX_IRQ_INDEX, 0, 0,
1312			       VFIO_IRQ_SET_ACTION_TRIGGER, 0);
1313	  clib_error_free (err);
1314          /* *INDENT-OFF* */
1315	  vec_foreach (irq, p->msix_irqs)
1316	    {
1317	      if (irq->fd == -1)
1318		continue;
1319	      clib_file_del_by_index (&file_main, irq->clib_file_index);
1320	      close (irq->fd);
1321	    }
1322          /* *INDENT-ON* */
1323	  vec_free (p->msix_irqs);
1324	}
1325    }
1326
1327  /* *INDENT-OFF* */
1328  vec_foreach (res, p->regions)
1329    {
1330      if (res->size == 0)
1331	continue;
1332      munmap (res->addr, res->size);
1333      if (res->fd != -1)
1334        close (res->fd);
1335    }
1336  /* *INDENT-ON* */
1337  vec_free (p->regions);
1338
1339  close (p->fd);
1340  clib_memset (p, 0, sizeof (linux_pci_device_t));
1341  pool_put (lpm->linux_pci_devices, p);
1342}
1343
1344void
1345init_device_from_registered (vlib_main_t * vm, vlib_pci_device_info_t * di)
1346{
1347  vlib_pci_main_t *pm = &pci_main;
1348  linux_pci_main_t *lpm = &linux_pci_main;
1349  pci_device_registration_t *r;
1350  pci_device_id_t *i;
1351  clib_error_t *err = 0;
1352  linux_pci_device_t *p;
1353
1354  pool_get (lpm->linux_pci_devices, p);
1355  p->handle = p - lpm->linux_pci_devices;
1356  p->intx_irq.fd = -1;
1357
1358  r = pm->pci_device_registrations;
1359
1360  while (r)
1361    {
1362      for (i = r->supported_devices; i->vendor_id != 0; i++)
1363	if (i->vendor_id == di->vendor_id && i->device_id == di->device_id)
1364	  {
1365	    if (di->iommu_group != -1)
1366	      err = add_device_vfio (vm, p, di, r);
1367	    else
1368	      err = add_device_uio (vm, p, di, r);
1369
1370	    if (err)
1371	      clib_error_report (err);
1372	    else
1373	      return;
1374	  }
1375      r = r->next_registration;
1376    }
1377
1378  /* No driver, close the PCI config-space FD */
1379  clib_memset (p, 0, sizeof (linux_pci_device_t));
1380  pool_put (lpm->linux_pci_devices, p);
1381}
1382
1383static clib_error_t *
1384scan_pci_addr (void *arg, u8 * dev_dir_name, u8 * ignored)
1385{
1386  vlib_pci_addr_t addr, **addrv = arg;
1387  unformat_input_t input;
1388  clib_error_t *err = 0;
1389
1390  unformat_init_string (&input, (char *) dev_dir_name,
1391			vec_len (dev_dir_name));
1392
1393  if (!unformat (&input, "/sys/bus/pci/devices/%U",
1394		 unformat_vlib_pci_addr, &addr))
1395    err = clib_error_return (0, "unformat error `%v`", dev_dir_name);
1396
1397  unformat_free (&input);
1398
1399  if (err)
1400    return err;
1401
1402  vec_add1 (*addrv, addr);
1403  return 0;
1404}
1405
1406static int
1407pci_addr_cmp (void *v1, void *v2)
1408{
1409  vlib_pci_addr_t *a1 = v1;
1410  vlib_pci_addr_t *a2 = v2;
1411
1412  if (a1->domain > a2->domain)
1413    return 1;
1414  if (a1->domain < a2->domain)
1415    return -1;
1416  if (a1->bus > a2->bus)
1417    return 1;
1418  if (a1->bus < a2->bus)
1419    return -1;
1420  if (a1->slot > a2->slot)
1421    return 1;
1422  if (a1->slot < a2->slot)
1423    return -1;
1424  if (a1->function > a2->function)
1425    return 1;
1426  if (a1->function < a2->function)
1427    return -1;
1428  return 0;
1429}
1430
1431vlib_pci_addr_t *
1432vlib_pci_get_all_dev_addrs ()
1433{
1434  vlib_pci_addr_t *addrs = 0;
1435  clib_error_t *err;
1436  err = foreach_directory_file ((char *) sysfs_pci_dev_path, scan_pci_addr,
1437				&addrs, /* scan_dirs */ 0);
1438  if (err)
1439    {
1440      vec_free (addrs);
1441      return 0;
1442    }
1443
1444  vec_sort_with_function (addrs, pci_addr_cmp);
1445
1446  return addrs;
1447}
1448
1449clib_error_t *
1450linux_pci_init (vlib_main_t * vm)
1451{
1452  vlib_pci_main_t *pm = &pci_main;
1453  vlib_pci_addr_t *addr = 0, *addrs;
1454
1455  pm->vlib_main = vm;
1456
1457  ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32));
1458
1459  addrs = vlib_pci_get_all_dev_addrs ();
1460  /* *INDENT-OFF* */
1461  vec_foreach (addr, addrs)
1462    {
1463      vlib_pci_device_info_t *d;
1464      if ((d = vlib_pci_get_device_info (vm, addr, 0)))
1465	{
1466	  init_device_from_registered (vm, d);
1467	  vlib_pci_free_device_info (d);
1468	}
1469    }
1470  /* *INDENT-ON* */
1471
1472  return 0;
1473}
1474
1475/* *INDENT-OFF* */
1476VLIB_INIT_FUNCTION (linux_pci_init) =
1477{
1478  .runs_after = VLIB_INITS("unix_input_init"),
1479};
1480/* *INDENT-ON* */
1481
1482/*
1483 * fd.io coding-style-patch-verification: ON
1484 *
1485 * Local Variables:
1486 * eval: (c-set-style "gnu")
1487 * End:
1488 */
1489