turbotap.c revision adff8bfb
1/*
2 *------------------------------------------------------------------
3 * turbotap.c - fast dynamic tap interface hookup
4 *
5 * Copyright (c) 2016 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 *     http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#define _GNU_SOURCE
21
22#include <fcntl.h>              /* for open */
23#include <sys/ioctl.h>
24#include <sys/socket.h>
25#include <sys/stat.h>
26#include <sys/types.h>
27#include <sys/uio.h>            /* for iovec */
28#include <netinet/in.h>
29
30#include <linux/if_arp.h>
31#include <linux/if_tun.h>
32
33#include <vlib/vlib.h>
34#include <vlib/unix/unix.h>
35
36#include <vnet/ip/ip.h>
37#include <vnet/plugin/plugin.h>
38#include <vnet/ethernet/ethernet.h>
39
40#if DPDK == 1
41#include <vnet/devices/dpdk/dpdk.h>
42#endif
43
44#include "turbotap.h"
45
46turbotap_main_t turbotap_main;
47
48static void
49turbotap_nopunt_frame (vlib_main_t * vm,
50                   vlib_node_runtime_t * node,
51                   vlib_frame_t * frame)
52{
53  u32 * buffers = vlib_frame_args (frame);
54  uword n_packets = frame->n_vectors;
55  vlib_buffer_free (vm, buffers, n_packets);
56  vlib_frame_free (vm, node, frame);
57}
58
59/* Gets called when file descriptor is ready from epoll. */
60static clib_error_t * turbotap_read_ready (unix_file_t * uf)
61{
62  vlib_main_t * vm = vlib_get_main();
63  turbotap_main_t * tr = &turbotap_main;
64  uword * p;
65
66  /* Schedule the rx node */
67  vlib_node_set_interrupt_pending (vm, turbotap_rx_node.index);
68
69  p = hash_get (tr->turbotap_interface_index_by_unix_fd, uf->file_descriptor);
70
71  /* Mark the specific tap interface ready-to-read */
72  if (p)
73    tr->pending_read_bitmap = clib_bitmap_set (tr->pending_read_bitmap,
74                                               p[0], 1);
75  else
76    clib_warning ("fd %d not in hash table", uf->file_descriptor);
77
78  return 0;
79}
80
81static clib_error_t *
82turbotap_config (vlib_main_t * vm, unformat_input_t * input)
83{
84  turbotap_main_t *tr = &turbotap_main;
85
86  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
87    {
88      if (unformat (input, "mtu %d", &tr->mtu_bytes))
89        ;
90      else if (unformat (input, "disable"))
91        tr->is_disabled = 1;
92      else
93          return clib_error_return (0, "unknown input `%U'",
94                                    format_unformat_error, input);
95    }
96
97  if (tr->is_disabled)
98    return 0;
99
100  if (geteuid())
101    {
102      clib_warning ("turbotap disabled: must be superuser");
103      tr->is_disabled = 1;
104      return 0;
105    }
106
107  return 0;
108}
109
110VLIB_CONFIG_FUNCTION (turbotap_config, "turbotap");
111
112static u32 turbotap_flag_change (vnet_main_t * vnm,
113                               vnet_hw_interface_t * hw,
114                               u32 flags)
115{
116  turbotap_main_t *tr = &turbotap_main;
117  turbotap_interface_t *ti;
118
119   ti = vec_elt_at_index (tr->turbotap_interfaces, hw->dev_instance);
120
121  if (flags & ETHERNET_INTERFACE_FLAG_MTU)
122    {
123      const uword buffer_size = vlib_buffer_free_list_buffer_size ( vlib_get_main(),
124                                    VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
125      ti->mtu_bytes = hw->max_packet_bytes;
126      ti->mtu_buffers = (hw->max_packet_bytes + (buffer_size - 1)) / buffer_size;
127    }
128   else
129    {
130      struct ifreq ifr;
131      u32 want_promisc;
132
133      memcpy (&ifr, &ti->ifr, sizeof (ifr));
134
135      /* get flags, modify to bring up interface... */
136      if (ioctl (ti->provision_fd, SIOCGIFFLAGS, &ifr) < 0)
137        {
138          clib_unix_warning ("Couldn't get interface flags for %s", hw->name);
139          return 0;
140        }
141
142      want_promisc = (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
143
144      if (want_promisc == ti->is_promisc)
145        return 0;
146
147      if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
148        ifr.ifr_flags |= IFF_PROMISC;
149      else
150        ifr.ifr_flags &= ~(IFF_PROMISC);
151
152      /* get flags, modify to bring up interface... */
153      if (ioctl (ti->provision_fd, SIOCSIFFLAGS, &ifr) < 0)
154        {
155          clib_unix_warning ("Couldn't set interface flags for %s", hw->name);
156          return 0;
157        }
158
159      ti->is_promisc = want_promisc;
160    }
161
162  return 0;
163}
164
165/* get tap interface from inactive interfaces or create new */
166static turbotap_interface_t *turbotap_get_new_tapif()
167{
168  turbotap_main_t * tr = &turbotap_main;
169  turbotap_interface_t *ti = NULL;
170
171  int inactive_cnt = vec_len(tr->turbotap_inactive_interfaces);
172  // if there are any inactive ifaces
173  if (inactive_cnt > 0) {
174    // take last
175    u32 ti_idx = tr->turbotap_inactive_interfaces[inactive_cnt - 1];
176    if (vec_len(tr->turbotap_interfaces) > ti_idx) {
177      ti = vec_elt_at_index (tr->turbotap_interfaces, ti_idx);
178      clib_warning("reusing tap interface");
179    }
180    // "remove" from inactive list
181    _vec_len(tr->turbotap_inactive_interfaces) -= 1;
182  }
183
184  // ti was not retrieved from inactive ifaces - create new
185  if (!ti)
186    {
187      vec_add2 (tr->turbotap_interfaces, ti, 1);
188      u32 i;
189
190      for (i = 0; i < MAX_RECV; i++)
191        {
192          ti->rx_msg[i].msg_hdr.msg_name = NULL;
193          ti->rx_msg[i].msg_hdr.msg_namelen = 0;
194          ti->rx_msg[i].msg_hdr.msg_control = NULL;
195          ti->rx_msg[i].msg_hdr.msg_controllen = 0;
196        }
197    }
198  return ti;
199}
200
201int vnet_turbotap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg,
202                      u32 * sw_if_indexp)
203{
204  turbotap_main_t * tr = &turbotap_main;
205  turbotap_interface_t * ti = NULL;
206  struct ifreq ifr;
207  int flags;
208  int dev_net_tun_fd;
209  int dev_tap_fd = -1;
210  int turbotap_fd = -1;
211  int sock_fd = -1;
212  clib_error_t * error;
213  u8 hwaddr [6];
214  int rv = 0;
215
216  if (tr->is_disabled)
217    {
218      return VNET_API_ERROR_FEATURE_DISABLED;
219    }
220
221  flags = IFF_TAP | IFF_NO_PI;
222
223  if ((turbotap_fd = open ("/dev/net/turbotap", O_RDWR)) < 0)
224      return VNET_API_ERROR_SYSCALL_ERROR_1;
225
226  if ((dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
227    return VNET_API_ERROR_SYSCALL_ERROR_1;
228
229  memset (&ifr, 0, sizeof (ifr));
230  strncpy(ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1);
231  ifr.ifr_flags = flags;
232  if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0)
233    {
234      rv = VNET_API_ERROR_SYSCALL_ERROR_2;
235      goto error;
236    }
237
238  /* Open a provisioning socket */
239  if ((dev_tap_fd = socket(PF_PACKET, SOCK_RAW,
240                           htons(ETH_P_ALL))) < 0 )
241    {
242      rv = VNET_API_ERROR_SYSCALL_ERROR_3;
243      goto error;
244    }
245
246  /* Find the interface index. */
247  {
248    struct ifreq ifr;
249    struct sockaddr_ll sll;
250
251    memset (&ifr, 0, sizeof(ifr));
252    strncpy (ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1);
253    if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 )
254      {
255        rv = VNET_API_ERROR_SYSCALL_ERROR_4;
256        goto error;
257      }
258
259    /* Bind the provisioning socket to the interface. */
260    memset(&sll, 0, sizeof(sll));
261    sll.sll_family   = AF_PACKET;
262    sll.sll_ifindex  = ifr.ifr_ifindex;
263    sll.sll_protocol = htons(ETH_P_ALL);
264
265    if (bind(dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0)
266      {
267        rv = VNET_API_ERROR_SYSCALL_ERROR_5;
268        goto error;
269      }
270  }
271
272  /* non-blocking I/O on /dev/tapX */
273  {
274    int one = 1;
275    if (ioctl (dev_net_tun_fd, FIONBIO, &one) < 0)
276      {
277        rv = VNET_API_ERROR_SYSCALL_ERROR_6;
278        goto error;
279      }
280  }
281  ifr.ifr_mtu = tr->mtu_bytes;
282  if (ioctl (dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
283    {
284      rv = VNET_API_ERROR_SYSCALL_ERROR_7;
285      goto error;
286    }
287
288  /* get flags, modify to bring up interface... */
289  if (ioctl (dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
290    {
291      rv = VNET_API_ERROR_SYSCALL_ERROR_8;
292      goto error;
293    }
294
295  ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
296
297  if (ioctl (dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
298    {
299      rv = VNET_API_ERROR_SYSCALL_ERROR_9;
300      goto error;
301    }
302
303  ti = turbotap_get_new_tapif();
304  ti->per_interface_next_index = ~0;
305
306  if (hwaddr_arg != 0)
307    clib_memcpy(hwaddr, hwaddr_arg, 6);
308  else
309    {
310      f64 now = vlib_time_now(vm);
311      u32 rnd;
312      rnd = (u32) (now * 1e6);
313      rnd = random_u32 (&rnd);
314
315      memcpy (hwaddr+2, &rnd, sizeof(rnd));
316      hwaddr[0] = 2;
317      hwaddr[1] = 0xfe;
318    }
319
320  if ((sock_fd = ioctl (turbotap_fd, TUNGETSOCKFD, (void *)&dev_net_tun_fd) ) < 0)
321    {
322      rv = VNET_API_ERROR_SYSCALL_ERROR_2;
323      goto error;
324    }
325
326  error = ethernet_register_interface
327        (tr->vnet_main,
328         turbotap_dev_class.index,
329         ti - tr->turbotap_interfaces /* device instance */,
330         hwaddr /* ethernet address */,
331         &ti->hw_if_index,
332         turbotap_flag_change);
333
334  if (error)
335    {
336      clib_error_report (error);
337      rv = VNET_API_ERROR_INVALID_REGISTRATION;
338      goto error;
339    }
340
341  {
342    const uword buffer_size = vlib_buffer_free_list_buffer_size ( vlib_get_main(),
343                                     VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
344    unix_file_t template = {0};
345    template.read_function = turbotap_read_ready;
346    template.file_descriptor = dev_net_tun_fd;
347    ti->unix_file_index = unix_file_add (&unix_main, &template);
348    ti->unix_fd = dev_net_tun_fd;
349    ti->provision_fd = dev_tap_fd;
350    ti->turbotap_fd = turbotap_fd;
351    ti->sock_fd = sock_fd;
352    ti->rx_ready = MAX_RECV;
353    ti->mtu_bytes = tr->mtu_bytes;
354    ti->mtu_buffers = (tr->mtu_bytes + (buffer_size - 1)) / buffer_size;
355    clib_memcpy (&ti->ifr, &ifr, sizeof (ifr));
356  }
357
358  {
359    vnet_hw_interface_t * hw;
360    hw = vnet_get_hw_interface (tr->vnet_main, ti->hw_if_index);
361    hw->min_supported_packet_bytes = TAP_MTU_MIN;
362    hw->max_supported_packet_bytes = TAP_MTU_MAX;
363    hw->max_packet_bytes = ti->mtu_bytes;
364    hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = hw->max_supported_packet_bytes - sizeof(ethernet_header_t);
365    ti->sw_if_index = hw->sw_if_index;
366    if (sw_if_indexp)
367      *sw_if_indexp = hw->sw_if_index;
368  }
369
370  ti->active = 1;
371
372  hash_set (tr->turbotap_interface_index_by_sw_if_index, ti->sw_if_index,
373            ti - tr->turbotap_interfaces);
374
375  hash_set (tr->turbotap_interface_index_by_unix_fd, ti->unix_fd,
376            ti - tr->turbotap_interfaces);
377
378  return rv;
379
380 error:
381  close (dev_net_tun_fd);
382  close (dev_tap_fd);
383  close (turbotap_fd);
384  close (sock_fd);
385
386  return rv;
387}
388
389static int turbotap_tap_disconnect (turbotap_interface_t *ti)
390{
391  int rv = 0;
392  turbotap_main_t * tr = &turbotap_main;
393  vnet_main_t * vnm = tr->vnet_main;
394  u32 sw_if_index = ti->sw_if_index;
395
396  // bring interface down
397  vnet_sw_interface_set_flags (vnm, sw_if_index, 0);
398
399  if (ti->unix_file_index != ~0) {
400    unix_file_del (&unix_main, unix_main.file_pool + ti->unix_file_index);
401    ti->unix_file_index = ~0;
402  }
403
404  hash_unset (tr->turbotap_interface_index_by_unix_fd, ti->unix_fd);
405  hash_unset (tr->turbotap_interface_index_by_sw_if_index, ti->sw_if_index);
406  close(ti->unix_fd);
407  close(ti->provision_fd);
408  close(ti->turbotap_fd);
409  close(ti->sock_fd);
410  ti->unix_fd = -1;
411  ti->provision_fd = -1;
412  ti->turbotap_fd = -1;
413  ti->sock_fd = -1;
414
415  return rv;
416}
417
418int vnet_turbotap_delete(vlib_main_t *vm, u32 sw_if_index)
419{
420  int rv = 0;
421  turbotap_main_t * tr = &turbotap_main;
422  turbotap_interface_t *ti;
423  uword *p = NULL;
424
425  p = hash_get (tr->turbotap_interface_index_by_sw_if_index,
426                sw_if_index);
427  if (p == 0) {
428    clib_warning ("sw_if_index %d unknown", sw_if_index);
429    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
430  }
431  ti = vec_elt_at_index (tr->turbotap_interfaces, p[0]);
432
433  // inactive
434  ti->active = 0;
435  turbotap_tap_disconnect(ti);
436  // add to inactive list
437  vec_add1(tr->turbotap_inactive_interfaces, ti - tr->turbotap_interfaces);
438
439  // reset renumbered iface
440  if (p[0] < vec_len (tr->show_dev_instance_by_real_dev_instance))
441    tr->show_dev_instance_by_real_dev_instance[p[0]] = ~0;
442
443  ethernet_delete_interface (tr->vnet_main, ti->hw_if_index);
444  return rv;
445}
446
447clib_error_t *
448vlib_plugin_register(vlib_main_t *m, vnet_plugin_handoff_t *h, int f)
449{
450  clib_error_t * error = 0;
451  return error;
452}
453
454clib_error_t *turbotap_init(vlib_main_t *vm)
455{
456  clib_error_t * error = 0;
457  turbotap_main_t * tr = &turbotap_main;
458
459  tr->vlib_main = vm;
460  tr->vnet_main = vnet_get_main();
461  tr->unix_main = &unix_main;
462  tr->mtu_bytes = TAP_MTU_DEFAULT;
463  tr->turbotap_interface_index_by_sw_if_index = hash_create (0, sizeof(uword));
464  tr->turbotap_interface_index_by_unix_fd = hash_create (0, sizeof (uword));
465  tr->rx_buffers = 0;
466  tr->unused_buffer_list = 0;
467  vec_alloc(tr->rx_buffers, VLIB_FRAME_SIZE);
468  vec_reset_length(tr->rx_buffers);
469  vec_alloc(tr->unused_buffer_list, VLIB_FRAME_SIZE);
470  vec_reset_length(tr->unused_buffer_list);
471  vm->os_punt_frame = turbotap_nopunt_frame;
472  return error;
473}
474VLIB_INIT_FUNCTION(turbotap_init);
475