11b0c9837Ssharath reddy/*
2cb9cadadSEd Warnicke *------------------------------------------------------------------
3cb9cadadSEd Warnicke * tuntap.c - kernel stack (reverse) punt/inject path
4cb9cadadSEd Warnicke *
5cb9cadadSEd Warnicke * Copyright (c) 2009 Cisco and/or its affiliates.
6cb9cadadSEd Warnicke * Licensed under the Apache License, Version 2.0 (the "License");
7cb9cadadSEd Warnicke * you may not use this file except in compliance with the License.
8cb9cadadSEd Warnicke * You may obtain a copy of the License at:
9cb9cadadSEd Warnicke *
10cb9cadadSEd Warnicke *     http://www.apache.org/licenses/LICENSE-2.0
11cb9cadadSEd Warnicke *
12cb9cadadSEd Warnicke * Unless required by applicable law or agreed to in writing, software
13cb9cadadSEd Warnicke * distributed under the License is distributed on an "AS IS" BASIS,
14cb9cadadSEd Warnicke * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15cb9cadadSEd Warnicke * See the License for the specific language governing permissions and
16cb9cadadSEd Warnicke * limitations under the License.
17cb9cadadSEd Warnicke *------------------------------------------------------------------
18cb9cadadSEd Warnicke */
1907203afeSKeith Burns (alagalah)/**
2007203afeSKeith Burns (alagalah) * @file
2107203afeSKeith Burns (alagalah) * @brief  TunTap Kernel stack (reverse) punt/inject path.
2207203afeSKeith Burns (alagalah) *
2307203afeSKeith Burns (alagalah) * This driver runs in one of two distinct modes:
2407203afeSKeith Burns (alagalah) * - "punt/inject" mode, where we send pkts not otherwise processed
2507203afeSKeith Burns (alagalah) * by the forwarding to the Linux kernel stack, and
2607203afeSKeith Burns (alagalah) *
2707203afeSKeith Burns (alagalah) * - "normal interface" mode, where we treat the Linux kernel stack
2807203afeSKeith Burns (alagalah) * as a peer.
2907203afeSKeith Burns (alagalah) *
3007203afeSKeith Burns (alagalah) * By default, we select punt/inject mode.
3107203afeSKeith Burns (alagalah) */
32cb9cadadSEd Warnicke
33cb9cadadSEd Warnicke#include <fcntl.h>		/* for open */
34cb9cadadSEd Warnicke#include <sys/ioctl.h>
35cb9cadadSEd Warnicke#include <sys/socket.h>
36cb9cadadSEd Warnicke#include <sys/stat.h>
3707203afeSKeith Burns (alagalah)#include <sys/types.h>
38cb9cadadSEd Warnicke#include <sys/uio.h>		/* for iovec */
39cb9cadadSEd Warnicke#include <netinet/in.h>
40cb9cadadSEd Warnicke
41cb9cadadSEd Warnicke#include <linux/if_arp.h>
42cb9cadadSEd Warnicke#include <linux/if_tun.h>
43cb9cadadSEd Warnicke
44cb9cadadSEd Warnicke#include <vlib/vlib.h>
45cb9cadadSEd Warnicke#include <vlib/unix/unix.h>
46cb9cadadSEd Warnicke
47cb9cadadSEd Warnicke#include <vnet/ip/ip.h>
48e8bad978SNeale Ranns#include <vnet/fib/fib_table.h>
49cb9cadadSEd Warnicke
50cb9cadadSEd Warnicke#include <vnet/ethernet/ethernet.h>
518bdc63b6SDamjan Marion#include <vnet/devices/devices.h>
522231150bSDamjan Marion#include <vnet/feature/feature.h>
53cb9cadadSEd Warnicke
54cb9cadadSEd Warnickestatic vnet_device_class_t tuntap_dev_class;
55cb9cadadSEd Warnickestatic vnet_hw_interface_class_t tuntap_interface_class;
56cb9cadadSEd Warnicke
57cb9cadadSEd Warnickestatic void tuntap_punt_frame (vlib_main_t * vm,
581b0c9837Ssharath reddy			       vlib_node_runtime_t * node,
591b0c9837Ssharath reddy			       vlib_frame_t * frame);
60cb9cadadSEd Warnickestatic void tuntap_nopunt_frame (vlib_main_t * vm,
611b0c9837Ssharath reddy				 vlib_node_runtime_t * node,
621b0c9837Ssharath reddy				 vlib_frame_t * frame);
63cb9cadadSEd Warnicke
641b0c9837Ssharath reddytypedef struct
651b0c9837Ssharath reddy{
66cb9cadadSEd Warnicke  u32 sw_if_index;
67cb9cadadSEd Warnicke  u8 is_v6;
68cb9cadadSEd Warnicke  u8 addr[16];
69cb9cadadSEd Warnicke} subif_address_t;
70cb9cadadSEd Warnicke
7107203afeSKeith Burns (alagalah)/**
724cd25766SSteven * @brief TUNTAP per thread struct
7307203afeSKeith Burns (alagalah) */
744cd25766SSteventypedef struct
7607203afeSKeith Burns (alagalah)  /** Vector of VLIB rx buffers to use.  We allocate them in blocks
77cb9cadadSEd Warnicke     of VLIB_FRAME_SIZE (256). */
781b0c9837Ssharath reddy  u32 *rx_buffers;
79cb9cadadSEd Warnicke
804cd25766SSteven  /** Vector of iovecs for readv/writev calls. */
811b0c9837Ssharath reddy  struct iovec *iovecs;
824cd25766SSteven} tuntap_per_thread_t;
854cd25766SSteven * @brief TUNTAP node main state
864cd25766SSteven */
871b0c9837Ssharath reddytypedef struct
881b0c9837Ssharath reddy{
894cd25766SSteven  /** per thread variables */
901b0c9837Ssharath reddy  tuntap_per_thread_t *threads;
9207203afeSKeith Burns (alagalah)  /** File descriptors for /dev/net/tun and provisioning socket. */
93cb9cadadSEd Warnicke  int dev_net_tun_fd, dev_tap_fd;
94cb9cadadSEd Warnicke
9507203afeSKeith Burns (alagalah)  /** Create a "tap" [ethernet] encaps device */
96cb9cadadSEd Warnicke  int is_ether;
97cb9cadadSEd Warnicke
9807203afeSKeith Burns (alagalah)  /** 1 if a "normal" routed intfc, 0 if a punt/inject interface */
99cb9cadadSEd Warnicke
100cb9cadadSEd Warnicke  int have_normal_interface;
101cb9cadadSEd Warnicke
10207203afeSKeith Burns (alagalah)  /** tap device destination MAC address. Required, or Linux drops pkts */
103cb9cadadSEd Warnicke  u8 ether_dst_mac[6];
104cb9cadadSEd Warnicke
10507203afeSKeith Burns (alagalah)  /** Interface MTU in bytes and # of default sized buffers. */
106cb9cadadSEd Warnicke  u32 mtu_bytes, mtu_buffers;
107cb9cadadSEd Warnicke
10807203afeSKeith Burns (alagalah)  /** Linux interface name for tun device. */
1091b0c9837Ssharath reddy  char *tun_name;
110cb9cadadSEd Warnicke
11107203afeSKeith Burns (alagalah)  /** Pool of subinterface addresses */
112cb9cadadSEd Warnicke  subif_address_t *subifs;
113cb9cadadSEd Warnicke
11407203afeSKeith Burns (alagalah)  /** Hash for subif addresses */
115cb9cadadSEd Warnicke  mhash_t subif_mhash;
116cb9cadadSEd Warnicke
11707203afeSKeith Burns (alagalah)  /** Unix file index */
11856dd5438SDamjan Marion  u32 clib_file_index;
119cb9cadadSEd Warnicke
12007203afeSKeith Burns (alagalah)  /** For the "normal" interface, if configured */
121cb9cadadSEd Warnicke  u32 hw_if_index, sw_if_index;
122cb9cadadSEd Warnicke
123cb9cadadSEd Warnicke} tuntap_main_t;
124cb9cadadSEd Warnicke
125cb9cadadSEd Warnickestatic tuntap_main_t tuntap_main = {
126cb9cadadSEd Warnicke  .tun_name = "vnet",
127cb9cadadSEd Warnicke
12807203afeSKeith Burns (alagalah)  /** Suitable defaults for an Ethernet-like tun/tap device */
129cb9cadadSEd Warnicke  .mtu_bytes = 4096 + 256,
130cb9cadadSEd Warnicke};
131cb9cadadSEd Warnicke
13207203afeSKeith Burns (alagalah)/**
13307203afeSKeith Burns (alagalah) * @brief tuntap_tx
13407203afeSKeith Burns (alagalah) * @node tuntap-tx
13507203afeSKeith Burns (alagalah) *
13607203afeSKeith Burns (alagalah) * Output node, writes the buffers comprising the incoming frame
137cb9cadadSEd Warnicke * to the tun/tap device, aka hands them to the Linux kernel stack.
13807203afeSKeith Burns (alagalah) *
13907203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
14007203afeSKeith Burns (alagalah) * @param *node - vlib_node_runtime_t
14107203afeSKeith Burns (alagalah) * @param *frame - vlib_frame_t
14207203afeSKeith Burns (alagalah) *
14307203afeSKeith Burns (alagalah) * @return rc - uword
14407203afeSKeith Burns (alagalah) *
145cb9cadadSEd Warnicke */
146cb9cadadSEd Warnickestatic uword
1471b0c9837Ssharath reddytuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
148cb9cadadSEd Warnicke{
149a3d5986aSDamjan Marion  u32 *buffers = vlib_frame_vector_args (frame);
150cb9cadadSEd Warnicke  uword n_packets = frame->n_vectors;
1511b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
1527394b5b0SJohn Lo  vnet_main_t *vnm = vnet_get_main ();
1537394b5b0SJohn Lo  vnet_interface_main_t *im = &vnm->interface_main;
1547394b5b0SJohn Lo  u32 n_bytes = 0;
155cb9cadadSEd Warnicke  int i;
156067cd622SDamjan Marion  u16 thread_index = vm->thread_index;
157cb9cadadSEd Warnicke
158cb9cadadSEd Warnicke  for (i = 0; i < n_packets; i++)
159cb9cadadSEd Warnicke    {
1601b0c9837Ssharath reddy      struct iovec *iov;
1611b0c9837Ssharath reddy      vlib_buffer_t *b;
162cb9cadadSEd Warnicke      uword l;
163cb9cadadSEd Warnicke
164cb9cadadSEd Warnicke      b = vlib_get_buffer (vm, buffers[i]);
165cb9cadadSEd Warnicke
166cb9cadadSEd Warnicke      if (tm->is_ether && (!tm->have_normal_interface))
1671b0c9837Ssharath reddy	{
1681b0c9837Ssharath reddy	  vlib_buffer_reset (b);
169178cf493SDave Barach	  clib_memcpy_fast (vlib_buffer_get_current (b), tm->ether_dst_mac,
170178cf493SDave Barach			    6);
1711b0c9837Ssharath reddy	}
172cb9cadadSEd Warnicke
173cb9cadadSEd Warnicke      /* Re-set iovecs if present. */
1744cd25766SSteven      if (tm->threads[thread_index].iovecs)
1754cd25766SSteven	_vec_len (tm->threads[thread_index].iovecs) = 0;
176cb9cadadSEd Warnicke
17707203afeSKeith Burns (alagalah)      /** VLIB buffer chain -> Unix iovec(s). */
1784cd25766SSteven      vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
179cb9cadadSEd Warnicke      iov->iov_base = b->data + b->current_data;
180cb9cadadSEd Warnicke      iov->iov_len = l = b->current_length;
181cb9cadadSEd Warnicke
182cb9cadadSEd Warnicke      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
183cb9cadadSEd Warnicke	{
1841b0c9837Ssharath reddy	  do
1851b0c9837Ssharath reddy	    {
1861b0c9837Ssharath reddy	      b = vlib_get_buffer (vm, b->next_buffer);
187cb9cadadSEd Warnicke
1881b0c9837Ssharath reddy	      vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
189cb9cadadSEd Warnicke
1901b0c9837Ssharath reddy	      iov->iov_base = b->data + b->current_data;
1911b0c9837Ssharath reddy	      iov->iov_len = b->current_length;
1921b0c9837Ssharath reddy	      l += b->current_length;
1931b0c9837Ssharath reddy	    }
1941b0c9837Ssharath reddy	  while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
195cb9cadadSEd Warnicke	}
196cb9cadadSEd Warnicke
1974cd25766SSteven      if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
1984cd25766SSteven		  vec_len (tm->threads[thread_index].iovecs)) < l)
199cb9cadadSEd Warnicke	clib_unix_warning ("writev");
2007394b5b0SJohn Lo
2017394b5b0SJohn Lo      n_bytes += l;
202cb9cadadSEd Warnicke    }
20307203afeSKeith Burns (alagalah)
2047394b5b0SJohn Lo  /* Update tuntap interface output stats. */
2057394b5b0SJohn Lo  vlib_increment_combined_counter (im->combined_sw_if_counters
2067394b5b0SJohn Lo				   + VNET_INTERFACE_COUNTER_TX,
207586afd76SDamjan Marion				   vm->thread_index,
2087394b5b0SJohn Lo				   tm->sw_if_index, n_packets, n_bytes);
2097394b5b0SJohn Lo
2107394b5b0SJohn Lo
21107203afeSKeith Burns (alagalah)  /** The normal interface path flattens the buffer chain */
212cb9cadadSEd Warnicke  if (tm->have_normal_interface)
213cb9cadadSEd Warnicke    vlib_buffer_free_no_next (vm, buffers, n_packets);
214cb9cadadSEd Warnicke  else
215cb9cadadSEd Warnicke    vlib_buffer_free (vm, buffers, n_packets);
21607203afeSKeith Burns (alagalah)
217cb9cadadSEd Warnicke  return n_packets;
218cb9cadadSEd Warnicke}
219cb9cadadSEd Warnicke
2201b0c9837Ssharath reddy/* *INDENT-OFF* */
221cb9cadadSEd WarnickeVLIB_REGISTER_NODE (tuntap_tx_node,static) = {
222cb9cadadSEd Warnicke  .function = tuntap_tx,
223cb9cadadSEd Warnicke  .name = "tuntap-tx",
224cb9cadadSEd Warnicke  .type = VLIB_NODE_TYPE_INTERNAL,
225cb9cadadSEd Warnicke  .vector_size = 4,
226cb9cadadSEd Warnicke};
2271b0c9837Ssharath reddy/* *INDENT-ON* */
228cb9cadadSEd Warnicke
22907203afeSKeith Burns (alagalah)/**
23007203afeSKeith Burns (alagalah) * @brief TUNTAP receive node
23107203afeSKeith Burns (alagalah) * @node tuntap-rx
23207203afeSKeith Burns (alagalah) *
23307203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
23407203afeSKeith Burns (alagalah) * @param *node - vlib_node_runtime_t
23507203afeSKeith Burns (alagalah) * @param *frame - vlib_frame_t
23607203afeSKeith Burns (alagalah) *
23707203afeSKeith Burns (alagalah) * @return rc - uword
23807203afeSKeith Burns (alagalah) *
23907203afeSKeith Burns (alagalah) */
240cb9cadadSEd Warnickestatic uword
2411b0c9837Ssharath reddytuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
242cb9cadadSEd Warnicke{
2431b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
2441b0c9837Ssharath reddy  vlib_buffer_t *b;
245cb9cadadSEd Warnicke  u32 bi;
2468934a045SDamjan Marion  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
247067cd622SDamjan Marion  u16 thread_index = vm->thread_index;
248cb9cadadSEd Warnicke
24907203afeSKeith Burns (alagalah)  /** Make sure we have some RX buffers. */
250cb9cadadSEd Warnicke  {
2514cd25766SSteven    uword n_left = vec_len (tm->threads[thread_index].rx_buffers);
252cb9cadadSEd Warnicke    uword n_alloc;
253cb9cadadSEd Warnicke
254cb9cadadSEd Warnicke    if (n_left < VLIB_FRAME_SIZE / 2)
255cb9cadadSEd Warnicke      {
2561b0c9837Ssharath reddy	if (!tm->threads[thread_index].rx_buffers)
2574cd25766SSteven	  vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE);
258cb9cadadSEd Warnicke
2591b0c9837Ssharath reddy	n_alloc =
2601b0c9837Ssharath reddy	  vlib_buffer_alloc (vm,
2611b0c9837Ssharath reddy			     tm->threads[thread_index].rx_buffers + n_left,
2621b0c9837Ssharath reddy			     VLIB_FRAME_SIZE - n_left);
2634cd25766SSteven	_vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc;
264cb9cadadSEd Warnicke      }
265cb9cadadSEd Warnicke  }
266cb9cadadSEd Warnicke
26707203afeSKeith Burns (alagalah)  /** Allocate RX buffers from end of rx_buffers.
268cb9cadadSEd Warnicke     Turn them into iovecs to pass to readv. */
269cb9cadadSEd Warnicke  {
2704cd25766SSteven    uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1;
2711b0c9837Ssharath reddy    vlib_buffer_t *b;
272cb9cadadSEd Warnicke    word i, n_bytes_left, n_bytes_in_packet;
273cb9cadadSEd Warnicke
27407203afeSKeith Burns (alagalah)    /** We should have enough buffers left for an MTU sized packet. */
2751b0c9837Ssharath reddy    ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >=
2761b0c9837Ssharath reddy	    tm->mtu_buffers);
277cb9cadadSEd Warnicke
2784cd25766SSteven    vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1);
279cb9cadadSEd Warnicke    for (i = 0; i < tm->mtu_buffers; i++)
280cb9cadadSEd Warnicke      {
2811b0c9837Ssharath reddy	b =
2821b0c9837Ssharath reddy	  vlib_get_buffer (vm,
2831b0c9837Ssharath reddy			   tm->threads[thread_index].rx_buffers[i_rx - i]);
2844cd25766SSteven	tm->threads[thread_index].iovecs[i].iov_base = b->data;
2854cd25766SSteven	tm->threads[thread_index].iovecs[i].iov_len = buffer_size;
286cb9cadadSEd Warnicke      }
287cb9cadadSEd Warnicke
2881b0c9837Ssharath reddy    n_bytes_left =
2891b0c9837Ssharath reddy      readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs,
2901b0c9837Ssharath reddy	     tm->mtu_buffers);
291cb9cadadSEd Warnicke    n_bytes_in_packet = n_bytes_left;
292cb9cadadSEd Warnicke    if (n_bytes_left <= 0)
293cb9cadadSEd Warnicke      {
2941b0c9837Ssharath reddy	if (errno != EAGAIN)
2951b0c9837Ssharath reddy	  clib_unix_warning ("readv %d", n_bytes_left);
296cb9cadadSEd Warnicke	return 0;
297cb9cadadSEd Warnicke      }
298cb9cadadSEd Warnicke
2994cd25766SSteven    bi = tm->threads[thread_index].rx_buffers[i_rx];
300cb9cadadSEd Warnicke
301cb9cadadSEd Warnicke    while (1)
302cb9cadadSEd Warnicke      {
3034cd25766SSteven	b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]);
304cb9cadadSEd Warnicke	b->flags = 0;
305cb9cadadSEd Warnicke	b->current_data = 0;
3061b0c9837Ssharath reddy	b->current_length =
3071b0c9837Ssharath reddy	  n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
308cb9cadadSEd Warnicke
309cb9cadadSEd Warnicke	n_bytes_left -= buffer_size;
310cb9cadadSEd Warnicke
311cb9cadadSEd Warnicke	if (n_bytes_left <= 0)
3121b0c9837Ssharath reddy	  {
3131b0c9837Ssharath reddy	    break;
3141b0c9837Ssharath reddy	  }
315cb9cadadSEd Warnicke
316cb9cadadSEd Warnicke	i_rx--;
317cb9cadadSEd Warnicke	b->flags |= VLIB_BUFFER_NEXT_PRESENT;
3184cd25766SSteven	b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx];
319cb9cadadSEd Warnicke      }
320cb9cadadSEd Warnicke
32107203afeSKeith Burns (alagalah)    /** Interface counters for tuntap interface. */
32207203afeSKeith Burns (alagalah)    vlib_increment_combined_counter
3231b0c9837Ssharath reddy      (vnet_main.interface_main.combined_sw_if_counters
3241b0c9837Ssharath reddy       + VNET_INTERFACE_COUNTER_RX,
3251b0c9837Ssharath reddy       thread_index, tm->sw_if_index, 1, n_bytes_in_packet);
32607203afeSKeith Burns (alagalah)
3274cd25766SSteven    _vec_len (tm->threads[thread_index].rx_buffers) = i_rx;
328cb9cadadSEd Warnicke  }
329cb9cadadSEd Warnicke
330cb9cadadSEd Warnicke  b = vlib_get_buffer (vm, bi);
331cb9cadadSEd Warnicke
332cb9cadadSEd Warnicke  {
333cb9cadadSEd Warnicke    u32 next_index;
334cb9cadadSEd Warnicke    uword n_trace = vlib_get_trace_count (vm, node);
335cb9cadadSEd Warnicke
336cb9cadadSEd Warnicke    vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index;
3371b0c9837Ssharath reddy    vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
338cb9cadadSEd Warnicke
339cb9cadadSEd Warnicke    /*
340cb9cadadSEd Warnicke     * Turn this on if you run into
341cb9cadadSEd Warnicke     * "bad monkey" contexts, and you want to know exactly
342cb9cadadSEd Warnicke     * which nodes they've visited...
343cb9cadadSEd Warnicke     */
344cb9cadadSEd Warnicke    if (VLIB_BUFFER_TRACE_TRAJECTORY)
3451b0c9837Ssharath reddy      b->pre_data[0] = 0;
346cb9cadadSEd Warnicke
347cb9cadadSEd Warnicke    b->error = node->errors[0];
348cb9cadadSEd Warnicke
349cb9cadadSEd Warnicke    if (tm->is_ether)
350cb9cadadSEd Warnicke      {
3518bdc63b6SDamjan Marion	next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
352cb9cadadSEd Warnicke      }
353cb9cadadSEd Warnicke    else
354cb9cadadSEd Warnicke      switch (b->data[0] & 0xf0)
3551b0c9837Ssharath reddy	{
3561b0c9837Ssharath reddy	case 0x40:
3571b0c9837Ssharath reddy	  next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
3581b0c9837Ssharath reddy	  break;
3591b0c9837Ssharath reddy	case 0x60:
3601b0c9837Ssharath reddy	  next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
3611b0c9837Ssharath reddy	  break;
3621b0c9837Ssharath reddy	default:
3631b0c9837Ssharath reddy	  next_index = VNET_DEVICE_INPUT_NEXT_DROP;
3641b0c9837Ssharath reddy	  break;
3651b0c9837Ssharath reddy	}
366cb9cadadSEd Warnicke
367cb9cadadSEd Warnicke    /* The linux kernel couldn't care less if our interface is up */
368cb9cadadSEd Warnicke    if (tm->have_normal_interface)
369cb9cadadSEd Warnicke      {
3701b0c9837Ssharath reddy	vnet_main_t *vnm = vnet_get_main ();
3711b0c9837Ssharath reddy	vnet_sw_interface_t *si;
3721b0c9837Ssharath reddy	si = vnet_get_sw_interface (vnm, tm->sw_if_index);
3731b0c9837Ssharath reddy	if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
3741b0c9837Ssharath reddy	  next_index = VNET_DEVICE_INPUT_NEXT_DROP;
375cb9cadadSEd Warnicke      }
376cb9cadadSEd Warnicke
37735af9e50SDamjan Marion    vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b);
3782231150bSDamjan Marion
379cb9cadadSEd Warnicke    vlib_set_next_frame_buffer (vm, node, next_index, bi);
380cb9cadadSEd Warnicke
381cb9cadadSEd Warnicke    if (n_trace > 0)
382cb9cadadSEd Warnicke      {
3831b0c9837Ssharath reddy	vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 1);
3841b0c9837Ssharath reddy	vlib_set_trace_count (vm, node, n_trace - 1);
385cb9cadadSEd Warnicke      }
386cb9cadadSEd Warnicke  }
387cb9cadadSEd Warnicke
388cb9cadadSEd Warnicke  return 1;
389cb9cadadSEd Warnicke}
390cb9cadadSEd Warnicke
39107203afeSKeith Burns (alagalah)/**
39207203afeSKeith Burns (alagalah) * @brief TUNTAP_RX error strings
39307203afeSKeith Burns (alagalah) */
3941b0c9837Ssharath reddystatic char *tuntap_rx_error_strings[] = {
395cb9cadadSEd Warnicke  "unknown packet type",
396cb9cadadSEd Warnicke};
397cb9cadadSEd Warnicke
3981b0c9837Ssharath reddy/* *INDENT-OFF* */
399cb9cadadSEd WarnickeVLIB_REGISTER_NODE (tuntap_rx_node,static) = {
400cb9cadadSEd Warnicke  .function = tuntap_rx,
4017ca5aaacSDamjan Marion  .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
402cb9cadadSEd Warnicke  .name = "tuntap-rx",
40351327ac5SDamjan Marion  .sibling_of = "device-input",
404cb9cadadSEd Warnicke  .type = VLIB_NODE_TYPE_INPUT,
405cb9cadadSEd Warnicke  .state = VLIB_NODE_STATE_INTERRUPT,
406cb9cadadSEd Warnicke  .vector_size = 4,
407cb9cadadSEd Warnicke  .n_errors = 1,
408cb9cadadSEd Warnicke  .error_strings = tuntap_rx_error_strings,
409cb9cadadSEd Warnicke};
4101b0c9837Ssharath reddy/* *INDENT-ON* */
411cb9cadadSEd Warnicke
41207203afeSKeith Burns (alagalah)/**
41307203afeSKeith Burns (alagalah) * @brief Gets called when file descriptor is ready from epoll.
41407203afeSKeith Burns (alagalah) *
41556dd5438SDamjan Marion * @param *uf - clib_file_t
41607203afeSKeith Burns (alagalah) *
41707203afeSKeith Burns (alagalah) * @return error - clib_error_t
41807203afeSKeith Burns (alagalah) */
4191b0c9837Ssharath reddystatic clib_error_t *
4201b0c9837Ssharath reddytuntap_read_ready (clib_file_t * uf)
421cb9cadadSEd Warnicke{
4221b0c9837Ssharath reddy  vlib_main_t *vm = vlib_get_main ();
423cb9cadadSEd Warnicke  vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index);
424cb9cadadSEd Warnicke  return 0;
425cb9cadadSEd Warnicke}
426cb9cadadSEd Warnicke
42707203afeSKeith Burns (alagalah)/**
42807203afeSKeith Burns (alagalah) * @brief Clean up the tun/tap device
42907203afeSKeith Burns (alagalah) *
43007203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
43107203afeSKeith Burns (alagalah) *
43207203afeSKeith Burns (alagalah) * @return error - clib_error_t
43307203afeSKeith Burns (alagalah) *
434cb9cadadSEd Warnicke */
435cb9cadadSEd Warnickestatic clib_error_t *
436cb9cadadSEd Warnicketuntap_exit (vlib_main_t * vm)
437cb9cadadSEd Warnicke{
438cb9cadadSEd Warnicke  tuntap_main_t *tm = &tuntap_main;
439cb9cadadSEd Warnicke  struct ifreq ifr;
440cb9cadadSEd Warnicke  int sfd;
441cb9cadadSEd Warnicke
442cb9cadadSEd Warnicke  /* Not present. */
4431b0c9837Ssharath reddy  if (!tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0)
444cb9cadadSEd Warnicke    return 0;
445cb9cadadSEd Warnicke
446cb9cadadSEd Warnicke  sfd = socket (AF_INET, SOCK_STREAM, 0);
447cb9cadadSEd Warnicke  if (sfd < 0)
4481b0c9837Ssharath reddy    clib_unix_warning ("provisioning socket");
449cb9cadadSEd Warnicke
450b7b92993SDave Barach  clib_memset (&ifr, 0, sizeof (ifr));
4511b0c9837Ssharath reddy  strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
452cb9cadadSEd Warnicke
453cb9cadadSEd Warnicke  /* get flags, modify to bring down interface... */
454cb9cadadSEd Warnicke  if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0)
455cb9cadadSEd Warnicke    clib_unix_warning ("SIOCGIFFLAGS");
456cb9cadadSEd Warnicke
457cb9cadadSEd Warnicke  ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
458cb9cadadSEd Warnicke
459cb9cadadSEd Warnicke  if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0)
460cb9cadadSEd Warnicke    clib_unix_warning ("SIOCSIFFLAGS");
461cb9cadadSEd Warnicke
462cb9cadadSEd Warnicke  /* Turn off persistence */
463cb9cadadSEd Warnicke  if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0)
464cb9cadadSEd Warnicke    clib_unix_warning ("TUNSETPERSIST");
4651b0c9837Ssharath reddy  close (tm->dev_tap_fd);
466f9c231ecSDave Barach  if (tm->dev_net_tun_fd >= 0)
4671b0c9837Ssharath reddy    close (tm->dev_net_tun_fd);
4686f6f34f6SDave Barach  if (sfd >= 0)
4696f6f34f6SDave Barach    close (sfd);
470cb9cadadSEd Warnicke
471cb9cadadSEd Warnicke  return 0;
472cb9cadadSEd Warnicke}
473cb9cadadSEd Warnicke
474cb9cadadSEd WarnickeVLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit);
475cb9cadadSEd Warnicke
47607203afeSKeith Burns (alagalah)/**
47707203afeSKeith Burns (alagalah) * @brief CLI function for tun/tap config
47807203afeSKeith Burns (alagalah) *
47907203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
48007203afeSKeith Burns (alagalah) * @param *input - unformat_input_t
48107203afeSKeith Burns (alagalah) *
48207203afeSKeith Burns (alagalah) * @return error - clib_error_t
48307203afeSKeith Burns (alagalah) *
48407203afeSKeith Burns (alagalah) */
485cb9cadadSEd Warnickestatic clib_error_t *
486cb9cadadSEd Warnicketuntap_config (vlib_main_t * vm, unformat_input_t * input)
487cb9cadadSEd Warnicke{
488cb9cadadSEd Warnicke  tuntap_main_t *tm = &tuntap_main;
4891b0c9837Ssharath reddy  clib_error_t *error = 0;
490cb9cadadSEd Warnicke  struct ifreq ifr;
4911b0c9837Ssharath reddy  u8 *name;
492cb9cadadSEd Warnicke  int flags = IFF_TUN | IFF_NO_PI;
493cb9cadadSEd Warnicke  int is_enabled = 0, is_ether = 0, have_normal_interface = 0;
4948934a045SDamjan Marion  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
495cb9cadadSEd Warnicke
496cb9cadadSEd Warnicke  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
497cb9cadadSEd Warnicke    {
498cb9cadadSEd Warnicke      if (unformat (input, "mtu %d", &tm->mtu_bytes))
499cb9cadadSEd Warnicke	;
500cb9cadadSEd Warnicke      else if (unformat (input, "enable"))
5011b0c9837Ssharath reddy	is_enabled = 1;
502cb9cadadSEd Warnicke      else if (unformat (input, "disable"))
5031b0c9837Ssharath reddy	is_enabled = 0;
5041b0c9837Ssharath reddy      else if (unformat (input, "ethernet") || unformat (input, "ether"))
5051b0c9837Ssharath reddy	is_ether = 1;
506cb9cadadSEd Warnicke      else if (unformat (input, "have-normal-interface") ||
5071b0c9837Ssharath reddy	       unformat (input, "have-normal"))
5081b0c9837Ssharath reddy	have_normal_interface = 1;
509cb9cadadSEd Warnicke      else if (unformat (input, "name %s", &name))
510cb9cadadSEd Warnicke	tm->tun_name = (char *) name;
511cb9cadadSEd Warnicke      else
512cb9cadadSEd Warnicke	return clib_error_return (0, "unknown input `%U'",
513cb9cadadSEd Warnicke				  format_unformat_error, input);
514cb9cadadSEd Warnicke    }
515cb9cadadSEd Warnicke
516cb9cadadSEd Warnicke  tm->dev_net_tun_fd = -1;
517cb9cadadSEd Warnicke  tm->dev_tap_fd = -1;
518cb9cadadSEd Warnicke
519cb9cadadSEd Warnicke  if (is_enabled == 0)
520cb9cadadSEd Warnicke    return 0;
521cb9cadadSEd Warnicke
5221b0c9837Ssharath reddy  if (geteuid ())
523cb9cadadSEd Warnicke    {
524cb9cadadSEd Warnicke      clib_warning ("tuntap disabled: must be superuser");
525cb9cadadSEd Warnicke      return 0;
5261b0c9837Ssharath reddy    }
527cb9cadadSEd Warnicke
528cb9cadadSEd Warnicke  tm->is_ether = is_ether;
529cb9cadadSEd Warnicke  tm->have_normal_interface = have_normal_interface;
530cb9cadadSEd Warnicke
531cb9cadadSEd Warnicke  if (is_ether)
532cb9cadadSEd Warnicke    flags = IFF_TAP | IFF_NO_PI;
533cb9cadadSEd Warnicke
534cb9cadadSEd Warnicke  if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
535cb9cadadSEd Warnicke    {
536cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "open /dev/net/tun");
537cb9cadadSEd Warnicke      goto done;
538cb9cadadSEd Warnicke    }
539cb9cadadSEd Warnicke
540b7b92993SDave Barach  clib_memset (&ifr, 0, sizeof (ifr));
5411b0c9837Ssharath reddy  strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
542cb9cadadSEd Warnicke  ifr.ifr_flags = flags;
5431b0c9837Ssharath reddy  if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *) &ifr) < 0)
544cb9cadadSEd Warnicke    {
545cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "ioctl TUNSETIFF");
546cb9cadadSEd Warnicke      goto done;
547cb9cadadSEd Warnicke    }
5481b0c9837Ssharath reddy
549cb9cadadSEd Warnicke  /* Make it persistent, at least until we split. */
550cb9cadadSEd Warnicke  if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0)
551cb9cadadSEd Warnicke    {
552cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "TUNSETPERSIST");
553cb9cadadSEd Warnicke      goto done;
554cb9cadadSEd Warnicke    }
555cb9cadadSEd Warnicke
556cb9cadadSEd Warnicke  /* Open a provisioning socket */
5571b0c9837Ssharath reddy  if ((tm->dev_tap_fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
558cb9cadadSEd Warnicke    {
559cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "socket");
560cb9cadadSEd Warnicke      goto done;
561cb9cadadSEd Warnicke    }
562cb9cadadSEd Warnicke
563cb9cadadSEd Warnicke  /* Find the interface index. */
564cb9cadadSEd Warnicke  {
565cb9cadadSEd Warnicke    struct ifreq ifr;
566cb9cadadSEd Warnicke    struct sockaddr_ll sll;
567cb9cadadSEd Warnicke
568b7b92993SDave Barach    clib_memset (&ifr, 0, sizeof (ifr));
5691b0c9837Ssharath reddy    strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name) - 1);
5701b0c9837Ssharath reddy    if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0)
571cb9cadadSEd Warnicke      {
572cb9cadadSEd Warnicke	error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX");
573cb9cadadSEd Warnicke	goto done;
574cb9cadadSEd Warnicke      }
575cb9cadadSEd Warnicke
576cb9cadadSEd Warnicke    /* Bind the provisioning socket to the interface. */
577b7b92993SDave Barach    clib_memset (&sll, 0, sizeof (sll));
5781b0c9837Ssharath reddy    sll.sll_family = AF_PACKET;
5791b0c9837Ssharath reddy    sll.sll_ifindex = ifr.ifr_ifindex;
5801b0c9837Ssharath reddy    sll.sll_protocol = htons (ETH_P_ALL);
581cb9cadadSEd Warnicke
5821b0c9837Ssharath reddy    if (bind (tm->dev_tap_fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
583cb9cadadSEd Warnicke      {
584cb9cadadSEd Warnicke	error = clib_error_return_unix (0, "bind");
585cb9cadadSEd Warnicke	goto done;
586cb9cadadSEd Warnicke      }
587cb9cadadSEd Warnicke  }
588cb9cadadSEd Warnicke
589cb9cadadSEd Warnicke  /* non-blocking I/O on /dev/tapX */
590cb9cadadSEd Warnicke  {
591cb9cadadSEd Warnicke    int one = 1;
592cb9cadadSEd Warnicke    if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0)
593cb9cadadSEd Warnicke      {
594cb9cadadSEd Warnicke	error = clib_error_return_unix (0, "ioctl FIONBIO");
595cb9cadadSEd Warnicke	goto done;
596cb9cadadSEd Warnicke      }
597cb9cadadSEd Warnicke  }
598cb9cadadSEd Warnicke
599cb9cadadSEd Warnicke  tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
600cb9cadadSEd Warnicke
601cb9cadadSEd Warnicke  ifr.ifr_mtu = tm->mtu_bytes;
602cb9cadadSEd Warnicke  if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
603cb9cadadSEd Warnicke    {
604cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "ioctl SIOCSIFMTU");
605cb9cadadSEd Warnicke      goto done;
606cb9cadadSEd Warnicke    }
607cb9cadadSEd Warnicke
608cb9cadadSEd Warnicke  /* get flags, modify to bring up interface... */
609cb9cadadSEd Warnicke  if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
610cb9cadadSEd Warnicke    {
611cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS");
612cb9cadadSEd Warnicke      goto done;
613cb9cadadSEd Warnicke    }
614cb9cadadSEd Warnicke
615cb9cadadSEd Warnicke  ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
616cb9cadadSEd Warnicke
617cb9cadadSEd Warnicke  if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
618cb9cadadSEd Warnicke    {
619cb9cadadSEd Warnicke      error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS");
620cb9cadadSEd Warnicke      goto done;
621cb9cadadSEd Warnicke    }
622cb9cadadSEd Warnicke
623cb9cadadSEd Warnicke  if (is_ether)
624cb9cadadSEd Warnicke    {
625cb9cadadSEd Warnicke      if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0)
6261b0c9837Ssharath reddy	{
6271b0c9837Ssharath reddy	  error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR");
6281b0c9837Ssharath reddy	  goto done;
6291b0c9837Ssharath reddy	}
630cb9cadadSEd Warnicke      else
631178cf493SDave Barach	clib_memcpy_fast (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6);
632cb9cadadSEd Warnicke    }
633cb9cadadSEd Warnicke
634cb9cadadSEd Warnicke  if (have_normal_interface)
635cb9cadadSEd Warnicke    {
6361b0c9837Ssharath reddy      vnet_main_t *vnm = vnet_get_main ();
637cb9cadadSEd Warnicke      error = ethernet_register_interface
6381b0c9837Ssharath reddy	(vnm, tuntap_dev_class.index, 0 /* device instance */ ,
6391b0c9837Ssharath reddy	 tm->ether_dst_mac /* ethernet address */ ,
6401b0c9837Ssharath reddy	 &tm->hw_if_index, 0 /* flag change */ );
641cb9cadadSEd Warnicke      if (error)
6421b0c9837Ssharath reddy	clib_error_report (error);
643cb9cadadSEd Warnicke      tm->sw_if_index = tm->hw_if_index;
644cb9cadadSEd Warnicke      vm->os_punt_frame = tuntap_nopunt_frame;
645cb9cadadSEd Warnicke    }
646cb9cadadSEd Warnicke  else
647cb9cadadSEd Warnicke    {
6481b0c9837Ssharath reddy      vnet_main_t *vnm = vnet_get_main ();
6491b0c9837Ssharath reddy      vnet_hw_interface_t *hi;
6501b0c9837Ssharath reddy
651cb9cadadSEd Warnicke      vm->os_punt_frame = tuntap_punt_frame;
6521b0c9837Ssharath reddy
653cb9cadadSEd Warnicke      tm->hw_if_index = vnet_register_interface
6541b0c9837Ssharath reddy	(vnm, tuntap_dev_class.index, 0 /* device instance */ ,
6551b0c9837Ssharath reddy	 tuntap_interface_class.index, 0);
656cb9cadadSEd Warnicke      hi = vnet_get_hw_interface (vnm, tm->hw_if_index);
657cb9cadadSEd Warnicke      tm->sw_if_index = hi->sw_if_index;
6581b0c9837Ssharath reddy
659cb9cadadSEd Warnicke      /* Interface is always up. */
6601b0c9837Ssharath reddy      vnet_hw_interface_set_flags (vnm, tm->hw_if_index,
6611b0c9837Ssharath reddy				   VNET_HW_INTERFACE_FLAG_LINK_UP);
6621b0c9837Ssharath reddy      vnet_sw_interface_set_flags (vnm, tm->sw_if_index,
6631b0c9837Ssharath reddy				   VNET_SW_INTERFACE_FLAG_ADMIN_UP);
664cb9cadadSEd Warnicke    }
665cb9cadadSEd Warnicke
666cb9cadadSEd Warnicke  {
6671b0c9837Ssharath reddy    clib_file_t template = { 0 };
668cb9cadadSEd Warnicke    template.read_function = tuntap_read_ready;
669cb9cadadSEd Warnicke    template.file_descriptor = tm->dev_net_tun_fd;
67056dd5438SDamjan Marion    tm->clib_file_index = clib_file_add (&file_main, &template);
671cb9cadadSEd Warnicke  }
672cb9cadadSEd Warnicke
6731b0c9837Ssharath reddydone:
674cb9cadadSEd Warnicke  if (error)
675cb9cadadSEd Warnicke    {
676cb9cadadSEd Warnicke      if (tm->dev_net_tun_fd >= 0)
677cb9cadadSEd Warnicke	close (tm->dev_net_tun_fd);
678cb9cadadSEd Warnicke      if (tm->dev_tap_fd >= 0)
679cb9cadadSEd Warnicke	close (tm->dev_tap_fd);
680cb9cadadSEd Warnicke    }
681cb9cadadSEd Warnicke
682cb9cadadSEd Warnicke  return error;
683cb9cadadSEd Warnicke}
684cb9cadadSEd Warnicke
685cb9cadadSEd WarnickeVLIB_CONFIG_FUNCTION (tuntap_config, "tuntap");
686cb9cadadSEd Warnicke
68707203afeSKeith Burns (alagalah)/**
68807203afeSKeith Burns (alagalah) * @brief Add or Del IP4 address to tun/tap interface
68907203afeSKeith Burns (alagalah) *
69007203afeSKeith Burns (alagalah) * @param *im - ip4_main_t
69107203afeSKeith Burns (alagalah) * @param opaque - uword
69207203afeSKeith Burns (alagalah) * @param sw_if_index - u32
69307203afeSKeith Burns (alagalah) * @param *address - ip4_address_t
69407203afeSKeith Burns (alagalah) * @param is_delete - u32
69507203afeSKeith Burns (alagalah) *
69607203afeSKeith Burns (alagalah) */
697cb9cadadSEd Warnickevoid
698cb9cadadSEd Warnicketuntap_ip4_add_del_interface_address (ip4_main_t * im,
699cb9cadadSEd Warnicke				      uword opaque,
700cb9cadadSEd Warnicke				      u32 sw_if_index,
701cb9cadadSEd Warnicke				      ip4_address_t * address,
702cb9cadadSEd Warnicke				      u32 address_length,
7031b0c9837Ssharath reddy				      u32 if_address_index, u32 is_delete)
704cb9cadadSEd Warnicke{
7051b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
706cb9cadadSEd Warnicke  struct ifreq ifr;
7071b0c9837Ssharath reddy  subif_address_t subif_addr, *ap;
7081b0c9837Ssharath reddy  uword *p;
709cb9cadadSEd Warnicke
71007203afeSKeith Burns (alagalah)  /** Tuntap disabled, or using a "normal" interface. */
7111b0c9837Ssharath reddy  if (tm->have_normal_interface || tm->dev_tap_fd < 0)
712cb9cadadSEd Warnicke    return;
713cb9cadadSEd Warnicke
714e8bad978SNeale Ranns  /* if the address is being applied to an interface that is not in
715e8bad978SNeale Ranns   * the same table/VRF as this tap, then ignore it.
716bdc0e6b7SPaul Vinciguerra   * If we don't do this overlapping address spaces in the different tables
717e8bad978SNeale Ranns   * breaks the linux host's routing tables */
7181b0c9837Ssharath reddy  if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
7191b0c9837Ssharath reddy					   sw_if_index) !=
7201b0c9837Ssharath reddy      fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, tm->sw_if_index))
7211b0c9837Ssharath reddy    return;
722e8bad978SNeale Ranns
72307203afeSKeith Burns (alagalah)  /** See if we already know about this subif */
724b7b92993SDave Barach  clib_memset (&subif_addr, 0, sizeof (subif_addr));
725cb9cadadSEd Warnicke  subif_addr.sw_if_index = sw_if_index;
726178cf493SDave Barach  clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
72707203afeSKeith Burns (alagalah)
728cb9cadadSEd Warnicke  p = mhash_get (&tm->subif_mhash, &subif_addr);
729cb9cadadSEd Warnicke
730cb9cadadSEd Warnicke  if (p)
731cb9cadadSEd Warnicke    ap = pool_elt_at_index (tm->subifs, p[0]);
732cb9cadadSEd Warnicke  else
733cb9cadadSEd Warnicke    {
734cb9cadadSEd Warnicke      pool_get (tm->subifs, ap);
735cb9cadadSEd Warnicke      *ap = subif_addr;
736cb9cadadSEd Warnicke      mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
737cb9cadadSEd Warnicke    }
738cb9cadadSEd Warnicke
739cb9cadadSEd Warnicke  /* Use subif pool index to select alias device. */
740b7b92993SDave Barach  clib_memset (&ifr, 0, sizeof (ifr));
7411b0c9837Ssharath reddy  snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
7421b0c9837Ssharath reddy	    "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
743cb9cadadSEd Warnicke
74480e8816bSIgor Mikhailov (imichail)  /* the tuntap punt/inject is enabled for IPv4 RX so long as
74580e8816bSIgor Mikhailov (imichail)   * any vpp interface has an IPv4 address.
74680e8816bSIgor Mikhailov (imichail)   * this is also ref counted.
74780e8816bSIgor Mikhailov (imichail)   */
74880e8816bSIgor Mikhailov (imichail)  ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
74980e8816bSIgor Mikhailov (imichail)
7501b0c9837Ssharath reddy  if (!is_delete)
751cb9cadadSEd Warnicke    {
7521b0c9837Ssharath reddy      struct sockaddr_in *sin;
753cb9cadadSEd Warnicke
7541b0c9837Ssharath reddy      sin = (struct sockaddr_in *) &ifr.ifr_addr;
755cb9cadadSEd Warnicke
756cb9cadadSEd Warnicke      /* Set ipv4 address, netmask. */
757cb9cadadSEd Warnicke      sin->sin_family = AF_INET;
758178cf493SDave Barach      clib_memcpy_fast (&sin->sin_addr.s_addr, address, 4);
759cb9cadadSEd Warnicke      if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
760cb9cadadSEd Warnicke	clib_unix_warning ("ioctl SIOCSIFADDR");
7611b0c9837Ssharath reddy
762cb9cadadSEd Warnicke      sin->sin_addr.s_addr = im->fib_masks[address_length];
763cb9cadadSEd Warnicke      if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
764cb9cadadSEd Warnicke	clib_unix_warning ("ioctl SIOCSIFNETMASK");
765cb9cadadSEd Warnicke    }
766cb9cadadSEd Warnicke  else
767cb9cadadSEd Warnicke    {
7681b0c9837Ssharath reddy      mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
769cb9cadadSEd Warnicke      pool_put (tm->subifs, ap);
770cb9cadadSEd Warnicke    }
771cb9cadadSEd Warnicke
772cb9cadadSEd Warnicke  /* get flags, modify to bring up interface... */
773cb9cadadSEd Warnicke  if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
774cb9cadadSEd Warnicke    clib_unix_warning ("ioctl SIOCGIFFLAGS");
775cb9cadadSEd Warnicke
776cb9cadadSEd Warnicke  if (is_delete)
777cb9cadadSEd Warnicke    ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
778cb9cadadSEd Warnicke  else
779cb9cadadSEd Warnicke    ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
780cb9cadadSEd Warnicke
781cb9cadadSEd Warnicke  if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
782cb9cadadSEd Warnicke    clib_unix_warning ("ioctl SIOCSIFFLAGS");
783cb9cadadSEd Warnicke}
784cb9cadadSEd Warnicke
78507203afeSKeith Burns (alagalah)/**
78616bcf7d8SChris Luke * @brief workaround for a known include file bug.
78716bcf7d8SChris Luke * including @c <linux/ipv6.h> causes multiple definitions if
78816bcf7d8SChris Luke * @c <netinet/in.h is also included.
789cb9cadadSEd Warnicke */
7901b0c9837Ssharath reddystruct in6_ifreq
7911b0c9837Ssharath reddy{
7921b0c9837Ssharath reddy  struct in6_addr ifr6_addr;
7931b0c9837Ssharath reddy  u32 ifr6_prefixlen;
7941b0c9837Ssharath reddy  int ifr6_ifindex;
795cb9cadadSEd Warnicke};
796cb9cadadSEd Warnicke
79707203afeSKeith Burns (alagalah)/**
79816bcf7d8SChris Luke * @brief Add or Del tun/tap interface address.
79907203afeSKeith Burns (alagalah) *
800cb9cadadSEd Warnicke * Both the v6 interface address API and the way ifconfig
801bdc0e6b7SPaul Vinciguerra * displays subinterfaces differ from their v4 counterparts.
802cb9cadadSEd Warnicke * The code given here seems to work but YMMV.
80307203afeSKeith Burns (alagalah) *
80407203afeSKeith Burns (alagalah) * @param *im - ip6_main_t
80507203afeSKeith Burns (alagalah) * @param opaque - uword
80607203afeSKeith Burns (alagalah) * @param sw_if_index - u32
80707203afeSKeith Burns (alagalah) * @param *address - ip6_address_t
80807203afeSKeith Burns (alagalah) * @param address_length - u32
80907203afeSKeith Burns (alagalah) * @param if_address_index - u32
81007203afeSKeith Burns (alagalah) * @param is_delete - u32
811cb9cadadSEd Warnicke */
812cb9cadadSEd Warnickevoid
813cb9cadadSEd Warnicketuntap_ip6_add_del_interface_address (ip6_main_t * im,
814cb9cadadSEd Warnicke				      uword opaque,
815cb9cadadSEd Warnicke				      u32 sw_if_index,
816cb9cadadSEd Warnicke				      ip6_address_t * address,
817cb9cadadSEd Warnicke				      u32 address_length,
8181b0c9837Ssharath reddy				      u32 if_address_index, u32 is_delete)
819cb9cadadSEd Warnicke{
8201b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
821cb9cadadSEd Warnicke  struct ifreq ifr;
822cb9cadadSEd Warnicke  struct in6_ifreq ifr6;
8231b0c9837Ssharath reddy  subif_address_t subif_addr, *ap;
8241b0c9837Ssharath reddy  uword *p;
825cb9cadadSEd Warnicke
826cb9cadadSEd Warnicke  /* Tuntap disabled, or using a "normal" interface. */
8271b0c9837Ssharath reddy  if (tm->have_normal_interface || tm->dev_tap_fd < 0)
828cb9cadadSEd Warnicke    return;
829cb9cadadSEd Warnicke
830e8bad978SNeale Ranns  /* if the address is being applied to an interface that is not in
831e8bad978SNeale Ranns   * the same table/VRF as this tap, then ignore it.
832bdc0e6b7SPaul Vinciguerra   * If we don't do this overlapping address spaces in the different tables
833e8bad978SNeale Ranns   * breaks the linux host's routing tables */
8341b0c9837Ssharath reddy  if (fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
8351b0c9837Ssharath reddy					   sw_if_index) !=
8361b0c9837Ssharath reddy      fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, tm->sw_if_index))
8371b0c9837Ssharath reddy    return;
838e8bad978SNeale Ranns
839cb9cadadSEd Warnicke  /* See if we already know about this subif */
840b7b92993SDave Barach  clib_memset (&subif_addr, 0, sizeof (subif_addr));
841cb9cadadSEd Warnicke  subif_addr.sw_if_index = sw_if_index;
842cb9cadadSEd Warnicke  subif_addr.is_v6 = 1;
843178cf493SDave Barach  clib_memcpy_fast (&subif_addr.addr, address, sizeof (*address));
84407203afeSKeith Burns (alagalah)
845cb9cadadSEd Warnicke  p = mhash_get (&tm->subif_mhash, &subif_addr);
846cb9cadadSEd Warnicke
847cb9cadadSEd Warnicke  if (p)
848cb9cadadSEd Warnicke    ap = pool_elt_at_index (tm->subifs, p[0]);
849cb9cadadSEd Warnicke  else
850cb9cadadSEd Warnicke    {
851cb9cadadSEd Warnicke      pool_get (tm->subifs, ap);
852cb9cadadSEd Warnicke      *ap = subif_addr;
853cb9cadadSEd Warnicke      mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
854cb9cadadSEd Warnicke    }
855cb9cadadSEd Warnicke
856cb9cadadSEd Warnicke  /* Use subif pool index to select alias device. */
857b7b92993SDave Barach  clib_memset (&ifr, 0, sizeof (ifr));
858b7b92993SDave Barach  clib_memset (&ifr6, 0, sizeof (ifr6));
8591b0c9837Ssharath reddy  snprintf (ifr.ifr_name, sizeof (ifr.ifr_name),
8601b0c9837Ssharath reddy	    "%s:%d", tm->tun_name, (int) (ap - tm->subifs));
861cb9cadadSEd Warnicke
86280e8816bSIgor Mikhailov (imichail)  /* the tuntap punt/inject is enabled for IPv6 RX so long as
86380e8816bSIgor Mikhailov (imichail)   * any vpp interface has an IPv6 address.
86480e8816bSIgor Mikhailov (imichail)   * this is also ref counted.
86580e8816bSIgor Mikhailov (imichail)   */
86680e8816bSIgor Mikhailov (imichail)  ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete);
86780e8816bSIgor Mikhailov (imichail)
8681b0c9837Ssharath reddy  if (!is_delete)
869cb9cadadSEd Warnicke    {
870cb9cadadSEd Warnicke      int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
871cb9cadadSEd Warnicke      if (sockfd < 0)
8721b0c9837Ssharath reddy	clib_unix_warning ("get ifindex socket");
873cb9cadadSEd Warnicke
874cb9cadadSEd Warnicke      if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
8751b0c9837Ssharath reddy	clib_unix_warning ("get ifindex");
876cb9cadadSEd Warnicke
877cb9cadadSEd Warnicke      ifr6.ifr6_ifindex = ifr.ifr_ifindex;
878cb9cadadSEd Warnicke      ifr6.ifr6_prefixlen = address_length;
879178cf493SDave Barach      clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
880cb9cadadSEd Warnicke
881cb9cadadSEd Warnicke      if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0)
8821b0c9837Ssharath reddy	clib_unix_warning ("set address");
883cb9cadadSEd Warnicke
8846f6f34f6SDave Barach      if (sockfd >= 0)
8851b0c9837Ssharath reddy	close (sockfd);
886cb9cadadSEd Warnicke    }
887cb9cadadSEd Warnicke  else
888cb9cadadSEd Warnicke    {
889cb9cadadSEd Warnicke      int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
890cb9cadadSEd Warnicke      if (sockfd < 0)
8911b0c9837Ssharath reddy	clib_unix_warning ("get ifindex socket");
892cb9cadadSEd Warnicke
893cb9cadadSEd Warnicke      if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
8941b0c9837Ssharath reddy	clib_unix_warning ("get ifindex");
895cb9cadadSEd Warnicke
896cb9cadadSEd Warnicke      ifr6.ifr6_ifindex = ifr.ifr_ifindex;
897cb9cadadSEd Warnicke      ifr6.ifr6_prefixlen = address_length;
898178cf493SDave Barach      clib_memcpy_fast (&ifr6.ifr6_addr, address, 16);
899cb9cadadSEd Warnicke
900cb9cadadSEd Warnicke      if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0)
9011b0c9837Ssharath reddy	clib_unix_warning ("del address");
902cb9cadadSEd Warnicke
903f9c231ecSDave Barach      if (sockfd >= 0)
9041b0c9837Ssharath reddy	close (sockfd);
905cb9cadadSEd Warnicke
9061b0c9837Ssharath reddy      mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */ );
907cb9cadadSEd Warnicke      pool_put (tm->subifs, ap);
908cb9cadadSEd Warnicke    }
909cb9cadadSEd Warnicke}
910cb9cadadSEd Warnicke
91107203afeSKeith Burns (alagalah)/**
91207203afeSKeith Burns (alagalah) * @brief TX the tun/tap frame
91307203afeSKeith Burns (alagalah) *
91407203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
91507203afeSKeith Burns (alagalah) * @param *node - vlib_node_runtime_t
91607203afeSKeith Burns (alagalah) * @param *frame - vlib_frame_t
91707203afeSKeith Burns (alagalah) *
91807203afeSKeith Burns (alagalah) */
919cb9cadadSEd Warnickestatic void
920cb9cadadSEd Warnicketuntap_punt_frame (vlib_main_t * vm,
9211b0c9837Ssharath reddy		   vlib_node_runtime_t * node, vlib_frame_t * frame)
922cb9cadadSEd Warnicke{
923cb9cadadSEd Warnicke  tuntap_tx (vm, node, frame);
924cb9cadadSEd Warnicke  vlib_frame_free (vm, node, frame);
925cb9cadadSEd Warnicke}
926cb9cadadSEd Warnicke
92707203afeSKeith Burns (alagalah)/**
92807203afeSKeith Burns (alagalah) * @brief Free the tun/tap frame
92907203afeSKeith Burns (alagalah) *
93007203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
93107203afeSKeith Burns (alagalah) * @param *node - vlib_node_runtime_t
93207203afeSKeith Burns (alagalah) * @param *frame - vlib_frame_t
93307203afeSKeith Burns (alagalah) *
93407203afeSKeith Burns (alagalah) */
935cb9cadadSEd Warnickestatic void
936cb9cadadSEd Warnicketuntap_nopunt_frame (vlib_main_t * vm,
9371b0c9837Ssharath reddy		     vlib_node_runtime_t * node, vlib_frame_t * frame)
938cb9cadadSEd Warnicke{
939a3d5986aSDamjan Marion  u32 *buffers = vlib_frame_vector_args (frame);
940cb9cadadSEd Warnicke  uword n_packets = frame->n_vectors;
941cb9cadadSEd Warnicke  vlib_buffer_free (vm, buffers, n_packets);
942cb9cadadSEd Warnicke  vlib_frame_free (vm, node, frame);
943cb9cadadSEd Warnicke}
944cb9cadadSEd Warnicke
9451b0c9837Ssharath reddy/* *INDENT-OFF* */
946cb9cadadSEd WarnickeVNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
947cb9cadadSEd Warnicke  .name = "tuntap",
948b80c536eSNeale Ranns  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
949cb9cadadSEd Warnicke};
9501b0c9837Ssharath reddy/* *INDENT-ON* */
951cb9cadadSEd Warnicke
95207203afeSKeith Burns (alagalah)/**
95307203afeSKeith Burns (alagalah) * @brief Format tun/tap interface name
95407203afeSKeith Burns (alagalah) *
95507203afeSKeith Burns (alagalah) * @param *s - u8 - formatter string
95607203afeSKeith Burns (alagalah) * @param *args - va_list
95707203afeSKeith Burns (alagalah) *
95807203afeSKeith Burns (alagalah) * @return *s - u8 - formatted string
95907203afeSKeith Burns (alagalah) *
96007203afeSKeith Burns (alagalah) */
9611b0c9837Ssharath reddystatic u8 *
9621b0c9837Ssharath reddyformat_tuntap_interface_name (u8 * s, va_list * args)
963cb9cadadSEd Warnicke{
964cb9cadadSEd Warnicke  u32 i = va_arg (*args, u32);
965cb9cadadSEd Warnicke
966cb9cadadSEd Warnicke  s = format (s, "tuntap-%d", i);
967cb9cadadSEd Warnicke  return s;
968cb9cadadSEd Warnicke}
969cb9cadadSEd Warnicke
97007203afeSKeith Burns (alagalah)/**
97107203afeSKeith Burns (alagalah) * @brief TX packet out tun/tap
97207203afeSKeith Burns (alagalah) *
97307203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
97407203afeSKeith Burns (alagalah) * @param *node - vlib_node_runtime_t
97507203afeSKeith Burns (alagalah) * @param *frame - vlib_frame_t
97607203afeSKeith Burns (alagalah) *
97707203afeSKeith Burns (alagalah) * @return n_buffers - uword - Packets transmitted
97807203afeSKeith Burns (alagalah) *
97907203afeSKeith Burns (alagalah) */
980cb9cadadSEd Warnickestatic uword
981cb9cadadSEd Warnicketuntap_intfc_tx (vlib_main_t * vm,
9821b0c9837Ssharath reddy		 vlib_node_runtime_t * node, vlib_frame_t * frame)
983cb9cadadSEd Warnicke{
9841b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
985a3d5986aSDamjan Marion  u32 *buffers = vlib_frame_vector_args (frame);
986cb9cadadSEd Warnicke  uword n_buffers = frame->n_vectors;
987cb9cadadSEd Warnicke
988cb9cadadSEd Warnicke  /* Normal interface transmit happens only on the normal interface... */
989cb9cadadSEd Warnicke  if (tm->have_normal_interface)
990cb9cadadSEd Warnicke    return tuntap_tx (vm, node, frame);
991cb9cadadSEd Warnicke
992cb9cadadSEd Warnicke  vlib_buffer_free (vm, buffers, n_buffers);
993cb9cadadSEd Warnicke  return n_buffers;
994cb9cadadSEd Warnicke}
995cb9cadadSEd Warnicke
9961b0c9837Ssharath reddy/* *INDENT-OFF* */
997cb9cadadSEd WarnickeVNET_DEVICE_CLASS (tuntap_dev_class,static) = {
998cb9cadadSEd Warnicke  .name = "tuntap",
999cb9cadadSEd Warnicke  .tx_function = tuntap_intfc_tx,
1000cb9cadadSEd Warnicke  .format_device_name = format_tuntap_interface_name,
1001cb9cadadSEd Warnicke};
10021b0c9837Ssharath reddy/* *INDENT-ON* */
1003cb9cadadSEd Warnicke
100407203afeSKeith Burns (alagalah)/**
100507203afeSKeith Burns (alagalah) * @brief tun/tap node init
100607203afeSKeith Burns (alagalah) *
100707203afeSKeith Burns (alagalah) * @param *vm - vlib_main_t
100807203afeSKeith Burns (alagalah) *
100907203afeSKeith Burns (alagalah) * @return error - clib_error_t
101007203afeSKeith Burns (alagalah) *
101107203afeSKeith Burns (alagalah) */
1012cb9cadadSEd Warnickestatic clib_error_t *
1013cb9cadadSEd Warnicketuntap_init (vlib_main_t * vm)
1014cb9cadadSEd Warnicke{
10151b0c9837Ssharath reddy  ip4_main_t *im4 = &ip4_main;
10161b0c9837Ssharath reddy  ip6_main_t *im6 = &ip6_main;
1017cb9cadadSEd Warnicke  ip4_add_del_interface_address_callback_t cb4;
1018cb9cadadSEd Warnicke  ip6_add_del_interface_address_callback_t cb6;
10191b0c9837Ssharath reddy  tuntap_main_t *tm = &tuntap_main;
10201b0c9837Ssharath reddy  vlib_thread_main_t *m = vlib_get_thread_main ();
1021cb9cadadSEd Warnicke
10221b0c9837Ssharath reddy  mhash_init (&tm->subif_mhash, sizeof (u32), sizeof (subif_address_t));
1023cb9cadadSEd Warnicke
1024cb9cadadSEd Warnicke  cb4.function = tuntap_ip4_add_del_interface_address;
1025cb9cadadSEd Warnicke  cb4.function_opaque = 0;
1026cb9cadadSEd Warnicke  vec_add1 (im4->add_del_interface_address_callbacks, cb4);
1027cb9cadadSEd Warnicke
1028cb9cadadSEd Warnicke  cb6.function = tuntap_ip6_add_del_interface_address;
1029cb9cadadSEd Warnicke  cb6.function_opaque = 0;
1030cb9cadadSEd Warnicke  vec_add1 (im6->add_del_interface_address_callbacks, cb6);
10314cd25766SSteven  vec_validate_aligned (tm->threads, m->n_vlib_mains - 1,
10324cd25766SSteven			CLIB_CACHE_LINE_BYTES);
1033cb9cadadSEd Warnicke
1034cb9cadadSEd Warnicke  return 0;
1035cb9cadadSEd Warnicke}
1036cb9cadadSEd Warnicke
1037f8d50682SDave Barach/* *INDENT-OFF* */
1038f8d50682SDave BarachVLIB_INIT_FUNCTION (tuntap_init) =
1039f8d50682SDave Barach{
1040f8d50682SDave Barach  .runs_after = VLIB_INITS("ip4_init"),
1041f8d50682SDave Barach};
1042f8d50682SDave Barach/* *INDENT-ON* */
10431b0c9837Ssharath reddy
10441b0c9837Ssharath reddy/*
10451b0c9837Ssharath reddy * fd.io coding-style-patch-verification: ON
10461b0c9837Ssharath reddy *
10471b0c9837Ssharath reddy * Local Variables:
10481b0c9837Ssharath reddy * eval: (c-set-style "gnu")
10491b0c9837Ssharath reddy * End:
10501b0c9837Ssharath reddy */