device.c revision adff8bfb
1/*
2 *------------------------------------------------------------------
3 * Copyright (c) 2016 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
16 */
17
18#define _GNU_SOURCE
19
20#include <stdint.h>
21#include <net/if.h>
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#include <sys/uio.h>            /* for iovec */
27#include <netinet/in.h>
28
29#include <vlib/vlib.h>
30#include <vnet/ip/ip.h>
31#include <vlib/unix/unix.h>
32#include <vnet/ethernet/ethernet.h>
33
34#if DPDK == 1
35#include <vnet/devices/dpdk/dpdk.h>
36#endif
37
38#include "turbotap.h"
39
40vnet_device_class_t turbotap_dev_class;
41
42static u8 * format_turbotap_interface_name (u8 * s, va_list * args)
43{
44  u32 i = va_arg (*args, u32);
45  u32 show_dev_instance = ~0;
46  turbotap_main_t * tr = &turbotap_main;
47
48  if (i < vec_len (tr->show_dev_instance_by_real_dev_instance))
49    show_dev_instance = tr->show_dev_instance_by_real_dev_instance[i];
50
51  if (show_dev_instance != ~0)
52    i = show_dev_instance;
53
54  s = format (s, "turbotap-%d", i);
55  return s;
56}
57
58static void turbotap_set_interface_next_node (vnet_main_t *vnm,
59                                            u32 hw_if_index,
60                                            u32 node_index)
61{
62  turbotap_main_t *tr = &turbotap_main;
63  turbotap_interface_t *ti;
64  vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
65
66  ti = vec_elt_at_index (tr->turbotap_interfaces, hw->dev_instance);
67
68  /* Shut off redirection */
69  if (node_index == ~0)
70    {
71      ti->per_interface_next_index = node_index;
72      return;
73    }
74
75  ti->per_interface_next_index =
76    vlib_node_add_next (tr->vlib_main, turbotap_rx_node.index, node_index);
77}
78
79static_always_inline uword
80turbotap_tx_iface(vlib_main_t * vm,
81                vlib_node_runtime_t * node,
82                vlib_frame_t * frame,
83                turbotap_interface_t * ti)
84{
85  u32 * buffers = vlib_frame_args (frame);
86  uword n_packets = frame->n_vectors;
87  vlib_buffer_t * b;
88  int i = 0;
89
90  vnet_sw_interface_t *si = vnet_get_sw_interface (vnet_get_main(), ti->sw_if_index);
91  if (PREDICT_FALSE(!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))) {
92    //Drop if interface is down
93    vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors);
94    return 0;
95  }
96
97  u32 n_tx = (n_packets > MAX_SEND)?MAX_SEND:n_packets;
98  u32 total_bytes = 0;
99  for (i = 0; i < n_tx; i++) {
100    struct iovec * iov;
101    b = vlib_get_buffer(vm, buffers[i]);
102
103    if (ti->tx_msg[i].msg_hdr.msg_iov)
104      _vec_len(ti->tx_msg[i].msg_hdr.msg_iov) = 0; //Reset vector
105
106    /* VLIB buffer chain -> Unix iovec(s). */
107    vec_add2 (ti->tx_msg[i].msg_hdr.msg_iov, iov, 1);
108    iov->iov_base = b->data + b->current_data;
109    iov->iov_len = b->current_length;
110    ti->tx_msg[i].msg_len = b->current_length;
111    if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) {
112      do {
113        b = vlib_get_buffer (vm, b->next_buffer);
114        vec_add2 (ti->tx_msg[i].msg_hdr.msg_iov, iov, 1);
115        iov->iov_base = b->data + b->current_data;
116        iov->iov_len = b->current_length;
117        ti->tx_msg[i].msg_len += b->current_length;
118      } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
119    }
120
121    ti->tx_msg[i].msg_hdr.msg_name = NULL;
122    ti->tx_msg[i].msg_hdr.msg_namelen = 0;
123    ti->tx_msg[i].msg_hdr.msg_iovlen = _vec_len(ti->tx_msg[i].msg_hdr.msg_iov);
124    ti->tx_msg[i].msg_hdr.msg_control = NULL;
125    ti->tx_msg[i].msg_hdr.msg_controllen = 0;
126    ti->tx_msg[i].msg_hdr.msg_flags = MSG_DONTWAIT;
127    total_bytes += ti->tx_msg[i].msg_len;
128  }
129
130  if (n_tx) {
131    int tx;
132    if ((tx = sendmmsg(ti->sock_fd, ti->tx_msg, n_tx, MSG_DONTWAIT)) < 1) {
133      vlib_increment_simple_counter
134      (vnet_main.interface_main.sw_if_counters
135       + VNET_INTERFACE_COUNTER_TX_ERROR, os_get_cpu_number(),
136       ti->sw_if_index, n_tx);
137    } else {
138      vlib_increment_combined_counter(
139          vnet_main.interface_main.combined_sw_if_counters
140          + VNET_INTERFACE_COUNTER_TX,
141          os_get_cpu_number(), ti->sw_if_index,
142          tx, total_bytes);
143    }
144  }
145
146  vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors);
147  return n_packets;
148}
149
150/*
151 * turbotap_tx
152 * Output node, writes the buffers comprising the incoming frame
153 * to the tun/tap device, aka hands them to the Linux kernel stack.
154 *
155 */
156static uword
157turbotap_tx (vlib_main_t * vm,
158           vlib_node_runtime_t * node,
159           vlib_frame_t * frame)
160{
161  u32 * buffers = vlib_frame_args (frame);
162  turbotap_main_t * tr = &turbotap_main;
163  turbotap_interface_t * ti;
164
165  if (!frame->n_vectors)
166    return 0;
167
168  vlib_buffer_t *b = vlib_get_buffer(vm, buffers[0]);
169  u32 tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_TX];
170  if (tx_sw_if_index == (u32)~0)
171    tx_sw_if_index = vnet_buffer(b)->sw_if_index[VLIB_RX];
172
173  ASSERT(tx_sw_if_index != (u32)~0);
174
175  /* Use the sup intfc to finesse vlan subifs */
176  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (tr->vnet_main, tx_sw_if_index);
177  tx_sw_if_index = hw->sw_if_index;
178
179  uword * p = hash_get (tr->turbotap_interface_index_by_sw_if_index,
180                        tx_sw_if_index);
181  if (p == 0) {
182    clib_warning ("sw_if_index %d unknown", tx_sw_if_index);
183    return 0;
184  } else {
185    ti = vec_elt_at_index (tr->turbotap_interfaces, p[0]);
186  }
187
188  return turbotap_tx_iface(vm, node, frame, ti);
189}
190
191VLIB_REGISTER_NODE (turbotap_tx_node,static) = {
192  .function = turbotap_tx,
193  .name = "turbotap-tx",
194  .type = VLIB_NODE_TYPE_INTERNAL,
195  .vector_size = 4,
196};
197
198/*
199 * Mainly exists to set link_state == admin_state
200 * otherwise, e.g. ip6 neighbor discovery breaks
201 */
202static clib_error_t *
203turbotap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
204{
205  uword is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
206  u32 hw_flags;
207  u32 speed_duplex = VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
208    | VNET_HW_INTERFACE_FLAG_SPEED_40G;
209
210  if (is_admin_up)
211    hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP | speed_duplex;
212  else
213    hw_flags = speed_duplex;
214
215  vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
216  return 0;
217}
218
219VNET_DEVICE_CLASS (turbotap_dev_class) = {
220  .name = "turbotap",
221  .tx_function = turbotap_tx,
222  .format_device_name = format_turbotap_interface_name,
223  .rx_redirect_to_node = turbotap_set_interface_next_node,
224  .admin_up_down_function = turbotap_interface_admin_up_down,
225  .no_flatten_output_chains = 1,
226};
227
228