tap_inject_node.c revision dfae7756
1/*
2 * Copyright 2016 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *   http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "tap_inject.h"
18
19#include <netinet/in.h>
20#include <vnet/ethernet/arp_packet.h>
21
22vlib_node_registration_t tap_inject_rx_node;
23vlib_node_registration_t tap_inject_tx_node;
24vlib_node_registration_t tap_inject_neighbor_node;
25
26enum {
27  NEXT_NEIGHBOR_ARP,
28  NEXT_NEIGHBOR_ICMP6,
29};
30
31
32static inline void
33tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b)
34{
35  struct iovec iov;
36  ssize_t n_bytes;
37
38  iov.iov_base = vlib_buffer_get_current (b);
39  iov.iov_len = b->current_length;
40
41  n_bytes = writev (fd, &iov, 1);
42
43  if (n_bytes < 0)
44    clib_warning ("writev failed");
45  else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT)
46    clib_warning ("buffer truncated");
47}
48
49static uword
50tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
51{
52  vlib_buffer_t * b;
53  u32 * pkts;
54  u32 fd;
55  u32 i;
56
57  pkts = vlib_frame_vector_args (f);
58
59  for (i = 0; i < f->n_vectors; ++i)
60    {
61      b = vlib_get_buffer (vm, pkts[i]);
62
63      fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
64      if (fd == ~0)
65        continue;
66
67      /* Re-wind the buffer to the start of the Ethernet header. */
68      vlib_buffer_advance (b, -b->current_data);
69
70      tap_inject_tap_send_buffer (fd, b);
71    }
72
73  vlib_buffer_free (vm, pkts, f->n_vectors);
74  return f->n_vectors;
75}
76
77VLIB_REGISTER_NODE (tap_inject_tx_node) = {
78  .function = tap_inject_tx,
79  .name = "tap-inject-tx",
80  .vector_size = sizeof (u32),
81  .type = VLIB_NODE_TYPE_INTERNAL,
82};
83
84
85static uword
86tap_inject_neighbor (vlib_main_t * vm,
87                     vlib_node_runtime_t * node, vlib_frame_t * f)
88{
89  vlib_buffer_t * b;
90  u32 * pkts;
91  u32 fd;
92  u32 i;
93  u32 bi;
94  u32 next_index = node->cached_next_index;
95  u32 next = ~0;
96  u32 n_left;
97  u32 * to_next;
98
99  pkts = vlib_frame_vector_args (f);
100
101  for (i = 0; i < f->n_vectors; ++i)
102    {
103      bi = pkts[i];
104      b = vlib_get_buffer (vm, bi);
105
106      fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
107      if (fd == ~0)
108        {
109          vlib_buffer_free (vm, &bi, 1);
110          continue;
111        }
112
113      /* Re-wind the buffer to the start of the Ethernet header. */
114      vlib_buffer_advance (b, -b->current_data);
115
116      tap_inject_tap_send_buffer (fd, b);
117
118      /* Send the buffer to a neighbor node too? */
119      {
120        ethernet_header_t * eth = vlib_buffer_get_current (b);
121        u16 ether_type = htons (eth->type);
122
123        if (ether_type == ETHERNET_TYPE_ARP)
124          {
125            ethernet_arp_header_t * arp = (void *)(eth + 1);
126
127            if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply))
128              next = NEXT_NEIGHBOR_ARP;
129          }
130        else if (ether_type == ETHERNET_TYPE_IP6)
131          {
132            ip6_header_t * ip = (void *)(eth + 1);
133            icmp46_header_t * icmp = (void *)(ip + 1);
134
135            if (ip->protocol == IP_PROTOCOL_ICMP6 &&
136                icmp->type == ICMP6_neighbor_advertisement)
137              next = NEXT_NEIGHBOR_ICMP6;
138          }
139      }
140
141      if (next == ~0)
142        {
143          vlib_buffer_free (vm, &bi, 1);
144          continue;
145        }
146
147      /* ARP and ICMP6 expect to start processing after the Ethernet header. */
148      vlib_buffer_advance (b, sizeof (ethernet_header_t));
149
150      vlib_get_next_frame (vm, node, next_index, to_next, n_left);
151
152      *(to_next++) = bi;
153      --n_left;
154
155      vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
156                                       n_left, bi, next);
157      vlib_put_next_frame (vm, node, next_index, n_left);
158    }
159
160  return f->n_vectors;
161}
162
163VLIB_REGISTER_NODE (tap_inject_neighbor_node) = {
164  .function = tap_inject_neighbor,
165  .name = "tap-inject-neighbor",
166  .vector_size = sizeof (u32),
167  .type = VLIB_NODE_TYPE_INTERNAL,
168  .n_next_nodes = 2,
169  .next_nodes = {
170    [NEXT_NEIGHBOR_ARP] = "arp-input",
171    [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation",
172  },
173};
174
175
176#define MTU 1500
177#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE)
178#define NUM_BUFFERS_TO_ALLOC 32
179
180static inline uword
181tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd)
182{
183  tap_inject_main_t * im = tap_inject_get_main ();
184  u32 sw_if_index;
185  struct iovec iov[MTU_BUFFERS];
186  u32 bi[MTU_BUFFERS];
187  vlib_buffer_t * b;
188  ssize_t n_bytes;
189  ssize_t n_bytes_left;
190  u32 i, j;
191
192  sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
193  if (sw_if_index == ~0)
194    return 0;
195
196  /* Allocate buffers in bulk when there are less than enough to rx an MTU. */
197  if (vec_len (im->rx_buffers) < MTU_BUFFERS)
198    {
199      u32 len = vec_len (im->rx_buffers);
200
201      len = vlib_buffer_alloc_from_free_list (vm,
202                    &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC,
203                    VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
204
205      _vec_len (im->rx_buffers) += len;
206
207      if (vec_len (im->rx_buffers) < MTU_BUFFERS)
208        {
209          clib_warning ("failed to allocate buffers");
210          return 0;
211        }
212    }
213
214  /* Fill buffers from the end of the list to make it easier to resize. */
215  for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j)
216    {
217      vlib_buffer_t * b;
218
219      bi[i] = im->rx_buffers[j];
220
221      b = vlib_get_buffer (vm, bi[i]);
222
223      iov[i].iov_base = b->data;
224      iov[i].iov_len = VLIB_BUFFER_DATA_SIZE;
225    }
226
227  n_bytes = readv (fd, iov, MTU_BUFFERS);
228  if (n_bytes < 0)
229    {
230      clib_warning ("readv failed");
231      return 0;
232    }
233
234  b = vlib_get_buffer (vm, bi[0]);
235
236  vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0;
237  vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
238
239  n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE;
240
241  if (n_bytes_left > 0)
242    {
243      b->total_length_not_including_first_buffer = n_bytes_left;
244      b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
245    }
246
247  b->current_length = n_bytes;
248
249  /* If necessary, configure any remaining buffers in the chain. */
250  for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE)
251    {
252      b = vlib_get_buffer (vm, bi[i - 1]);
253      b->current_length = VLIB_BUFFER_DATA_SIZE;
254      b->flags |= VLIB_BUFFER_NEXT_PRESENT;
255      b->next_buffer = bi[i];
256
257      b = vlib_get_buffer (vm, bi[i]);
258      b->current_length = n_bytes_left;
259    }
260
261  _vec_len (im->rx_buffers) -= i;
262
263  vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0]));
264
265  /* Get the packet to the output node. */
266  {
267    vnet_hw_interface_t * hw;
268    vlib_frame_t * new_frame;
269    u32 * to_next;
270
271    hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
272
273    new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
274    to_next = vlib_frame_vector_args (new_frame);
275    to_next[0] = bi[0];
276    new_frame->n_vectors = 1;
277
278    vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
279  }
280
281  return 1;
282}
283
284static uword
285tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
286{
287  tap_inject_main_t * im = tap_inject_get_main ();
288  u32 * fd;
289  uword count = 0;
290
291  vec_foreach (fd, im->rx_file_descriptors)
292    {
293      if (tap_rx (vm, node, f, *fd) != 1)
294        {
295          clib_warning ("rx failed");
296          count = 0;
297          break;
298        }
299      ++count;
300    }
301
302  vec_free (im->rx_file_descriptors);
303
304  return count;
305}
306
307VLIB_REGISTER_NODE (tap_inject_rx_node) = {
308  .function = tap_inject_rx,
309  .name = "tap-inject-rx",
310  .type = VLIB_NODE_TYPE_INPUT,
311  .state = VLIB_NODE_STATE_INTERRUPT,
312  .vector_size = sizeof (u32),
313};
314
315
316static clib_error_t *
317tap_inject_init (vlib_main_t * vm)
318{
319  tap_inject_main_t * im = tap_inject_get_main ();
320
321  im->rx_node_index = tap_inject_rx_node.index;
322  im->tx_node_index = tap_inject_tx_node.index;
323  im->neighbor_node_index = tap_inject_neighbor_node.index;
324
325  vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC);
326  vec_reset_length (im->rx_buffers);
327
328  return 0;
329}
330
331VLIB_INIT_FUNCTION (tap_inject_init);
332