tap_inject_node.c revision 50a852c1
1/*
2 * Copyright 2016 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *   http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "tap_inject.h"
18
19#include <netinet/in.h>
20#include <vnet/ethernet/arp_packet.h>
21
22vlib_node_registration_t tap_inject_rx_node;
23vlib_node_registration_t tap_inject_tx_node;
24vlib_node_registration_t tap_inject_neighbor_node;
25
26enum {
27  NEXT_NEIGHBOR_ARP,
28  NEXT_NEIGHBOR_ICMP6,
29};
30
31/**
32 * @brief Dynamically added tap_inject DPO type
33 */
34dpo_type_t tap_inject_dpo_type;
35
36static inline void
37tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b)
38{
39  struct iovec iov;
40  ssize_t n_bytes;
41
42  iov.iov_base = vlib_buffer_get_current (b);
43  iov.iov_len = b->current_length;
44
45  n_bytes = writev (fd, &iov, 1);
46
47  if (n_bytes < 0)
48    clib_warning ("writev failed");
49  else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT)
50    clib_warning ("buffer truncated");
51}
52
53static uword
54tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
55{
56  vlib_buffer_t * b;
57  u32 * pkts;
58  u32 fd;
59  u32 i;
60
61  pkts = vlib_frame_vector_args (f);
62
63  for (i = 0; i < f->n_vectors; ++i)
64    {
65      b = vlib_get_buffer (vm, pkts[i]);
66
67      fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
68      if (fd == ~0)
69        continue;
70
71      /* Re-wind the buffer to the start of the Ethernet header. */
72      vlib_buffer_advance (b, -b->current_data);
73
74      tap_inject_tap_send_buffer (fd, b);
75    }
76
77  vlib_buffer_free (vm, pkts, f->n_vectors);
78  return f->n_vectors;
79}
80
81VLIB_REGISTER_NODE (tap_inject_tx_node) = {
82  .function = tap_inject_tx,
83  .name = "tap-inject-tx",
84  .vector_size = sizeof (u32),
85  .type = VLIB_NODE_TYPE_INTERNAL,
86};
87
88
89static uword
90tap_inject_neighbor (vlib_main_t * vm,
91                     vlib_node_runtime_t * node, vlib_frame_t * f)
92{
93  vlib_buffer_t * b;
94  u32 * pkts;
95  u32 fd;
96  u32 i;
97  u32 bi;
98  u32 next_index = node->cached_next_index;
99  u32 next = ~0;
100  u32 n_left;
101  u32 * to_next;
102
103  pkts = vlib_frame_vector_args (f);
104
105  for (i = 0; i < f->n_vectors; ++i)
106    {
107      bi = pkts[i];
108      b = vlib_get_buffer (vm, bi);
109
110      fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
111      if (fd == ~0)
112        {
113          vlib_buffer_free (vm, &bi, 1);
114          continue;
115        }
116
117      /* Re-wind the buffer to the start of the Ethernet header. */
118      vlib_buffer_advance (b, -b->current_data);
119
120      tap_inject_tap_send_buffer (fd, b);
121
122      /* Send the buffer to a neighbor node too? */
123      {
124        ethernet_header_t * eth = vlib_buffer_get_current (b);
125        u16 ether_type = htons (eth->type);
126
127        if (ether_type == ETHERNET_TYPE_ARP)
128          {
129            ethernet_arp_header_t * arp = (void *)(eth + 1);
130
131            if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply))
132              next = NEXT_NEIGHBOR_ARP;
133          }
134        else if (ether_type == ETHERNET_TYPE_IP6)
135          {
136            ip6_header_t * ip = (void *)(eth + 1);
137            icmp46_header_t * icmp = (void *)(ip + 1);
138
139            if (ip->protocol == IP_PROTOCOL_ICMP6 &&
140                icmp->type == ICMP6_neighbor_advertisement)
141              next = NEXT_NEIGHBOR_ICMP6;
142          }
143      }
144
145      if (next == ~0)
146        {
147          vlib_buffer_free (vm, &bi, 1);
148          continue;
149        }
150
151      /* ARP and ICMP6 expect to start processing after the Ethernet header. */
152      vlib_buffer_advance (b, sizeof (ethernet_header_t));
153
154      vlib_get_next_frame (vm, node, next_index, to_next, n_left);
155
156      *(to_next++) = bi;
157      --n_left;
158
159      vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
160                                       n_left, bi, next);
161      vlib_put_next_frame (vm, node, next_index, n_left);
162    }
163
164  return f->n_vectors;
165}
166
167VLIB_REGISTER_NODE (tap_inject_neighbor_node) = {
168  .function = tap_inject_neighbor,
169  .name = "tap-inject-neighbor",
170  .vector_size = sizeof (u32),
171  .type = VLIB_NODE_TYPE_INTERNAL,
172  .n_next_nodes = 2,
173  .next_nodes = {
174    [NEXT_NEIGHBOR_ARP] = "arp-input",
175    [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation",
176  },
177};
178
179
180#define MTU 1500
181#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE)
182#define NUM_BUFFERS_TO_ALLOC 32
183
184static inline uword
185tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd)
186{
187  tap_inject_main_t * im = tap_inject_get_main ();
188  u32 sw_if_index;
189  struct iovec iov[MTU_BUFFERS];
190  u32 bi[MTU_BUFFERS];
191  vlib_buffer_t * b;
192  ssize_t n_bytes;
193  ssize_t n_bytes_left;
194  u32 i, j;
195
196  sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
197  if (sw_if_index == ~0)
198    return 0;
199
200  /* Allocate buffers in bulk when there are less than enough to rx an MTU. */
201  if (vec_len (im->rx_buffers) < MTU_BUFFERS)
202    {
203      u32 len = vec_len (im->rx_buffers);
204
205      len = vlib_buffer_alloc_from_free_list (vm,
206                    &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC,
207                    VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
208
209      _vec_len (im->rx_buffers) += len;
210
211      if (vec_len (im->rx_buffers) < MTU_BUFFERS)
212        {
213          clib_warning ("failed to allocate buffers");
214          return 0;
215        }
216    }
217
218  /* Fill buffers from the end of the list to make it easier to resize. */
219  for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j)
220    {
221      vlib_buffer_t * b;
222
223      bi[i] = im->rx_buffers[j];
224
225      b = vlib_get_buffer (vm, bi[i]);
226
227      iov[i].iov_base = b->data;
228      iov[i].iov_len = VLIB_BUFFER_DATA_SIZE;
229    }
230
231  n_bytes = readv (fd, iov, MTU_BUFFERS);
232  if (n_bytes < 0)
233    {
234      clib_warning ("readv failed");
235      return 0;
236    }
237
238  b = vlib_get_buffer (vm, bi[0]);
239
240  vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
241  vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
242
243  n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE;
244
245  if (n_bytes_left > 0)
246    {
247      b->total_length_not_including_first_buffer = n_bytes_left;
248      b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
249    }
250
251  b->current_length = n_bytes;
252
253  /* If necessary, configure any remaining buffers in the chain. */
254  for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE)
255    {
256      b = vlib_get_buffer (vm, bi[i - 1]);
257      b->current_length = VLIB_BUFFER_DATA_SIZE;
258      b->flags |= VLIB_BUFFER_NEXT_PRESENT;
259      b->next_buffer = bi[i];
260
261      b = vlib_get_buffer (vm, bi[i]);
262      b->current_length = n_bytes_left;
263    }
264
265  _vec_len (im->rx_buffers) -= i;
266
267  vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0]));
268
269  /* Get the packet to the output node. */
270  {
271    vnet_hw_interface_t * hw;
272    vlib_frame_t * new_frame;
273    u32 * to_next;
274
275    hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
276
277    new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
278    to_next = vlib_frame_vector_args (new_frame);
279    to_next[0] = bi[0];
280    new_frame->n_vectors = 1;
281
282    vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
283  }
284
285  return 1;
286}
287
288static uword
289tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
290{
291  tap_inject_main_t * im = tap_inject_get_main ();
292  u32 * fd;
293  uword count = 0;
294
295  vec_foreach (fd, im->rx_file_descriptors)
296    {
297      if (tap_rx (vm, node, f, *fd) != 1)
298        {
299          clib_warning ("rx failed");
300          count = 0;
301          break;
302        }
303      ++count;
304    }
305
306  vec_free (im->rx_file_descriptors);
307
308  return count;
309}
310
311VLIB_REGISTER_NODE (tap_inject_rx_node) = {
312  .function = tap_inject_rx,
313  .name = "tap-inject-rx",
314  .type = VLIB_NODE_TYPE_INPUT,
315  .state = VLIB_NODE_STATE_INTERRUPT,
316  .vector_size = sizeof (u32),
317};
318
319/**
320 * @brief no-op lock function.
321 */
322static void
323tap_inject_dpo_lock (dpo_id_t * dpo)
324{
325}
326
327/**
328 * @brief no-op unlock function.
329 */
330static void
331tap_inject_dpo_unlock (dpo_id_t * dpo)
332{
333}
334
335u8 *
336format_tap_inject_dpo (u8 * s, va_list * args)
337{
338  return (format (s, "tap-inject:[%d]", 0));
339}
340
341const static dpo_vft_t tap_inject_vft = {
342  .dv_lock = tap_inject_dpo_lock,
343  .dv_unlock = tap_inject_dpo_unlock,
344  .dv_format = format_tap_inject_dpo,
345};
346
347const static char *const tap_inject_tx_nodes[] = {
348  "tap-inject-tx",
349  NULL,
350};
351
352const static char *const *const tap_inject_nodes[DPO_PROTO_NUM] = {
353  [DPO_PROTO_IP6] = tap_inject_tx_nodes,
354};
355
356static clib_error_t *
357tap_inject_init (vlib_main_t * vm)
358{
359  tap_inject_main_t * im = tap_inject_get_main ();
360
361  im->rx_node_index = tap_inject_rx_node.index;
362  im->tx_node_index = tap_inject_tx_node.index;
363  im->neighbor_node_index = tap_inject_neighbor_node.index;
364
365  tap_inject_dpo_type = dpo_register_new_type (&tap_inject_vft, tap_inject_nodes);
366
367  vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC);
368  vec_reset_length (im->rx_buffers);
369
370  return 0;
371}
372
373VLIB_INIT_FUNCTION (tap_inject_init);
374