punt_node.c revision 7ca5aaac
1/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16/**
17 * @file
18 * @brief Local TCP/IP stack punt infrastructure.
19 *
20 * Provides a set of VPP nodes together with the relevant APIs and CLI
21 * commands in order to adjust and dispatch packets from the VPP data plane
22 * to the local TCP/IP stack
23 */
24
25#include <vnet/ip/ip.h>
26#include <vlib/vlib.h>
27#include <vnet/pg/pg.h>
28#include <vnet/udp/udp.h>
29#include <vnet/tcp/tcp.h>
30#include <vnet/ip/punt.h>
31#include <vlib/unix/unix.h>
32
33#include <stdio.h>
34#include <unistd.h>
35#include <sys/socket.h>
36#include <sys/uio.h>
37#include <stdlib.h>
38
39typedef enum
40{
41#define punt_error(n,s) PUNT_ERROR_##n,
42#include <vnet/ip/punt_error.def>
43#undef punt_error
44  PUNT_N_ERROR,
45} punt_error_t;
46
47#define foreach_punt_next			\
48  _ (PUNT4, "ip4-punt")                         \
49  _ (PUNT6, "ip6-punt")
50
51typedef enum
52{
53#define _(s,n) PUNT_NEXT_##s,
54  foreach_punt_next
55#undef _
56    PUNT_N_NEXT,
57} punt_next_t;
58
59enum punt_socket_rx_next_e
60{
61  PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT,
62  PUNT_SOCKET_RX_NEXT_IP4_LOOKUP,
63  PUNT_SOCKET_RX_NEXT_IP6_LOOKUP,
64  PUNT_SOCKET_RX_N_NEXT
65};
66
67#define punt_next_punt(is_ip4) (is_ip4 ? PUNT_NEXT_PUNT4 : PUNT_NEXT_PUNT6)
68
69/** @brief IPv4/IPv6 UDP punt node main loop.
70
71    This is the main loop inline function for IPv4/IPv6 UDP punt
72    transition node.
73
74    @param vm vlib_main_t corresponding to the current thread
75    @param node vlib_node_runtime_t
76    @param frame vlib_frame_t whose contents should be dispatched
77    @param is_ipv4 indicates if called for IPv4 or IPv6 node
78*/
79always_inline uword
80udp46_punt_inline (vlib_main_t * vm,
81		   vlib_node_runtime_t * node,
82		   vlib_frame_t * from_frame, int is_ip4)
83{
84  u32 n_left_from, *from, *to_next;
85  word advance;
86
87  from = vlib_frame_vector_args (from_frame);
88  n_left_from = from_frame->n_vectors;
89
90  /* udp[46]_lookup hands us the data payload, not the IP header */
91  if (is_ip4)
92    advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
93  else
94    advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));
95
96  while (n_left_from > 0)
97    {
98      u32 n_left_to_next;
99
100      vlib_get_next_frame (vm, node, punt_next_punt (is_ip4), to_next,
101			   n_left_to_next);
102
103      while (n_left_from > 0 && n_left_to_next > 0)
104	{
105	  u32 bi0;
106	  vlib_buffer_t *b0;
107
108	  bi0 = from[0];
109	  to_next[0] = bi0;
110	  from += 1;
111	  to_next += 1;
112	  n_left_from -= 1;
113	  n_left_to_next -= 1;
114
115	  b0 = vlib_get_buffer (vm, bi0);
116	  vlib_buffer_advance (b0, advance);
117	  b0->error = node->errors[PUNT_ERROR_UDP_PORT];
118	}
119
120      vlib_put_next_frame (vm, node, punt_next_punt (is_ip4), n_left_to_next);
121    }
122
123  return from_frame->n_vectors;
124}
125
126static char *punt_error_strings[] = {
127#define punt_error(n,s) s,
128#include "punt_error.def"
129#undef punt_error
130};
131
132/** @brief IPv4 UDP punt node.
133    @node ip4-udp-punt
134
135    This is the IPv4 UDP punt transition node. It is registered as a next
136    node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
137    The buffer's current data pointer is adjusted to the original packet
138    IPv4 header. All buffers are dispatched to "error-punt".
139
140    @param vm vlib_main_t corresponding to the current thread
141    @param node vlib_node_runtime_t
142    @param frame vlib_frame_t whose contents should be dispatched
143
144    @par Graph mechanics: next index usage
145
146    @em Sets:
147    - <code>vnet_buffer(b)->current_data</code>
148    - <code>vnet_buffer(b)->current_len</code>
149
150    <em>Next Index:</em>
151    - Dispatches the packet to the "error-punt" node
152*/
153VLIB_NODE_FN (udp4_punt_node) (vlib_main_t * vm,
154			       vlib_node_runtime_t * node,
155			       vlib_frame_t * from_frame)
156{
157  return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
158}
159
160/** @brief IPv6 UDP punt node.
161    @node ip6-udp-punt
162
163    This is the IPv6 UDP punt transition node. It is registered as a next
164    node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
165    The buffer's current data pointer is adjusted to the original packet
166    IPv6 header. All buffers are dispatched to "error-punt".
167
168    @param vm vlib_main_t corresponding to the current thread
169    @param node vlib_node_runtime_t
170    @param frame vlib_frame_t whose contents should be dispatched
171
172    @par Graph mechanics: next index usage
173
174    @em Sets:
175    - <code>vnet_buffer(b)->current_data</code>
176    - <code>vnet_buffer(b)->current_len</code>
177
178    <em>Next Index:</em>
179    - Dispatches the packet to the "error-punt" node
180*/
181VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
182			       vlib_node_runtime_t * node,
183			       vlib_frame_t * from_frame)
184{
185  return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
186}
187
188/* *INDENT-OFF* */
189VLIB_REGISTER_NODE (udp4_punt_node) = {
190  .name = "ip4-udp-punt",
191  /* Takes a vector of packets. */
192  .vector_size = sizeof (u32),
193
194  .n_errors = PUNT_N_ERROR,
195  .error_strings = punt_error_strings,
196
197  .n_next_nodes = PUNT_N_NEXT,
198  .next_nodes = {
199#define _(s,n) [PUNT_NEXT_##s] = n,
200     foreach_punt_next
201#undef _
202  },
203};
204
205VLIB_REGISTER_NODE (udp6_punt_node) = {
206  .name = "ip6-udp-punt",
207  /* Takes a vector of packets. */
208  .vector_size = sizeof (u32),
209
210  .n_errors = PUNT_N_ERROR,
211  .error_strings = punt_error_strings,
212
213  .n_next_nodes = PUNT_N_NEXT,
214  .next_nodes = {
215#define _(s,n) [PUNT_NEXT_##s] = n,
216     foreach_punt_next
217#undef _
218  },
219};
220/* *INDENT-ON* */
221
222typedef struct
223{
224  punt_client_t client;
225  u8 is_midchain;
226  u8 packet_data[64];
227} udp_punt_trace_t;
228
229static u8 *
230format_udp_punt_trace (u8 * s, va_list * args)
231{
232  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
233  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
234  udp_punt_trace_t *t = va_arg (*args, udp_punt_trace_t *);
235  u32 indent = format_get_indent (s);
236  s = format (s, "to: %s", t->client.caddr.sun_path);
237  if (t->is_midchain)
238    {
239      s = format (s, "\n%U(buffer is part of chain)", format_white_space,
240		  indent);
241    }
242  s = format (s, "\n%U%U", format_white_space, indent,
243	      format_hex_bytes, t->packet_data, sizeof (t->packet_data));
244
245  return s;
246}
247
248always_inline uword
249punt_socket_inline (vlib_main_t * vm,
250		    vlib_node_runtime_t * node,
251		    vlib_frame_t * frame,
252		    punt_type_t pt, ip_address_family_t af)
253{
254  u32 *buffers = vlib_frame_vector_args (frame);
255  u32 thread_index = vm->thread_index;
256  uword n_packets = frame->n_vectors;
257  punt_main_t *pm = &punt_main;
258  int i;
259
260  punt_thread_data_t *ptd = &pm->thread_data[thread_index];
261  u32 node_index = (AF_IP4 == af ?
262		    udp4_punt_socket_node.index :
263		    udp6_punt_socket_node.index);
264
265  for (i = 0; i < n_packets; i++)
266    {
267      struct iovec *iov;
268      vlib_buffer_t *b;
269      uword l;
270      punt_packetdesc_t packetdesc;
271      punt_client_t *c;
272
273      b = vlib_get_buffer (vm, buffers[i]);
274
275      if (PUNT_TYPE_L4 == pt)
276	{
277	  /* Reverse UDP Punt advance */
278	  udp_header_t *udp;
279	  if (AF_IP4 == af)
280	    {
281	      vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
282					sizeof (udp_header_t)));
283	      ip4_header_t *ip = vlib_buffer_get_current (b);
284	      udp = (udp_header_t *) (ip + 1);
285	    }
286	  else
287	    {
288	      vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
289					sizeof (udp_header_t)));
290	      ip6_header_t *ip = vlib_buffer_get_current (b);
291	      udp = (udp_header_t *) (ip + 1);
292	    }
293
294	  /*
295	   * Find registerered client
296	   * If no registered client, drop packet and count
297	   */
298	  c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
299	}
300      else if (PUNT_TYPE_IP_PROTO == pt)
301	{
302	  /* Reverse UDP Punt advance */
303	  ip_protocol_t proto;
304
305	  if (AF_IP4 == af)
306	    {
307	      ip4_header_t *ip = vlib_buffer_get_current (b);
308	      proto = ip->protocol;
309	    }
310	  else
311	    {
312	      ip6_header_t *ip = vlib_buffer_get_current (b);
313	      proto = ip->protocol;
314	    }
315
316	  c = punt_client_ip_proto_get (af, proto);
317	}
318      else if (PUNT_TYPE_EXCEPTION == pt)
319	{
320	  c = punt_client_exception_get (b->punt_reason);
321	}
322      else
323	c = NULL;
324
325      if (PREDICT_FALSE (NULL == c))
326	{
327	  vlib_node_increment_counter (vm, node_index,
328				       PUNT_ERROR_SOCKET_TX_ERROR, 1);
329	  goto error;
330	}
331
332      struct sockaddr_un *caddr = &c->caddr;
333
334      /* Re-set iovecs */
335      vec_reset_length (ptd->iovecs);
336
337      /* Add packet descriptor */
338      packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
339      packetdesc.action = 0;
340      vec_add2 (ptd->iovecs, iov, 1);
341      iov->iov_base = &packetdesc;
342      iov->iov_len = sizeof (packetdesc);
343
344      /** VLIB buffer chain -> Unix iovec(s). */
345      vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
346      vec_add2 (ptd->iovecs, iov, 1);
347      iov->iov_base = b->data + b->current_data;
348      iov->iov_len = l = b->current_length;
349
350      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
351	{
352	  udp_punt_trace_t *t;
353	  t = vlib_add_trace (vm, node, b, sizeof (t[0]));
354	  clib_memcpy_fast (&t->client, c, sizeof (t->client));
355	  clib_memcpy_fast (t->packet_data,
356			    vlib_buffer_get_current (b),
357			    sizeof (t->packet_data));
358	}
359
360      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
361	{
362	  do
363	    {
364	      b = vlib_get_buffer (vm, b->next_buffer);
365	      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
366		{
367		  udp_punt_trace_t *t;
368		  t = vlib_add_trace (vm, node, b, sizeof (t[0]));
369		  clib_memcpy_fast (&t->client, c, sizeof (t->client));
370		  t->is_midchain = 1;
371		}
372
373	      vec_add2 (ptd->iovecs, iov, 1);
374
375	      iov->iov_base = b->data + b->current_data;
376	      iov->iov_len = b->current_length;
377	      l += b->current_length;
378	    }
379	  while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
380	}
381
382      struct msghdr msg = {
383	.msg_name = caddr,
384	.msg_namelen = sizeof (*caddr),
385	.msg_iov = ptd->iovecs,
386	.msg_iovlen = vec_len (ptd->iovecs),
387      };
388
389      if (sendmsg (pm->socket_fd, &msg, 0) < (ssize_t) l)
390	vlib_node_increment_counter (vm, node_index,
391				     PUNT_ERROR_SOCKET_TX_ERROR, 1);
392      else
393	vlib_node_increment_counter (vm, node_index, PUNT_ERROR_SOCKET_TX, 1);
394    }
395
396error:
397  vlib_buffer_free (vm, buffers, n_packets);
398
399  return n_packets;
400}
401
402static uword
403udp4_punt_socket (vlib_main_t * vm,
404		  vlib_node_runtime_t * node, vlib_frame_t * from_frame)
405{
406  return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP4);
407}
408
409static uword
410udp6_punt_socket (vlib_main_t * vm,
411		  vlib_node_runtime_t * node, vlib_frame_t * from_frame)
412{
413  return punt_socket_inline (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6);
414}
415
416static uword
417ip4_proto_punt_socket (vlib_main_t * vm,
418		       vlib_node_runtime_t * node, vlib_frame_t * from_frame)
419{
420  return punt_socket_inline (vm, node, from_frame,
421			     PUNT_TYPE_IP_PROTO, AF_IP4);
422}
423
424static uword
425ip6_proto_punt_socket (vlib_main_t * vm,
426		       vlib_node_runtime_t * node, vlib_frame_t * from_frame)
427{
428  return punt_socket_inline (vm, node, from_frame,
429			     PUNT_TYPE_IP_PROTO, AF_IP6);
430}
431
432static uword
433exception_punt_socket (vlib_main_t * vm,
434		       vlib_node_runtime_t * node, vlib_frame_t * from_frame)
435{
436  return punt_socket_inline (vm, node, from_frame,
437			     PUNT_TYPE_EXCEPTION, AF_IP4);
438}
439
440
441/* *INDENT-OFF* */
442VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
443  .function = udp4_punt_socket,
444  .name = "ip4-udp-punt-socket",
445  .format_trace = format_udp_punt_trace,
446  .flags = VLIB_NODE_FLAG_IS_DROP,
447  /* Takes a vector of packets. */
448  .vector_size = sizeof (u32),
449  .n_errors = PUNT_N_ERROR,
450  .error_strings = punt_error_strings,
451};
452VLIB_REGISTER_NODE (udp6_punt_socket_node) = {
453  .function = udp6_punt_socket,
454  .name = "ip6-udp-punt-socket",
455  .format_trace = format_udp_punt_trace,
456  .flags = VLIB_NODE_FLAG_IS_DROP,
457  .vector_size = sizeof (u32),
458  .n_errors = PUNT_N_ERROR,
459  .error_strings = punt_error_strings,
460};
461VLIB_REGISTER_NODE (ip4_proto_punt_socket_node) = {
462  .function = ip4_proto_punt_socket,
463  .name = "ip4-proto-punt-socket",
464  .format_trace = format_udp_punt_trace,
465  .flags = VLIB_NODE_FLAG_IS_DROP,
466  /* Takes a vector of packets. */
467  .vector_size = sizeof (u32),
468  .n_errors = PUNT_N_ERROR,
469  .error_strings = punt_error_strings,
470};
471VLIB_REGISTER_NODE (ip6_proto_punt_socket_node) = {
472  .function = ip6_proto_punt_socket,
473  .name = "ip6-proto-punt-socket",
474  .format_trace = format_udp_punt_trace,
475  .flags = VLIB_NODE_FLAG_IS_DROP,
476  .vector_size = sizeof (u32),
477  .n_errors = PUNT_N_ERROR,
478  .error_strings = punt_error_strings,
479};
480VLIB_REGISTER_NODE (exception_punt_socket_node) = {
481  .function = exception_punt_socket,
482  .name = "exception-punt-socket",
483  .format_trace = format_udp_punt_trace,
484  .flags = VLIB_NODE_FLAG_IS_DROP,
485  .vector_size = sizeof (u32),
486  .n_errors = PUNT_N_ERROR,
487  .error_strings = punt_error_strings,
488};
489/* *INDENT-ON* */
490
491typedef struct
492{
493  enum punt_action_e action;
494  u32 sw_if_index;
495} punt_trace_t;
496
497static u8 *
498format_punt_trace (u8 * s, va_list * va)
499{
500  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
501  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
502  vnet_main_t *vnm = vnet_get_main ();
503  punt_trace_t *t = va_arg (*va, punt_trace_t *);
504  s = format (s, "%U Action: %d", format_vnet_sw_if_index_name,
505	      vnm, t->sw_if_index, t->action);
506  return s;
507}
508
509static uword
510punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd)
511{
512  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
513  u32 n_trace = vlib_get_trace_count (vm, node);
514  u32 next = node->cached_next_index;
515  u32 n_left_to_next, next_index;
516  u32 *to_next;
517  u32 error = PUNT_ERROR_NONE;
518  vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
519
520  /* $$$$ Only dealing with one buffer at the time for now */
521
522  u32 bi;
523  vlib_buffer_t *b;
524  punt_packetdesc_t packetdesc;
525  ssize_t size;
526  struct iovec io[2];
527
528  if (vlib_buffer_alloc (vm, &bi, 1) != 1)
529    {
530      error = PUNT_ERROR_NOBUFFER;
531      goto error;
532    }
533
534  b = vlib_get_buffer (vm, bi);
535  io[0].iov_base = &packetdesc;
536  io[0].iov_len = sizeof (packetdesc);
537  io[1].iov_base = b->data;
538  io[1].iov_len = buffer_size;
539
540  size = readv (fd, io, 2);
541  /* We need at least the packet descriptor plus a header */
542  if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t)))
543    {
544      vlib_buffer_free (vm, &bi, 1);
545      error = PUNT_ERROR_READV;
546      goto error;
547    }
548
549  b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
550  b->current_length = size - sizeof (packetdesc);
551
552  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
553
554  switch (packetdesc.action)
555    {
556    case PUNT_L2:
557      vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index;
558      next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT;
559      break;
560
561    case PUNT_IP4_ROUTED:
562      vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
563      vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
564      next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP;
565      break;
566
567    case PUNT_IP6_ROUTED:
568      vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
569      vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
570      next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP;
571      break;
572
573    default:
574      error = PUNT_ERROR_ACTION;
575      vlib_buffer_free (vm, &bi, 1);
576      goto error;
577    }
578
579  if (PREDICT_FALSE (n_trace > 0))
580    {
581      punt_trace_t *t;
582      vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ );
583      vlib_set_trace_count (vm, node, --n_trace);
584      t = vlib_add_trace (vm, node, b, sizeof (*t));
585      t->sw_if_index = packetdesc.sw_if_index;
586      t->action = packetdesc.action;
587    }
588
589  to_next[0] = bi;
590  to_next++;
591  n_left_to_next--;
592
593  vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next,
594				   bi, next_index);
595  vlib_put_next_frame (vm, node, next, n_left_to_next);
596  return 1;
597
598error:
599  vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1);
600  return 0;
601}
602
603static uword
604punt_socket_rx (vlib_main_t * vm,
605		vlib_node_runtime_t * node, vlib_frame_t * frame)
606{
607  punt_main_t *pm = &punt_main;
608  u32 total_count = 0;
609  int i;
610
611  for (i = 0; i < vec_len (pm->ready_fds); i++)
612    {
613      total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]);
614      vec_del1 (pm->ready_fds, i);
615    }
616  return total_count;
617}
618
619/* *INDENT-OFF* */
620VLIB_REGISTER_NODE (punt_socket_rx_node) =
621{
622 .function = punt_socket_rx,
623 .name = "punt-socket-rx",
624 .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
625 .type = VLIB_NODE_TYPE_INPUT,
626 .state = VLIB_NODE_STATE_INTERRUPT,
627 .vector_size = 1,
628 .n_errors = PUNT_N_ERROR,
629 .error_strings = punt_error_strings,
630 .n_next_nodes = PUNT_SOCKET_RX_N_NEXT,
631 .next_nodes = {
632    [PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output",
633    [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup",
634    [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",
635  },
636 .format_trace = format_punt_trace,
637};
638/* *INDENT-ON* */
639
640/*
641 * fd.io coding-style-patch-verification: ON
642 *
643 * Local Variables:
644 * eval: (c-set-style "gnu")
645 * End:
646 */
647