ip4_input.c revision 896c896a
1/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_input.c: IP v4 input node
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/ip/ip4_input.h>
41#include <vnet/ethernet/ethernet.h>
42#include <vnet/ppp/ppp.h>
43#include <vnet/hdlc/hdlc.h>
44#include <vnet/util/throttle.h>
45
46typedef struct
47{
48  u8 packet_data[64];
49} ip4_input_trace_t;
50
51static u8 *
52format_ip4_input_trace (u8 * s, va_list * va)
53{
54  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
55  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
56  ip4_input_trace_t *t = va_arg (*va, ip4_input_trace_t *);
57
58  s = format (s, "%U",
59	      format_ip4_header, t->packet_data, sizeof (t->packet_data));
60
61  return s;
62}
63
64static_always_inline u32
65ip4_input_set_next (u32 sw_if_index, vlib_buffer_t * b, int arc_enabled)
66{
67  ip4_main_t *im = &ip4_main;
68  ip_lookup_main_t *lm = &im->lookup_main;
69  u32 next;
70  u8 arc;
71
72  ip4_header_t *ip = vlib_buffer_get_current (b);
73
74  if (PREDICT_FALSE (ip4_address_is_multicast (&ip->dst_address)))
75    {
76      next = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
77      arc = lm->mcast_feature_arc_index;
78    }
79  else
80    {
81      next = IP4_INPUT_NEXT_LOOKUP;
82      arc = lm->ucast_feature_arc_index;
83    }
84
85  if (arc_enabled)
86    vnet_feature_arc_start (arc, sw_if_index, &next, b);
87
88  return next;
89}
90
91static_always_inline void
92ip4_input_check_sw_if_index (vlib_main_t * vm,
93			     vlib_simple_counter_main_t * cm, u32 sw_if_index,
94			     u32 * last_sw_if_index, u32 * cnt,
95			     int *arc_enabled)
96{
97  ip4_main_t *im = &ip4_main;
98  ip_lookup_main_t *lm = &im->lookup_main;
99  u32 thread_index;
100  if (*last_sw_if_index == sw_if_index)
101    {
102      (*cnt)++;
103      return;
104    }
105
106  thread_index = vm->thread_index;
107  if (*cnt)
108    vlib_increment_simple_counter (cm, thread_index, *last_sw_if_index, *cnt);
109  *cnt = 1;
110  *last_sw_if_index = sw_if_index;
111
112  if (vnet_have_features (lm->ucast_feature_arc_index, sw_if_index) ||
113      vnet_have_features (lm->mcast_feature_arc_index, sw_if_index))
114    *arc_enabled = 1;
115  else
116    *arc_enabled = 0;
117}
118
119/* Validate IP v4 packets and pass them either to forwarding code
120   or drop/punt exception packets. */
121always_inline uword
122ip4_input_inline (vlib_main_t * vm,
123		  vlib_node_runtime_t * node,
124		  vlib_frame_t * frame, int verify_checksum)
125{
126  vnet_main_t *vnm = vnet_get_main ();
127  u32 n_left_from, *from;
128  u32 thread_index = vm->thread_index;
129  vlib_node_runtime_t *error_node =
130    vlib_node_get_runtime (vm, ip4_input_node.index);
131  vlib_simple_counter_main_t *cm;
132  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
133  ip4_header_t *ip[4];
134  u16 nexts[VLIB_FRAME_SIZE], *next;
135  u32 sw_if_index[4];
136  u32 last_sw_if_index = ~0;
137  u32 cnt = 0;
138  int arc_enabled = 0;
139
140  from = vlib_frame_vector_args (frame);
141  n_left_from = frame->n_vectors;
142
143  if (node->flags & VLIB_NODE_FLAG_TRACE)
144    vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
145				   /* stride */ 1,
146				   sizeof (ip4_input_trace_t));
147
148  cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
149			 VNET_INTERFACE_COUNTER_IP4);
150
151  vlib_get_buffers (vm, from, bufs, n_left_from);
152  b = bufs;
153  next = nexts;
154#if (CLIB_N_PREFETCHES >= 8)
155  while (n_left_from >= 4)
156    {
157      u32 x = 0;
158
159      /* Prefetch next iteration. */
160      if (n_left_from >= 12)
161	{
162	  vlib_prefetch_buffer_header (b[8], LOAD);
163	  vlib_prefetch_buffer_header (b[9], LOAD);
164	  vlib_prefetch_buffer_header (b[10], LOAD);
165	  vlib_prefetch_buffer_header (b[11], LOAD);
166
167	  vlib_prefetch_buffer_data (b[4], LOAD);
168	  vlib_prefetch_buffer_data (b[5], LOAD);
169	  vlib_prefetch_buffer_data (b[6], LOAD);
170	  vlib_prefetch_buffer_data (b[7], LOAD);
171	}
172
173      vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0;
174      vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = ~0;
175      vnet_buffer (b[2])->ip.adj_index[VLIB_RX] = ~0;
176      vnet_buffer (b[3])->ip.adj_index[VLIB_RX] = ~0;
177
178      sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
179      sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
180      sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
181      sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
182
183      x |= sw_if_index[0] ^ last_sw_if_index;
184      x |= sw_if_index[1] ^ last_sw_if_index;
185      x |= sw_if_index[2] ^ last_sw_if_index;
186      x |= sw_if_index[3] ^ last_sw_if_index;
187
188      if (PREDICT_TRUE (x == 0))
189	{
190	  /* we deal with 4 more packets sharing the same sw_if_index
191	     with the previous one, so we can optimize */
192	  cnt += 4;
193	  if (arc_enabled)
194	    {
195	      next[0] = ip4_input_set_next (sw_if_index[0], b[0], 1);
196	      next[1] = ip4_input_set_next (sw_if_index[1], b[1], 1);
197	      next[2] = ip4_input_set_next (sw_if_index[2], b[2], 1);
198	      next[3] = ip4_input_set_next (sw_if_index[3], b[3], 1);
199	    }
200	  else
201	    {
202	      next[0] = ip4_input_set_next (sw_if_index[0], b[0], 0);
203	      next[1] = ip4_input_set_next (sw_if_index[1], b[1], 0);
204	      next[2] = ip4_input_set_next (sw_if_index[2], b[2], 0);
205	      next[3] = ip4_input_set_next (sw_if_index[3], b[3], 0);
206	    }
207	}
208      else
209	{
210	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[0],
211				       &last_sw_if_index, &cnt, &arc_enabled);
212	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[1],
213				       &last_sw_if_index, &cnt, &arc_enabled);
214	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[2],
215				       &last_sw_if_index, &cnt, &arc_enabled);
216	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[3],
217				       &last_sw_if_index, &cnt, &arc_enabled);
218
219	  next[0] = ip4_input_set_next (sw_if_index[0], b[0], 1);
220	  next[1] = ip4_input_set_next (sw_if_index[1], b[1], 1);
221	  next[2] = ip4_input_set_next (sw_if_index[2], b[2], 1);
222	  next[3] = ip4_input_set_next (sw_if_index[3], b[3], 1);
223	}
224
225      ip[0] = vlib_buffer_get_current (b[0]);
226      ip[1] = vlib_buffer_get_current (b[1]);
227      ip[2] = vlib_buffer_get_current (b[2]);
228      ip[3] = vlib_buffer_get_current (b[3]);
229
230      ip4_input_check_x4 (vm, error_node, b, ip, next, verify_checksum);
231
232      /* next */
233      b += 4;
234      next += 4;
235      n_left_from -= 4;
236    }
237#elif (CLIB_N_PREFETCHES >= 4)
238  while (n_left_from >= 2)
239    {
240      u32 x = 0;
241      u32 next0, next1;
242
243      /* Prefetch next iteration. */
244      if (n_left_from >= 6)
245	{
246	  vlib_prefetch_buffer_header (b[4], LOAD);
247	  vlib_prefetch_buffer_header (b[5], LOAD);
248
249	  vlib_prefetch_buffer_data (b[2], LOAD);
250	  vlib_prefetch_buffer_data (b[3], LOAD);
251	}
252
253      vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0;
254      vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = ~0;
255
256      sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
257      sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
258
259      x |= sw_if_index[0] ^ last_sw_if_index;
260      x |= sw_if_index[1] ^ last_sw_if_index;
261
262      if (PREDICT_TRUE (x == 0))
263	{
264	  /* we deal with 2 more packets sharing the same sw_if_index
265	     with the previous one, so we can optimize */
266	  cnt += 2;
267	  if (arc_enabled)
268	    {
269	      next0 = ip4_input_set_next (sw_if_index[0], b[0], 1);
270	      next1 = ip4_input_set_next (sw_if_index[1], b[1], 1);
271	    }
272	  else
273	    {
274	      next0 = ip4_input_set_next (sw_if_index[0], b[0], 0);
275	      next1 = ip4_input_set_next (sw_if_index[1], b[1], 0);
276	    }
277	}
278      else
279	{
280	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[0],
281				       &last_sw_if_index, &cnt, &arc_enabled);
282	  ip4_input_check_sw_if_index (vm, cm, sw_if_index[1],
283				       &last_sw_if_index, &cnt, &arc_enabled);
284
285	  next0 = ip4_input_set_next (sw_if_index[0], b[0], 1);
286	  next1 = ip4_input_set_next (sw_if_index[1], b[1], 1);
287	}
288
289      ip[0] = vlib_buffer_get_current (b[0]);
290      ip[1] = vlib_buffer_get_current (b[1]);
291
292      ip4_input_check_x2 (vm, error_node, b[0], b[1], ip[0], ip[1],
293			  &next0, &next1, verify_checksum);
294      next[0] = (u16) next0;
295      next[1] = (u16) next1;
296
297      /* next */
298      b += 2;
299      next += 2;
300      n_left_from -= 2;
301    }
302#endif
303
304  while (n_left_from)
305    {
306      u32 next0;
307      vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = ~0;
308      sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
309      ip4_input_check_sw_if_index (vm, cm, sw_if_index[0], &last_sw_if_index,
310				   &cnt, &arc_enabled);
311      next0 = ip4_input_set_next (sw_if_index[0], b[0], arc_enabled);
312      ip[0] = vlib_buffer_get_current (b[0]);
313      ip4_input_check_x1 (vm, error_node, b[0], ip[0], &next0,
314			  verify_checksum);
315      next[0] = next0;
316
317      /* next */
318      b += 1;
319      next += 1;
320      n_left_from -= 1;
321    }
322
323  vlib_increment_simple_counter (cm, thread_index, last_sw_if_index, cnt);
324  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
325  return frame->n_vectors;
326}
327
328/** \brief IPv4 input node.
329    @node ip4-input
330
331    This is the IPv4 input node: validates ip4 header checksums,
332    verifies ip header lengths, discards pkts with expired TTLs,
333    and sends pkts to the set of ip feature nodes configured on
334    the rx interface.
335
336    @param vm vlib_main_t corresponding to the current thread
337    @param node vlib_node_runtime_t
338    @param frame vlib_frame_t whose contents should be dispatched
339
340    @par Graph mechanics: buffer metadata, next index usage
341
342    @em Uses:
343    - vnet_feature_config_main_t cm corresponding to each pkt's dst address unicast /
344      multicast status.
345    - <code>b->current_config_index</code> corresponding to each pkt's
346      rx sw_if_index.
347         - This sets the per-packet graph trajectory, ensuring that
348           each packet visits the per-interface features in order.
349
350    - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
351        - Indicates the @c sw_if_index value of the interface that the
352	  packet was received on.
353
354    @em Sets:
355    - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
356        - The lookup result adjacency index.
357
358    <em>Next Indices:</em>
359    - Dispatches pkts to the (first) feature node:
360      <code> vnet_get_config_data (... &next0 ...); </code>
361      or @c error-drop
362*/
363VLIB_NODE_FN (ip4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
364			       vlib_frame_t * frame)
365{
366  return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1);
367}
368
369VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm,
370					   vlib_node_runtime_t * node,
371					   vlib_frame_t * frame)
372{
373  return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
374}
375
376#ifndef CLIB_MARCH_VARIANT
377char *ip4_error_strings[] = {
378#define _(sym,string) string,
379  foreach_ip4_error
380#undef _
381};
382#endif
383
384/* *INDENT-OFF* */
385VLIB_REGISTER_NODE (ip4_input_node) = {
386  .name = "ip4-input",
387  .vector_size = sizeof (u32),
388  .protocol_hint = VLIB_NODE_PROTO_HINT_IP4,
389
390  .n_errors = IP4_N_ERROR,
391  .error_strings = ip4_error_strings,
392
393  .n_next_nodes = IP4_INPUT_N_NEXT,
394  .next_nodes = {
395    [IP4_INPUT_NEXT_DROP] = "error-drop",
396    [IP4_INPUT_NEXT_PUNT] = "error-punt",
397    [IP4_INPUT_NEXT_OPTIONS] = "ip4-options",
398    [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
399    [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
400    [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
401    [IP4_INPUT_NEXT_REASSEMBLY] = "ip4-full-reassembly",
402  },
403
404  .format_buffer = format_ip4_header,
405  .format_trace = format_ip4_input_trace,
406};
407
408VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = {
409  .name = "ip4-input-no-checksum",
410  .vector_size = sizeof (u32),
411
412  .sibling_of = "ip4-input",
413  .format_buffer = format_ip4_header,
414  .format_trace = format_ip4_input_trace,
415};
416/* *INDENT-ON* */
417
418static clib_error_t *
419ip4_init (vlib_main_t * vm)
420{
421  clib_error_t *error;
422
423  ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, ip4_input_node.index);
424  ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, ip4_input_node.index);
425  hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, ip4_input_node.index);
426
427  {
428    extern vlib_node_registration_t ip4_input_no_checksum_node;
429    pg_node_t *pn;
430    pn = pg_get_node (ip4_input_node.index);
431    pn->unformat_edit = unformat_pg_ip4_header;
432    pn = pg_get_node (ip4_input_no_checksum_node.index);
433    pn->unformat_edit = unformat_pg_ip4_header;
434  }
435
436  if ((error = vlib_call_init_function (vm, ip4_cli_init)))
437    return error;
438
439  if ((error = vlib_call_init_function (vm, ip4_source_check_init)))
440    return error;
441
442  if ((error = vlib_call_init_function
443       (vm, ip4_source_and_port_range_check_init)))
444    return error;
445
446  /* Set flow hash to something non-zero. */
447  ip4_main.flow_hash_seed = 0xdeadbeef;
448
449  /* Default TTL for packets we generate. */
450  ip4_main.host_config.ttl = 64;
451
452  return error;
453}
454
455VLIB_INIT_FUNCTION (ip4_init);
456
457static clib_error_t *
458ip4_main_loop_enter (vlib_main_t * vm)
459{
460  ip4_main_t *im = &ip4_main;
461  vlib_thread_main_t *tm = &vlib_thread_main;
462  u32 n_vlib_mains = tm->n_vlib_mains;
463
464  throttle_init (&im->arp_throttle, n_vlib_mains, 1e-3);
465
466  return (NULL);
467}
468
469VLIB_MAIN_LOOP_ENTER_FUNCTION (ip4_main_loop_enter);
470
471/*
472 * fd.io coding-style-patch-verification: ON
473 *
474 * Local Variables:
475 * eval: (c-set-style "gnu")
476 * End:
477 */
478