ip4_forward.c revision 896c896a
1/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_forward.c: IP v4 forwarding
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/vnet.h>
41#include <vnet/ip/ip.h>
42#include <vnet/ip/ip_frag.h>
43#include <vnet/ethernet/ethernet.h>	/* for ethernet_header_t */
44#include <vnet/ethernet/arp_packet.h>	/* for ethernet_arp_header_t */
45#include <vnet/ppp/ppp.h>
46#include <vnet/srp/srp.h>	/* for srp_hw_interface_class */
47#include <vnet/api_errno.h>	/* for API error numbers */
48#include <vnet/fib/fib_table.h>	/* for FIB table and entry creation */
49#include <vnet/fib/fib_entry.h>	/* for FIB table and entry creation */
50#include <vnet/fib/fib_urpf_list.h>	/* for FIB uRPF check */
51#include <vnet/fib/ip4_fib.h>
52#include <vnet/dpo/load_balance.h>
53#include <vnet/dpo/load_balance_map.h>
54#include <vnet/dpo/classify_dpo.h>
55#include <vnet/mfib/mfib_table.h>	/* for mFIB table and entry creation */
56
57#include <vnet/ip/ip4_forward.h>
58#include <vnet/interface_output.h>
59
60/** @brief IPv4 lookup node.
61    @node ip4-lookup
62
63    This is the main IPv4 lookup dispatch node.
64
65    @param vm vlib_main_t corresponding to the current thread
66    @param node vlib_node_runtime_t
67    @param frame vlib_frame_t whose contents should be dispatched
68
69    @par Graph mechanics: buffer metadata, next index usage
70
71    @em Uses:
72    - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73        - Indicates the @c sw_if_index value of the interface that the
74	  packet was received on.
75    - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76        - When the value is @c ~0 then the node performs a longest prefix
77          match (LPM) for the packet destination address in the FIB attached
78          to the receive interface.
79        - Otherwise perform LPM for the packet destination address in the
80          indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81          value (0, 1, ...) and not a VRF id.
82
83    @em Sets:
84    - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85        - The lookup result adjacency index.
86
87    <em>Next Index:</em>
88    - Dispatches the packet to the node index found in
89      ip_adjacency_t @c adj->lookup_next_index
90      (where @c adj is the lookup result adjacency).
91*/
92VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93				vlib_frame_t * frame)
94{
95  return ip4_lookup_inline (vm, node, frame);
96}
97
98static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100/* *INDENT-OFF* */
101VLIB_REGISTER_NODE (ip4_lookup_node) =
102{
103  .name = "ip4-lookup",
104  .vector_size = sizeof (u32),
105  .format_trace = format_ip4_lookup_trace,
106  .n_next_nodes = IP_LOOKUP_N_NEXT,
107  .next_nodes = IP4_LOOKUP_NEXT_NODES,
108};
109/* *INDENT-ON* */
110
111VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112				      vlib_node_runtime_t * node,
113				      vlib_frame_t * frame)
114{
115  vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116  u32 n_left, *from;
117  u32 thread_index = vm->thread_index;
118  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119  u16 nexts[VLIB_FRAME_SIZE], *next;
120
121  from = vlib_frame_vector_args (frame);
122  n_left = frame->n_vectors;
123  next = nexts;
124
125  vlib_get_buffers (vm, from, bufs, n_left);
126
127  while (n_left >= 4)
128    {
129      const load_balance_t *lb0, *lb1;
130      const ip4_header_t *ip0, *ip1;
131      u32 lbi0, hc0, lbi1, hc1;
132      const dpo_id_t *dpo0, *dpo1;
133
134      /* Prefetch next iteration. */
135      {
136	vlib_prefetch_buffer_header (b[2], LOAD);
137	vlib_prefetch_buffer_header (b[3], LOAD);
138
139	CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140	CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141      }
142
143      ip0 = vlib_buffer_get_current (b[0]);
144      ip1 = vlib_buffer_get_current (b[1]);
145      lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146      lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148      lb0 = load_balance_get (lbi0);
149      lb1 = load_balance_get (lbi1);
150
151      /*
152       * this node is for via FIBs we can re-use the hash value from the
153       * to node if present.
154       * We don't want to use the same hash value at each level in the recursion
155       * graph as that would lead to polarisation
156       */
157      hc0 = hc1 = 0;
158
159      if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160	{
161	  if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162	    {
163	      hc0 = vnet_buffer (b[0])->ip.flow_hash =
164		vnet_buffer (b[0])->ip.flow_hash >> 1;
165	    }
166	  else
167	    {
168	      hc0 = vnet_buffer (b[0])->ip.flow_hash =
169		ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170	    }
171	  dpo0 = load_balance_get_fwd_bucket
172	    (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173	}
174      else
175	{
176	  dpo0 = load_balance_get_bucket_i (lb0, 0);
177	}
178      if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179	{
180	  if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181	    {
182	      hc1 = vnet_buffer (b[1])->ip.flow_hash =
183		vnet_buffer (b[1])->ip.flow_hash >> 1;
184	    }
185	  else
186	    {
187	      hc1 = vnet_buffer (b[1])->ip.flow_hash =
188		ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189	    }
190	  dpo1 = load_balance_get_fwd_bucket
191	    (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192	}
193      else
194	{
195	  dpo1 = load_balance_get_bucket_i (lb1, 0);
196	}
197
198      next[0] = dpo0->dpoi_next_node;
199      next[1] = dpo1->dpoi_next_node;
200
201      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202      vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204      vlib_increment_combined_counter
205	(cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206      vlib_increment_combined_counter
207	(cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209      b += 2;
210      next += 2;
211      n_left -= 2;
212    }
213
214  while (n_left > 0)
215    {
216      const load_balance_t *lb0;
217      const ip4_header_t *ip0;
218      const dpo_id_t *dpo0;
219      u32 lbi0, hc0;
220
221      ip0 = vlib_buffer_get_current (b[0]);
222      lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224      lb0 = load_balance_get (lbi0);
225
226      hc0 = 0;
227      if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228	{
229	  if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230	    {
231	      hc0 = vnet_buffer (b[0])->ip.flow_hash =
232		vnet_buffer (b[0])->ip.flow_hash >> 1;
233	    }
234	  else
235	    {
236	      hc0 = vnet_buffer (b[0])->ip.flow_hash =
237		ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238	    }
239	  dpo0 = load_balance_get_fwd_bucket
240	    (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241	}
242      else
243	{
244	  dpo0 = load_balance_get_bucket_i (lb0, 0);
245	}
246
247      next[0] = dpo0->dpoi_next_node;
248      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250      vlib_increment_combined_counter
251	(cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253      b += 1;
254      next += 1;
255      n_left -= 1;
256    }
257
258  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259  if (node->flags & VLIB_NODE_FLAG_TRACE)
260    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262  return frame->n_vectors;
263}
264
265/* *INDENT-OFF* */
266VLIB_REGISTER_NODE (ip4_load_balance_node) =
267{
268  .name = "ip4-load-balance",
269  .vector_size = sizeof (u32),
270  .sibling_of = "ip4-lookup",
271  .format_trace = format_ip4_lookup_trace,
272};
273/* *INDENT-ON* */
274
275#ifndef CLIB_MARCH_VARIANT
276/* get first interface address */
277ip4_address_t *
278ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279			     ip_interface_address_t ** result_ia)
280{
281  ip_lookup_main_t *lm = &im->lookup_main;
282  ip_interface_address_t *ia = 0;
283  ip4_address_t *result = 0;
284
285  /* *INDENT-OFF* */
286  foreach_ip_interface_address
287    (lm, ia, sw_if_index,
288     1 /* honor unnumbered */ ,
289     ({
290       ip4_address_t * a =
291         ip_interface_address_get_address (lm, ia);
292       result = a;
293       break;
294     }));
295  /* *INDENT-OFF* */
296  if (result_ia)
297    *result_ia = result ? ia : 0;
298  return result;
299}
300#endif
301
302static void
303ip4_add_subnet_bcast_route (u32 fib_index,
304                            fib_prefix_t *pfx,
305                            u32 sw_if_index)
306{
307  vnet_sw_interface_flags_t iflags;
308
309  iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311  fib_table_entry_special_remove(fib_index,
312                                 pfx,
313                                 FIB_SOURCE_INTERFACE);
314
315  if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316    {
317      fib_table_entry_update_one_path (fib_index, pfx,
318                                       FIB_SOURCE_INTERFACE,
319                                       FIB_ENTRY_FLAG_NONE,
320                                       DPO_PROTO_IP4,
321                                       /* No next-hop address */
322                                       &ADJ_BCAST_ADDR,
323                                       sw_if_index,
324                                       // invalid FIB index
325                                       ~0,
326                                       1,
327                                       // no out-label stack
328                                       NULL,
329                                       FIB_ROUTE_PATH_FLAG_NONE);
330    }
331  else
332    {
333        fib_table_entry_special_add(fib_index,
334                                    pfx,
335                                    FIB_SOURCE_INTERFACE,
336                                    (FIB_ENTRY_FLAG_DROP |
337                                     FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338    }
339}
340
341static void
342ip4_add_interface_prefix_routes (ip4_main_t *im,
343				 u32 sw_if_index,
344				 u32 fib_index,
345				 ip_interface_address_t * a)
346{
347  ip_lookup_main_t *lm = &im->lookup_main;
348  ip_interface_prefix_t *if_prefix;
349  ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351  ip_interface_prefix_key_t key = {
352    .prefix = {
353      .fp_len = a->address_length,
354      .fp_proto = FIB_PROTOCOL_IP4,
355      .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356    },
357    .sw_if_index = sw_if_index,
358  };
359
360  fib_prefix_t pfx_special = {
361    .fp_proto = FIB_PROTOCOL_IP4,
362  };
363
364  /* If prefix already set on interface, just increment ref count & return */
365  if_prefix = ip_get_interface_prefix (lm, &key);
366  if (if_prefix)
367    {
368      if_prefix->ref_count += 1;
369      return;
370    }
371
372  /* New prefix - allocate a pool entry, initialize it, add to the hash */
373  pool_get (lm->if_prefix_pool, if_prefix);
374  if_prefix->ref_count = 1;
375  if_prefix->src_ia_index = a - lm->if_address_pool;
376  clib_memcpy (&if_prefix->key, &key, sizeof (key));
377  mhash_set (&lm->prefix_to_if_prefix_index, &key,
378	     if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380  /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381  if (a->address_length <= 30)
382    {
383      pfx_special.fp_len = a->address_length;
384      pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386      /* set the glean route for the prefix */
387      fib_table_entry_update_one_path (fib_index, &pfx_special,
388				       FIB_SOURCE_INTERFACE,
389				       (FIB_ENTRY_FLAG_CONNECTED |
390					FIB_ENTRY_FLAG_ATTACHED),
391				       DPO_PROTO_IP4,
392				       /* No next-hop address */
393				       NULL,
394				       sw_if_index,
395                                       /* invalid FIB index */
396                                       ~0,
397                                       1,
398                                       /* no out-label stack */
399                                       NULL,
400                                       FIB_ROUTE_PATH_FLAG_NONE);
401
402      /* set a drop route for the base address of the prefix */
403      pfx_special.fp_len = 32;
404      pfx_special.fp_addr.ip4.as_u32 =
405	address->as_u32 & im->fib_masks[a->address_length];
406
407      if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408	fib_table_entry_special_add (fib_index, &pfx_special,
409				     FIB_SOURCE_INTERFACE,
410				     (FIB_ENTRY_FLAG_DROP |
411				      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413      /* set a route for the broadcast address of the prefix */
414      pfx_special.fp_len = 32;
415      pfx_special.fp_addr.ip4.as_u32 =
416	address->as_u32 | ~im->fib_masks[a->address_length];
417      if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418	ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421    }
422  /* length == 31 - add an attached route for the other address */
423  else if (a->address_length == 31)
424    {
425      pfx_special.fp_len = 32;
426      pfx_special.fp_addr.ip4.as_u32 =
427	address->as_u32 ^ clib_host_to_net_u32(1);
428
429      fib_table_entry_update_one_path (fib_index, &pfx_special,
430				       FIB_SOURCE_INTERFACE,
431				       (FIB_ENTRY_FLAG_ATTACHED),
432				       DPO_PROTO_IP4,
433				       &pfx_special.fp_addr,
434				       sw_if_index,
435                                       /* invalid FIB index */
436                                       ~0,
437                                       1,
438                                       NULL,
439                                       FIB_ROUTE_PATH_FLAG_NONE);
440    }
441}
442
443static void
444ip4_add_interface_routes (u32 sw_if_index,
445			  ip4_main_t * im, u32 fib_index,
446			  ip_interface_address_t * a)
447{
448  ip_lookup_main_t *lm = &im->lookup_main;
449  ip4_address_t *address = ip_interface_address_get_address (lm, a);
450  fib_prefix_t pfx = {
451    .fp_len = 32,
452    .fp_proto = FIB_PROTOCOL_IP4,
453    .fp_addr.ip4 = *address,
454  };
455
456  /* set special routes for the prefix if needed */
457  ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459  if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460    {
461      u32 classify_table_index =
462	lm->classify_table_index_by_sw_if_index[sw_if_index];
463      if (classify_table_index != (u32) ~ 0)
464	{
465	  dpo_id_t dpo = DPO_INVALID;
466
467	  dpo_set (&dpo,
468		   DPO_CLASSIFY,
469		   DPO_PROTO_IP4,
470		   classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472	  fib_table_entry_special_dpo_add (fib_index,
473					   &pfx,
474					   FIB_SOURCE_CLASSIFY,
475					   FIB_ENTRY_FLAG_NONE, &dpo);
476	  dpo_reset (&dpo);
477	}
478    }
479
480  fib_table_entry_update_one_path (fib_index, &pfx,
481                                   FIB_SOURCE_INTERFACE,
482                                   (FIB_ENTRY_FLAG_CONNECTED |
483                                    FIB_ENTRY_FLAG_LOCAL),
484                                   DPO_PROTO_IP4,
485                                   &pfx.fp_addr,
486                                   sw_if_index,
487                                   // invalid FIB index
488                                   ~0,
489				   1, NULL,
490				   FIB_ROUTE_PATH_FLAG_NONE);
491}
492
493static void
494ip4_del_interface_prefix_routes (ip4_main_t * im,
495				 u32 sw_if_index,
496				 u32 fib_index,
497				 ip4_address_t * address,
498				 u32 address_length)
499{
500  ip_lookup_main_t *lm = &im->lookup_main;
501  ip_interface_prefix_t *if_prefix;
502
503  ip_interface_prefix_key_t key = {
504    .prefix = {
505      .fp_len = address_length,
506      .fp_proto = FIB_PROTOCOL_IP4,
507      .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508    },
509    .sw_if_index = sw_if_index,
510  };
511
512  fib_prefix_t pfx_special = {
513    .fp_len = 32,
514    .fp_proto = FIB_PROTOCOL_IP4,
515  };
516
517  if_prefix = ip_get_interface_prefix (lm, &key);
518  if (!if_prefix)
519    {
520      clib_warning ("Prefix not found while deleting %U",
521		    format_ip4_address_and_length, address, address_length);
522      return;
523    }
524
525  if_prefix->ref_count -= 1;
526
527  /*
528   * Routes need to be adjusted if:
529   * - deleting last intf addr in prefix
530   * - deleting intf addr used as default source address in glean adjacency
531   *
532   * We're done now otherwise
533   */
534  if ((if_prefix->ref_count > 0) &&
535      !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536    return;
537
538  /* length <= 30, delete glean route, first address, last address */
539  if (address_length <= 30)
540    {
541
542      /* remove glean route for prefix */
543      pfx_special.fp_addr.ip4 = *address;
544      pfx_special.fp_len = address_length;
545      fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547      /* if no more intf addresses in prefix, remove other special routes */
548      if (!if_prefix->ref_count)
549	{
550	  /* first address in prefix */
551	  pfx_special.fp_addr.ip4.as_u32 =
552	    address->as_u32 & im->fib_masks[address_length];
553	  pfx_special.fp_len = 32;
554
555	  if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556	  fib_table_entry_special_remove (fib_index,
557					  &pfx_special,
558					  FIB_SOURCE_INTERFACE);
559
560	  /* prefix broadcast address */
561	  pfx_special.fp_addr.ip4.as_u32 =
562	    address->as_u32 | ~im->fib_masks[address_length];
563	  pfx_special.fp_len = 32;
564
565	  if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566	  fib_table_entry_special_remove (fib_index,
567					  &pfx_special,
568					  FIB_SOURCE_INTERFACE);
569	}
570      else
571	/* default source addr just got deleted, find another */
572	{
573	  ip_interface_address_t *new_src_ia = NULL;
574	  ip4_address_t *new_src_addr = NULL;
575
576	  new_src_addr =
577	    ip4_interface_address_matching_destination
578	      (im, address, sw_if_index, &new_src_ia);
579
580	  if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582	  pfx_special.fp_len = address_length;
583	  pfx_special.fp_addr.ip4 = *new_src_addr;
584
585	  /* set new glean route for the prefix */
586	  fib_table_entry_update_one_path (fib_index, &pfx_special,
587					   FIB_SOURCE_INTERFACE,
588					   (FIB_ENTRY_FLAG_CONNECTED |
589					    FIB_ENTRY_FLAG_ATTACHED),
590					   DPO_PROTO_IP4,
591					   /* No next-hop address */
592					   NULL,
593					   sw_if_index,
594					   /* invalid FIB index */
595					   ~0,
596					   1,
597					   /* no out-label stack */
598					   NULL,
599					   FIB_ROUTE_PATH_FLAG_NONE);
600	  return;
601	}
602    }
603  /* length == 31, delete attached route for the other address */
604  else if (address_length == 31)
605    {
606      pfx_special.fp_addr.ip4.as_u32 =
607	address->as_u32 ^ clib_host_to_net_u32(1);
608
609      fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610    }
611
612  mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613  pool_put (lm->if_prefix_pool, if_prefix);
614}
615
616static void
617ip4_del_interface_routes (u32 sw_if_index,
618			  ip4_main_t * im,
619			  u32 fib_index,
620			  ip4_address_t * address, u32 address_length)
621{
622  fib_prefix_t pfx = {
623    .fp_len = address_length,
624    .fp_proto = FIB_PROTOCOL_IP4,
625    .fp_addr.ip4 = *address,
626  };
627
628  ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629				   address, address_length);
630
631  pfx.fp_len = 32;
632  fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633}
634
635#ifndef CLIB_MARCH_VARIANT
636void
637ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638{
639  ip4_main_t *im = &ip4_main;
640
641  vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643  /*
644   * enable/disable only on the 1<->0 transition
645   */
646  if (is_enable)
647    {
648      if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649	return;
650    }
651  else
652    {
653      ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654      if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655	return;
656    }
657  vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658			       !is_enable, 0, 0);
659
660
661  vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662			       sw_if_index, !is_enable, 0, 0);
663
664  {
665    ip4_enable_disable_interface_callback_t *cb;
666    vec_foreach (cb, im->enable_disable_interface_callbacks)
667      cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668  }
669}
670
671static clib_error_t *
672ip4_add_del_interface_address_internal (vlib_main_t * vm,
673					u32 sw_if_index,
674					ip4_address_t * address,
675					u32 address_length, u32 is_del)
676{
677  vnet_main_t *vnm = vnet_get_main ();
678  ip4_main_t *im = &ip4_main;
679  ip_lookup_main_t *lm = &im->lookup_main;
680  clib_error_t *error = 0;
681  u32 if_address_index, elts_before;
682  ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684  /* local0 interface doesn't support IP addressing  */
685  if (sw_if_index == 0)
686    {
687      return
688       clib_error_create ("local0 interface doesn't support IP addressing");
689    }
690
691  vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692  ip4_addr_fib_init (&ip4_af, address,
693		     vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694  vec_add1 (addr_fib, ip4_af);
695
696  /*
697   * there is no support for adj-fib handling in the presence of overlapping
698   * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699   * most routers do.
700   */
701  /* *INDENT-OFF* */
702  if (!is_del)
703    {
704      /* When adding an address check that it does not conflict
705         with an existing address on any interface in this table. */
706      ip_interface_address_t *ia;
707      vnet_sw_interface_t *sif;
708
709      pool_foreach(sif, vnm->interface_main.sw_interfaces,
710      ({
711          if (im->fib_index_by_sw_if_index[sw_if_index] ==
712              im->fib_index_by_sw_if_index[sif->sw_if_index])
713            {
714              foreach_ip_interface_address
715                (&im->lookup_main, ia, sif->sw_if_index,
716                 0 /* honor unnumbered */ ,
717                 ({
718                   ip4_address_t * x =
719                     ip_interface_address_get_address
720                     (&im->lookup_main, ia);
721                   if (ip4_destination_matches_route
722                       (im, address, x, ia->address_length) ||
723                       ip4_destination_matches_route (im,
724                                                      x,
725                                                      address,
726                                                      address_length))
727                     {
728		       /* an intf may have >1 addr from the same prefix */
729		       if ((sw_if_index == sif->sw_if_index) &&
730			   (ia->address_length == address_length) &&
731			   (x->as_u32 != address->as_u32))
732		         continue;
733
734		       /* error if the length or intf was different */
735                       vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                       return
738                         clib_error_create
739                         ("failed to add %U on %U which conflicts with %U for interface %U",
740                          format_ip4_address_and_length, address,
741                          address_length,
742			  format_vnet_sw_if_index_name, vnm,
743			  sw_if_index,
744                          format_ip4_address_and_length, x,
745                          ia->address_length,
746                          format_vnet_sw_if_index_name, vnm,
747                          sif->sw_if_index);
748                     }
749                 }));
750            }
751      }));
752    }
753  /* *INDENT-ON* */
754
755  elts_before = pool_elts (lm->if_address_pool);
756
757  error = ip_interface_address_add_del
758    (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
759  if (error)
760    goto done;
761
762  ip4_sw_interface_enable_disable (sw_if_index, !is_del);
763
764  /* intf addr routes are added/deleted on admin up/down */
765  if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766    {
767      if (is_del)
768	ip4_del_interface_routes (sw_if_index,
769				  im, ip4_af.fib_index, address,
770				  address_length);
771      else
772	ip4_add_interface_routes (sw_if_index,
773				  im, ip4_af.fib_index,
774				  pool_elt_at_index
775				  (lm->if_address_pool, if_address_index));
776    }
777
778  /* If pool did not grow/shrink: add duplicate address. */
779  if (elts_before != pool_elts (lm->if_address_pool))
780    {
781      ip4_add_del_interface_address_callback_t *cb;
782      vec_foreach (cb, im->add_del_interface_address_callbacks)
783	cb->function (im, cb->function_opaque, sw_if_index,
784		      address, address_length, if_address_index, is_del);
785    }
786
787done:
788  vec_free (addr_fib);
789  return error;
790}
791
792clib_error_t *
793ip4_add_del_interface_address (vlib_main_t * vm,
794			       u32 sw_if_index,
795			       ip4_address_t * address,
796			       u32 address_length, u32 is_del)
797{
798  return ip4_add_del_interface_address_internal
799    (vm, sw_if_index, address, address_length, is_del);
800}
801
802void
803ip4_directed_broadcast (u32 sw_if_index, u8 enable)
804{
805  ip_interface_address_t *ia;
806  ip4_main_t *im;
807
808  im = &ip4_main;
809
810  /*
811   * when directed broadcast is enabled, the subnet braodcast route will forward
812   * packets using an adjacency with a broadcast MAC. otherwise it drops
813   */
814  /* *INDENT-OFF* */
815  foreach_ip_interface_address(&im->lookup_main, ia,
816                               sw_if_index, 0,
817     ({
818       if (ia->address_length <= 30)
819         {
820           ip4_address_t *ipa;
821
822           ipa = ip_interface_address_get_address (&im->lookup_main, ia);
823
824           fib_prefix_t pfx = {
825             .fp_len = 32,
826             .fp_proto = FIB_PROTOCOL_IP4,
827             .fp_addr = {
828               .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
829             },
830           };
831
832           ip4_add_subnet_bcast_route
833             (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
834                                                  sw_if_index),
835              &pfx, sw_if_index);
836         }
837     }));
838  /* *INDENT-ON* */
839}
840#endif
841
842static clib_error_t *
843ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
844{
845  ip4_main_t *im = &ip4_main;
846  ip_interface_address_t *ia;
847  ip4_address_t *a;
848  u32 is_admin_up, fib_index;
849
850  /* Fill in lookup tables with default table (0). */
851  vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
852
853  vec_validate_init_empty (im->
854			   lookup_main.if_address_pool_index_by_sw_if_index,
855			   sw_if_index, ~0);
856
857  is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
858
859  fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860
861  /* *INDENT-OFF* */
862  foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
863                                0 /* honor unnumbered */,
864  ({
865    a = ip_interface_address_get_address (&im->lookup_main, ia);
866    if (is_admin_up)
867      ip4_add_interface_routes (sw_if_index,
868				im, fib_index,
869				ia);
870    else
871      ip4_del_interface_routes (sw_if_index,
872				im, fib_index,
873				a, ia->address_length);
874  }));
875  /* *INDENT-ON* */
876
877  return 0;
878}
879
880VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
881
882/* Built-in ip4 unicast rx feature path definition */
883/* *INDENT-OFF* */
884VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
885{
886  .arc_name = "ip4-unicast",
887  .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
888  .last_in_arc = "ip4-lookup",
889  .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890};
891
892VNET_FEATURE_INIT (ip4_flow_classify, static) =
893{
894  .arc_name = "ip4-unicast",
895  .node_name = "ip4-flow-classify",
896  .runs_before = VNET_FEATURES ("ip4-inacl"),
897};
898
899VNET_FEATURE_INIT (ip4_inacl, static) =
900{
901  .arc_name = "ip4-unicast",
902  .node_name = "ip4-inacl",
903  .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904};
905
906VNET_FEATURE_INIT (ip4_source_check_1, static) =
907{
908  .arc_name = "ip4-unicast",
909  .node_name = "ip4-source-check-via-rx",
910  .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911};
912
913VNET_FEATURE_INIT (ip4_source_check_2, static) =
914{
915  .arc_name = "ip4-unicast",
916  .node_name = "ip4-source-check-via-any",
917  .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918};
919
920VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
921{
922  .arc_name = "ip4-unicast",
923  .node_name = "ip4-source-and-port-range-check-rx",
924  .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925};
926
927VNET_FEATURE_INIT (ip4_policer_classify, static) =
928{
929  .arc_name = "ip4-unicast",
930  .node_name = "ip4-policer-classify",
931  .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932};
933
934VNET_FEATURE_INIT (ip4_ipsec, static) =
935{
936  .arc_name = "ip4-unicast",
937  .node_name = "ipsec4-input-feature",
938  .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939};
940
941VNET_FEATURE_INIT (ip4_vpath, static) =
942{
943  .arc_name = "ip4-unicast",
944  .node_name = "vpath-input-ip4",
945  .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946};
947
948VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
949{
950  .arc_name = "ip4-unicast",
951  .node_name = "ip4-vxlan-bypass",
952  .runs_before = VNET_FEATURES ("ip4-lookup"),
953};
954
955VNET_FEATURE_INIT (ip4_not_enabled, static) =
956{
957  .arc_name = "ip4-unicast",
958  .node_name = "ip4-not-enabled",
959  .runs_before = VNET_FEATURES ("ip4-lookup"),
960};
961
962VNET_FEATURE_INIT (ip4_lookup, static) =
963{
964  .arc_name = "ip4-unicast",
965  .node_name = "ip4-lookup",
966  .runs_before = 0,	/* not before any other features */
967};
968
969/* Built-in ip4 multicast rx feature path definition */
970VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
971{
972  .arc_name = "ip4-multicast",
973  .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
974  .last_in_arc = "ip4-mfib-forward-lookup",
975  .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976};
977
978VNET_FEATURE_INIT (ip4_vpath_mc, static) =
979{
980  .arc_name = "ip4-multicast",
981  .node_name = "vpath-input-ip4",
982  .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983};
984
985VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
986{
987  .arc_name = "ip4-multicast",
988  .node_name = "ip4-not-enabled",
989  .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990};
991
992VNET_FEATURE_INIT (ip4_lookup_mc, static) =
993{
994  .arc_name = "ip4-multicast",
995  .node_name = "ip4-mfib-forward-lookup",
996  .runs_before = 0,	/* last feature */
997};
998
999/* Source and port-range check ip4 tx feature path definition */
1000VNET_FEATURE_ARC_INIT (ip4_output, static) =
1001{
1002  .arc_name = "ip4-output",
1003  .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1004  .last_in_arc = "interface-output",
1005  .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006};
1007
1008VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1009{
1010  .arc_name = "ip4-output",
1011  .node_name = "ip4-source-and-port-range-check-tx",
1012  .runs_before = VNET_FEATURES ("ip4-outacl"),
1013};
1014
1015VNET_FEATURE_INIT (ip4_outacl, static) =
1016{
1017  .arc_name = "ip4-output",
1018  .node_name = "ip4-outacl",
1019  .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020};
1021
1022VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1023{
1024  .arc_name = "ip4-output",
1025  .node_name = "ipsec4-output-feature",
1026  .runs_before = VNET_FEATURES ("interface-output"),
1027};
1028
1029/* Built-in ip4 tx feature path definition */
1030VNET_FEATURE_INIT (ip4_interface_output, static) =
1031{
1032  .arc_name = "ip4-output",
1033  .node_name = "interface-output",
1034  .runs_before = 0,	/* not before any other features */
1035};
1036/* *INDENT-ON* */
1037
1038static clib_error_t *
1039ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1040{
1041  ip4_main_t *im = &ip4_main;
1042
1043  /* Fill in lookup tables with default table (0). */
1044  vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1045  vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1046
1047  if (!is_add)
1048    {
1049      ip4_main_t *im4 = &ip4_main;
1050      ip_lookup_main_t *lm4 = &im4->lookup_main;
1051      ip_interface_address_t *ia = 0;
1052      ip4_address_t *address;
1053      vlib_main_t *vm = vlib_get_main ();
1054
1055      vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1056      /* *INDENT-OFF* */
1057      foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1058      ({
1059        address = ip_interface_address_get_address (lm4, ia);
1060        ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1061      }));
1062      /* *INDENT-ON* */
1063    }
1064
1065  vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066			       is_add, 0, 0);
1067
1068  vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1069			       sw_if_index, is_add, 0, 0);
1070
1071  return /* no error */ 0;
1072}
1073
1074VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1075
1076/* Global IP4 main. */
1077#ifndef CLIB_MARCH_VARIANT
1078ip4_main_t ip4_main;
1079#endif /* CLIB_MARCH_VARIANT */
1080
1081static clib_error_t *
1082ip4_lookup_init (vlib_main_t * vm)
1083{
1084  ip4_main_t *im = &ip4_main;
1085  clib_error_t *error;
1086  uword i;
1087
1088  if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1089    return error;
1090  if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1091    return (error);
1092  if ((error = vlib_call_init_function (vm, fib_module_init)))
1093    return error;
1094  if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095    return error;
1096
1097  for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1098    {
1099      u32 m;
1100
1101      if (i < 32)
1102	m = pow2_mask (i) << (32 - i);
1103      else
1104	m = ~0;
1105      im->fib_masks[i] = clib_host_to_net_u32 (m);
1106    }
1107
1108  ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1109
1110  /* Create FIB with index 0 and table id of 0. */
1111  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112				     FIB_SOURCE_DEFAULT_ROUTE);
1113  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1114				      MFIB_SOURCE_DEFAULT_ROUTE);
1115
1116  {
1117    pg_node_t *pn;
1118    pn = pg_get_node (ip4_lookup_node.index);
1119    pn->unformat_edit = unformat_pg_ip4_header;
1120  }
1121
1122  {
1123    ethernet_arp_header_t h;
1124
1125    clib_memset (&h, 0, sizeof (h));
1126
1127#define _16(f,v) h.f = clib_host_to_net_u16 (v);
1128#define _8(f,v) h.f = v;
1129    _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1130    _16 (l3_type, ETHERNET_TYPE_IP4);
1131    _8 (n_l2_address_bytes, 6);
1132    _8 (n_l3_address_bytes, 4);
1133    _16 (opcode, ETHERNET_ARP_OPCODE_request);
1134#undef _16
1135#undef _8
1136
1137    vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138			       /* data */ &h,
1139			       sizeof (h),
1140			       /* alloc chunk size */ 8,
1141			       "ip4 arp");
1142  }
1143
1144  return error;
1145}
1146
1147VLIB_INIT_FUNCTION (ip4_lookup_init);
1148
1149typedef struct
1150{
1151  /* Adjacency taken. */
1152  u32 dpo_index;
1153  u32 flow_hash;
1154  u32 fib_index;
1155
1156  /* Packet data, possibly *after* rewrite. */
1157  u8 packet_data[64 - 1 * sizeof (u32)];
1158}
1159ip4_forward_next_trace_t;
1160
1161#ifndef CLIB_MARCH_VARIANT
1162u8 *
1163format_ip4_forward_next_trace (u8 * s, va_list * args)
1164{
1165  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1166  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1167  ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1168  u32 indent = format_get_indent (s);
1169  s = format (s, "%U%U",
1170	      format_white_space, indent,
1171	      format_ip4_header, t->packet_data, sizeof (t->packet_data));
1172  return s;
1173}
1174#endif
1175
1176static u8 *
1177format_ip4_lookup_trace (u8 * s, va_list * args)
1178{
1179  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181  ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182  u32 indent = format_get_indent (s);
1183
1184  s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1185	      t->fib_index, t->dpo_index, t->flow_hash);
1186  s = format (s, "\n%U%U",
1187	      format_white_space, indent,
1188	      format_ip4_header, t->packet_data, sizeof (t->packet_data));
1189  return s;
1190}
1191
1192static u8 *
1193format_ip4_rewrite_trace (u8 * s, va_list * args)
1194{
1195  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197  ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198  u32 indent = format_get_indent (s);
1199
1200  s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1201	      t->fib_index, t->dpo_index, format_ip_adjacency,
1202	      t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1203  s = format (s, "\n%U%U",
1204	      format_white_space, indent,
1205	      format_ip_adjacency_packet_data,
1206	      t->dpo_index, t->packet_data, sizeof (t->packet_data));
1207  return s;
1208}
1209
1210#ifndef CLIB_MARCH_VARIANT
1211/* Common trace function for all ip4-forward next nodes. */
1212void
1213ip4_forward_next_trace (vlib_main_t * vm,
1214			vlib_node_runtime_t * node,
1215			vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216{
1217  u32 *from, n_left;
1218  ip4_main_t *im = &ip4_main;
1219
1220  n_left = frame->n_vectors;
1221  from = vlib_frame_vector_args (frame);
1222
1223  while (n_left >= 4)
1224    {
1225      u32 bi0, bi1;
1226      vlib_buffer_t *b0, *b1;
1227      ip4_forward_next_trace_t *t0, *t1;
1228
1229      /* Prefetch next iteration. */
1230      vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1231      vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1232
1233      bi0 = from[0];
1234      bi1 = from[1];
1235
1236      b0 = vlib_get_buffer (vm, bi0);
1237      b1 = vlib_get_buffer (vm, bi1);
1238
1239      if (b0->flags & VLIB_BUFFER_IS_TRACED)
1240	{
1241	  t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1242	  t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1243	  t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1244	  t0->fib_index =
1245	    (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1246	     (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1247	    vec_elt (im->fib_index_by_sw_if_index,
1248		     vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1249
1250	  clib_memcpy_fast (t0->packet_data,
1251			    vlib_buffer_get_current (b0),
1252			    sizeof (t0->packet_data));
1253	}
1254      if (b1->flags & VLIB_BUFFER_IS_TRACED)
1255	{
1256	  t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1257	  t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1258	  t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1259	  t1->fib_index =
1260	    (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1261	     (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1262	    vec_elt (im->fib_index_by_sw_if_index,
1263		     vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1264	  clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1265			    sizeof (t1->packet_data));
1266	}
1267      from += 2;
1268      n_left -= 2;
1269    }
1270
1271  while (n_left >= 1)
1272    {
1273      u32 bi0;
1274      vlib_buffer_t *b0;
1275      ip4_forward_next_trace_t *t0;
1276
1277      bi0 = from[0];
1278
1279      b0 = vlib_get_buffer (vm, bi0);
1280
1281      if (b0->flags & VLIB_BUFFER_IS_TRACED)
1282	{
1283	  t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1284	  t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1285	  t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1286	  t0->fib_index =
1287	    (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1288	     (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1289	    vec_elt (im->fib_index_by_sw_if_index,
1290		     vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1291	  clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1292			    sizeof (t0->packet_data));
1293	}
1294      from += 1;
1295      n_left -= 1;
1296    }
1297}
1298
1299/* Compute TCP/UDP/ICMP4 checksum in software. */
1300u16
1301ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1302			      ip4_header_t * ip0)
1303{
1304  ip_csum_t sum0;
1305  u32 ip_header_length, payload_length_host_byte_order;
1306  u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1307  u16 sum16;
1308  u8 *data_this_buffer;
1309  u8 length_odd;
1310
1311  /* Initialize checksum with ip header. */
1312  ip_header_length = ip4_header_bytes (ip0);
1313  payload_length_host_byte_order =
1314    clib_net_to_host_u16 (ip0->length) - ip_header_length;
1315  sum0 =
1316    clib_host_to_net_u32 (payload_length_host_byte_order +
1317			  (ip0->protocol << 16));
1318
1319  if (BITS (uword) == 32)
1320    {
1321      sum0 =
1322	ip_csum_with_carry (sum0,
1323			    clib_mem_unaligned (&ip0->src_address, u32));
1324      sum0 =
1325	ip_csum_with_carry (sum0,
1326			    clib_mem_unaligned (&ip0->dst_address, u32));
1327    }
1328  else
1329    sum0 =
1330      ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1331
1332  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1333  data_this_buffer = (u8 *) ip0 + ip_header_length;
1334  n_ip_bytes_this_buffer =
1335    p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1336  if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1337    {
1338      n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1339	n_ip_bytes_this_buffer - ip_header_length : 0;
1340    }
1341
1342  while (1)
1343    {
1344      sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1345      n_bytes_left -= n_this_buffer;
1346      if (n_bytes_left == 0)
1347	break;
1348
1349      ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1350      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1351	return 0xfefe;
1352
1353      length_odd = (n_this_buffer & 1);
1354
1355      p0 = vlib_get_buffer (vm, p0->next_buffer);
1356      data_this_buffer = vlib_buffer_get_current (p0);
1357      n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1358
1359      if (PREDICT_FALSE (length_odd))
1360	{
1361	  /* Prepend a 0 or the resulting checksum will be incorrect. */
1362	  data_this_buffer--;
1363	  n_this_buffer++;
1364	  n_bytes_left++;
1365	  data_this_buffer[0] = 0;
1366	}
1367    }
1368
1369  sum16 = ~ip_csum_fold (sum0);
1370  return sum16;
1371}
1372
1373u32
1374ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1375{
1376  ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1377  udp_header_t *udp0;
1378  u16 sum16;
1379
1380  ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1381	  || ip0->protocol == IP_PROTOCOL_UDP);
1382
1383  udp0 = (void *) (ip0 + 1);
1384  if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1385    {
1386      p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1387		    | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1388      return p0->flags;
1389    }
1390
1391  sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1392
1393  p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1394		| ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1395
1396  return p0->flags;
1397}
1398#endif
1399
1400/* *INDENT-OFF* */
1401VNET_FEATURE_ARC_INIT (ip4_local) =
1402{
1403  .arc_name  = "ip4-local",
1404  .start_nodes = VNET_FEATURES ("ip4-local"),
1405  .last_in_arc = "ip4-local-end-of-arc",
1406};
1407/* *INDENT-ON* */
1408
1409static inline void
1410ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1411			    ip4_header_t * ip, u8 is_udp, u8 * error,
1412			    u8 * good_tcp_udp)
1413{
1414  u32 flags0;
1415  flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1416  *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1417  if (is_udp)
1418    {
1419      udp_header_t *udp;
1420      u32 ip_len, udp_len;
1421      i32 len_diff;
1422      udp = ip4_next_header (ip);
1423      /* Verify UDP length. */
1424      ip_len = clib_net_to_host_u16 (ip->length);
1425      udp_len = clib_net_to_host_u16 (udp->length);
1426
1427      len_diff = ip_len - udp_len;
1428      *good_tcp_udp &= len_diff >= 0;
1429      *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1430    }
1431}
1432
1433#define ip4_local_csum_is_offloaded(_b)					\
1434    _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM				\
1435	|| _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1436
1437#define ip4_local_need_csum_check(is_tcp_udp, _b) 			\
1438    (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED 	\
1439	|| ip4_local_csum_is_offloaded (_b)))
1440
1441#define ip4_local_csum_is_valid(_b)					\
1442    (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT			\
1443	|| (ip4_local_csum_is_offloaded (_b))) != 0
1444
1445static inline void
1446ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1447			 ip4_header_t * ih, u8 * error)
1448{
1449  u8 is_udp, is_tcp_udp, good_tcp_udp;
1450
1451  is_udp = ih->protocol == IP_PROTOCOL_UDP;
1452  is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1453
1454  if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1455    ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1456  else
1457    good_tcp_udp = ip4_local_csum_is_valid (b);
1458
1459  ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1460  *error = (is_tcp_udp && !good_tcp_udp
1461	    ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1462}
1463
1464static inline void
1465ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1466			    ip4_header_t ** ih, u8 * error)
1467{
1468  u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1469
1470  is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1471  is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1472
1473  is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1474  is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1475
1476  good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1477  good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1478
1479  if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1480		     || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1481    {
1482      if (is_tcp_udp[0])
1483	ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1484				    &good_tcp_udp[0]);
1485      if (is_tcp_udp[1])
1486	ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1487				    &good_tcp_udp[1]);
1488    }
1489
1490  error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1491	      IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1492  error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1493	      IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1494}
1495
1496static inline void
1497ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1498			      vlib_buffer_t * b, u16 * next, u8 error,
1499			      u8 head_of_feature_arc)
1500{
1501  u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1502  u32 next_index;
1503
1504  *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1505  b->error = error ? error_node->errors[error] : 0;
1506  if (head_of_feature_arc)
1507    {
1508      next_index = *next;
1509      if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1510	{
1511	  vnet_feature_arc_start (arc_index,
1512				  vnet_buffer (b)->sw_if_index[VLIB_RX],
1513				  &next_index, b);
1514	  *next = next_index;
1515	}
1516    }
1517}
1518
1519typedef struct
1520{
1521  ip4_address_t src;
1522  u32 lbi;
1523  u8 error;
1524  u8 first;
1525} ip4_local_last_check_t;
1526
1527static inline void
1528ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1529		     ip4_local_last_check_t * last_check, u8 * error0)
1530{
1531  ip4_fib_mtrie_leaf_t leaf0;
1532  ip4_fib_mtrie_t *mtrie0;
1533  const dpo_id_t *dpo0;
1534  load_balance_t *lb0;
1535  u32 lbi0;
1536
1537  vnet_buffer (b)->ip.fib_index =
1538    vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1539    vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1540
1541  /*
1542   * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1543   *  adjacency for the destination address (the local interface address).
1544   * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1545   *  adjacency for the source address (the remote sender's address)
1546   */
1547  if (PREDICT_FALSE (last_check->first ||
1548		     (last_check->src.as_u32 != ip0->src_address.as_u32)))
1549    {
1550      mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1551      leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1552      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1553      leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1554      lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1555
1556      vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1557	vnet_buffer (b)->ip.adj_index[VLIB_TX];
1558      vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1559
1560      lb0 = load_balance_get (lbi0);
1561      dpo0 = load_balance_get_bucket_i (lb0, 0);
1562
1563      /*
1564       * Must have a route to source otherwise we drop the packet.
1565       * ip4 broadcasts are accepted, e.g. to make dhcp client work
1566       *
1567       * The checks are:
1568       *  - the source is a recieve => it's from us => bogus, do this
1569       *    first since it sets a different error code.
1570       *  - uRPF check for any route to source - accept if passes.
1571       *  - allow packets destined to the broadcast address from unknown sources
1572       */
1573
1574      *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1575		  && dpo0->dpoi_type == DPO_RECEIVE) ?
1576		 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1577      *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1578		  && !fib_urpf_check_size (lb0->lb_urpf)
1579		  && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1580		 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1581
1582      last_check->src.as_u32 = ip0->src_address.as_u32;
1583      last_check->lbi = lbi0;
1584      last_check->error = *error0;
1585    }
1586  else
1587    {
1588      vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1589	vnet_buffer (b)->ip.adj_index[VLIB_TX];
1590      vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1591      *error0 = last_check->error;
1592      last_check->first = 0;
1593    }
1594}
1595
1596static inline void
1597ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1598			ip4_local_last_check_t * last_check, u8 * error)
1599{
1600  ip4_fib_mtrie_leaf_t leaf[2];
1601  ip4_fib_mtrie_t *mtrie[2];
1602  const dpo_id_t *dpo[2];
1603  load_balance_t *lb[2];
1604  u32 not_last_hit;
1605  u32 lbi[2];
1606
1607  not_last_hit = last_check->first;
1608  not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1609  not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1610
1611  vnet_buffer (b[0])->ip.fib_index =
1612    vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1613    vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1614    vnet_buffer (b[0])->ip.fib_index;
1615
1616  vnet_buffer (b[1])->ip.fib_index =
1617    vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1618    vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1619    vnet_buffer (b[1])->ip.fib_index;
1620
1621  /*
1622   * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1623   *  adjacency for the destination address (the local interface address).
1624   * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1625   *  adjacency for the source address (the remote sender's address)
1626   */
1627  if (PREDICT_FALSE (not_last_hit))
1628    {
1629      mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1630      mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1631
1632      leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1633      leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1634
1635      leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1636					   &ip[0]->src_address, 2);
1637      leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1638					   &ip[1]->src_address, 2);
1639
1640      leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1641					   &ip[0]->src_address, 3);
1642      leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1643					   &ip[1]->src_address, 3);
1644
1645      lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1646      lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1647
1648      vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1649	vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1650      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1651
1652      vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1653	vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1654      vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1655
1656      lb[0] = load_balance_get (lbi[0]);
1657      lb[1] = load_balance_get (lbi[1]);
1658
1659      dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1660      dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1661
1662      error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1663		   dpo[0]->dpoi_type == DPO_RECEIVE) ?
1664		  IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1665      error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1666		   !fib_urpf_check_size (lb[0]->lb_urpf) &&
1667		   ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1668		  ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1669
1670      error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1671		   dpo[1]->dpoi_type == DPO_RECEIVE) ?
1672		  IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1673      error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1674		   !fib_urpf_check_size (lb[1]->lb_urpf) &&
1675		   ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1676		  ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1677
1678      last_check->src.as_u32 = ip[1]->src_address.as_u32;
1679      last_check->lbi = lbi[1];
1680      last_check->error = error[1];
1681    }
1682  else
1683    {
1684      vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1685	vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1686      vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1687
1688      vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1689	vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1690      vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1691
1692      error[0] = last_check->error;
1693      error[1] = last_check->error;
1694      last_check->first = 0;
1695    }
1696}
1697
1698enum ip_local_packet_type_e
1699{
1700  IP_LOCAL_PACKET_TYPE_L4,
1701  IP_LOCAL_PACKET_TYPE_NAT,
1702  IP_LOCAL_PACKET_TYPE_FRAG,
1703};
1704
1705/**
1706 * Determine packet type and next node.
1707 *
1708 * The expectation is that all packets that are not L4 will skip
1709 * checksums and source checks.
1710 */
1711always_inline u8
1712ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1713{
1714  ip_lookup_main_t *lm = &ip4_main.lookup_main;
1715
1716  if (PREDICT_FALSE (ip4_is_fragment (ip)))
1717    {
1718      *next = IP_LOCAL_NEXT_REASSEMBLY;
1719      return IP_LOCAL_PACKET_TYPE_FRAG;
1720    }
1721  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1722    {
1723      *next = lm->local_next_by_ip_protocol[ip->protocol];
1724      return IP_LOCAL_PACKET_TYPE_NAT;
1725    }
1726
1727  *next = lm->local_next_by_ip_protocol[ip->protocol];
1728  return IP_LOCAL_PACKET_TYPE_L4;
1729}
1730
1731static inline uword
1732ip4_local_inline (vlib_main_t * vm,
1733		  vlib_node_runtime_t * node,
1734		  vlib_frame_t * frame, int head_of_feature_arc)
1735{
1736  u32 *from, n_left_from;
1737  vlib_node_runtime_t *error_node =
1738    vlib_node_get_runtime (vm, ip4_input_node.index);
1739  u16 nexts[VLIB_FRAME_SIZE], *next;
1740  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1741  ip4_header_t *ip[2];
1742  u8 error[2], pt[2];
1743
1744  ip4_local_last_check_t last_check = {
1745    /*
1746     * 0.0.0.0 can appear as the source address of an IP packet,
1747     * as can any other address, hence the need to use the 'first'
1748     * member to make sure the .lbi is initialised for the first
1749     * packet.
1750     */
1751    .src = {.as_u32 = 0},
1752    .lbi = ~0,
1753    .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1754    .first = 1,
1755  };
1756
1757  from = vlib_frame_vector_args (frame);
1758  n_left_from = frame->n_vectors;
1759
1760  if (node->flags & VLIB_NODE_FLAG_TRACE)
1761    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1762
1763  vlib_get_buffers (vm, from, bufs, n_left_from);
1764  b = bufs;
1765  next = nexts;
1766
1767  while (n_left_from >= 6)
1768    {
1769      u8 not_batch = 0;
1770
1771      /* Prefetch next iteration. */
1772      {
1773	vlib_prefetch_buffer_header (b[4], LOAD);
1774	vlib_prefetch_buffer_header (b[5], LOAD);
1775
1776	CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1777	CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1778      }
1779
1780      error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1781
1782      ip[0] = vlib_buffer_get_current (b[0]);
1783      ip[1] = vlib_buffer_get_current (b[1]);
1784
1785      vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1786      vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1787
1788      pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1789      pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1790
1791      not_batch = pt[0] ^ pt[1];
1792
1793      if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1794	goto skip_checks;
1795
1796      if (PREDICT_TRUE (not_batch == 0))
1797	{
1798	  ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1799	  ip4_local_check_src_x2 (b, ip, &last_check, error);
1800	}
1801      else
1802	{
1803	  if (!pt[0])
1804	    {
1805	      ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1806	      ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1807	    }
1808	  if (!pt[1])
1809	    {
1810	      ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1811	      ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1812	    }
1813	}
1814
1815    skip_checks:
1816
1817      ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1818				    head_of_feature_arc);
1819      ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1820				    head_of_feature_arc);
1821
1822      b += 2;
1823      next += 2;
1824      n_left_from -= 2;
1825    }
1826
1827  while (n_left_from > 0)
1828    {
1829      error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1830
1831      ip[0] = vlib_buffer_get_current (b[0]);
1832      vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1833      pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1834
1835      if (head_of_feature_arc == 0 || pt[0])
1836	goto skip_check;
1837
1838      ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1839      ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1840
1841    skip_check:
1842
1843      ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1844				    head_of_feature_arc);
1845
1846      b += 1;
1847      next += 1;
1848      n_left_from -= 1;
1849    }
1850
1851  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1852  return frame->n_vectors;
1853}
1854
1855VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1856			       vlib_frame_t * frame)
1857{
1858  return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1859}
1860
1861/* *INDENT-OFF* */
1862VLIB_REGISTER_NODE (ip4_local_node) =
1863{
1864  .name = "ip4-local",
1865  .vector_size = sizeof (u32),
1866  .format_trace = format_ip4_forward_next_trace,
1867  .n_next_nodes = IP_LOCAL_N_NEXT,
1868  .next_nodes =
1869  {
1870    [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1871    [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1872    [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1873    [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1874    [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1875  },
1876};
1877/* *INDENT-ON* */
1878
1879
1880VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1881					  vlib_node_runtime_t * node,
1882					  vlib_frame_t * frame)
1883{
1884  return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1885}
1886
1887/* *INDENT-OFF* */
1888VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1889  .name = "ip4-local-end-of-arc",
1890  .vector_size = sizeof (u32),
1891
1892  .format_trace = format_ip4_forward_next_trace,
1893  .sibling_of = "ip4-local",
1894};
1895
1896VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1897  .arc_name = "ip4-local",
1898  .node_name = "ip4-local-end-of-arc",
1899  .runs_before = 0, /* not before any other features */
1900};
1901/* *INDENT-ON* */
1902
1903#ifndef CLIB_MARCH_VARIANT
1904void
1905ip4_register_protocol (u32 protocol, u32 node_index)
1906{
1907  vlib_main_t *vm = vlib_get_main ();
1908  ip4_main_t *im = &ip4_main;
1909  ip_lookup_main_t *lm = &im->lookup_main;
1910
1911  ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1912  lm->local_next_by_ip_protocol[protocol] =
1913    vlib_node_add_next (vm, ip4_local_node.index, node_index);
1914}
1915
1916void
1917ip4_unregister_protocol (u32 protocol)
1918{
1919  ip4_main_t *im = &ip4_main;
1920  ip_lookup_main_t *lm = &im->lookup_main;
1921
1922  ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1923  lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1924}
1925#endif
1926
1927static clib_error_t *
1928show_ip_local_command_fn (vlib_main_t * vm,
1929			  unformat_input_t * input, vlib_cli_command_t * cmd)
1930{
1931  ip4_main_t *im = &ip4_main;
1932  ip_lookup_main_t *lm = &im->lookup_main;
1933  int i;
1934
1935  vlib_cli_output (vm, "Protocols handled by ip4_local");
1936  for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1937    {
1938      if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1939	{
1940	  u32 node_index = vlib_get_node (vm,
1941					  ip4_local_node.index)->
1942	    next_nodes[lm->local_next_by_ip_protocol[i]];
1943	  vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1944			   format_vlib_node_name, vm, node_index);
1945	}
1946    }
1947  return 0;
1948}
1949
1950
1951
1952/*?
1953 * Display the set of protocols handled by the local IPv4 stack.
1954 *
1955 * @cliexpar
1956 * Example of how to display local protocol table:
1957 * @cliexstart{show ip local}
1958 * Protocols handled by ip4_local
1959 * 1
1960 * 17
1961 * 47
1962 * @cliexend
1963?*/
1964/* *INDENT-OFF* */
1965VLIB_CLI_COMMAND (show_ip_local, static) =
1966{
1967  .path = "show ip local",
1968  .function = show_ip_local_command_fn,
1969  .short_help = "show ip local",
1970};
1971/* *INDENT-ON* */
1972
1973always_inline uword
1974ip4_arp_inline (vlib_main_t * vm,
1975		vlib_node_runtime_t * node,
1976		vlib_frame_t * frame, int is_glean)
1977{
1978  vnet_main_t *vnm = vnet_get_main ();
1979  ip4_main_t *im = &ip4_main;
1980  ip_lookup_main_t *lm = &im->lookup_main;
1981  u32 *from, *to_next_drop;
1982  uword n_left_from, n_left_to_next_drop, next_index;
1983  u32 thread_index = vm->thread_index;
1984  u64 seed;
1985
1986  if (node->flags & VLIB_NODE_FLAG_TRACE)
1987    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1988
1989  seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1990
1991  from = vlib_frame_vector_args (frame);
1992  n_left_from = frame->n_vectors;
1993  next_index = node->cached_next_index;
1994  if (next_index == IP4_ARP_NEXT_DROP)
1995    next_index = IP4_ARP_N_NEXT;	/* point to first interface */
1996
1997  while (n_left_from > 0)
1998    {
1999      vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2000			   to_next_drop, n_left_to_next_drop);
2001
2002      while (n_left_from > 0 && n_left_to_next_drop > 0)
2003	{
2004	  u32 pi0, bi0, adj_index0, sw_if_index0;
2005	  ip_adjacency_t *adj0;
2006	  vlib_buffer_t *p0, *b0;
2007	  ip4_address_t resolve0;
2008	  ethernet_arp_header_t *h0;
2009	  vnet_hw_interface_t *hw_if0;
2010	  u64 r0;
2011
2012	  pi0 = from[0];
2013	  p0 = vlib_get_buffer (vm, pi0);
2014
2015	  from += 1;
2016	  n_left_from -= 1;
2017	  to_next_drop[0] = pi0;
2018	  to_next_drop += 1;
2019	  n_left_to_next_drop -= 1;
2020
2021	  adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2022	  adj0 = adj_get (adj_index0);
2023
2024	  if (is_glean)
2025	    {
2026	      /* resolve the packet's destination */
2027	      ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2028	      resolve0 = ip0->dst_address;
2029	    }
2030	  else
2031	    {
2032	      /* resolve the incomplete adj */
2033	      resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2034	    }
2035
2036	  /* combine the address and interface for the hash key */
2037	  sw_if_index0 = adj0->rewrite_header.sw_if_index;
2038	  r0 = (u64) resolve0.data_u32 << 32;
2039	  r0 |= sw_if_index0;
2040
2041	  if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2042	    {
2043	      p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2044	      continue;
2045	    }
2046
2047	  /*
2048	   * the adj has been updated to a rewrite but the node the DPO that got
2049	   * us here hasn't - yet. no big deal. we'll drop while we wait.
2050	   */
2051	  if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2052	    {
2053	      p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2054	      continue;
2055	    }
2056
2057	  /*
2058	   * Can happen if the control-plane is programming tables
2059	   * with traffic flowing; at least that's today's lame excuse.
2060	   */
2061	  if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2062	      || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2063	    {
2064	      p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2065	      continue;
2066	    }
2067	  /* Send ARP request. */
2068	  h0 =
2069	    vlib_packet_template_get_packet (vm,
2070					     &im->ip4_arp_request_packet_template,
2071					     &bi0);
2072	  /* Seems we're out of buffers */
2073	  if (PREDICT_FALSE (!h0))
2074	    {
2075	      p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2076	      continue;
2077	    }
2078
2079	  b0 = vlib_get_buffer (vm, bi0);
2080
2081	  /* copy the persistent fields from the original */
2082	  clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2083
2084	  /* Add rewrite/encap string for ARP packet. */
2085	  vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2086
2087	  hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2088
2089	  /* Src ethernet address in ARP header. */
2090	  mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2091				  hw_if0->hw_address);
2092	  if (is_glean)
2093	    {
2094	      /* The interface's source address is stashed in the Glean Adj */
2095	      h0->ip4_over_ethernet[0].ip4 =
2096		adj0->sub_type.glean.receive_addr.ip4;
2097	    }
2098	  else
2099	    {
2100	      /* Src IP address in ARP header. */
2101	      if (ip4_src_address_for_packet (lm, sw_if_index0,
2102					      &h0->ip4_over_ethernet[0].ip4))
2103		{
2104		  /* No source address available */
2105		  p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2106		  vlib_buffer_free (vm, &bi0, 1);
2107		  continue;
2108		}
2109	    }
2110	  h0->ip4_over_ethernet[1].ip4 = resolve0;
2111
2112	  p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2113
2114	  vlib_buffer_copy_trace_flag (vm, p0, bi0);
2115	  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2116	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2117
2118	  vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2119
2120	  vlib_set_next_frame_buffer (vm, node,
2121				      adj0->rewrite_header.next_index, bi0);
2122	}
2123
2124      vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2125    }
2126
2127  return frame->n_vectors;
2128}
2129
2130VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2131			     vlib_frame_t * frame)
2132{
2133  return (ip4_arp_inline (vm, node, frame, 0));
2134}
2135
2136VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2137			       vlib_frame_t * frame)
2138{
2139  return (ip4_arp_inline (vm, node, frame, 1));
2140}
2141
2142static char *ip4_arp_error_strings[] = {
2143  [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2144  [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2145  [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2146  [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2147  [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2148  [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2149};
2150
2151/* *INDENT-OFF* */
2152VLIB_REGISTER_NODE (ip4_arp_node) =
2153{
2154  .name = "ip4-arp",
2155  .vector_size = sizeof (u32),
2156  .format_trace = format_ip4_forward_next_trace,
2157  .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2158  .error_strings = ip4_arp_error_strings,
2159  .n_next_nodes = IP4_ARP_N_NEXT,
2160  .next_nodes =
2161  {
2162    [IP4_ARP_NEXT_DROP] = "error-drop",
2163  },
2164};
2165
2166VLIB_REGISTER_NODE (ip4_glean_node) =
2167{
2168  .name = "ip4-glean",
2169  .vector_size = sizeof (u32),
2170  .format_trace = format_ip4_forward_next_trace,
2171  .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2172  .error_strings = ip4_arp_error_strings,
2173  .n_next_nodes = IP4_ARP_N_NEXT,
2174  .next_nodes = {
2175  [IP4_ARP_NEXT_DROP] = "error-drop",
2176  },
2177};
2178/* *INDENT-ON* */
2179
2180#define foreach_notrace_ip4_arp_error           \
2181_(THROTTLED)                                    \
2182_(RESOLVED)                                     \
2183_(NO_BUFFERS)                                   \
2184_(REQUEST_SENT)                                 \
2185_(NON_ARP_ADJ)                                  \
2186_(NO_SOURCE_ADDRESS)
2187
2188static clib_error_t *
2189arp_notrace_init (vlib_main_t * vm)
2190{
2191  vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2192
2193  /* don't trace ARP request packets */
2194#define _(a)                                    \
2195    vnet_pcap_drop_trace_filter_add_del         \
2196        (rt->errors[IP4_ARP_ERROR_##a],         \
2197         1 /* is_add */);
2198  foreach_notrace_ip4_arp_error;
2199#undef _
2200  return 0;
2201}
2202
2203VLIB_INIT_FUNCTION (arp_notrace_init);
2204
2205
2206#ifndef CLIB_MARCH_VARIANT
2207/* Send an ARP request to see if given destination is reachable on given interface. */
2208clib_error_t *
2209ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2210		    u8 refresh)
2211{
2212  vnet_main_t *vnm = vnet_get_main ();
2213  ip4_main_t *im = &ip4_main;
2214  ethernet_arp_header_t *h;
2215  ip4_address_t *src;
2216  ip_interface_address_t *ia;
2217  ip_adjacency_t *adj;
2218  vnet_hw_interface_t *hi;
2219  vnet_sw_interface_t *si;
2220  vlib_buffer_t *b;
2221  adj_index_t ai;
2222  u32 bi = 0;
2223  u8 unicast_rewrite = 0;
2224
2225  si = vnet_get_sw_interface (vnm, sw_if_index);
2226
2227  if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2228    {
2229      return clib_error_return (0, "%U: interface %U down",
2230				format_ip4_address, dst,
2231				format_vnet_sw_if_index_name, vnm,
2232				sw_if_index);
2233    }
2234
2235  src =
2236    ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2237  if (!src)
2238    {
2239      vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2240      return clib_error_return
2241	(0,
2242	 "no matching interface address for destination %U (interface %U)",
2243	 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2244	 sw_if_index);
2245    }
2246
2247  h = vlib_packet_template_get_packet (vm,
2248				       &im->ip4_arp_request_packet_template,
2249				       &bi);
2250
2251  if (!h)
2252    return clib_error_return (0, "ARP request packet allocation failed");
2253
2254  hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2255  if (PREDICT_FALSE (!hi->hw_address))
2256    {
2257      return clib_error_return (0, "%U: interface %U do not support ip probe",
2258				format_ip4_address, dst,
2259				format_vnet_sw_if_index_name, vnm,
2260				sw_if_index);
2261    }
2262
2263  mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2264
2265  h->ip4_over_ethernet[0].ip4 = src[0];
2266  h->ip4_over_ethernet[1].ip4 = dst[0];
2267
2268  b = vlib_get_buffer (vm, bi);
2269  vnet_buffer (b)->sw_if_index[VLIB_RX] =
2270    vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2271
2272  ip46_address_t nh = {
2273    .ip4 = *dst,
2274  };
2275
2276  ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2277			    VNET_LINK_IP4, &nh, sw_if_index);
2278  adj = adj_get (ai);
2279
2280  /* Peer has been previously resolved, retrieve glean adj instead */
2281  if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2282    {
2283      if (refresh)
2284	unicast_rewrite = 1;
2285      else
2286	{
2287	  adj_unlock (ai);
2288	  ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2289				      VNET_LINK_IP4, sw_if_index, &nh);
2290	  adj = adj_get (ai);
2291	}
2292    }
2293
2294  /* Add encapsulation string for software interface (e.g. ethernet header). */
2295  vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2296  if (unicast_rewrite)
2297    {
2298      u16 *etype = vlib_buffer_get_current (b) - 2;
2299      etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2300    }
2301  vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2302
2303  {
2304    vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2305    u32 *to_next = vlib_frame_vector_args (f);
2306    to_next[0] = bi;
2307    f->n_vectors = 1;
2308    vlib_put_frame_to_node (vm, hi->output_node_index, f);
2309  }
2310
2311  adj_unlock (ai);
2312  return /* no error */ 0;
2313}
2314#endif
2315
2316typedef enum
2317{
2318  IP4_REWRITE_NEXT_DROP,
2319  IP4_REWRITE_NEXT_ICMP_ERROR,
2320  IP4_REWRITE_NEXT_FRAGMENT,
2321  IP4_REWRITE_N_NEXT		/* Last */
2322} ip4_rewrite_next_t;
2323
2324/**
2325 * This bits of an IPv4 address to mask to construct a multicast
2326 * MAC address
2327 */
2328#if CLIB_ARCH_IS_BIG_ENDIAN
2329#define IP4_MCAST_ADDR_MASK 0x007fffff
2330#else
2331#define IP4_MCAST_ADDR_MASK 0xffff7f00
2332#endif
2333
2334always_inline void
2335ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2336	       u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2337{
2338  if (packet_len > adj_packet_bytes)
2339    {
2340      *error = IP4_ERROR_MTU_EXCEEDED;
2341      if (df)
2342	{
2343	  icmp4_error_set_vnet_buffer
2344	    (b, ICMP4_destination_unreachable,
2345	     ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2346	     adj_packet_bytes);
2347	  *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2348	}
2349      else
2350	{
2351	  /* IP fragmentation */
2352	  ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2353				   IP4_FRAG_NEXT_IP4_REWRITE, 0);
2354	  *next = IP4_REWRITE_NEXT_FRAGMENT;
2355	}
2356    }
2357}
2358
2359/* Decrement TTL & update checksum.
2360   Works either endian, so no need for byte swap. */
2361static_always_inline void
2362ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2363			    u32 * error)
2364{
2365  i32 ttl;
2366  u32 checksum;
2367  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2368    {
2369      b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2370      return;
2371    }
2372
2373  ttl = ip->ttl;
2374
2375  /* Input node should have reject packets with ttl 0. */
2376  ASSERT (ip->ttl > 0);
2377
2378  checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2379  checksum += checksum >= 0xffff;
2380
2381  ip->checksum = checksum;
2382  ttl -= 1;
2383  ip->ttl = ttl;
2384
2385  /*
2386   * If the ttl drops below 1 when forwarding, generate
2387   * an ICMP response.
2388   */
2389  if (PREDICT_FALSE (ttl <= 0))
2390    {
2391      *error = IP4_ERROR_TIME_EXPIRED;
2392      vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2393      icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2394				   ICMP4_time_exceeded_ttl_exceeded_in_transit,
2395				   0);
2396      *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2397    }
2398
2399  /* Verify checksum. */
2400  ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2401	  (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2402}
2403
2404
2405always_inline uword
2406ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2407			     vlib_node_runtime_t * node,
2408			     vlib_frame_t * frame,
2409			     int do_counters, int is_midchain, int is_mcast,
2410			     int do_gso)
2411{
2412  ip_lookup_main_t *lm = &ip4_main.lookup_main;
2413  u32 *from = vlib_frame_vector_args (frame);
2414  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2415  u16 nexts[VLIB_FRAME_SIZE], *next;
2416  u32 n_left_from;
2417  vlib_node_runtime_t *error_node =
2418    vlib_node_get_runtime (vm, ip4_input_node.index);
2419
2420  n_left_from = frame->n_vectors;
2421  u32 thread_index = vm->thread_index;
2422
2423  vlib_get_buffers (vm, from, bufs, n_left_from);
2424  clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2425
2426#if (CLIB_N_PREFETCHES >= 8)
2427  if (n_left_from >= 6)
2428    {
2429      int i;
2430      for (i = 2; i < 6; i++)
2431	vlib_prefetch_buffer_header (bufs[i], LOAD);
2432    }
2433
2434  next = nexts;
2435  b = bufs;
2436  while (n_left_from >= 8)
2437    {
2438      ip_adjacency_t *adj0, *adj1;
2439      ip4_header_t *ip0, *ip1;
2440      u32 rw_len0, error0, adj_index0;
2441      u32 rw_len1, error1, adj_index1;
2442      u32 tx_sw_if_index0, tx_sw_if_index1;
2443      u8 *p;
2444
2445      vlib_prefetch_buffer_header (b[6], LOAD);
2446      vlib_prefetch_buffer_header (b[7], LOAD);
2447
2448      adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2449      adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2450
2451      /*
2452       * pre-fetch the per-adjacency counters
2453       */
2454      if (do_counters)
2455	{
2456	  vlib_prefetch_combined_counter (&adjacency_counters,
2457					  thread_index, adj_index0);
2458	  vlib_prefetch_combined_counter (&adjacency_counters,
2459					  thread_index, adj_index1);
2460	}
2461
2462      ip0 = vlib_buffer_get_current (b[0]);
2463      ip1 = vlib_buffer_get_current (b[1]);
2464
2465      error0 = error1 = IP4_ERROR_NONE;
2466
2467      ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2468      ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2469
2470      /* Rewrite packet header and updates lengths. */
2471      adj0 = adj_get (adj_index0);
2472      adj1 = adj_get (adj_index1);
2473
2474      /* Worth pipelining. No guarantee that adj0,1 are hot... */
2475      rw_len0 = adj0[0].rewrite_header.data_bytes;
2476      rw_len1 = adj1[0].rewrite_header.data_bytes;
2477      vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2478      vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2479
2480      p = vlib_buffer_get_current (b[2]);
2481      CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2482      CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2483
2484      p = vlib_buffer_get_current (b[3]);
2485      CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2486      CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2487
2488      /* Check MTU of outgoing interface. */
2489      u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2490      u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2491
2492      if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2493	ip0_len = gso_mtu_sz (b[0]);
2494      if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2495	ip1_len = gso_mtu_sz (b[1]);
2496
2497      ip4_mtu_check (b[0], ip0_len,
2498		     adj0[0].rewrite_header.max_l3_packet_bytes,
2499		     ip0->flags_and_fragment_offset &
2500		     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2501		     next + 0, &error0);
2502      ip4_mtu_check (b[1], ip1_len,
2503		     adj1[0].rewrite_header.max_l3_packet_bytes,
2504		     ip1->flags_and_fragment_offset &
2505		     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2506		     next + 1, &error1);
2507
2508      if (is_mcast)
2509	{
2510	  error0 = ((adj0[0].rewrite_header.sw_if_index ==
2511		     vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2512		    IP4_ERROR_SAME_INTERFACE : error0);
2513	  error1 = ((adj1[0].rewrite_header.sw_if_index ==
2514		     vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2515		    IP4_ERROR_SAME_INTERFACE : error1);
2516	}
2517
2518      /* Don't adjust the buffer for ttl issue; icmp-error node wants
2519       * to see the IP header */
2520      if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2521	{
2522	  u32 next_index = adj0[0].rewrite_header.next_index;
2523	  vlib_buffer_advance (b[0], -(word) rw_len0);
2524	  tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2525	  vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2526
2527	  if (PREDICT_FALSE
2528	      (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2529	    vnet_feature_arc_start (lm->output_feature_arc_index,
2530				    tx_sw_if_index0, &next_index, b[0]);
2531	  next[0] = next_index;
2532	}
2533      else
2534	{
2535	  b[0]->error = error_node->errors[error0];
2536	}
2537      if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2538	{
2539	  u32 next_index = adj1[0].rewrite_header.next_index;
2540	  vlib_buffer_advance (b[1], -(word) rw_len1);
2541
2542	  tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2543	  vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2544
2545	  if (PREDICT_FALSE
2546	      (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2547	    vnet_feature_arc_start (lm->output_feature_arc_index,
2548				    tx_sw_if_index1, &next_index, b[1]);
2549	  next[1] = next_index;
2550	}
2551      else
2552	{
2553	  b[1]->error = error_node->errors[error1];
2554	}
2555      if (is_midchain)
2556	{
2557	  calc_checksums (vm, b[0]);
2558	  calc_checksums (vm, b[1]);
2559	}
2560      /* Guess we are only writing on simple Ethernet header. */
2561      vnet_rewrite_two_headers (adj0[0], adj1[0],
2562				ip0, ip1, sizeof (ethernet_header_t));
2563
2564      /*
2565       * Bump the per-adjacency counters
2566       */
2567      if (do_counters)
2568	{
2569	  vlib_increment_combined_counter
2570	    (&adjacency_counters,
2571	     thread_index,
2572	     adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2573
2574	  vlib_increment_combined_counter
2575	    (&adjacency_counters,
2576	     thread_index,
2577	     adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2578	}
2579
2580      if (is_midchain)
2581	{
2582	  if (adj0->sub_type.midchain.fixup_func)
2583	    adj0->sub_type.midchain.fixup_func
2584	      (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2585	  if (adj1->sub_type.midchain.fixup_func)
2586	    adj1->sub_type.midchain.fixup_func
2587	      (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2588	}
2589
2590      if (is_mcast)
2591	{
2592	  /*
2593	   * copy bytes from the IP address into the MAC rewrite
2594	   */
2595	  vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2596				      adj0->rewrite_header.dst_mcast_offset,
2597				      &ip0->dst_address.as_u32, (u8 *) ip0);
2598	  vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2599				      adj1->rewrite_header.dst_mcast_offset,
2600				      &ip1->dst_address.as_u32, (u8 *) ip1);
2601	}
2602
2603      next += 2;
2604      b += 2;
2605      n_left_from -= 2;
2606    }
2607#elif (CLIB_N_PREFETCHES >= 4)
2608  next = nexts;
2609  b = bufs;
2610  while (n_left_from >= 1)
2611    {
2612      ip_adjacency_t *adj0;
2613      ip4_header_t *ip0;
2614      u32 rw_len0, error0, adj_index0;
2615      u32 tx_sw_if_index0;
2616      u8 *p;
2617
2618      /* Prefetch next iteration */
2619      if (PREDICT_TRUE (n_left_from >= 4))
2620	{
2621	  ip_adjacency_t *adj2;
2622	  u32 adj_index2;
2623
2624	  vlib_prefetch_buffer_header (b[3], LOAD);
2625	  vlib_prefetch_buffer_data (b[2], LOAD);
2626
2627	  /* Prefetch adj->rewrite_header */
2628	  adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2629	  adj2 = adj_get (adj_index2);
2630	  p = (u8 *) adj2;
2631	  CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2632			 LOAD);
2633	}
2634
2635      adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2636
2637      /*
2638       * Prefetch the per-adjacency counters
2639       */
2640      if (do_counters)
2641	{
2642	  vlib_prefetch_combined_counter (&adjacency_counters,
2643					  thread_index, adj_index0);
2644	}
2645
2646      ip0 = vlib_buffer_get_current (b[0]);
2647
2648      error0 = IP4_ERROR_NONE;
2649
2650      ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2651
2652      /* Rewrite packet header and updates lengths. */
2653      adj0 = adj_get (adj_index0);
2654
2655      /* Rewrite header was prefetched. */
2656      rw_len0 = adj0[0].rewrite_header.data_bytes;
2657      vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2658
2659      /* Check MTU of outgoing interface. */
2660      u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2661
2662      if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2663	ip0_len = gso_mtu_sz (b[0]);
2664
2665      ip4_mtu_check (b[0], ip0_len,
2666		     adj0[0].rewrite_header.max_l3_packet_bytes,
2667		     ip0->flags_and_fragment_offset &
2668		     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2669		     next + 0, &error0);
2670
2671      if (is_mcast)
2672	{
2673	  error0 = ((adj0[0].rewrite_header.sw_if_index ==
2674		     vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2675		    IP4_ERROR_SAME_INTERFACE : error0);
2676	}
2677
2678      /* Don't adjust the buffer for ttl issue; icmp-error node wants
2679       * to see the IP header */
2680      if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2681	{
2682	  u32 next_index = adj0[0].rewrite_header.next_index;
2683	  vlib_buffer_advance (b[0], -(word) rw_len0);
2684	  tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2685	  vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2686
2687	  if (PREDICT_FALSE
2688	      (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2689	    vnet_feature_arc_start (lm->output_feature_arc_index,
2690				    tx_sw_if_index0, &next_index, b[0]);
2691	  next[0] = next_index;
2692	}
2693      else
2694	{
2695	  b[0]->error = error_node->errors[error0];
2696	}
2697      if (is_midchain)
2698	{
2699	  calc_checksums (vm, b[0]);
2700	}
2701      /* Guess we are only writing on simple Ethernet header. */
2702      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2703
2704      /*
2705       * Bump the per-adjacency counters
2706       */
2707      if (do_counters)
2708	{
2709	  vlib_increment_combined_counter
2710	    (&adjacency_counters,
2711	     thread_index,
2712	     adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2713	}
2714
2715      if (is_midchain)
2716	{
2717	  if (adj0->sub_type.midchain.fixup_func)
2718	    adj0->sub_type.midchain.fixup_func
2719	      (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2720	}
2721
2722      if (is_mcast)
2723	{
2724	  /*
2725	   * copy bytes from the IP address into the MAC rewrite
2726	   */
2727	  vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2728				      adj0->rewrite_header.dst_mcast_offset,
2729				      &ip0->dst_address.as_u32, (u8 *) ip0);
2730	}
2731
2732      next += 1;
2733      b += 1;
2734      n_left_from -= 1;
2735    }
2736#endif
2737
2738  while (n_left_from > 0)
2739    {
2740      ip_adjacency_t *adj0;
2741      ip4_header_t *ip0;
2742      u32 rw_len0, adj_index0, error0;
2743      u32 tx_sw_if_index0;
2744
2745      adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2746
2747      adj0 = adj_get (adj_index0);
2748
2749      if (do_counters)
2750	vlib_prefetch_combined_counter (&adjacency_counters,
2751					thread_index, adj_index0);
2752
2753      ip0 = vlib_buffer_get_current (b[0]);
2754
2755      error0 = IP4_ERROR_NONE;
2756
2757      ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2758
2759
2760      /* Update packet buffer attributes/set output interface. */
2761      rw_len0 = adj0[0].rewrite_header.data_bytes;
2762      vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2763
2764      /* Check MTU of outgoing interface. */
2765      u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2766      if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2767	ip0_len = gso_mtu_sz (b[0]);
2768
2769      ip4_mtu_check (b[0], ip0_len,
2770		     adj0[0].rewrite_header.max_l3_packet_bytes,
2771		     ip0->flags_and_fragment_offset &
2772		     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2773		     next + 0, &error0);
2774
2775      if (is_mcast)
2776	{
2777	  error0 = ((adj0[0].rewrite_header.sw_if_index ==
2778		     vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2779		    IP4_ERROR_SAME_INTERFACE : error0);
2780	}
2781
2782      /* Don't adjust the buffer for ttl issue; icmp-error node wants
2783       * to see the IP header */
2784      if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2785	{
2786	  u32 next_index = adj0[0].rewrite_header.next_index;
2787	  vlib_buffer_advance (b[0], -(word) rw_len0);
2788	  tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2789	  vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2790
2791	  if (PREDICT_FALSE
2792	      (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2793	    vnet_feature_arc_start (lm->output_feature_arc_index,
2794				    tx_sw_if_index0, &next_index, b[0]);
2795	  next[0] = next_index;
2796	}
2797      else
2798	{
2799	  b[0]->error = error_node->errors[error0];
2800	}
2801      if (is_midchain)
2802	{
2803	  calc_checksums (vm, b[0]);
2804	}
2805      /* Guess we are only writing on simple Ethernet header. */
2806      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2807
2808      if (do_counters)
2809	vlib_increment_combined_counter
2810	  (&adjacency_counters,
2811	   thread_index, adj_index0, 1,
2812	   vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2813
2814      if (is_midchain)
2815	{
2816	  if (adj0->sub_type.midchain.fixup_func)
2817	    adj0->sub_type.midchain.fixup_func
2818	      (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2819	}
2820
2821      if (is_mcast)
2822	{
2823	  /*
2824	   * copy bytes from the IP address into the MAC rewrite
2825	   */
2826	  vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2827				      adj0->rewrite_header.dst_mcast_offset,
2828				      &ip0->dst_address.as_u32, (u8 *) ip0);
2829	}
2830
2831      next += 1;
2832      b += 1;
2833      n_left_from -= 1;
2834    }
2835
2836
2837  /* Need to do trace after rewrites to pick up new packet data. */
2838  if (node->flags & VLIB_NODE_FLAG_TRACE)
2839    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2840
2841  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2842  return frame->n_vectors;
2843}
2844
2845always_inline uword
2846ip4_rewrite_inline (vlib_main_t * vm,
2847		    vlib_node_runtime_t * node,
2848		    vlib_frame_t * frame,
2849		    int do_counters, int is_midchain, int is_mcast)
2850{
2851  vnet_main_t *vnm = vnet_get_main ();
2852  if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2853    return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2854					is_midchain, is_mcast,
2855					1 /* do_gso */ );
2856  else
2857    return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2858					is_midchain, is_mcast,
2859					0 /* no do_gso */ );
2860}
2861
2862
2863/** @brief IPv4 rewrite node.
2864    @node ip4-rewrite
2865
2866    This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2867    header checksum, fetch the ip adjacency, check the outbound mtu,
2868    apply the adjacency rewrite, and send pkts to the adjacency
2869    rewrite header's rewrite_next_index.
2870
2871    @param vm vlib_main_t corresponding to the current thread
2872    @param node vlib_node_runtime_t
2873    @param frame vlib_frame_t whose contents should be dispatched
2874
2875    @par Graph mechanics: buffer metadata, next index usage
2876
2877    @em Uses:
2878    - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2879        - the rewrite adjacency index
2880    - <code>adj->lookup_next_index</code>
2881        - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2882          the packet will be dropped.
2883    - <code>adj->rewrite_header</code>
2884        - Rewrite string length, rewrite string, next_index
2885
2886    @em Sets:
2887    - <code>b->current_data, b->current_length</code>
2888        - Updated net of applying the rewrite string
2889
2890    <em>Next Indices:</em>
2891    - <code> adj->rewrite_header.next_index </code>
2892      or @c ip4-drop
2893*/
2894
2895VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2896				 vlib_frame_t * frame)
2897{
2898  if (adj_are_counters_enabled ())
2899    return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2900  else
2901    return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2902}
2903
2904VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2905				       vlib_node_runtime_t * node,
2906				       vlib_frame_t * frame)
2907{
2908  if (adj_are_counters_enabled ())
2909    return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2910  else
2911    return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2912}
2913
2914VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2915				  vlib_node_runtime_t * node,
2916				  vlib_frame_t * frame)
2917{
2918  if (adj_are_counters_enabled ())
2919    return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2920  else
2921    return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2922}
2923
2924VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2925				       vlib_node_runtime_t * node,
2926				       vlib_frame_t * frame)
2927{
2928  if (adj_are_counters_enabled ())
2929    return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2930  else
2931    return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2932}
2933
2934VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2935					vlib_node_runtime_t * node,
2936					vlib_frame_t * frame)
2937{
2938  if (adj_are_counters_enabled ())
2939    return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2940  else
2941    return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2942}
2943
2944/* *INDENT-OFF* */
2945VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2946  .name = "ip4-rewrite",
2947  .vector_size = sizeof (u32),
2948
2949  .format_trace = format_ip4_rewrite_trace,
2950
2951  .n_next_nodes = IP4_REWRITE_N_NEXT,
2952  .next_nodes = {
2953    [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2954    [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2955    [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2956  },
2957};
2958
2959VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2960  .name = "ip4-rewrite-bcast",
2961  .vector_size = sizeof (u32),
2962
2963  .format_trace = format_ip4_rewrite_trace,
2964  .sibling_of = "ip4-rewrite",
2965};
2966
2967VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2968  .name = "ip4-rewrite-mcast",
2969  .vector_size = sizeof (u32),
2970
2971  .format_trace = format_ip4_rewrite_trace,
2972  .sibling_of = "ip4-rewrite",
2973};
2974
2975VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2976  .name = "ip4-mcast-midchain",
2977  .vector_size = sizeof (u32),
2978
2979  .format_trace = format_ip4_rewrite_trace,
2980  .sibling_of = "ip4-rewrite",
2981};
2982
2983VLIB_REGISTER_NODE (ip4_midchain_node) = {
2984  .name = "ip4-midchain",
2985  .vector_size = sizeof (u32),
2986  .format_trace = format_ip4_forward_next_trace,
2987  .sibling_of =  "ip4-rewrite",
2988};
2989/* *INDENT-ON */
2990
2991static int
2992ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2993{
2994  ip4_fib_mtrie_t *mtrie0;
2995  ip4_fib_mtrie_leaf_t leaf0;
2996  u32 lbi0;
2997
2998  mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2999
3000  leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
3001  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3002  leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3003
3004  lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3005
3006  return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
3007}
3008
3009static clib_error_t *
3010test_lookup_command_fn (vlib_main_t * vm,
3011			unformat_input_t * input, vlib_cli_command_t * cmd)
3012{
3013  ip4_fib_t *fib;
3014  u32 table_id = 0;
3015  f64 count = 1;
3016  u32 n;
3017  int i;
3018  ip4_address_t ip4_base_address;
3019  u64 errors = 0;
3020
3021  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3022    {
3023      if (unformat (input, "table %d", &table_id))
3024	{
3025	  /* Make sure the entry exists. */
3026	  fib = ip4_fib_get (table_id);
3027	  if ((fib) && (fib->index != table_id))
3028	    return clib_error_return (0, "<fib-index> %d does not exist",
3029				      table_id);
3030	}
3031      else if (unformat (input, "count %f", &count))
3032	;
3033
3034      else if (unformat (input, "%U",
3035			 unformat_ip4_address, &ip4_base_address))
3036	;
3037      else
3038	return clib_error_return (0, "unknown input `%U'",
3039				  format_unformat_error, input);
3040    }
3041
3042  n = count;
3043
3044  for (i = 0; i < n; i++)
3045    {
3046      if (!ip4_lookup_validate (&ip4_base_address, table_id))
3047	errors++;
3048
3049      ip4_base_address.as_u32 =
3050	clib_host_to_net_u32 (1 +
3051			      clib_net_to_host_u32 (ip4_base_address.as_u32));
3052    }
3053
3054  if (errors)
3055    vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3056  else
3057    vlib_cli_output (vm, "No errors in %d lookups\n", n);
3058
3059  return 0;
3060}
3061
3062/*?
3063 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3064 * given FIB table to determine if there is a conflict with the
3065 * adjacency table. The fib-id can be determined by using the
3066 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3067 * of 0 is used.
3068 *
3069 * @todo This command uses fib-id, other commands use table-id (not
3070 * just a name, they are different indexes). Would like to change this
3071 * to table-id for consistency.
3072 *
3073 * @cliexpar
3074 * Example of how to run the test lookup command:
3075 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3076 * No errors in 2 lookups
3077 * @cliexend
3078?*/
3079/* *INDENT-OFF* */
3080VLIB_CLI_COMMAND (lookup_test_command, static) =
3081{
3082  .path = "test lookup",
3083  .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3084  .function = test_lookup_command_fn,
3085};
3086/* *INDENT-ON* */
3087
3088#ifndef CLIB_MARCH_VARIANT
3089int
3090vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3091{
3092  u32 fib_index;
3093
3094  fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3095
3096  if (~0 == fib_index)
3097    return VNET_API_ERROR_NO_SUCH_FIB;
3098
3099  fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3100				  flow_hash_config);
3101
3102  return 0;
3103}
3104#endif
3105
3106static clib_error_t *
3107set_ip_flow_hash_command_fn (vlib_main_t * vm,
3108			     unformat_input_t * input,
3109			     vlib_cli_command_t * cmd)
3110{
3111  int matched = 0;
3112  u32 table_id = 0;
3113  u32 flow_hash_config = 0;
3114  int rv;
3115
3116  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3117    {
3118      if (unformat (input, "table %d", &table_id))
3119	matched = 1;
3120#define _(a,v) \
3121    else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3122      foreach_flow_hash_bit
3123#undef _
3124	else
3125	break;
3126    }
3127
3128  if (matched == 0)
3129    return clib_error_return (0, "unknown input `%U'",
3130			      format_unformat_error, input);
3131
3132  rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3133  switch (rv)
3134    {
3135    case 0:
3136      break;
3137
3138    case VNET_API_ERROR_NO_SUCH_FIB:
3139      return clib_error_return (0, "no such FIB table %d", table_id);
3140
3141    default:
3142      clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3143      break;
3144    }
3145
3146  return 0;
3147}
3148
3149/*?
3150 * Configure the set of IPv4 fields used by the flow hash.
3151 *
3152 * @cliexpar
3153 * Example of how to set the flow hash on a given table:
3154 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3155 * Example of display the configured flow hash:
3156 * @cliexstart{show ip fib}
3157 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3158 * 0.0.0.0/0
3159 *   unicast-ip4-chain
3160 *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3161 *     [0] [@0]: dpo-drop ip6
3162 * 0.0.0.0/32
3163 *   unicast-ip4-chain
3164 *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3165 *     [0] [@0]: dpo-drop ip6
3166 * 224.0.0.0/8
3167 *   unicast-ip4-chain
3168 *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3169 *     [0] [@0]: dpo-drop ip6
3170 * 6.0.1.2/32
3171 *   unicast-ip4-chain
3172 *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3173 *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3174 * 7.0.0.1/32
3175 *   unicast-ip4-chain
3176 *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3177 *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3178 *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3179 *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3180 *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3181 * 240.0.0.0/8
3182 *   unicast-ip4-chain
3183 *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3184 *     [0] [@0]: dpo-drop ip6
3185 * 255.255.255.255/32
3186 *   unicast-ip4-chain
3187 *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3188 *     [0] [@0]: dpo-drop ip6
3189 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3190 * 0.0.0.0/0
3191 *   unicast-ip4-chain
3192 *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3193 *     [0] [@0]: dpo-drop ip6
3194 * 0.0.0.0/32
3195 *   unicast-ip4-chain
3196 *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3197 *     [0] [@0]: dpo-drop ip6
3198 * 172.16.1.0/24
3199 *   unicast-ip4-chain
3200 *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3201 *     [0] [@4]: ipv4-glean: af_packet0
3202 * 172.16.1.1/32
3203 *   unicast-ip4-chain
3204 *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3205 *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3206 * 172.16.1.2/32
3207 *   unicast-ip4-chain
3208 *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3209 *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3210 * 172.16.2.0/24
3211 *   unicast-ip4-chain
3212 *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3213 *     [0] [@4]: ipv4-glean: af_packet1
3214 * 172.16.2.1/32
3215 *   unicast-ip4-chain
3216 *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3217 *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3218 * 224.0.0.0/8
3219 *   unicast-ip4-chain
3220 *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3221 *     [0] [@0]: dpo-drop ip6
3222 * 240.0.0.0/8
3223 *   unicast-ip4-chain
3224 *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3225 *     [0] [@0]: dpo-drop ip6
3226 * 255.255.255.255/32
3227 *   unicast-ip4-chain
3228 *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3229 *     [0] [@0]: dpo-drop ip6
3230 * @cliexend
3231?*/
3232/* *INDENT-OFF* */
3233VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3234{
3235  .path = "set ip flow-hash",
3236  .short_help =
3237  "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3238  .function = set_ip_flow_hash_command_fn,
3239};
3240/* *INDENT-ON* */
3241
3242#ifndef CLIB_MARCH_VARIANT
3243int
3244vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3245			     u32 table_index)
3246{
3247  vnet_main_t *vnm = vnet_get_main ();
3248  vnet_interface_main_t *im = &vnm->interface_main;
3249  ip4_main_t *ipm = &ip4_main;
3250  ip_lookup_main_t *lm = &ipm->lookup_main;
3251  vnet_classify_main_t *cm = &vnet_classify_main;
3252  ip4_address_t *if_addr;
3253
3254  if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3255    return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3256
3257  if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3258    return VNET_API_ERROR_NO_SUCH_ENTRY;
3259
3260  vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3261  lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3262
3263  if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3264
3265  if (NULL != if_addr)
3266    {
3267      fib_prefix_t pfx = {
3268	.fp_len = 32,
3269	.fp_proto = FIB_PROTOCOL_IP4,
3270	.fp_addr.ip4 = *if_addr,
3271      };
3272      u32 fib_index;
3273
3274      fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3275						       sw_if_index);
3276
3277
3278      if (table_index != (u32) ~ 0)
3279	{
3280	  dpo_id_t dpo = DPO_INVALID;
3281
3282	  dpo_set (&dpo,
3283		   DPO_CLASSIFY,
3284		   DPO_PROTO_IP4,
3285		   classify_dpo_create (DPO_PROTO_IP4, table_index));
3286
3287	  fib_table_entry_special_dpo_add (fib_index,
3288					   &pfx,
3289					   FIB_SOURCE_CLASSIFY,
3290					   FIB_ENTRY_FLAG_NONE, &dpo);
3291	  dpo_reset (&dpo);
3292	}
3293      else
3294	{
3295	  fib_table_entry_special_remove (fib_index,
3296					  &pfx, FIB_SOURCE_CLASSIFY);
3297	}
3298    }
3299
3300  return 0;
3301}
3302#endif
3303
3304static clib_error_t *
3305set_ip_classify_command_fn (vlib_main_t * vm,
3306			    unformat_input_t * input,
3307			    vlib_cli_command_t * cmd)
3308{
3309  u32 table_index = ~0;
3310  int table_index_set = 0;
3311  u32 sw_if_index = ~0;
3312  int rv;
3313
3314  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3315    {
3316      if (unformat (input, "table-index %d", &table_index))
3317	table_index_set = 1;
3318      else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3319			 vnet_get_main (), &sw_if_index))
3320	;
3321      else
3322	break;
3323    }
3324
3325  if (table_index_set == 0)
3326    return clib_error_return (0, "classify table-index must be specified");
3327
3328  if (sw_if_index == ~0)
3329    return clib_error_return (0, "interface / subif must be specified");
3330
3331  rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3332
3333  switch (rv)
3334    {
3335    case 0:
3336      break;
3337
3338    case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3339      return clib_error_return (0, "No such interface");
3340
3341    case VNET_API_ERROR_NO_SUCH_ENTRY:
3342      return clib_error_return (0, "No such classifier table");
3343    }
3344  return 0;
3345}
3346
3347/*?
3348 * Assign a classification table to an interface. The classification
3349 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3350 * commands. Once the table is create, use this command to filter packets
3351 * on an interface.
3352 *
3353 * @cliexpar
3354 * Example of how to assign a classification table to an interface:
3355 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3356?*/
3357/* *INDENT-OFF* */
3358VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3359{
3360    .path = "set ip classify",
3361    .short_help =
3362    "set ip classify intfc <interface> table-index <classify-idx>",
3363    .function = set_ip_classify_command_fn,
3364};
3365/* *INDENT-ON* */
3366
3367static clib_error_t *
3368ip4_config (vlib_main_t * vm, unformat_input_t * input)
3369{
3370  ip4_main_t *im = &ip4_main;
3371  uword heapsize = 0;
3372
3373  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3374    {
3375      if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3376	;
3377      else
3378	return clib_error_return (0,
3379				  "invalid heap-size parameter `%U'",
3380				  format_unformat_error, input);
3381    }
3382
3383  im->mtrie_heap_size = heapsize;
3384
3385  return 0;
3386}
3387
3388VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3389
3390/*
3391 * fd.io coding-style-patch-verification: ON
3392 *
3393 * Local Variables:
3394 * eval: (c-set-style "gnu")
3395 * End:
3396 */
3397