node.c revision 42bde459
1/*
2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ethernet_node.c: ethernet packet processing
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/vlib.h>
41#include <vnet/pg/pg.h>
42#include <vnet/ethernet/ethernet.h>
43#include <vnet/ethernet/p2p_ethernet.h>
44#include <vnet/devices/pipe/pipe.h>
45#include <vppinfra/sparse_vec.h>
46#include <vnet/l2/l2_bvi.h>
47#include <vnet/classify/trace_classify.h>
48
49#define foreach_ethernet_input_next		\
50  _ (PUNT, "error-punt")			\
51  _ (DROP, "error-drop")			\
52  _ (LLC, "llc-input")				\
53  _ (IP4_INPUT, "ip4-input")			\
54  _ (IP4_INPUT_NCS, "ip4-input-no-checksum")
55
56typedef enum
57{
58#define _(s,n) ETHERNET_INPUT_NEXT_##s,
59  foreach_ethernet_input_next
60#undef _
61    ETHERNET_INPUT_N_NEXT,
62} ethernet_input_next_t;
63
64typedef struct
65{
66  u8 packet_data[32];
67  u16 frame_flags;
68  ethernet_input_frame_t frame_data;
69} ethernet_input_trace_t;
70
71static u8 *
72format_ethernet_input_trace (u8 * s, va_list * va)
73{
74  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
75  CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
76  ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
77  u32 indent = format_get_indent (s);
78
79  if (t->frame_flags)
80    {
81      s = format (s, "frame: flags 0x%x", t->frame_flags);
82      if (t->frame_flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
83	s = format (s, ", hw-if-index %u, sw-if-index %u",
84		    t->frame_data.hw_if_index, t->frame_data.sw_if_index);
85      s = format (s, "\n%U", format_white_space, indent);
86    }
87  s = format (s, "%U", format_ethernet_header, t->packet_data);
88
89  return s;
90}
91
92extern vlib_node_registration_t ethernet_input_node;
93
94typedef enum
95{
96  ETHERNET_INPUT_VARIANT_ETHERNET,
97  ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
98  ETHERNET_INPUT_VARIANT_NOT_L2,
99} ethernet_input_variant_t;
100
101
102// Parse the ethernet header to extract vlan tags and innermost ethertype
103static_always_inline void
104parse_header (ethernet_input_variant_t variant,
105	      vlib_buffer_t * b0,
106	      u16 * type,
107	      u16 * orig_type,
108	      u16 * outer_id, u16 * inner_id, u32 * match_flags)
109{
110  u8 vlan_count;
111
112  if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
113      || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
114    {
115      ethernet_header_t *e0;
116
117      e0 = (void *) (b0->data + b0->current_data);
118
119      vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
120      b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
121
122      vlib_buffer_advance (b0, sizeof (e0[0]));
123
124      *type = clib_net_to_host_u16 (e0->type);
125    }
126  else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
127    {
128      // here when prior node was LLC/SNAP processing
129      u16 *e0;
130
131      e0 = (void *) (b0->data + b0->current_data);
132
133      vlib_buffer_advance (b0, sizeof (e0[0]));
134
135      *type = clib_net_to_host_u16 (e0[0]);
136    }
137
138  // save for distinguishing between dot1q and dot1ad later
139  *orig_type = *type;
140
141  // default the tags to 0 (used if there is no corresponding tag)
142  *outer_id = 0;
143  *inner_id = 0;
144
145  *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
146  vlan_count = 0;
147
148  // check for vlan encaps
149  if (ethernet_frame_is_tagged (*type))
150    {
151      ethernet_vlan_header_t *h0;
152      u16 tag;
153
154      *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
155
156      h0 = (void *) (b0->data + b0->current_data);
157
158      tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
159
160      *outer_id = tag & 0xfff;
161      if (0 == *outer_id)
162	*match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
163
164      *type = clib_net_to_host_u16 (h0->type);
165
166      vlib_buffer_advance (b0, sizeof (h0[0]));
167      vlan_count = 1;
168
169      if (*type == ETHERNET_TYPE_VLAN)
170	{
171	  // Double tagged packet
172	  *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
173
174	  h0 = (void *) (b0->data + b0->current_data);
175
176	  tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
177
178	  *inner_id = tag & 0xfff;
179
180	  *type = clib_net_to_host_u16 (h0->type);
181
182	  vlib_buffer_advance (b0, sizeof (h0[0]));
183	  vlan_count = 2;
184	  if (*type == ETHERNET_TYPE_VLAN)
185	    {
186	      // More than double tagged packet
187	      *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
188
189	      vlib_buffer_advance (b0, sizeof (h0[0]));
190	      vlan_count = 3;	// "unknown" number, aka, 3-or-more
191	    }
192	}
193    }
194  ethernet_buffer_set_vlan_count (b0, vlan_count);
195}
196
197static_always_inline void
198ethernet_input_inline_dmac_check (vnet_hw_interface_t * hi,
199				  u64 * dmacs, u8 * dmacs_bad,
200				  u32 n_packets, ethernet_interface_t * ei,
201				  u8 have_sec_dmac);
202
203// Determine the subinterface for this packet, given the result of the
204// vlan table lookups and vlan header parsing. Check the most specific
205// matches first.
206static_always_inline void
207identify_subint (vnet_hw_interface_t * hi,
208		 vlib_buffer_t * b0,
209		 u32 match_flags,
210		 main_intf_t * main_intf,
211		 vlan_intf_t * vlan_intf,
212		 qinq_intf_t * qinq_intf,
213		 u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
214{
215  u32 matched;
216
217  matched = eth_identify_subint (hi, match_flags, main_intf, vlan_intf,
218				 qinq_intf, new_sw_if_index, error0, is_l2);
219
220  if (matched)
221    {
222      // Perform L3 my-mac filter
223      // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
224      // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
225      if (!(*is_l2))
226	{
227	  u64 dmacs[2];
228	  u8 dmacs_bad[2];
229	  ethernet_header_t *e0;
230	  ethernet_interface_t *ei0;
231
232	  e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
233	  dmacs[0] = *(u64 *) e0;
234	  ei0 = ethernet_get_interface (&ethernet_main, hi->hw_if_index);
235
236	  if (ei0 && vec_len (ei0->secondary_addrs))
237	    ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
238					      1 /* n_packets */ , ei0,
239					      1 /* have_sec_dmac */ );
240	  else
241	    ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
242					      1 /* n_packets */ , ei0,
243					      0 /* have_sec_dmac */ );
244
245	  if (dmacs_bad[0])
246	    *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
247	}
248
249      // Check for down subinterface
250      *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
251    }
252}
253
254static_always_inline void
255determine_next_node (ethernet_main_t * em,
256		     ethernet_input_variant_t variant,
257		     u32 is_l20,
258		     u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
259{
260  vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
261  b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
262
263  if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
264    {
265      // some error occurred
266      *next0 = ETHERNET_INPUT_NEXT_DROP;
267    }
268  else if (is_l20)
269    {
270      // record the L2 len and reset the buffer so the L2 header is preserved
271      u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
272      vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
273      *next0 = em->l2_next;
274      ASSERT (vnet_buffer (b0)->l2.l2_len ==
275	      ethernet_buffer_header_size (b0));
276      vlib_buffer_advance (b0, -(vnet_buffer (b0)->l2.l2_len));
277
278      // check for common IP/MPLS ethertypes
279    }
280  else if (type0 == ETHERNET_TYPE_IP4)
281    {
282      *next0 = em->l3_next.input_next_ip4;
283    }
284  else if (type0 == ETHERNET_TYPE_IP6)
285    {
286      *next0 = em->l3_next.input_next_ip6;
287    }
288  else if (type0 == ETHERNET_TYPE_MPLS)
289    {
290      *next0 = em->l3_next.input_next_mpls;
291
292    }
293  else if (em->redirect_l3)
294    {
295      // L3 Redirect is on, the cached common next nodes will be
296      // pointing to the redirect node, catch the uncommon types here
297      *next0 = em->redirect_l3_next;
298    }
299  else
300    {
301      // uncommon ethertype, check table
302      u32 i0;
303      i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
304      *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
305      *error0 =
306	i0 ==
307	SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
308
309      // The table is not populated with LLC values, so check that now.
310      // If variant is variant_ethernet then we came from LLC processing. Don't
311      // go back there; drop instead using by keeping the drop/bad table result.
312      if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
313	{
314	  *next0 = ETHERNET_INPUT_NEXT_LLC;
315	}
316    }
317}
318
319
320/* following vector code relies on following assumptions */
321STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_data, 0);
322STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, current_length, 2);
323STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, flags, 4);
324STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l2_hdr_offset) ==
325	       STRUCT_OFFSET_OF (vnet_buffer_opaque_t, l3_hdr_offset) - 2,
326	       "l3_hdr_offset must follow l2_hdr_offset");
327
328static_always_inline void
329eth_input_adv_and_flags_x4 (vlib_buffer_t ** b, int is_l3)
330{
331  i16 adv = sizeof (ethernet_header_t);
332  u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
333    VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
334
335#ifdef CLIB_HAVE_VEC256
336  /* to reduce number of small loads/stores we are loading first 64 bits
337     of each buffer metadata into 256-bit register so we can advance
338     current_data, current_length and flags.
339     Observed saving of this code is ~2 clocks per packet */
340  u64x4 r, radv;
341
342  /* vector if signed 16 bit integers used in signed vector add operation
343     to advnce current_data and current_length */
344  u32x8 flags4 = { 0, flags, 0, flags, 0, flags, 0, flags };
345  i16x16 adv4 = {
346    adv, -adv, 0, 0, adv, -adv, 0, 0,
347    adv, -adv, 0, 0, adv, -adv, 0, 0
348  };
349
350  /* load 4 x 64 bits */
351  r = u64x4_gather (b[0], b[1], b[2], b[3]);
352
353  /* set flags */
354  r |= (u64x4) flags4;
355
356  /* advance buffer */
357  radv = (u64x4) ((i16x16) r + adv4);
358
359  /* write 4 x 64 bits */
360  u64x4_scatter (is_l3 ? radv : r, b[0], b[1], b[2], b[3]);
361
362  /* use old current_data as l2_hdr_offset and new current_data as
363     l3_hdr_offset */
364  r = (u64x4) u16x16_blend (r, radv << 16, 0xaa);
365
366  /* store both l2_hdr_offset and l3_hdr_offset in single store operation */
367  u32x8_scatter_one ((u32x8) r, 0, &vnet_buffer (b[0])->l2_hdr_offset);
368  u32x8_scatter_one ((u32x8) r, 2, &vnet_buffer (b[1])->l2_hdr_offset);
369  u32x8_scatter_one ((u32x8) r, 4, &vnet_buffer (b[2])->l2_hdr_offset);
370  u32x8_scatter_one ((u32x8) r, 6, &vnet_buffer (b[3])->l2_hdr_offset);
371
372  if (is_l3)
373    {
374      ASSERT (b[0]->current_data == vnet_buffer (b[0])->l3_hdr_offset);
375      ASSERT (b[1]->current_data == vnet_buffer (b[1])->l3_hdr_offset);
376      ASSERT (b[2]->current_data == vnet_buffer (b[2])->l3_hdr_offset);
377      ASSERT (b[3]->current_data == vnet_buffer (b[3])->l3_hdr_offset);
378
379      ASSERT (b[0]->current_data - vnet_buffer (b[0])->l2_hdr_offset == adv);
380      ASSERT (b[1]->current_data - vnet_buffer (b[1])->l2_hdr_offset == adv);
381      ASSERT (b[2]->current_data - vnet_buffer (b[2])->l2_hdr_offset == adv);
382      ASSERT (b[3]->current_data - vnet_buffer (b[3])->l2_hdr_offset == adv);
383    }
384  else
385    {
386      ASSERT (b[0]->current_data == vnet_buffer (b[0])->l2_hdr_offset);
387      ASSERT (b[1]->current_data == vnet_buffer (b[1])->l2_hdr_offset);
388      ASSERT (b[2]->current_data == vnet_buffer (b[2])->l2_hdr_offset);
389      ASSERT (b[3]->current_data == vnet_buffer (b[3])->l2_hdr_offset);
390
391      ASSERT (b[0]->current_data - vnet_buffer (b[0])->l3_hdr_offset == -adv);
392      ASSERT (b[1]->current_data - vnet_buffer (b[1])->l3_hdr_offset == -adv);
393      ASSERT (b[2]->current_data - vnet_buffer (b[2])->l3_hdr_offset == -adv);
394      ASSERT (b[3]->current_data - vnet_buffer (b[3])->l3_hdr_offset == -adv);
395    }
396
397#else
398  vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
399  vnet_buffer (b[1])->l2_hdr_offset = b[1]->current_data;
400  vnet_buffer (b[2])->l2_hdr_offset = b[2]->current_data;
401  vnet_buffer (b[3])->l2_hdr_offset = b[3]->current_data;
402  vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
403  vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data + adv;
404  vnet_buffer (b[2])->l3_hdr_offset = b[2]->current_data + adv;
405  vnet_buffer (b[3])->l3_hdr_offset = b[3]->current_data + adv;
406
407  if (is_l3)
408    {
409      vlib_buffer_advance (b[0], adv);
410      vlib_buffer_advance (b[1], adv);
411      vlib_buffer_advance (b[2], adv);
412      vlib_buffer_advance (b[3], adv);
413    }
414
415  b[0]->flags |= flags;
416  b[1]->flags |= flags;
417  b[2]->flags |= flags;
418  b[3]->flags |= flags;
419#endif
420
421  if (!is_l3)
422    {
423      vnet_buffer (b[0])->l2.l2_len = adv;
424      vnet_buffer (b[1])->l2.l2_len = adv;
425      vnet_buffer (b[2])->l2.l2_len = adv;
426      vnet_buffer (b[3])->l2.l2_len = adv;
427    }
428}
429
430static_always_inline void
431eth_input_adv_and_flags_x1 (vlib_buffer_t ** b, int is_l3)
432{
433  i16 adv = sizeof (ethernet_header_t);
434  u32 flags = VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
435    VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
436
437  vnet_buffer (b[0])->l2_hdr_offset = b[0]->current_data;
438  vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data + adv;
439
440  if (is_l3)
441    vlib_buffer_advance (b[0], adv);
442  b[0]->flags |= flags;
443  if (!is_l3)
444    vnet_buffer (b[0])->l2.l2_len = adv;
445}
446
447
448static_always_inline void
449eth_input_get_etype_and_tags (vlib_buffer_t ** b, u16 * etype, u64 * tags,
450			      u64 * dmacs, int offset, int dmac_check)
451{
452  ethernet_header_t *e;
453  e = vlib_buffer_get_current (b[offset]);
454#ifdef CLIB_HAVE_VEC128
455  u64x2 r = u64x2_load_unaligned (((u8 *) & e->type) - 6);
456  etype[offset] = ((u16x8) r)[3];
457  tags[offset] = r[1];
458#else
459  etype[offset] = e->type;
460  tags[offset] = *(u64 *) (e + 1);
461#endif
462
463  if (dmac_check)
464    dmacs[offset] = *(u64 *) e;
465}
466
467static_always_inline u16
468eth_input_next_by_type (u16 etype)
469{
470  ethernet_main_t *em = &ethernet_main;
471
472  return (etype < 0x600) ? ETHERNET_INPUT_NEXT_LLC :
473    vec_elt (em->l3_next.input_next_by_type,
474	     sparse_vec_index (em->l3_next.input_next_by_type, etype));
475}
476
477typedef struct
478{
479  u64 tag, mask;
480  u32 sw_if_index;
481  u16 type, len, next;
482  i16 adv;
483  u8 err, n_tags;
484  u64 n_packets, n_bytes;
485} eth_input_tag_lookup_t;
486
487static_always_inline void
488eth_input_update_if_counters (vlib_main_t * vm, vnet_main_t * vnm,
489			      eth_input_tag_lookup_t * l)
490{
491  if (l->n_packets == 0 || l->sw_if_index == ~0)
492    return;
493
494  if (l->adv > 0)
495    l->n_bytes += l->n_packets * l->len;
496
497  vlib_increment_combined_counter
498    (vnm->interface_main.combined_sw_if_counters +
499     VNET_INTERFACE_COUNTER_RX, vm->thread_index, l->sw_if_index,
500     l->n_packets, l->n_bytes);
501}
502
503static_always_inline void
504eth_input_tag_lookup (vlib_main_t * vm, vnet_main_t * vnm,
505		      vlib_node_runtime_t * node, vnet_hw_interface_t * hi,
506		      u64 tag, u16 * next, vlib_buffer_t * b,
507		      eth_input_tag_lookup_t * l, u8 dmac_bad, int is_dot1ad,
508		      int main_is_l3, int check_dmac)
509{
510  ethernet_main_t *em = &ethernet_main;
511
512  if ((tag ^ l->tag) & l->mask)
513    {
514      main_intf_t *mif = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
515      vlan_intf_t *vif;
516      qinq_intf_t *qif;
517      vlan_table_t *vlan_table;
518      qinq_table_t *qinq_table;
519      u16 *t = (u16 *) & tag;
520      u16 vlan1 = clib_net_to_host_u16 (t[0]) & 0xFFF;
521      u16 vlan2 = clib_net_to_host_u16 (t[2]) & 0xFFF;
522      u32 matched, is_l2, new_sw_if_index;
523
524      vlan_table = vec_elt_at_index (em->vlan_pool, is_dot1ad ?
525				     mif->dot1ad_vlans : mif->dot1q_vlans);
526      vif = &vlan_table->vlans[vlan1];
527      qinq_table = vec_elt_at_index (em->qinq_pool, vif->qinqs);
528      qif = &qinq_table->vlans[vlan2];
529      l->err = ETHERNET_ERROR_NONE;
530      l->type = clib_net_to_host_u16 (t[1]);
531
532      if (l->type == ETHERNET_TYPE_VLAN)
533	{
534	  l->type = clib_net_to_host_u16 (t[3]);
535	  l->n_tags = 2;
536	  matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
537					 SUBINT_CONFIG_MATCH_2_TAG, mif, vif,
538					 qif, &new_sw_if_index, &l->err,
539					 &is_l2);
540	}
541      else
542	{
543	  l->n_tags = 1;
544	  if (vlan1 == 0)
545	    {
546	      new_sw_if_index = hi->sw_if_index;
547	      l->err = ETHERNET_ERROR_NONE;
548	      matched = 1;
549	      is_l2 = main_is_l3 == 0;
550	    }
551	  else
552	    matched = eth_identify_subint (hi, SUBINT_CONFIG_VALID |
553					   SUBINT_CONFIG_MATCH_1_TAG, mif,
554					   vif, qif, &new_sw_if_index,
555					   &l->err, &is_l2);
556	}
557
558      if (l->sw_if_index != new_sw_if_index)
559	{
560	  eth_input_update_if_counters (vm, vnm, l);
561	  l->n_packets = 0;
562	  l->n_bytes = 0;
563	  l->sw_if_index = new_sw_if_index;
564	}
565      l->tag = tag;
566      l->mask = (l->n_tags == 2) ?
567	clib_net_to_host_u64 (0xffffffffffffffff) :
568	clib_net_to_host_u64 (0xffffffff00000000);
569
570      if (matched && l->sw_if_index == ~0)
571	l->err = ETHERNET_ERROR_DOWN;
572
573      l->len = sizeof (ethernet_header_t) +
574	l->n_tags * sizeof (ethernet_vlan_header_t);
575      if (main_is_l3)
576	l->adv = is_l2 ? -(int) sizeof (ethernet_header_t) :
577	  l->n_tags * sizeof (ethernet_vlan_header_t);
578      else
579	l->adv = is_l2 ? 0 : l->len;
580
581      if (PREDICT_FALSE (l->err != ETHERNET_ERROR_NONE))
582	l->next = ETHERNET_INPUT_NEXT_DROP;
583      else if (is_l2)
584	l->next = em->l2_next;
585      else if (l->type == ETHERNET_TYPE_IP4)
586	l->next = em->l3_next.input_next_ip4;
587      else if (l->type == ETHERNET_TYPE_IP6)
588	l->next = em->l3_next.input_next_ip6;
589      else if (l->type == ETHERNET_TYPE_MPLS)
590	l->next = em->l3_next.input_next_mpls;
591      else if (em->redirect_l3)
592	l->next = em->redirect_l3_next;
593      else
594	{
595	  l->next = eth_input_next_by_type (l->type);
596	  if (l->next == ETHERNET_INPUT_NEXT_PUNT)
597	    l->err = ETHERNET_ERROR_UNKNOWN_TYPE;
598	}
599    }
600
601  if (check_dmac && l->adv > 0 && dmac_bad)
602    {
603      l->err = ETHERNET_ERROR_L3_MAC_MISMATCH;
604      next[0] = ETHERNET_INPUT_NEXT_PUNT;
605    }
606  else
607    next[0] = l->next;
608
609  vlib_buffer_advance (b, l->adv);
610  vnet_buffer (b)->l2.l2_len = l->len;
611  vnet_buffer (b)->l3_hdr_offset = vnet_buffer (b)->l2_hdr_offset + l->len;
612
613  if (l->err == ETHERNET_ERROR_NONE)
614    {
615      vnet_buffer (b)->sw_if_index[VLIB_RX] = l->sw_if_index;
616      ethernet_buffer_set_vlan_count (b, l->n_tags);
617    }
618  else
619    b->error = node->errors[l->err];
620
621  /* update counters */
622  l->n_packets += 1;
623  l->n_bytes += vlib_buffer_length_in_chain (vm, b);
624}
625
626#define DMAC_MASK clib_net_to_host_u64 (0xFFFFFFFFFFFF0000)
627#define DMAC_IGBIT clib_net_to_host_u64 (0x0100000000000000)
628
629#ifdef CLIB_HAVE_VEC256
630static_always_inline u32
631is_dmac_bad_x4 (u64 * dmacs, u64 hwaddr)
632{
633  u64x4 r0 = u64x4_load_unaligned (dmacs) & u64x4_splat (DMAC_MASK);
634  r0 = (r0 != u64x4_splat (hwaddr)) & ((r0 & u64x4_splat (DMAC_IGBIT)) == 0);
635  return u8x32_msb_mask ((u8x32) (r0));
636}
637#endif
638
639static_always_inline u8
640is_dmac_bad (u64 dmac, u64 hwaddr)
641{
642  u64 r0 = dmac & DMAC_MASK;
643  return (r0 != hwaddr) && ((r0 & DMAC_IGBIT) == 0);
644}
645
646static_always_inline u8
647is_sec_dmac_bad (u64 dmac, u64 hwaddr)
648{
649  return ((dmac & DMAC_MASK) != hwaddr);
650}
651
652#ifdef CLIB_HAVE_VEC256
653static_always_inline u32
654is_sec_dmac_bad_x4 (u64 * dmacs, u64 hwaddr)
655{
656  u64x4 r0 = u64x4_load_unaligned (dmacs) & u64x4_splat (DMAC_MASK);
657  r0 = (r0 != u64x4_splat (hwaddr));
658  return u8x32_msb_mask ((u8x32) (r0));
659}
660#endif
661
662static_always_inline u8
663eth_input_sec_dmac_check_x1 (u64 hwaddr, u64 * dmac, u8 * dmac_bad)
664{
665  dmac_bad[0] &= is_sec_dmac_bad (dmac[0], hwaddr);
666  return dmac_bad[0];
667}
668
669static_always_inline u32
670eth_input_sec_dmac_check_x4 (u64 hwaddr, u64 * dmac, u8 * dmac_bad)
671{
672#ifdef CLIB_HAVE_VEC256
673  *(u32 *) (dmac_bad + 0) &= is_sec_dmac_bad_x4 (dmac + 0, hwaddr);
674#else
675  dmac_bad[0] &= is_sec_dmac_bad (dmac[0], hwaddr);
676  dmac_bad[1] &= is_sec_dmac_bad (dmac[1], hwaddr);
677  dmac_bad[2] &= is_sec_dmac_bad (dmac[2], hwaddr);
678  dmac_bad[3] &= is_sec_dmac_bad (dmac[3], hwaddr);
679#endif
680  return *(u32 *) dmac_bad;
681}
682
683/*
684 * DMAC check for ethernet_input_inline()
685 *
686 * dmacs and dmacs_bad are arrays that are 2 elements long
687 * n_packets should be 1 or 2 for ethernet_input_inline()
688 */
689static_always_inline void
690ethernet_input_inline_dmac_check (vnet_hw_interface_t * hi,
691				  u64 * dmacs, u8 * dmacs_bad,
692				  u32 n_packets, ethernet_interface_t * ei,
693				  u8 have_sec_dmac)
694{
695  u64 hwaddr = (*(u64 *) hi->hw_address) & DMAC_MASK;
696  u8 bad = 0;
697
698  dmacs_bad[0] = is_dmac_bad (dmacs[0], hwaddr);
699  dmacs_bad[1] = ((n_packets > 1) & is_dmac_bad (dmacs[1], hwaddr));
700
701  bad = dmacs_bad[0] | dmacs_bad[1];
702
703  if (PREDICT_FALSE (bad && have_sec_dmac))
704    {
705      mac_address_t *sec_addr;
706
707      vec_foreach (sec_addr, ei->secondary_addrs)
708      {
709	hwaddr = (*(u64 *) sec_addr) & DMAC_MASK;
710
711	bad = (eth_input_sec_dmac_check_x1 (hwaddr, dmacs, dmacs_bad) |
712	       eth_input_sec_dmac_check_x1 (hwaddr, dmacs + 1,
713					    dmacs_bad + 1));
714
715	if (!bad)
716	  return;
717      }
718    }
719}
720
721static_always_inline void
722eth_input_process_frame_dmac_check (vnet_hw_interface_t * hi,
723				    u64 * dmacs, u8 * dmacs_bad,
724				    u32 n_packets, ethernet_interface_t * ei,
725				    u8 have_sec_dmac)
726{
727  u64 hwaddr = (*(u64 *) hi->hw_address) & DMAC_MASK;
728  u64 *dmac = dmacs;
729  u8 *dmac_bad = dmacs_bad;
730  u32 bad = 0;
731  i32 n_left = n_packets;
732
733#ifdef CLIB_HAVE_VEC256
734  while (n_left > 0)
735    {
736      bad |= *(u32 *) (dmac_bad + 0) = is_dmac_bad_x4 (dmac + 0, hwaddr);
737      bad |= *(u32 *) (dmac_bad + 4) = is_dmac_bad_x4 (dmac + 4, hwaddr);
738
739      /* next */
740      dmac += 8;
741      dmac_bad += 8;
742      n_left -= 8;
743    }
744#else
745  while (n_left > 0)
746    {
747      bad |= dmac_bad[0] = is_dmac_bad (dmac[0], hwaddr);
748      bad |= dmac_bad[1] = is_dmac_bad (dmac[1], hwaddr);
749      bad |= dmac_bad[2] = is_dmac_bad (dmac[2], hwaddr);
750      bad |= dmac_bad[3] = is_dmac_bad (dmac[3], hwaddr);
751
752      /* next */
753      dmac += 4;
754      dmac_bad += 4;
755      n_left -= 4;
756    }
757#endif
758
759  if (have_sec_dmac && bad)
760    {
761      mac_address_t *addr;
762
763      vec_foreach (addr, ei->secondary_addrs)
764      {
765	u64 hwaddr = ((u64 *) addr)[0] & DMAC_MASK;
766	i32 n_left = n_packets;
767	u64 *dmac = dmacs;
768	u8 *dmac_bad = dmacs_bad;
769
770	bad = 0;
771
772	while (n_left > 0)
773	  {
774	    int adv = 0;
775	    int n_bad;
776
777	    /* skip any that have already matched */
778	    if (!dmac_bad[0])
779	      {
780		dmac += 1;
781		dmac_bad += 1;
782		n_left -= 1;
783		continue;
784	      }
785
786	    n_bad = clib_min (4, n_left);
787
788	    /* If >= 4 left, compare 4 together */
789	    if (n_bad == 4)
790	      {
791		bad |= eth_input_sec_dmac_check_x4 (hwaddr, dmac, dmac_bad);
792		adv = 4;
793		n_bad = 0;
794	      }
795
796	    /* handle individually */
797	    while (n_bad > 0)
798	      {
799		bad |= eth_input_sec_dmac_check_x1 (hwaddr, dmac + adv,
800						    dmac_bad + adv);
801		adv += 1;
802		n_bad -= 1;
803	      }
804
805	    dmac += adv;
806	    dmac_bad += adv;
807	    n_left -= adv;
808	  }
809
810	if (!bad)		/* can stop looping if everything matched */
811	  break;
812      }
813    }
814}
815
816/* process frame of buffers, store ethertype into array and update
817   buffer metadata fields depending on interface being l2 or l3 assuming that
818   packets are untagged. For tagged packets those fields are updated later.
819   Optionally store Destionation MAC address and tag data into arrays
820   for further processing */
821
822STATIC_ASSERT (VLIB_FRAME_SIZE % 8 == 0,
823	       "VLIB_FRAME_SIZE must be power of 8");
824static_always_inline void
825eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
826			 vnet_hw_interface_t * hi,
827			 u32 * buffer_indices, u32 n_packets, int main_is_l3,
828			 int ip4_cksum_ok, int dmac_check)
829{
830  ethernet_main_t *em = &ethernet_main;
831  u16 nexts[VLIB_FRAME_SIZE], *next;
832  u16 etypes[VLIB_FRAME_SIZE], *etype = etypes;
833  u64 dmacs[VLIB_FRAME_SIZE], *dmac = dmacs;
834  u8 dmacs_bad[VLIB_FRAME_SIZE];
835  u64 tags[VLIB_FRAME_SIZE], *tag = tags;
836  u16 slowpath_indices[VLIB_FRAME_SIZE];
837  u16 n_slowpath, i;
838  u16 next_ip4, next_ip6, next_mpls, next_l2;
839  u16 et_ip4 = clib_host_to_net_u16 (ETHERNET_TYPE_IP4);
840  u16 et_ip6 = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
841  u16 et_mpls = clib_host_to_net_u16 (ETHERNET_TYPE_MPLS);
842  u16 et_vlan = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
843  u16 et_dot1ad = clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD);
844  i32 n_left = n_packets;
845  vlib_buffer_t *b[20];
846  u32 *from;
847  ethernet_interface_t *ei = ethernet_get_interface (em, hi->hw_if_index);
848
849  from = buffer_indices;
850
851  while (n_left >= 20)
852    {
853      vlib_buffer_t **ph = b + 16, **pd = b + 8;
854      vlib_get_buffers (vm, from, b, 4);
855      vlib_get_buffers (vm, from + 8, pd, 4);
856      vlib_get_buffers (vm, from + 16, ph, 4);
857
858      vlib_prefetch_buffer_header (ph[0], LOAD);
859      vlib_prefetch_buffer_data (pd[0], LOAD);
860      eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
861
862      vlib_prefetch_buffer_header (ph[1], LOAD);
863      vlib_prefetch_buffer_data (pd[1], LOAD);
864      eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
865
866      vlib_prefetch_buffer_header (ph[2], LOAD);
867      vlib_prefetch_buffer_data (pd[2], LOAD);
868      eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
869
870      vlib_prefetch_buffer_header (ph[3], LOAD);
871      vlib_prefetch_buffer_data (pd[3], LOAD);
872      eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
873
874      eth_input_adv_and_flags_x4 (b, main_is_l3);
875
876      /* next */
877      n_left -= 4;
878      etype += 4;
879      tag += 4;
880      dmac += 4;
881      from += 4;
882    }
883  while (n_left >= 4)
884    {
885      vlib_get_buffers (vm, from, b, 4);
886      eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
887      eth_input_get_etype_and_tags (b, etype, tag, dmac, 1, dmac_check);
888      eth_input_get_etype_and_tags (b, etype, tag, dmac, 2, dmac_check);
889      eth_input_get_etype_and_tags (b, etype, tag, dmac, 3, dmac_check);
890      eth_input_adv_and_flags_x4 (b, main_is_l3);
891
892      /* next */
893      n_left -= 4;
894      etype += 4;
895      tag += 4;
896      dmac += 4;
897      from += 4;
898    }
899  while (n_left)
900    {
901      vlib_get_buffers (vm, from, b, 1);
902      eth_input_get_etype_and_tags (b, etype, tag, dmac, 0, dmac_check);
903      eth_input_adv_and_flags_x1 (b, main_is_l3);
904
905      /* next */
906      n_left -= 1;
907      etype += 1;
908      tag += 1;
909      dmac += 4;
910      from += 1;
911    }
912
913  if (dmac_check)
914    {
915      if (ei && vec_len (ei->secondary_addrs))
916	eth_input_process_frame_dmac_check (hi, dmacs, dmacs_bad, n_packets,
917					    ei, 1 /* have_sec_dmac */ );
918      else
919	eth_input_process_frame_dmac_check (hi, dmacs, dmacs_bad, n_packets,
920					    ei, 0 /* have_sec_dmac */ );
921    }
922
923  next_ip4 = em->l3_next.input_next_ip4;
924  next_ip6 = em->l3_next.input_next_ip6;
925  next_mpls = em->l3_next.input_next_mpls;
926  next_l2 = em->l2_next;
927
928  if (next_ip4 == ETHERNET_INPUT_NEXT_IP4_INPUT && ip4_cksum_ok)
929    next_ip4 = ETHERNET_INPUT_NEXT_IP4_INPUT_NCS;
930
931#ifdef CLIB_HAVE_VEC256
932  u16x16 et16_ip4 = u16x16_splat (et_ip4);
933  u16x16 et16_ip6 = u16x16_splat (et_ip6);
934  u16x16 et16_mpls = u16x16_splat (et_mpls);
935  u16x16 et16_vlan = u16x16_splat (et_vlan);
936  u16x16 et16_dot1ad = u16x16_splat (et_dot1ad);
937  u16x16 next16_ip4 = u16x16_splat (next_ip4);
938  u16x16 next16_ip6 = u16x16_splat (next_ip6);
939  u16x16 next16_mpls = u16x16_splat (next_mpls);
940  u16x16 next16_l2 = u16x16_splat (next_l2);
941  u16x16 zero = { 0 };
942  u16x16 stairs = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
943#endif
944
945  etype = etypes;
946  n_left = n_packets;
947  next = nexts;
948  n_slowpath = 0;
949  i = 0;
950
951  /* fastpath - in l3 mode hadles ip4, ip6 and mpls packets, other packets
952     are considered as slowpath, in l2 mode all untagged packets are
953     considered as fastpath */
954  while (n_left > 0)
955    {
956#ifdef CLIB_HAVE_VEC256
957      if (n_left >= 16)
958	{
959	  u16x16 r = zero;
960	  u16x16 e16 = u16x16_load_unaligned (etype);
961	  if (main_is_l3)
962	    {
963	      r += (e16 == et16_ip4) & next16_ip4;
964	      r += (e16 == et16_ip6) & next16_ip6;
965	      r += (e16 == et16_mpls) & next16_mpls;
966	    }
967	  else
968	    r = ((e16 != et16_vlan) & (e16 != et16_dot1ad)) & next16_l2;
969	  u16x16_store_unaligned (r, next);
970
971	  if (!u16x16_is_all_zero (r == zero))
972	    {
973	      if (u16x16_is_all_zero (r))
974		{
975		  u16x16_store_unaligned (u16x16_splat (i) + stairs,
976					  slowpath_indices + n_slowpath);
977		  n_slowpath += 16;
978		}
979	      else
980		{
981		  for (int j = 0; j < 16; j++)
982		    if (next[j] == 0)
983		      slowpath_indices[n_slowpath++] = i + j;
984		}
985	    }
986
987	  etype += 16;
988	  next += 16;
989	  n_left -= 16;
990	  i += 16;
991	  continue;
992	}
993#endif
994      if (main_is_l3 && etype[0] == et_ip4)
995	next[0] = next_ip4;
996      else if (main_is_l3 && etype[0] == et_ip6)
997	next[0] = next_ip6;
998      else if (main_is_l3 && etype[0] == et_mpls)
999	next[0] = next_mpls;
1000      else if (main_is_l3 == 0 &&
1001	       etype[0] != et_vlan && etype[0] != et_dot1ad)
1002	next[0] = next_l2;
1003      else
1004	{
1005	  next[0] = 0;
1006	  slowpath_indices[n_slowpath++] = i;
1007	}
1008
1009      etype += 1;
1010      next += 1;
1011      n_left -= 1;
1012      i += 1;
1013    }
1014
1015  if (n_slowpath)
1016    {
1017      vnet_main_t *vnm = vnet_get_main ();
1018      n_left = n_slowpath;
1019      u16 *si = slowpath_indices;
1020      u32 last_unknown_etype = ~0;
1021      u32 last_unknown_next = ~0;
1022      eth_input_tag_lookup_t dot1ad_lookup, dot1q_lookup = {
1023	.mask = -1LL,
1024	.tag = tags[si[0]] ^ -1LL,
1025	.sw_if_index = ~0
1026      };
1027
1028      clib_memcpy_fast (&dot1ad_lookup, &dot1q_lookup, sizeof (dot1q_lookup));
1029
1030      while (n_left)
1031	{
1032	  i = si[0];
1033	  u16 etype = etypes[i];
1034
1035	  if (etype == et_vlan)
1036	    {
1037	      vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
1038	      eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
1039				    &dot1q_lookup, dmacs_bad[i], 0,
1040				    main_is_l3, dmac_check);
1041
1042	    }
1043	  else if (etype == et_dot1ad)
1044	    {
1045	      vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
1046	      eth_input_tag_lookup (vm, vnm, node, hi, tags[i], nexts + i, b,
1047				    &dot1ad_lookup, dmacs_bad[i], 1,
1048				    main_is_l3, dmac_check);
1049	    }
1050	  else
1051	    {
1052	      /* untagged packet with not well known etyertype */
1053	      if (last_unknown_etype != etype)
1054		{
1055		  last_unknown_etype = etype;
1056		  etype = clib_host_to_net_u16 (etype);
1057		  last_unknown_next = eth_input_next_by_type (etype);
1058		}
1059	      if (dmac_check && main_is_l3 && dmacs_bad[i])
1060		{
1061		  vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
1062		  b->error = node->errors[ETHERNET_ERROR_L3_MAC_MISMATCH];
1063		  nexts[i] = ETHERNET_INPUT_NEXT_PUNT;
1064		}
1065	      else
1066		nexts[i] = last_unknown_next;
1067	    }
1068
1069	  /* next */
1070	  n_left--;
1071	  si++;
1072	}
1073
1074      eth_input_update_if_counters (vm, vnm, &dot1q_lookup);
1075      eth_input_update_if_counters (vm, vnm, &dot1ad_lookup);
1076    }
1077
1078  vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts, n_packets);
1079}
1080
1081static_always_inline void
1082eth_input_single_int (vlib_main_t * vm, vlib_node_runtime_t * node,
1083		      vnet_hw_interface_t * hi, u32 * from, u32 n_pkts,
1084		      int ip4_cksum_ok)
1085{
1086  ethernet_main_t *em = &ethernet_main;
1087  ethernet_interface_t *ei;
1088  ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
1089  main_intf_t *intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1090  subint_config_t *subint0 = &intf0->untagged_subint;
1091
1092  int main_is_l3 = (subint0->flags & SUBINT_CONFIG_L2) == 0;
1093  int promisc = (ei->flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) != 0;
1094
1095  if (main_is_l3)
1096    {
1097      /* main interface is L3, we dont expect tagged packets and interface
1098         is not in promisc node, so we dont't need to check DMAC */
1099      int is_l3 = 1;
1100
1101      if (promisc == 0)
1102	eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
1103				 ip4_cksum_ok, 0);
1104      else
1105	/* subinterfaces and promisc mode so DMAC check is needed */
1106	eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
1107				 ip4_cksum_ok, 1);
1108      return;
1109    }
1110  else
1111    {
1112      /* untagged packets are treated as L2 */
1113      int is_l3 = 0;
1114      eth_input_process_frame (vm, node, hi, from, n_pkts, is_l3,
1115			       ip4_cksum_ok, 1);
1116      return;
1117    }
1118}
1119
1120static_always_inline void
1121ethernet_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
1122		      vlib_frame_t * from_frame)
1123{
1124  u32 *from, n_left;
1125  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
1126    {
1127      from = vlib_frame_vector_args (from_frame);
1128      n_left = from_frame->n_vectors;
1129
1130      while (n_left)
1131	{
1132	  ethernet_input_trace_t *t0;
1133	  vlib_buffer_t *b0 = vlib_get_buffer (vm, from[0]);
1134
1135	  if (b0->flags & VLIB_BUFFER_IS_TRACED)
1136	    {
1137	      t0 = vlib_add_trace (vm, node, b0,
1138				   sizeof (ethernet_input_trace_t));
1139	      clib_memcpy_fast (t0->packet_data, b0->data + b0->current_data,
1140				sizeof (t0->packet_data));
1141	      t0->frame_flags = from_frame->flags;
1142	      clib_memcpy_fast (&t0->frame_data,
1143				vlib_frame_scalar_args (from_frame),
1144				sizeof (ethernet_input_frame_t));
1145	    }
1146	  from += 1;
1147	  n_left -= 1;
1148	}
1149    }
1150
1151  /* rx pcap capture if enabled */
1152  if (PREDICT_FALSE (vlib_global_main.pcap.pcap_rx_enable))
1153    {
1154      u32 bi0;
1155      vnet_pcap_t *pp = &vlib_global_main.pcap;
1156
1157      from = vlib_frame_vector_args (from_frame);
1158      n_left = from_frame->n_vectors;
1159      while (n_left > 0)
1160	{
1161	  int classify_filter_result;
1162	  vlib_buffer_t *b0;
1163	  bi0 = from[0];
1164	  from++;
1165	  n_left--;
1166	  b0 = vlib_get_buffer (vm, bi0);
1167	  if (pp->filter_classify_table_index != ~0)
1168	    {
1169	      classify_filter_result =
1170		vnet_is_packet_traced_inline
1171		(b0, pp->filter_classify_table_index, 0 /* full classify */ );
1172	      if (classify_filter_result)
1173		pcap_add_buffer (&pp->pcap_main, vm, bi0,
1174				 pp->max_bytes_per_pkt);
1175	      continue;
1176	    }
1177
1178	  if (pp->pcap_sw_if_index == 0 ||
1179	      pp->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_RX])
1180	    {
1181	      vnet_main_t *vnm = vnet_get_main ();
1182	      vnet_hw_interface_t *hi =
1183		vnet_get_sup_hw_interface
1184		(vnm, vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1185
1186	      /* Capture pkt if not filtered, or if filter hits */
1187	      if (hi->trace_classify_table_index == ~0 ||
1188		  vnet_is_packet_traced_inline
1189		  (b0, hi->trace_classify_table_index,
1190		   0 /* full classify */ ))
1191		pcap_add_buffer (&pp->pcap_main, vm, bi0,
1192				 pp->max_bytes_per_pkt);
1193	    }
1194	}
1195    }
1196}
1197
1198static_always_inline void
1199ethernet_input_inline (vlib_main_t * vm,
1200		       vlib_node_runtime_t * node,
1201		       u32 * from, u32 n_packets,
1202		       ethernet_input_variant_t variant)
1203{
1204  vnet_main_t *vnm = vnet_get_main ();
1205  ethernet_main_t *em = &ethernet_main;
1206  vlib_node_runtime_t *error_node;
1207  u32 n_left_from, next_index, *to_next;
1208  u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
1209  u32 thread_index = vm->thread_index;
1210  u32 cached_sw_if_index = ~0;
1211  u32 cached_is_l2 = 0;		/* shut up gcc */
1212  vnet_hw_interface_t *hi = NULL;	/* used for main interface only */
1213  ethernet_interface_t *ei = NULL;
1214  vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
1215  vlib_buffer_t **b = bufs;
1216
1217  if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
1218    error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
1219  else
1220    error_node = node;
1221
1222  n_left_from = n_packets;
1223
1224  next_index = node->cached_next_index;
1225  stats_sw_if_index = node->runtime_data[0];
1226  stats_n_packets = stats_n_bytes = 0;
1227  vlib_get_buffers (vm, from, bufs, n_left_from);
1228
1229  while (n_left_from > 0)
1230    {
1231      u32 n_left_to_next;
1232
1233      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1234
1235      while (n_left_from >= 4 && n_left_to_next >= 2)
1236	{
1237	  u32 bi0, bi1;
1238	  vlib_buffer_t *b0, *b1;
1239	  u8 next0, next1, error0, error1;
1240	  u16 type0, orig_type0, type1, orig_type1;
1241	  u16 outer_id0, inner_id0, outer_id1, inner_id1;
1242	  u32 match_flags0, match_flags1;
1243	  u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
1244	    new_sw_if_index1, len1;
1245	  vnet_hw_interface_t *hi0, *hi1;
1246	  main_intf_t *main_intf0, *main_intf1;
1247	  vlan_intf_t *vlan_intf0, *vlan_intf1;
1248	  qinq_intf_t *qinq_intf0, *qinq_intf1;
1249	  u32 is_l20, is_l21;
1250	  ethernet_header_t *e0, *e1;
1251	  u64 dmacs[2];
1252	  u8 dmacs_bad[2];
1253
1254	  /* Prefetch next iteration. */
1255	  {
1256	    vlib_prefetch_buffer_header (b[2], STORE);
1257	    vlib_prefetch_buffer_header (b[3], STORE);
1258
1259	    CLIB_PREFETCH (b[2]->data, sizeof (ethernet_header_t), LOAD);
1260	    CLIB_PREFETCH (b[3]->data, sizeof (ethernet_header_t), LOAD);
1261	  }
1262
1263	  bi0 = from[0];
1264	  bi1 = from[1];
1265	  to_next[0] = bi0;
1266	  to_next[1] = bi1;
1267	  from += 2;
1268	  to_next += 2;
1269	  n_left_to_next -= 2;
1270	  n_left_from -= 2;
1271
1272	  b0 = b[0];
1273	  b1 = b[1];
1274	  b += 2;
1275
1276	  error0 = error1 = ETHERNET_ERROR_NONE;
1277	  e0 = vlib_buffer_get_current (b0);
1278	  type0 = clib_net_to_host_u16 (e0->type);
1279	  e1 = vlib_buffer_get_current (b1);
1280	  type1 = clib_net_to_host_u16 (e1->type);
1281
1282	  /* Set the L2 header offset for all packets */
1283	  vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1284	  vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
1285	  b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1286	  b1->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1287
1288	  /* Speed-path for the untagged case */
1289	  if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1290			    && !ethernet_frame_is_any_tagged_x2 (type0,
1291								 type1)))
1292	    {
1293	      main_intf_t *intf0;
1294	      subint_config_t *subint0;
1295	      u32 sw_if_index0, sw_if_index1;
1296
1297	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1298	      sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1299	      is_l20 = cached_is_l2;
1300
1301	      /* This is probably wholly unnecessary */
1302	      if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
1303		goto slowpath;
1304
1305	      /* Now sw_if_index0 == sw_if_index1  */
1306	      if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1307		{
1308		  cached_sw_if_index = sw_if_index0;
1309		  hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1310		  ei = ethernet_get_interface (em, hi->hw_if_index);
1311		  intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1312		  subint0 = &intf0->untagged_subint;
1313		  cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1314		}
1315
1316	      if (PREDICT_TRUE (is_l20 != 0))
1317		{
1318		  vnet_buffer (b0)->l3_hdr_offset =
1319		    vnet_buffer (b0)->l2_hdr_offset +
1320		    sizeof (ethernet_header_t);
1321		  vnet_buffer (b1)->l3_hdr_offset =
1322		    vnet_buffer (b1)->l2_hdr_offset +
1323		    sizeof (ethernet_header_t);
1324		  b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1325		  b1->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1326		  next0 = em->l2_next;
1327		  vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1328		  next1 = em->l2_next;
1329		  vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
1330		}
1331	      else
1332		{
1333		  dmacs[0] = *(u64 *) e0;
1334		  dmacs[1] = *(u64 *) e1;
1335
1336		  if (ei && vec_len (ei->secondary_addrs))
1337		    ethernet_input_inline_dmac_check (hi, dmacs,
1338						      dmacs_bad,
1339						      2 /* n_packets */ ,
1340						      ei,
1341						      1 /* have_sec_dmac */ );
1342		  else
1343		    ethernet_input_inline_dmac_check (hi, dmacs,
1344						      dmacs_bad,
1345						      2 /* n_packets */ ,
1346						      ei,
1347						      0 /* have_sec_dmac */ );
1348
1349		  if (dmacs_bad[0])
1350		    error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1351		  if (dmacs_bad[1])
1352		    error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1353
1354		  vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1355		  determine_next_node (em, variant, 0, type0, b0,
1356				       &error0, &next0);
1357		  vlib_buffer_advance (b1, sizeof (ethernet_header_t));
1358		  determine_next_node (em, variant, 0, type1, b1,
1359				       &error1, &next1);
1360		}
1361	      goto ship_it01;
1362	    }
1363
1364	  /* Slow-path for the tagged case */
1365	slowpath:
1366	  parse_header (variant,
1367			b0,
1368			&type0,
1369			&orig_type0, &outer_id0, &inner_id0, &match_flags0);
1370
1371	  parse_header (variant,
1372			b1,
1373			&type1,
1374			&orig_type1, &outer_id1, &inner_id1, &match_flags1);
1375
1376	  old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1377	  old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
1378
1379	  eth_vlan_table_lookups (em,
1380				  vnm,
1381				  old_sw_if_index0,
1382				  orig_type0,
1383				  outer_id0,
1384				  inner_id0,
1385				  &hi0,
1386				  &main_intf0, &vlan_intf0, &qinq_intf0);
1387
1388	  eth_vlan_table_lookups (em,
1389				  vnm,
1390				  old_sw_if_index1,
1391				  orig_type1,
1392				  outer_id1,
1393				  inner_id1,
1394				  &hi1,
1395				  &main_intf1, &vlan_intf1, &qinq_intf1);
1396
1397	  identify_subint (hi0,
1398			   b0,
1399			   match_flags0,
1400			   main_intf0,
1401			   vlan_intf0,
1402			   qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1403
1404	  identify_subint (hi1,
1405			   b1,
1406			   match_flags1,
1407			   main_intf1,
1408			   vlan_intf1,
1409			   qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
1410
1411	  // Save RX sw_if_index for later nodes
1412	  vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1413	    error0 !=
1414	    ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1415	  vnet_buffer (b1)->sw_if_index[VLIB_RX] =
1416	    error1 !=
1417	    ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
1418
1419	  // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
1420	  if (((new_sw_if_index0 != ~0)
1421	       && (new_sw_if_index0 != old_sw_if_index0))
1422	      || ((new_sw_if_index1 != ~0)
1423		  && (new_sw_if_index1 != old_sw_if_index1)))
1424	    {
1425
1426	      len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1427		- vnet_buffer (b0)->l2_hdr_offset;
1428	      len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
1429		- vnet_buffer (b1)->l2_hdr_offset;
1430
1431	      stats_n_packets += 2;
1432	      stats_n_bytes += len0 + len1;
1433
1434	      if (PREDICT_FALSE
1435		  (!(new_sw_if_index0 == stats_sw_if_index
1436		     && new_sw_if_index1 == stats_sw_if_index)))
1437		{
1438		  stats_n_packets -= 2;
1439		  stats_n_bytes -= len0 + len1;
1440
1441		  if (new_sw_if_index0 != old_sw_if_index0
1442		      && new_sw_if_index0 != ~0)
1443		    vlib_increment_combined_counter (vnm->
1444						     interface_main.combined_sw_if_counters
1445						     +
1446						     VNET_INTERFACE_COUNTER_RX,
1447						     thread_index,
1448						     new_sw_if_index0, 1,
1449						     len0);
1450		  if (new_sw_if_index1 != old_sw_if_index1
1451		      && new_sw_if_index1 != ~0)
1452		    vlib_increment_combined_counter (vnm->
1453						     interface_main.combined_sw_if_counters
1454						     +
1455						     VNET_INTERFACE_COUNTER_RX,
1456						     thread_index,
1457						     new_sw_if_index1, 1,
1458						     len1);
1459
1460		  if (new_sw_if_index0 == new_sw_if_index1)
1461		    {
1462		      if (stats_n_packets > 0)
1463			{
1464			  vlib_increment_combined_counter
1465			    (vnm->interface_main.combined_sw_if_counters
1466			     + VNET_INTERFACE_COUNTER_RX,
1467			     thread_index,
1468			     stats_sw_if_index,
1469			     stats_n_packets, stats_n_bytes);
1470			  stats_n_packets = stats_n_bytes = 0;
1471			}
1472		      stats_sw_if_index = new_sw_if_index0;
1473		    }
1474		}
1475	    }
1476
1477	  if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1478	    is_l20 = is_l21 = 0;
1479
1480	  determine_next_node (em, variant, is_l20, type0, b0, &error0,
1481			       &next0);
1482	  determine_next_node (em, variant, is_l21, type1, b1, &error1,
1483			       &next1);
1484
1485	ship_it01:
1486	  b0->error = error_node->errors[error0];
1487	  b1->error = error_node->errors[error1];
1488
1489	  // verify speculative enqueue
1490	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1491					   n_left_to_next, bi0, bi1, next0,
1492					   next1);
1493	}
1494
1495      while (n_left_from > 0 && n_left_to_next > 0)
1496	{
1497	  u32 bi0;
1498	  vlib_buffer_t *b0;
1499	  u8 error0, next0;
1500	  u16 type0, orig_type0;
1501	  u16 outer_id0, inner_id0;
1502	  u32 match_flags0;
1503	  u32 old_sw_if_index0, new_sw_if_index0, len0;
1504	  vnet_hw_interface_t *hi0;
1505	  main_intf_t *main_intf0;
1506	  vlan_intf_t *vlan_intf0;
1507	  qinq_intf_t *qinq_intf0;
1508	  ethernet_header_t *e0;
1509	  u32 is_l20;
1510	  u64 dmacs[2];
1511	  u8 dmacs_bad[2];
1512
1513	  // Prefetch next iteration
1514	  if (n_left_from > 1)
1515	    {
1516	      vlib_prefetch_buffer_header (b[1], STORE);
1517	      CLIB_PREFETCH (b[1]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1518	    }
1519
1520	  bi0 = from[0];
1521	  to_next[0] = bi0;
1522	  from += 1;
1523	  to_next += 1;
1524	  n_left_from -= 1;
1525	  n_left_to_next -= 1;
1526
1527	  b0 = b[0];
1528	  b += 1;
1529
1530	  error0 = ETHERNET_ERROR_NONE;
1531	  e0 = vlib_buffer_get_current (b0);
1532	  type0 = clib_net_to_host_u16 (e0->type);
1533
1534	  /* Set the L2 header offset for all packets */
1535	  vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
1536	  b0->flags |= VNET_BUFFER_F_L2_HDR_OFFSET_VALID;
1537
1538	  /* Speed-path for the untagged case */
1539	  if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
1540			    && !ethernet_frame_is_tagged (type0)))
1541	    {
1542	      main_intf_t *intf0;
1543	      subint_config_t *subint0;
1544	      u32 sw_if_index0;
1545
1546	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1547	      is_l20 = cached_is_l2;
1548
1549	      if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
1550		{
1551		  cached_sw_if_index = sw_if_index0;
1552		  hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1553		  ei = ethernet_get_interface (em, hi->hw_if_index);
1554		  intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1555		  subint0 = &intf0->untagged_subint;
1556		  cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
1557		}
1558
1559
1560	      if (PREDICT_TRUE (is_l20 != 0))
1561		{
1562		  vnet_buffer (b0)->l3_hdr_offset =
1563		    vnet_buffer (b0)->l2_hdr_offset +
1564		    sizeof (ethernet_header_t);
1565		  b0->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
1566		  next0 = em->l2_next;
1567		  vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
1568		}
1569	      else
1570		{
1571		  dmacs[0] = *(u64 *) e0;
1572
1573		  if (ei && vec_len (ei->secondary_addrs))
1574		    ethernet_input_inline_dmac_check (hi, dmacs,
1575						      dmacs_bad,
1576						      1 /* n_packets */ ,
1577						      ei,
1578						      1 /* have_sec_dmac */ );
1579		  else
1580		    ethernet_input_inline_dmac_check (hi, dmacs,
1581						      dmacs_bad,
1582						      1 /* n_packets */ ,
1583						      ei,
1584						      0 /* have_sec_dmac */ );
1585
1586		  if (dmacs_bad[0])
1587		    error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
1588
1589		  vlib_buffer_advance (b0, sizeof (ethernet_header_t));
1590		  determine_next_node (em, variant, 0, type0, b0,
1591				       &error0, &next0);
1592		}
1593	      goto ship_it0;
1594	    }
1595
1596	  /* Slow-path for the tagged case */
1597	  parse_header (variant,
1598			b0,
1599			&type0,
1600			&orig_type0, &outer_id0, &inner_id0, &match_flags0);
1601
1602	  old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1603
1604	  eth_vlan_table_lookups (em,
1605				  vnm,
1606				  old_sw_if_index0,
1607				  orig_type0,
1608				  outer_id0,
1609				  inner_id0,
1610				  &hi0,
1611				  &main_intf0, &vlan_intf0, &qinq_intf0);
1612
1613	  identify_subint (hi0,
1614			   b0,
1615			   match_flags0,
1616			   main_intf0,
1617			   vlan_intf0,
1618			   qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
1619
1620	  // Save RX sw_if_index for later nodes
1621	  vnet_buffer (b0)->sw_if_index[VLIB_RX] =
1622	    error0 !=
1623	    ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
1624
1625	  // Increment subinterface stats
1626	  // Note that interface-level counters have already been incremented
1627	  // prior to calling this function. Thus only subinterface counters
1628	  // are incremented here.
1629	  //
1630	  // Interface level counters include packets received on the main
1631	  // interface and all subinterfaces. Subinterface level counters
1632	  // include only those packets received on that subinterface
1633	  // Increment stats if the subint is valid and it is not the main intf
1634	  if ((new_sw_if_index0 != ~0)
1635	      && (new_sw_if_index0 != old_sw_if_index0))
1636	    {
1637
1638	      len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
1639		- vnet_buffer (b0)->l2_hdr_offset;
1640
1641	      stats_n_packets += 1;
1642	      stats_n_bytes += len0;
1643
1644	      // Batch stat increments from the same subinterface so counters
1645	      // don't need to be incremented for every packet.
1646	      if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
1647		{
1648		  stats_n_packets -= 1;
1649		  stats_n_bytes -= len0;
1650
1651		  if (new_sw_if_index0 != ~0)
1652		    vlib_increment_combined_counter
1653		      (vnm->interface_main.combined_sw_if_counters
1654		       + VNET_INTERFACE_COUNTER_RX,
1655		       thread_index, new_sw_if_index0, 1, len0);
1656		  if (stats_n_packets > 0)
1657		    {
1658		      vlib_increment_combined_counter
1659			(vnm->interface_main.combined_sw_if_counters
1660			 + VNET_INTERFACE_COUNTER_RX,
1661			 thread_index,
1662			 stats_sw_if_index, stats_n_packets, stats_n_bytes);
1663		      stats_n_packets = stats_n_bytes = 0;
1664		    }
1665		  stats_sw_if_index = new_sw_if_index0;
1666		}
1667	    }
1668
1669	  if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
1670	    is_l20 = 0;
1671
1672	  determine_next_node (em, variant, is_l20, type0, b0, &error0,
1673			       &next0);
1674
1675	ship_it0:
1676	  b0->error = error_node->errors[error0];
1677
1678	  // verify speculative enqueue
1679	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1680					   to_next, n_left_to_next,
1681					   bi0, next0);
1682	}
1683
1684      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1685    }
1686
1687  // Increment any remaining batched stats
1688  if (stats_n_packets > 0)
1689    {
1690      vlib_increment_combined_counter
1691	(vnm->interface_main.combined_sw_if_counters
1692	 + VNET_INTERFACE_COUNTER_RX,
1693	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
1694      node->runtime_data[0] = stats_sw_if_index;
1695    }
1696}
1697
1698VLIB_NODE_FN (ethernet_input_node) (vlib_main_t * vm,
1699				    vlib_node_runtime_t * node,
1700				    vlib_frame_t * frame)
1701{
1702  vnet_main_t *vnm = vnet_get_main ();
1703  u32 *from = vlib_frame_vector_args (frame);
1704  u32 n_packets = frame->n_vectors;
1705
1706  ethernet_input_trace (vm, node, frame);
1707
1708  if (frame->flags & ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX)
1709    {
1710      ethernet_input_frame_t *ef = vlib_frame_scalar_args (frame);
1711      int ip4_cksum_ok = (frame->flags & ETH_INPUT_FRAME_F_IP4_CKSUM_OK) != 0;
1712      vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ef->hw_if_index);
1713      eth_input_single_int (vm, node, hi, from, n_packets, ip4_cksum_ok);
1714    }
1715  else
1716    ethernet_input_inline (vm, node, from, n_packets,
1717			   ETHERNET_INPUT_VARIANT_ETHERNET);
1718  return n_packets;
1719}
1720
1721VLIB_NODE_FN (ethernet_input_type_node) (vlib_main_t * vm,
1722					 vlib_node_runtime_t * node,
1723					 vlib_frame_t * from_frame)
1724{
1725  u32 *from = vlib_frame_vector_args (from_frame);
1726  u32 n_packets = from_frame->n_vectors;
1727  ethernet_input_trace (vm, node, from_frame);
1728  ethernet_input_inline (vm, node, from, n_packets,
1729			 ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
1730  return n_packets;
1731}
1732
1733VLIB_NODE_FN (ethernet_input_not_l2_node) (vlib_main_t * vm,
1734					   vlib_node_runtime_t * node,
1735					   vlib_frame_t * from_frame)
1736{
1737  u32 *from = vlib_frame_vector_args (from_frame);
1738  u32 n_packets = from_frame->n_vectors;
1739  ethernet_input_trace (vm, node, from_frame);
1740  ethernet_input_inline (vm, node, from, n_packets,
1741			 ETHERNET_INPUT_VARIANT_NOT_L2);
1742  return n_packets;
1743}
1744
1745
1746// Return the subinterface config struct for the given sw_if_index
1747// Also return via parameter the appropriate match flags for the
1748// configured number of tags.
1749// On error (unsupported or not ethernet) return 0.
1750static subint_config_t *
1751ethernet_sw_interface_get_config (vnet_main_t * vnm,
1752				  u32 sw_if_index,
1753				  u32 * flags, u32 * unsupported)
1754{
1755  ethernet_main_t *em = &ethernet_main;
1756  vnet_hw_interface_t *hi;
1757  vnet_sw_interface_t *si;
1758  main_intf_t *main_intf;
1759  vlan_table_t *vlan_table;
1760  qinq_table_t *qinq_table;
1761  subint_config_t *subint = 0;
1762
1763  hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1764
1765  if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
1766    {
1767      *unsupported = 0;
1768      goto done;		// non-ethernet interface
1769    }
1770
1771  // ensure there's an entry for the main intf (shouldn't really be necessary)
1772  vec_validate (em->main_intfs, hi->hw_if_index);
1773  main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
1774
1775  // Locate the subint for the given ethernet config
1776  si = vnet_get_sw_interface (vnm, sw_if_index);
1777
1778  if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
1779    {
1780      p2p_ethernet_main_t *p2pm = &p2p_main;
1781      u32 p2pe_sw_if_index =
1782	p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
1783      if (p2pe_sw_if_index == ~0)
1784	{
1785	  pool_get (p2pm->p2p_subif_pool, subint);
1786	  si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
1787	}
1788      else
1789	subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
1790      *flags = SUBINT_CONFIG_P2P;
1791    }
1792  else if (si->type == VNET_SW_INTERFACE_TYPE_PIPE)
1793    {
1794      pipe_t *pipe;
1795
1796      pipe = pipe_get (sw_if_index);
1797      subint = &pipe->subint;
1798      *flags = SUBINT_CONFIG_P2P;
1799    }
1800  else if (si->sub.eth.flags.default_sub)
1801    {
1802      subint = &main_intf->default_subint;
1803      *flags = SUBINT_CONFIG_MATCH_1_TAG |
1804	SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1805    }
1806  else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
1807    {
1808      // if no flags are set then this is a main interface
1809      // so treat as untagged
1810      subint = &main_intf->untagged_subint;
1811      *flags = SUBINT_CONFIG_MATCH_0_TAG;
1812    }
1813  else
1814    {
1815      // one or two tags
1816      // first get the vlan table
1817      if (si->sub.eth.flags.dot1ad)
1818	{
1819	  if (main_intf->dot1ad_vlans == 0)
1820	    {
1821	      // Allocate a vlan table from the pool
1822	      pool_get (em->vlan_pool, vlan_table);
1823	      main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
1824	    }
1825	  else
1826	    {
1827	      // Get ptr to existing vlan table
1828	      vlan_table =
1829		vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
1830	    }
1831	}
1832      else
1833	{			// dot1q
1834	  if (main_intf->dot1q_vlans == 0)
1835	    {
1836	      // Allocate a vlan table from the pool
1837	      pool_get (em->vlan_pool, vlan_table);
1838	      main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
1839	    }
1840	  else
1841	    {
1842	      // Get ptr to existing vlan table
1843	      vlan_table =
1844		vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
1845	    }
1846	}
1847
1848      if (si->sub.eth.flags.one_tag)
1849	{
1850	  *flags = si->sub.eth.flags.exact_match ?
1851	    SUBINT_CONFIG_MATCH_1_TAG :
1852	    (SUBINT_CONFIG_MATCH_1_TAG |
1853	     SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1854
1855	  if (si->sub.eth.flags.outer_vlan_id_any)
1856	    {
1857	      // not implemented yet
1858	      *unsupported = 1;
1859	      goto done;
1860	    }
1861	  else
1862	    {
1863	      // a single vlan, a common case
1864	      subint =
1865		&vlan_table->vlans[si->sub.eth.
1866				   outer_vlan_id].single_tag_subint;
1867	    }
1868
1869	}
1870      else
1871	{
1872	  // Two tags
1873	  *flags = si->sub.eth.flags.exact_match ?
1874	    SUBINT_CONFIG_MATCH_2_TAG :
1875	    (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
1876
1877	  if (si->sub.eth.flags.outer_vlan_id_any
1878	      && si->sub.eth.flags.inner_vlan_id_any)
1879	    {
1880	      // not implemented yet
1881	      *unsupported = 1;
1882	      goto done;
1883	    }
1884
1885	  if (si->sub.eth.flags.inner_vlan_id_any)
1886	    {
1887	      // a specific outer and "any" inner
1888	      // don't need a qinq table for this
1889	      subint =
1890		&vlan_table->vlans[si->sub.eth.
1891				   outer_vlan_id].inner_any_subint;
1892	      if (si->sub.eth.flags.exact_match)
1893		{
1894		  *flags = SUBINT_CONFIG_MATCH_2_TAG;
1895		}
1896	      else
1897		{
1898		  *flags = SUBINT_CONFIG_MATCH_2_TAG |
1899		    SUBINT_CONFIG_MATCH_3_TAG;
1900		}
1901	    }
1902	  else
1903	    {
1904	      // a specific outer + specifc innner vlan id, a common case
1905
1906	      // get the qinq table
1907	      if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
1908		{
1909		  // Allocate a qinq table from the pool
1910		  pool_get (em->qinq_pool, qinq_table);
1911		  vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
1912		    qinq_table - em->qinq_pool;
1913		}
1914	      else
1915		{
1916		  // Get ptr to existing qinq table
1917		  qinq_table =
1918		    vec_elt_at_index (em->qinq_pool,
1919				      vlan_table->vlans[si->sub.
1920							eth.outer_vlan_id].
1921				      qinqs);
1922		}
1923	      subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
1924	    }
1925	}
1926    }
1927
1928done:
1929  return subint;
1930}
1931
1932static clib_error_t *
1933ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
1934{
1935  subint_config_t *subint;
1936  u32 dummy_flags;
1937  u32 dummy_unsup;
1938  clib_error_t *error = 0;
1939
1940  // Find the config for this subinterface
1941  subint =
1942    ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1943				      &dummy_unsup);
1944
1945  if (subint == 0)
1946    {
1947      // not implemented yet or not ethernet
1948      goto done;
1949    }
1950
1951  subint->sw_if_index =
1952    ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
1953
1954done:
1955  return error;
1956}
1957
1958VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
1959
1960
1961#ifndef CLIB_MARCH_VARIANT
1962// Set the L2/L3 mode for the subinterface
1963void
1964ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
1965{
1966  subint_config_t *subint;
1967  u32 dummy_flags;
1968  u32 dummy_unsup;
1969  int is_port;
1970  vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
1971
1972  is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
1973
1974  // Find the config for this subinterface
1975  subint =
1976    ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
1977				      &dummy_unsup);
1978
1979  if (subint == 0)
1980    {
1981      // unimplemented or not ethernet
1982      goto done;
1983    }
1984
1985  // Double check that the config we found is for our interface (or the interface is down)
1986  ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
1987
1988  if (l2)
1989    {
1990      subint->flags |= SUBINT_CONFIG_L2;
1991      if (is_port)
1992	subint->flags |=
1993	  SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
1994	  | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
1995    }
1996  else
1997    {
1998      subint->flags &= ~SUBINT_CONFIG_L2;
1999      if (is_port)
2000	subint->flags &=
2001	  ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
2002	    | SUBINT_CONFIG_MATCH_3_TAG);
2003    }
2004
2005done:
2006  return;
2007}
2008
2009/*
2010 * Set the L2/L3 mode for the subinterface regardless of port
2011 */
2012void
2013ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
2014					  u32 sw_if_index, u32 l2)
2015{
2016  subint_config_t *subint;
2017  u32 dummy_flags;
2018  u32 dummy_unsup;
2019
2020  /* Find the config for this subinterface */
2021  subint =
2022    ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
2023				      &dummy_unsup);
2024
2025  if (subint == 0)
2026    {
2027      /* unimplemented or not ethernet */
2028      goto done;
2029    }
2030
2031  /*
2032   * Double check that the config we found is for our interface (or the
2033   * interface is down)
2034   */
2035  ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
2036
2037  if (l2)
2038    {
2039      subint->flags |= SUBINT_CONFIG_L2;
2040    }
2041  else
2042    {
2043      subint->flags &= ~SUBINT_CONFIG_L2;
2044    }
2045
2046done:
2047  return;
2048}
2049#endif
2050
2051static clib_error_t *
2052ethernet_sw_interface_add_del (vnet_main_t * vnm,
2053			       u32 sw_if_index, u32 is_create)
2054{
2055  clib_error_t *error = 0;
2056  subint_config_t *subint;
2057  u32 match_flags;
2058  u32 unsupported = 0;
2059
2060  // Find the config for this subinterface
2061  subint =
2062    ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
2063				      &unsupported);
2064
2065  if (subint == 0)
2066    {
2067      // not implemented yet or not ethernet
2068      if (unsupported)
2069	{
2070	  // this is the NYI case
2071	  error = clib_error_return (0, "not implemented yet");
2072	}
2073      goto done;
2074    }
2075
2076  if (!is_create)
2077    {
2078      subint->flags = 0;
2079      return error;
2080    }
2081
2082  // Initialize the subint
2083  if (subint->flags & SUBINT_CONFIG_VALID)
2084    {
2085      // Error vlan already in use
2086      error = clib_error_return (0, "vlan is already in use");
2087    }
2088  else
2089    {
2090      // Note that config is L3 by default
2091      subint->flags = SUBINT_CONFIG_VALID | match_flags;
2092      subint->sw_if_index = ~0;	// because interfaces are initially down
2093    }
2094
2095done:
2096  return error;
2097}
2098
2099VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
2100
2101static char *ethernet_error_strings[] = {
2102#define ethernet_error(n,c,s) s,
2103#include "error.def"
2104#undef ethernet_error
2105};
2106
2107/* *INDENT-OFF* */
2108VLIB_REGISTER_NODE (ethernet_input_node) = {
2109  .name = "ethernet-input",
2110  /* Takes a vector of packets. */
2111  .vector_size = sizeof (u32),
2112  .scalar_size = sizeof (ethernet_input_frame_t),
2113  .n_errors = ETHERNET_N_ERROR,
2114  .error_strings = ethernet_error_strings,
2115  .n_next_nodes = ETHERNET_INPUT_N_NEXT,
2116  .next_nodes = {
2117#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
2118    foreach_ethernet_input_next
2119#undef _
2120  },
2121  .format_buffer = format_ethernet_header_with_length,
2122  .format_trace = format_ethernet_input_trace,
2123  .unformat_buffer = unformat_ethernet_header,
2124};
2125
2126VLIB_REGISTER_NODE (ethernet_input_type_node) = {
2127  .name = "ethernet-input-type",
2128  /* Takes a vector of packets. */
2129  .vector_size = sizeof (u32),
2130  .n_next_nodes = ETHERNET_INPUT_N_NEXT,
2131  .next_nodes = {
2132#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
2133    foreach_ethernet_input_next
2134#undef _
2135  },
2136};
2137
2138VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
2139  .name = "ethernet-input-not-l2",
2140  /* Takes a vector of packets. */
2141  .vector_size = sizeof (u32),
2142  .n_next_nodes = ETHERNET_INPUT_N_NEXT,
2143  .next_nodes = {
2144#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
2145    foreach_ethernet_input_next
2146#undef _
2147  },
2148};
2149/* *INDENT-ON* */
2150
2151#ifndef CLIB_MARCH_VARIANT
2152void
2153ethernet_set_rx_redirect (vnet_main_t * vnm,
2154			  vnet_hw_interface_t * hi, u32 enable)
2155{
2156  // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
2157  // don't go directly to ip4-input)
2158  vnet_hw_interface_rx_redirect_to_node
2159    (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
2160}
2161
2162
2163/*
2164 * Initialization and registration for the next_by_ethernet structure
2165 */
2166
2167clib_error_t *
2168next_by_ethertype_init (next_by_ethertype_t * l3_next)
2169{
2170  l3_next->input_next_by_type = sparse_vec_new
2171    ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
2172     /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
2173
2174  vec_validate (l3_next->sparse_index_by_input_next_index,
2175		ETHERNET_INPUT_NEXT_DROP);
2176  vec_validate (l3_next->sparse_index_by_input_next_index,
2177		ETHERNET_INPUT_NEXT_PUNT);
2178  l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
2179    SPARSE_VEC_INVALID_INDEX;
2180  l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
2181    SPARSE_VEC_INVALID_INDEX;
2182
2183  /*
2184   * Make sure we don't wipe out an ethernet registration by mistake
2185   * Can happen if init function ordering constraints are missing.
2186   */
2187  if (CLIB_DEBUG > 0)
2188    {
2189      ethernet_main_t *em = &ethernet_main;
2190      ASSERT (em->next_by_ethertype_register_called == 0);
2191    }
2192
2193  return 0;
2194}
2195
2196// Add an ethertype -> next index mapping to the structure
2197clib_error_t *
2198next_by_ethertype_register (next_by_ethertype_t * l3_next,
2199			    u32 ethertype, u32 next_index)
2200{
2201  u32 i;
2202  u16 *n;
2203  ethernet_main_t *em = &ethernet_main;
2204
2205  if (CLIB_DEBUG > 0)
2206    {
2207      ethernet_main_t *em = &ethernet_main;
2208      em->next_by_ethertype_register_called = 1;
2209    }
2210
2211  /* Setup ethernet type -> next index sparse vector mapping. */
2212  n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
2213  n[0] = next_index;
2214
2215  /* Rebuild next index -> sparse index inverse mapping when sparse vector
2216     is updated. */
2217  vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
2218  for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
2219    l3_next->
2220      sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
2221
2222  // do not allow the cached next index's to be updated if L3
2223  // redirect is enabled, as it will have overwritten them
2224  if (!em->redirect_l3)
2225    {
2226      // Cache common ethertypes directly
2227      if (ethertype == ETHERNET_TYPE_IP4)
2228	{
2229	  l3_next->input_next_ip4 = next_index;
2230	}
2231      else if (ethertype == ETHERNET_TYPE_IP6)
2232	{
2233	  l3_next->input_next_ip6 = next_index;
2234	}
2235      else if (ethertype == ETHERNET_TYPE_MPLS)
2236	{
2237	  l3_next->input_next_mpls = next_index;
2238	}
2239    }
2240  return 0;
2241}
2242
2243void
2244ethernet_input_init (vlib_main_t * vm, ethernet_main_t * em)
2245{
2246  __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
2247  __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
2248
2249  ethernet_setup_node (vm, ethernet_input_node.index);
2250  ethernet_setup_node (vm, ethernet_input_type_node.index);
2251  ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
2252
2253  next_by_ethertype_init (&em->l3_next);
2254
2255  // Initialize pools and vector for vlan parsing
2256  vec_validate (em->main_intfs, 10);	// 10 main interfaces
2257  pool_alloc (em->vlan_pool, 10);
2258  pool_alloc (em->qinq_pool, 1);
2259
2260  // The first vlan pool will always be reserved for an invalid table
2261  pool_get (em->vlan_pool, invalid_vlan_table);	// first id = 0
2262  // The first qinq pool will always be reserved for an invalid table
2263  pool_get (em->qinq_pool, invalid_qinq_table);	// first id = 0
2264}
2265
2266void
2267ethernet_register_input_type (vlib_main_t * vm,
2268			      ethernet_type_t type, u32 node_index)
2269{
2270  ethernet_main_t *em = &ethernet_main;
2271  ethernet_type_info_t *ti;
2272  u32 i;
2273
2274  {
2275    clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
2276    if (error)
2277      clib_error_report (error);
2278  }
2279
2280  ti = ethernet_get_type_info (em, type);
2281  if (ti == 0)
2282    {
2283      clib_warning ("type_info NULL for type %d", type);
2284      return;
2285    }
2286  ti->node_index = node_index;
2287  ti->next_index = vlib_node_add_next (vm,
2288				       ethernet_input_node.index, node_index);
2289  i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2290  ASSERT (i == ti->next_index);
2291
2292  i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2293  ASSERT (i == ti->next_index);
2294
2295  // Add the L3 node for this ethertype to the next nodes structure
2296  next_by_ethertype_register (&em->l3_next, type, ti->next_index);
2297
2298  // Call the registration functions for other nodes that want a mapping
2299  l2bvi_register_input_type (vm, type, node_index);
2300}
2301
2302void
2303ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
2304{
2305  ethernet_main_t *em = &ethernet_main;
2306  u32 i;
2307
2308  em->l2_next =
2309    vlib_node_add_next (vm, ethernet_input_node.index, node_index);
2310
2311  /*
2312   * Even if we never use these arcs, we have to align the next indices...
2313   */
2314  i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2315
2316  ASSERT (i == em->l2_next);
2317
2318  i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2319  ASSERT (i == em->l2_next);
2320}
2321
2322// Register a next node for L3 redirect, and enable L3 redirect
2323void
2324ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
2325{
2326  ethernet_main_t *em = &ethernet_main;
2327  u32 i;
2328
2329  em->redirect_l3 = 1;
2330  em->redirect_l3_next = vlib_node_add_next (vm,
2331					     ethernet_input_node.index,
2332					     node_index);
2333  /*
2334   * Change the cached next nodes to the redirect node
2335   */
2336  em->l3_next.input_next_ip4 = em->redirect_l3_next;
2337  em->l3_next.input_next_ip6 = em->redirect_l3_next;
2338  em->l3_next.input_next_mpls = em->redirect_l3_next;
2339
2340  /*
2341   * Even if we never use these arcs, we have to align the next indices...
2342   */
2343  i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
2344
2345  ASSERT (i == em->redirect_l3_next);
2346
2347  i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
2348
2349  ASSERT (i == em->redirect_l3_next);
2350}
2351#endif
2352
2353/*
2354 * fd.io coding-style-patch-verification: ON
2355 *
2356 * Local Variables:
2357 * eval: (c-set-style "gnu")
2358 * End:
2359 */
2360