nsh_output.c revision 2fd17601
1/*
2 * nsh_output.c: NSH Adj rewrite
3 *
4 * Copyright (c) 2017-2019 Intel and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include <vlib/vlib.h>
19#include <vnet/pg/pg.h>
20#include <vnet/ip/ip.h>
21#include <nsh/nsh.h>
22
23typedef struct {
24  /* Adjacency taken. */
25  u32 adj_index;
26  u32 flow_hash;
27
28  /* Packet data, possibly *after* rewrite. */
29  u8 packet_data[64 - 1*sizeof(u32)];
30} nsh_output_trace_t;
31
32#define foreach_nsh_output_next        	\
33_(DROP, "error-drop")            \
34_(INTERFACE, "interface-output" )
35
36typedef enum {
37#define _(s,n) NSH_OUTPUT_NEXT_##s,
38  foreach_nsh_output_next
39#undef _
40  NSH_OUTPUT_N_NEXT,
41} nsh_output_next_t;
42
43static u8 *
44format_nsh_output_trace (u8 * s, va_list * args)
45{
46  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48  nsh_output_trace_t * t = va_arg (*args, nsh_output_trace_t *);
49  uword indent = format_get_indent (s);
50
51  s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
52              t->adj_index,
53              format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
54              t->flow_hash);
55  s = format (s, "\n%U%U",
56              format_white_space, indent,
57              format_ip_adjacency_packet_data,
58              t->adj_index, t->packet_data, sizeof (t->packet_data));
59  return s;
60}
61
62static inline uword
63nsh_output_inline (vlib_main_t * vm,
64                   vlib_node_runtime_t * node,
65                   vlib_frame_t * from_frame,
66                   int is_midchain)
67{
68  u32 n_left_from, next_index, * from, * to_next, thread_index;
69  vlib_node_runtime_t * error_node;
70  u32 n_left_to_next;
71  nsh_main_t *nm;
72
73  thread_index = vlib_get_thread_index();
74  error_node = vlib_node_get_runtime (vm, nsh_eth_output_node.index);
75  from = vlib_frame_vector_args (from_frame);
76  n_left_from = from_frame->n_vectors;
77  next_index = node->cached_next_index;
78  nm = &nsh_main;
79
80  while (n_left_from > 0)
81    {
82      vlib_get_next_frame (vm, node, next_index,
83                           to_next, n_left_to_next);
84
85      while (n_left_from >= 4 && n_left_to_next >= 2)
86        {
87          ip_adjacency_t * adj0;
88          nsh_base_header_t *hdr0;
89          ethernet_header_t * eth_hdr0;
90          vlib_buffer_t * p0;
91          u32 pi0, rw_len0, adj_index0, next0, error0;
92
93          ip_adjacency_t * adj1;
94          nsh_base_header_t *hdr1;
95          ethernet_header_t * eth_hdr1;
96          vlib_buffer_t * p1;
97          u32 pi1, rw_len1, adj_index1, next1, error1;
98
99          /* Prefetch next iteration. */
100          {
101            vlib_buffer_t * p2, * p3;
102
103            p2 = vlib_get_buffer (vm, from[2]);
104            p3 = vlib_get_buffer (vm, from[3]);
105
106            vlib_prefetch_buffer_header (p2, STORE);
107            vlib_prefetch_buffer_header (p3, STORE);
108
109            CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
110            CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE);
111          }
112
113          pi0 = to_next[0] = from[0];
114          pi1 = to_next[1] = from[1];
115
116          from += 2;
117          n_left_from -= 2;
118          to_next += 2;
119          n_left_to_next -= 2;
120
121          p0 = vlib_get_buffer (vm, pi0);
122          p1 = vlib_get_buffer (vm, pi1);
123
124          adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
125          adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
126
127          adj0 = adj_get(adj_index0);
128          adj1 = adj_get(adj_index1);
129          hdr0 = vlib_buffer_get_current (p0);
130          hdr1 = vlib_buffer_get_current (p1);
131
132          /* Guess we are only writing on simple Ethernet header. */
133          vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1,
134                                   sizeof (ethernet_header_t));
135
136          eth_hdr0 = (ethernet_header_t*)((u8 *)hdr0-sizeof(ethernet_header_t));
137          eth_hdr0->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH);
138          eth_hdr1 = (ethernet_header_t*)((u8 *)hdr1-sizeof(ethernet_header_t));
139          eth_hdr1->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH);
140
141          /* Update packet buffer attributes/set output interface. */
142          rw_len0 = adj0[0].rewrite_header.data_bytes;
143          rw_len1 = adj1[0].rewrite_header.data_bytes;
144
145          /* Bump the adj counters for packet and bytes */
146          vlib_increment_combined_counter
147              (&adjacency_counters,
148               thread_index,
149               adj_index0,
150               1,
151               vlib_buffer_length_in_chain (vm, p0) + rw_len0);
152          vlib_increment_combined_counter
153              (&adjacency_counters,
154               thread_index,
155               adj_index1,
156               1,
157               vlib_buffer_length_in_chain (vm, p1) + rw_len1);
158
159          /* Check MTU of outgoing interface. */
160          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
161                           adj0[0].rewrite_header.max_l3_packet_bytes))
162            {
163              p0->current_data -= rw_len0;
164              p0->current_length += rw_len0;
165
166              vnet_buffer (p0)->sw_if_index[VLIB_TX] =
167                  adj0[0].rewrite_header.sw_if_index;
168              next0 = NSH_OUTPUT_NEXT_INTERFACE;
169              error0 = IP4_ERROR_NONE;
170
171              if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
172                vnet_feature_arc_start (nm->output_feature_arc_index,
173                                        adj0[0].rewrite_header.sw_if_index,
174                                        &next0, p0);
175            }
176          else
177            {
178              error0 = IP4_ERROR_MTU_EXCEEDED;
179              next0 = NSH_OUTPUT_NEXT_DROP;
180            }
181          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
182                           adj1[0].rewrite_header.max_l3_packet_bytes))
183            {
184              p1->current_data -= rw_len1;
185              p1->current_length += rw_len1;
186
187              vnet_buffer (p1)->sw_if_index[VLIB_TX] =
188                  adj1[0].rewrite_header.sw_if_index;
189              next1 = NSH_OUTPUT_NEXT_INTERFACE;
190              error1 = IP4_ERROR_NONE;
191
192              if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
193                vnet_feature_arc_start (nm->output_feature_arc_index,
194                                        adj1[0].rewrite_header.sw_if_index,
195                                        &next1, p1);
196            }
197          else
198            {
199              error1 = IP4_ERROR_MTU_EXCEEDED;
200              next1 = NSH_OUTPUT_NEXT_DROP;
201            }
202          if (is_midchain)
203          {
204              adj0->sub_type.midchain.fixup_func
205                (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
206              adj1->sub_type.midchain.fixup_func
207                (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
208          }
209
210          p0->error = error_node->errors[error0];
211          p1->error = error_node->errors[error1];
212
213          if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
214            {
215              nsh_output_trace_t *tr = vlib_add_trace (vm, node,
216                                                        p0, sizeof (*tr));
217              tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
218              tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
219            }
220          if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
221            {
222              nsh_output_trace_t *tr = vlib_add_trace (vm, node,
223                                                        p1, sizeof (*tr));
224              tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX];
225              tr->flow_hash = vnet_buffer(p1)->ip.flow_hash;
226            }
227
228          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
229                                           to_next, n_left_to_next,
230                                           pi0, pi1, next0, next1);
231        }
232
233      while (n_left_from > 0 && n_left_to_next > 0)
234        {
235          ip_adjacency_t * adj0;
236          nsh_base_header_t *hdr0;
237          ethernet_header_t * eth_hdr0;
238          vlib_buffer_t * p0;
239          u32 pi0, rw_len0, adj_index0, next0, error0;
240
241          pi0 = to_next[0] = from[0];
242
243          p0 = vlib_get_buffer (vm, pi0);
244
245          adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
246
247          adj0 = adj_get(adj_index0);
248          hdr0 = vlib_buffer_get_current (p0);
249
250          /* Guess we are only writing on simple Ethernet header. */
251          vnet_rewrite_one_header (adj0[0], hdr0,
252                                   sizeof (ethernet_header_t));
253
254          eth_hdr0 = (ethernet_header_t*)((u8 *)hdr0-sizeof(ethernet_header_t));
255          eth_hdr0->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH);
256
257          /* Update packet buffer attributes/set output interface. */
258          rw_len0 = adj0[0].rewrite_header.data_bytes;
259
260          vlib_increment_combined_counter
261              (&adjacency_counters,
262               thread_index,
263               adj_index0,
264               1,
265               vlib_buffer_length_in_chain (vm, p0) + rw_len0);
266
267          /* Check MTU of outgoing interface. */
268          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
269                           adj0[0].rewrite_header.max_l3_packet_bytes))
270            {
271              p0->current_data -= rw_len0;
272              p0->current_length += rw_len0;
273
274              vnet_buffer (p0)->sw_if_index[VLIB_TX] =
275                  adj0[0].rewrite_header.sw_if_index;
276              next0 = NSH_OUTPUT_NEXT_INTERFACE;
277              error0 = IP4_ERROR_NONE;
278
279              if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
280                vnet_feature_arc_start (nm->output_feature_arc_index,
281                                        adj0[0].rewrite_header.sw_if_index,
282                                        &next0, p0);
283            }
284          else
285            {
286              error0 = IP4_ERROR_MTU_EXCEEDED;
287              next0 = NSH_OUTPUT_NEXT_DROP;
288            }
289          if (is_midchain)
290          {
291              adj0->sub_type.midchain.fixup_func
292                (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
293          }
294
295          p0->error = error_node->errors[error0];
296
297          from += 1;
298          n_left_from -= 1;
299          to_next += 1;
300          n_left_to_next -= 1;
301
302          if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
303            {
304              nsh_output_trace_t *tr = vlib_add_trace (vm, node,
305                                                        p0, sizeof (*tr));
306              tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
307              tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
308            }
309
310          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
311                                           to_next, n_left_to_next,
312                                           pi0, next0);
313        }
314
315      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
316    }
317
318  return from_frame->n_vectors;
319}
320
321typedef enum nsh_midchain_next_t_
322{
323    NSH_MIDCHAIN_NEXT_DROP,
324} nsh_midchain_next_t;
325
326static inline uword
327nsh_eth_output (vlib_main_t * vm,
328                vlib_node_runtime_t * node,
329                vlib_frame_t * from_frame)
330{
331    return (nsh_output_inline(vm, node, from_frame, /* is_midchain */ 0));
332}
333
334VLIB_REGISTER_NODE (nsh_eth_output_node) = {
335  .function = nsh_eth_output,
336  .name = "nsh-eth-output",
337  /* Takes a vector of packets. */
338  .vector_size = sizeof (u32),
339  .n_next_nodes = NSH_OUTPUT_N_NEXT,
340  .next_nodes = {
341#define _(s,n) [NSH_OUTPUT_NEXT_##s] = n,
342    foreach_nsh_output_next
343#undef _
344  },
345
346  .format_trace = format_nsh_output_trace,
347};
348
349VLIB_NODE_FUNCTION_MULTIARCH (nsh_eth_output_node, nsh_eth_output)
350
351static inline uword
352nsh_midchain (vlib_main_t * vm,
353               vlib_node_runtime_t * node,
354               vlib_frame_t * from_frame)
355{
356    return (nsh_output_inline(vm, node, from_frame, /* is_midchain */ 1));
357}
358
359VLIB_REGISTER_NODE (nsh_midchain_node) = {
360  .function = nsh_midchain,
361  .name = "nsh-midchain",
362  .vector_size = sizeof (u32),
363  .format_trace = format_nsh_output_trace,
364  .n_next_nodes = 1,
365  .next_nodes = {
366      [NSH_MIDCHAIN_NEXT_DROP] = "error-drop",
367  },
368};
369
370VLIB_NODE_FUNCTION_MULTIARCH (nsh_midchain_node, nsh_midchain)
371
372/* Built-in nsh tx feature path definition */
373VNET_FEATURE_INIT (nsh_interface_output, static) = {
374  .arc_name = "nsh-eth-output",
375  .node_name = "interface-output",
376  .runs_before = 0, /* not before any other features */
377};
378
379/* Built-in ip4 tx feature path definition */
380/* *INDENT-OFF* */
381VNET_FEATURE_ARC_INIT (nsh_eth_output, static) =
382{
383  .arc_name  = "nsh-eth-output",
384  .start_nodes = VNET_FEATURES ("nsh-midchain"),
385};
386
387VNET_FEATURE_INIT (nsh_eth_tx_drop, static) =
388{
389  .arc_name = "nsh-eth-output",
390  .node_name = "error-drop",
391  .runs_before = 0,     /* not before any other features */
392};
393/* *INDENT-ON* */
394/**
395 * @brief Next index values from the NSH incomplete adj node
396 */
397#define foreach_nsh_adj_incomplete_next       	\
398_(DROP, "error-drop")                   \
399_(IP4,  "ip4-arp")                      \
400_(IP6,  "ip6-discover-neighbor")
401
402typedef enum {
403#define _(s,n) NSH_ADJ_INCOMPLETE_NEXT_##s,
404  foreach_nsh_adj_incomplete_next
405#undef _
406  NSH_ADJ_INCOMPLETE_N_NEXT,
407} nsh_adj_incomplete_next_t;
408
409/**
410 * @brief A struct to hold tracing information for the NSH label imposition
411 * node.
412 */
413typedef struct nsh_adj_incomplete_trace_t_
414{
415    u32 next;
416} nsh_adj_incomplete_trace_t;
417
418
419/**
420 * @brief Graph node for incomplete NSH adjacency.
421 * This node will push traffic to either the v4-arp or v6-nd node
422 * based on the next-hop proto of the adj.
423 * We pay a cost for this 'routing' node, but an incomplete adj is the
424 * exception case.
425 */
426static inline uword
427nsh_adj_incomplete (vlib_main_t * vm,
428                     vlib_node_runtime_t * node,
429                     vlib_frame_t * from_frame)
430{
431  u32 n_left_from, next_index, * from, * to_next;
432
433  from = vlib_frame_vector_args (from_frame);
434  n_left_from = from_frame->n_vectors;
435  next_index = node->cached_next_index;
436
437  while (n_left_from > 0)
438    {
439      u32 n_left_to_next;
440
441      vlib_get_next_frame (vm, node, next_index,
442                           to_next, n_left_to_next);
443
444      while (n_left_from > 0 && n_left_to_next > 0)
445        {
446          u32 pi0, next0, adj_index0;
447          ip_adjacency_t * adj0;
448          vlib_buffer_t * p0;
449
450          pi0 = to_next[0] = from[0];
451          p0 = vlib_get_buffer (vm, pi0);
452          from += 1;
453          n_left_from -= 1;
454          to_next += 1;
455          n_left_to_next -= 1;
456
457          adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
458
459          adj0 = adj_get(adj_index0);
460
461          if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto))
462          {
463              next0 = NSH_ADJ_INCOMPLETE_NEXT_IP4;
464          }
465          else
466          {
467              next0 = NSH_ADJ_INCOMPLETE_NEXT_IP6;
468          }
469
470          if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
471          {
472              nsh_adj_incomplete_trace_t *tr =
473                 vlib_add_trace (vm, node, p0, sizeof (*tr));
474              tr->next = next0;
475          }
476
477          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
478                                           to_next, n_left_to_next,
479                                           pi0, next0);
480        }
481
482      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
483    }
484
485  return from_frame->n_vectors;
486}
487
488static u8 *
489format_nsh_adj_incomplete_trace (u8 * s, va_list * args)
490{
491    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
492    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
493    nsh_adj_incomplete_trace_t * t;
494    uword indent;
495
496    t = va_arg (*args, nsh_adj_incomplete_trace_t *);
497    indent = format_get_indent (s);
498
499    s = format (s, "%Unext:%d",
500                format_white_space, indent,
501                t->next);
502    return (s);
503}
504
505VLIB_REGISTER_NODE (nsh_adj_incomplete_node) = {
506  .function = nsh_adj_incomplete,
507  .name = "nsh-adj-incomplete",
508  .format_trace = format_nsh_adj_incomplete_trace,
509  /* Takes a vector of packets. */
510  .vector_size = sizeof (u32),
511  .n_next_nodes = NSH_ADJ_INCOMPLETE_N_NEXT,
512  .next_nodes = {
513#define _(s,n) [NSH_ADJ_INCOMPLETE_NEXT_##s] = n,
514    foreach_nsh_adj_incomplete_next
515#undef _
516  },
517};
518
519VLIB_NODE_FUNCTION_MULTIARCH (nsh_adj_incomplete_node,
520                              nsh_adj_incomplete)
521