buffer_node.h revision 9fb6d40e
1/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * buffer_node.h: VLIB buffer handling node helper macros/inlines
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#ifndef included_vlib_buffer_node_h
41#define included_vlib_buffer_node_h
42
43/** \file
44    vlib buffer/node functions
45*/
46
47/** \brief Finish enqueueing two buffers forward in the graph.
48 Standard dual loop boilerplate element. This is a MACRO,
49 with MULTIPLE SIDE EFFECTS. In the ideal case,
50 <code>next_index == next0 == next1</code>,
51 which means that the speculative enqueue at the top of the dual loop
52 has correctly dealt with both packets. In that case, the macro does
53 nothing at all.
54
55 @param vm vlib_main_t pointer, varies by thread
56 @param node current node vlib_node_runtime_t pointer
57 @param next_index speculated next index used for both packets
58 @param to_next speculated vector pointer used for both packets
59 @param n_left_to_next number of slots left in speculated vector
60 @param bi0 first buffer index
61 @param bi1 second buffer index
62 @param next0 actual next index to be used for the first packet
63 @param next1 actual next index to be used for the second packet
64
65 @return @c next_index -- speculative next index to be used for future packets
66 @return @c to_next -- speculative frame to be used for future packets
67 @return @c n_left_to_next -- number of slots left in speculative frame
68*/
69
70#define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
71do {									\
72  int enqueue_code = (next0 != next_index) + 2*(next1 != next_index);	\
73									\
74  if (PREDICT_FALSE (enqueue_code != 0))				\
75    {									\
76      switch (enqueue_code)						\
77	{								\
78	case 1:								\
79	  /* A B A */							\
80	  to_next[-2] = bi1;						\
81	  to_next -= 1;							\
82	  n_left_to_next += 1;						\
83	  vlib_set_next_frame_buffer (vm, node, next0, bi0);		\
84	  break;							\
85									\
86	case 2:								\
87	  /* A A B */							\
88	  to_next -= 1;							\
89	  n_left_to_next += 1;						\
90	  vlib_set_next_frame_buffer (vm, node, next1, bi1);		\
91	  break;							\
92									\
93	case 3:								\
94	  /* A B B or A B C */						\
95	  to_next -= 2;							\
96	  n_left_to_next += 2;						\
97	  vlib_set_next_frame_buffer (vm, node, next0, bi0);		\
98	  vlib_set_next_frame_buffer (vm, node, next1, bi1);		\
99	  if (next0 == next1)						\
100	    {								\
101	      vlib_put_next_frame (vm, node, next_index,		\
102				   n_left_to_next);			\
103	      next_index = next1;					\
104	      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
105	    }								\
106	}								\
107    }									\
108} while (0)
109
110
111/** \brief Finish enqueueing four buffers forward in the graph.
112 Standard quad loop boilerplate element. This is a MACRO,
113 with MULTIPLE SIDE EFFECTS. In the ideal case,
114 <code>next_index == next0 == next1 == next2 == next3</code>,
115 which means that the speculative enqueue at the top of the quad loop
116 has correctly dealt with all four packets. In that case, the macro does
117 nothing at all.
118
119 @param vm vlib_main_t pointer, varies by thread
120 @param node current node vlib_node_runtime_t pointer
121 @param next_index speculated next index used for both packets
122 @param to_next speculated vector pointer used for both packets
123 @param n_left_to_next number of slots left in speculated vector
124 @param bi0 first buffer index
125 @param bi1 second buffer index
126 @param bi2 third buffer index
127 @param bi3 fourth buffer index
128 @param next0 actual next index to be used for the first packet
129 @param next1 actual next index to be used for the second packet
130 @param next2 actual next index to be used for the third packet
131 @param next3 actual next index to be used for the fourth packet
132
133 @return @c next_index -- speculative next index to be used for future packets
134 @return @c to_next -- speculative frame to be used for future packets
135 @return @c n_left_to_next -- number of slots left in speculative frame
136*/
137
138#define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
139do {                                                                    \
140  /* After the fact: check the [speculative] enqueue to "next" */       \
141  u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1)     \
142    | (next_index ^ next2) | (next_index ^ next3);                      \
143  if (PREDICT_FALSE(fix_speculation))                                   \
144    {                                                                   \
145      /* rewind... */                                                   \
146      to_next -= 4;                                                     \
147      n_left_to_next += 4;                                              \
148                                                                        \
149      /* If bi0 belongs to "next", send it there */                     \
150      if (next_index == next0)                                          \
151        {                                                               \
152          to_next[0] = bi0;                                             \
153          to_next++;                                                    \
154          n_left_to_next --;                                            \
155        }                                                               \
156      else              /* send it where it needs to go */              \
157        vlib_set_next_frame_buffer (vm, node, next0, bi0);              \
158                                                                        \
159      if (next_index == next1)                                          \
160        {                                                               \
161          to_next[0] = bi1;                                             \
162          to_next++;                                                    \
163          n_left_to_next --;                                            \
164        }                                                               \
165      else                                                              \
166        vlib_set_next_frame_buffer (vm, node, next1, bi1);              \
167                                                                        \
168      if (next_index == next2)                                          \
169        {                                                               \
170          to_next[0] = bi2;                                             \
171          to_next++;                                                    \
172          n_left_to_next --;                                            \
173        }                                                               \
174      else                                                              \
175        vlib_set_next_frame_buffer (vm, node, next2, bi2);              \
176                                                                        \
177      if (next_index == next3)                                          \
178        {                                                               \
179          to_next[0] = bi3;                                             \
180          to_next++;                                                    \
181          n_left_to_next --;                                            \
182        }                                                               \
183      else                                                              \
184        {                                                               \
185          vlib_set_next_frame_buffer (vm, node, next3, bi3);            \
186                                                                        \
187          /* Change speculation: last 2 packets went to the same node*/ \
188          if (next2 == next3)                                           \
189            {                                                           \
190              vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
191              next_index = next3;                                       \
192              vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
193            }                                                           \
194	}                                                               \
195    }                                                                   \
196 } while(0);
197
198/** \brief Finish enqueueing one buffer forward in the graph.
199 Standard single loop boilerplate element. This is a MACRO,
200 with MULTIPLE SIDE EFFECTS. In the ideal case,
201 <code>next_index == next0</code>,
202 which means that the speculative enqueue at the top of the single loop
203 has correctly dealt with the packet in hand. In that case, the macro does
204 nothing at all.
205
206 @param vm vlib_main_t pointer, varies by thread
207 @param node current node vlib_node_runtime_t pointer
208 @param next_index speculated next index used for both packets
209 @param to_next speculated vector pointer used for both packets
210 @param n_left_to_next number of slots left in speculated vector
211 @param bi0 first buffer index
212 @param next0 actual next index to be used for the first packet
213
214 @return @c next_index -- speculative next index to be used for future packets
215 @return @c to_next -- speculative frame to be used for future packets
216 @return @c n_left_to_next -- number of slots left in speculative frame
217*/
218#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
219do {									\
220  if (PREDICT_FALSE (next0 != next_index))				\
221    {									\
222      vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);	\
223      next_index = next0;						\
224      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
225									\
226      to_next[0] = bi0;							\
227      to_next += 1;							\
228      n_left_to_next -= 1;						\
229    }									\
230} while (0)
231
232always_inline uword
233generic_buffer_node_inline (vlib_main_t * vm,
234			    vlib_node_runtime_t * node,
235			    vlib_frame_t * frame,
236			    uword sizeof_trace,
237			    void *opaque1,
238			    uword opaque2,
239			    void (*two_buffers) (vlib_main_t * vm,
240						 void *opaque1,
241						 uword opaque2,
242						 vlib_buffer_t * b0,
243						 vlib_buffer_t * b1,
244						 u32 * next0, u32 * next1),
245			    void (*one_buffer) (vlib_main_t * vm,
246						void *opaque1, uword opaque2,
247						vlib_buffer_t * b0,
248						u32 * next0))
249{
250  u32 n_left_from, *from, *to_next;
251  u32 next_index;
252
253  from = vlib_frame_vector_args (frame);
254  n_left_from = frame->n_vectors;
255  next_index = node->cached_next_index;
256
257  if (node->flags & VLIB_NODE_FLAG_TRACE)
258    vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
259				   /* stride */ 1, sizeof_trace);
260
261  while (n_left_from > 0)
262    {
263      u32 n_left_to_next;
264
265      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
266
267      while (n_left_from >= 4 && n_left_to_next >= 2)
268	{
269	  vlib_buffer_t *p0, *p1;
270	  u32 pi0, next0;
271	  u32 pi1, next1;
272
273	  /* Prefetch next iteration. */
274	  {
275	    vlib_buffer_t *p2, *p3;
276
277	    p2 = vlib_get_buffer (vm, from[2]);
278	    p3 = vlib_get_buffer (vm, from[3]);
279
280	    vlib_prefetch_buffer_header (p2, LOAD);
281	    vlib_prefetch_buffer_header (p3, LOAD);
282
283	    CLIB_PREFETCH (p2->data, 64, LOAD);
284	    CLIB_PREFETCH (p3->data, 64, LOAD);
285	  }
286
287	  pi0 = to_next[0] = from[0];
288	  pi1 = to_next[1] = from[1];
289	  from += 2;
290	  to_next += 2;
291	  n_left_from -= 2;
292	  n_left_to_next -= 2;
293
294	  p0 = vlib_get_buffer (vm, pi0);
295	  p1 = vlib_get_buffer (vm, pi1);
296
297	  two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
298
299	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
300					   to_next, n_left_to_next,
301					   pi0, pi1, next0, next1);
302	}
303
304      while (n_left_from > 0 && n_left_to_next > 0)
305	{
306	  vlib_buffer_t *p0;
307	  u32 pi0, next0;
308
309	  pi0 = from[0];
310	  to_next[0] = pi0;
311	  from += 1;
312	  to_next += 1;
313	  n_left_from -= 1;
314	  n_left_to_next -= 1;
315
316	  p0 = vlib_get_buffer (vm, pi0);
317
318	  one_buffer (vm, opaque1, opaque2, p0, &next0);
319
320	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
321					   to_next, n_left_to_next,
322					   pi0, next0);
323	}
324
325      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
326    }
327
328  return frame->n_vectors;
329}
330
331static_always_inline void
332vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
333			     u32 * buffers, u16 * nexts, uword count)
334{
335  u32 *to_next, n_left_to_next, max;
336  u16 next_index;
337
338  next_index = nexts[0];
339  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
340  max = clib_min (n_left_to_next, count);
341
342  while (count)
343    {
344      u32 n_enqueued;
345      if ((nexts[0] != next_index) || n_left_to_next == 0)
346	{
347	  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
348	  next_index = nexts[0];
349	  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
350	  max = clib_min (n_left_to_next, count);
351	}
352#if defined(CLIB_HAVE_VEC512)
353      u16x32 next32 = CLIB_MEM_OVERFLOW_LOAD (u16x32_load_unaligned, nexts);
354      next32 = (next32 == u16x32_splat (next32[0]));
355      u64 bitmap = u16x32_msb_mask (next32);
356      n_enqueued = count_trailing_zeros (~bitmap);
357#elif defined(CLIB_HAVE_VEC256)
358      u16x16 next16 = CLIB_MEM_OVERFLOW_LOAD (u16x16_load_unaligned, nexts);
359      next16 = (next16 == u16x16_splat (next16[0]));
360      u64 bitmap = u8x32_msb_mask ((u8x32) next16);
361      n_enqueued = count_trailing_zeros (~bitmap) / 2;
362#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
363      u16x8 next8 = CLIB_MEM_OVERFLOW_LOAD (u16x8_load_unaligned, nexts);
364      next8 = (next8 == u16x8_splat (next8[0]));
365      u64 bitmap = u8x16_msb_mask ((u8x16) next8);
366      n_enqueued = count_trailing_zeros (~bitmap) / 2;
367#else
368      u16 x = 0;
369      if (count + 3 < max)
370	{
371	  x |= next_index ^ nexts[1];
372	  x |= next_index ^ nexts[2];
373	  x |= next_index ^ nexts[3];
374	  n_enqueued = (x == 0) ? 4 : 1;
375	}
376      else
377	n_enqueued = 1;
378#endif
379
380      if (PREDICT_FALSE (n_enqueued > max))
381	n_enqueued = max;
382
383#ifdef CLIB_HAVE_VEC512
384      if (n_enqueued >= 32)
385	{
386	  vlib_buffer_copy_indices (to_next, buffers, 32);
387	  nexts += 32;
388	  to_next += 32;
389	  buffers += 32;
390	  n_left_to_next -= 32;
391	  count -= 32;
392	  max -= 32;
393	  continue;
394	}
395#endif
396
397#ifdef CLIB_HAVE_VEC256
398      if (n_enqueued >= 16)
399	{
400	  vlib_buffer_copy_indices (to_next, buffers, 16);
401	  nexts += 16;
402	  to_next += 16;
403	  buffers += 16;
404	  n_left_to_next -= 16;
405	  count -= 16;
406	  max -= 16;
407	  continue;
408	}
409#endif
410
411#ifdef CLIB_HAVE_VEC128
412      if (n_enqueued >= 8)
413	{
414	  vlib_buffer_copy_indices (to_next, buffers, 8);
415	  nexts += 8;
416	  to_next += 8;
417	  buffers += 8;
418	  n_left_to_next -= 8;
419	  count -= 8;
420	  max -= 8;
421	  continue;
422	}
423#endif
424
425      if (n_enqueued >= 4)
426	{
427	  vlib_buffer_copy_indices (to_next, buffers, 4);
428	  nexts += 4;
429	  to_next += 4;
430	  buffers += 4;
431	  n_left_to_next -= 4;
432	  count -= 4;
433	  max -= 4;
434	  continue;
435	}
436
437      /* copy */
438      to_next[0] = buffers[0];
439
440      /* next */
441      nexts += 1;
442      to_next += 1;
443      buffers += 1;
444      n_left_to_next -= 1;
445      count -= 1;
446      max -= 1;
447    }
448  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
449}
450
451static_always_inline void
452vlib_buffer_enqueue_to_single_next (vlib_main_t * vm,
453				    vlib_node_runtime_t * node, u32 * buffers,
454				    u16 next_index, u32 count)
455{
456  u32 *to_next, n_left_to_next, n_enq;
457
458  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
459
460  if (PREDICT_TRUE (n_left_to_next >= count))
461    {
462      vlib_buffer_copy_indices (to_next, buffers, count);
463      n_left_to_next -= count;
464      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
465      return;
466    }
467
468  n_enq = n_left_to_next;
469next:
470  vlib_buffer_copy_indices (to_next, buffers, n_enq);
471  n_left_to_next -= n_enq;
472
473  if (PREDICT_FALSE (count > n_enq))
474    {
475      count -= n_enq;
476      buffers += n_enq;
477
478      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
479      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
480      n_enq = clib_min (n_left_to_next, count);
481      goto next;
482    }
483  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
484}
485
486static_always_inline u32
487vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
488			       u32 * buffer_indices, u16 * thread_indices,
489			       u32 n_packets, int drop_on_congestion)
490{
491  vlib_thread_main_t *tm = vlib_get_thread_main ();
492  vlib_frame_queue_main_t *fqm;
493  vlib_frame_queue_per_thread_data_t *ptd;
494  u32 n_left = n_packets;
495  u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
496  vlib_frame_queue_elt_t *hf = 0;
497  u32 n_left_to_next_thread = 0, *to_next_thread = 0;
498  u32 next_thread_index, current_thread_index = ~0;
499  int i;
500
501  fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
502  ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
503
504  while (n_left)
505    {
506      next_thread_index = thread_indices[0];
507
508      if (next_thread_index != current_thread_index)
509	{
510	  if (drop_on_congestion &&
511	      is_vlib_frame_queue_congested
512	      (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
513	       ptd->congested_handoff_queue_by_thread_index))
514	    {
515	      dbi[0] = buffer_indices[0];
516	      dbi++;
517	      n_drop++;
518	      goto next;
519	    }
520
521	  if (hf)
522	    hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
523
524	  hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
525						  next_thread_index,
526						  ptd->handoff_queue_elt_by_thread_index);
527
528	  n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
529	  to_next_thread = &hf->buffer_index[hf->n_vectors];
530	  current_thread_index = next_thread_index;
531	}
532
533      to_next_thread[0] = buffer_indices[0];
534      to_next_thread++;
535      n_left_to_next_thread--;
536
537      if (n_left_to_next_thread == 0)
538	{
539	  hf->n_vectors = VLIB_FRAME_SIZE;
540	  vlib_put_frame_queue_elt (hf);
541	  vlib_mains[current_thread_index]->check_frame_queues = 1;
542	  current_thread_index = ~0;
543	  ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
544	  hf = 0;
545	}
546
547      /* next */
548    next:
549      thread_indices += 1;
550      buffer_indices += 1;
551      n_left -= 1;
552    }
553
554  if (hf)
555    hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
556
557  /* Ship frames to the thread nodes */
558  for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
559    {
560      if (ptd->handoff_queue_elt_by_thread_index[i])
561	{
562	  hf = ptd->handoff_queue_elt_by_thread_index[i];
563	  /*
564	   * It works better to let the handoff node
565	   * rate-adapt, always ship the handoff queue element.
566	   */
567	  if (1 || hf->n_vectors == hf->last_n_vectors)
568	    {
569	      vlib_put_frame_queue_elt (hf);
570	      vlib_mains[i]->check_frame_queues = 1;
571	      ptd->handoff_queue_elt_by_thread_index[i] = 0;
572	    }
573	  else
574	    hf->last_n_vectors = hf->n_vectors;
575	}
576      ptd->congested_handoff_queue_by_thread_index[i] =
577	(vlib_frame_queue_t *) (~0);
578    }
579
580  if (drop_on_congestion && n_drop)
581    vlib_buffer_free (vm, drop_list, n_drop);
582
583  return n_packets - n_drop;
584}
585
586#endif /* included_vlib_buffer_node_h */
587
588/*
589 * fd.io coding-style-patch-verification: ON
590 *
591 * Local Variables:
592 * eval: (c-set-style "gnu")
593 * End:
594 */
595