1/*
2 *------------------------------------------------------------------
3 * Copyright (c) 2018 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
16 */
17
18#include <vlib/vlib.h>
19#include <vlib/unix/unix.h>
20#include <vlib/pci/pci.h>
21#include <vnet/ethernet/ethernet.h>
22#include <vnet/devices/devices.h>
23
24#include <rdma/rdma.h>
25
26#define foreach_rdma_input_error \
27  _(BUFFER_ALLOC, "buffer alloc error")
28
29typedef enum
30{
31#define _(f,s) RDMA_INPUT_ERROR_##f,
32  foreach_rdma_input_error
33#undef _
34    RDMA_INPUT_N_ERROR,
35} rdma_input_error_t;
36
37static __clib_unused char *rdma_input_error_strings[] = {
38#define _(n,s) s,
39  foreach_rdma_input_error
40#undef _
41};
42
43
44static_always_inline void
45ibv_set_recv_wr_and_sge (struct ibv_recv_wr *w, struct ibv_sge *s, u64 va,
46			 u32 data_size, u32 lkey)
47{
48  s[0].addr = va;
49  s[0].length = data_size;
50  s[0].lkey = lkey;
51  w[0].next = w + 1;
52  w[0].sg_list = s;
53  w[0].num_sge = 1;
54}
55
56static_always_inline void
57rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
58			  rdma_rxq_t * rxq, int is_mlx5dv)
59{
60  u32 n_alloc, n;
61  struct ibv_recv_wr wr[VLIB_FRAME_SIZE], *w = wr;
62  struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
63  u32 mask = rxq->size - 1;
64  u32 slot = rxq->tail & mask;
65  u32 *bufs = rxq->bufs + slot;
66  u32 data_size = vlib_buffer_get_default_data_size (vm);
67  u32 lkey = rd->lkey;
68
69  /* do not enqueue more packet than ring space */
70  n_alloc = clib_min (VLIB_FRAME_SIZE, rxq->size - (rxq->tail - rxq->head));
71
72  /* do not bother to allocate if too small */
73  if (n_alloc < 16)
74    return;
75
76  /* avoid wrap-around logic in core loop */
77  n_alloc = clib_min (n_alloc, rxq->size - slot);
78
79  n_alloc &= ~7;		/* round to 8 */
80
81  n = vlib_buffer_alloc_to_ring_from_pool (vm, rxq->bufs, slot, rxq->size,
82					   n_alloc, rd->pool);
83
84  if (PREDICT_FALSE (n != n_alloc))
85    {
86      u32 n_free;
87      if (n < 8)
88	{
89	  if (n)
90	    vlib_buffer_free_from_ring (vm, rxq->bufs, slot, rxq->size, n);
91	  return;
92	}
93
94      /* partial allocation, round and return rest */
95      n_free = n - (n & 7);
96      n -= n_free;
97      if (n_free)
98	vlib_buffer_free_from_ring (vm, rxq->bufs, (slot + n) & mask,
99				    rxq->size, n_free);
100    }
101
102  n_alloc = n;
103
104  if (is_mlx5dv)
105    {
106      u64 __clib_aligned (32) va[8];
107      mlx5dv_rwq_t *wqe = rxq->wqes + slot;
108
109      while (n >= 1)
110	{
111	  vlib_get_buffers_with_offset (vm, rxq->bufs + slot, (void **) va, 8,
112					sizeof (vlib_buffer_t));
113#ifdef CLIB_HAVE_VEC256
114	  *(u64x4 *) va = u64x4_byte_swap (*(u64x4 *) va);
115	  *(u64x4 *) (va + 4) = u64x4_byte_swap (*(u64x4 *) (va + 4));
116#else
117	  for (int i = 0; i < 8; i++)
118	    va[i] = clib_host_to_net_u64 (va[i]);
119#endif
120	  wqe[0].addr = va[0];
121	  wqe[1].addr = va[1];
122	  wqe[2].addr = va[2];
123	  wqe[3].addr = va[3];
124	  wqe[4].addr = va[4];
125	  wqe[5].addr = va[5];
126	  wqe[6].addr = va[6];
127	  wqe[7].addr = va[7];
128	  wqe += 8;
129	  slot += 8;
130	  n -= 8;
131	}
132
133      CLIB_MEMORY_STORE_BARRIER ();
134      rxq->tail += n_alloc;
135      rxq->wq_db[MLX5_RCV_DBR] = clib_host_to_net_u32 (rxq->tail);
136      return;
137    }
138
139  while (n >= 8)
140    {
141      u64 va[8];
142      if (PREDICT_TRUE (n >= 16))
143	{
144	  clib_prefetch_store (s + 16);
145	  clib_prefetch_store (w + 16);
146	}
147
148      vlib_get_buffers_with_offset (vm, bufs, (void **) va, 8,
149				    sizeof (vlib_buffer_t));
150
151      ibv_set_recv_wr_and_sge (w++, s++, va[0], data_size, lkey);
152      ibv_set_recv_wr_and_sge (w++, s++, va[1], data_size, lkey);
153      ibv_set_recv_wr_and_sge (w++, s++, va[2], data_size, lkey);
154      ibv_set_recv_wr_and_sge (w++, s++, va[3], data_size, lkey);
155      ibv_set_recv_wr_and_sge (w++, s++, va[4], data_size, lkey);
156      ibv_set_recv_wr_and_sge (w++, s++, va[5], data_size, lkey);
157      ibv_set_recv_wr_and_sge (w++, s++, va[6], data_size, lkey);
158      ibv_set_recv_wr_and_sge (w++, s++, va[7], data_size, lkey);
159
160      bufs += 8;
161      n -= 8;
162    }
163
164  w[-1].next = 0;		/* fix next pointer in WR linked-list last item */
165
166  n = n_alloc;
167  if (ibv_post_wq_recv (rxq->wq, wr, &w) != 0)
168    {
169      n = w - wr;
170      vlib_buffer_free_from_ring (vm, rxq->bufs, slot + n, rxq->size,
171				  n_alloc - n);
172    }
173
174  rxq->tail += n;
175}
176
177static_always_inline void
178rdma_device_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
179			 const rdma_device_t * rd, u32 n_left, const u32 * bi,
180			 u32 next_index, u16 * cqe_flags, int is_mlx5dv)
181{
182  u32 n_trace, i;
183
184  if (PREDICT_TRUE (0 == (n_trace = vlib_get_trace_count (vm, node))))
185    return;
186
187  i = 0;
188  while (n_trace && n_left)
189    {
190      vlib_buffer_t *b;
191      rdma_input_trace_t *tr;
192      b = vlib_get_buffer (vm, bi[0]);
193      vlib_trace_buffer (vm, node, next_index, b,
194			 /* follow_chain */ 0);
195      tr = vlib_add_trace (vm, node, b, sizeof (*tr));
196      tr->next_index = next_index;
197      tr->hw_if_index = rd->hw_if_index;
198      tr->cqe_flags = is_mlx5dv ? clib_net_to_host_u16 (cqe_flags[0]) : 0;
199
200      /* next */
201      n_trace--;
202      n_left--;
203      cqe_flags++;
204      bi++;
205      i++;
206    }
207  vlib_set_trace_count (vm, node, n_trace);
208}
209
210static_always_inline void
211rdma_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
212			    const rdma_device_t * rd, u32 next_index,
213			    int skip_ip4_cksum)
214{
215  vlib_next_frame_t *nf;
216  vlib_frame_t *f;
217  ethernet_input_frame_t *ef;
218
219  if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
220    return;
221
222  nf =
223    vlib_node_runtime_get_next_frame (vm, node,
224				      VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
225  f = vlib_get_frame (vm, nf->frame);
226  f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
227  if (skip_ip4_cksum)
228    f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
229
230  ef = vlib_frame_scalar_args (f);
231  ef->sw_if_index = rd->sw_if_index;
232  ef->hw_if_index = rd->hw_if_index;
233}
234
235static_always_inline u32
236rdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd,
237			vlib_buffer_t ** b, struct ibv_wc *wc,
238			u32 n_left_from, vlib_buffer_t * bt)
239{
240  u32 n_rx_bytes = 0;
241
242  while (n_left_from >= 4)
243    {
244      if (PREDICT_TRUE (n_left_from >= 8))
245	{
246	  CLIB_PREFETCH (&wc[4 + 0], CLIB_CACHE_LINE_BYTES, LOAD);
247	  CLIB_PREFETCH (&wc[4 + 1], CLIB_CACHE_LINE_BYTES, LOAD);
248	  CLIB_PREFETCH (&wc[4 + 2], CLIB_CACHE_LINE_BYTES, LOAD);
249	  CLIB_PREFETCH (&wc[4 + 3], CLIB_CACHE_LINE_BYTES, LOAD);
250	  vlib_prefetch_buffer_header (b[4 + 0], STORE);
251	  vlib_prefetch_buffer_header (b[4 + 1], STORE);
252	  vlib_prefetch_buffer_header (b[4 + 2], STORE);
253	  vlib_prefetch_buffer_header (b[4 + 3], STORE);
254	}
255
256      vlib_buffer_copy_template (b[0], bt);
257      vlib_buffer_copy_template (b[1], bt);
258      vlib_buffer_copy_template (b[2], bt);
259      vlib_buffer_copy_template (b[3], bt);
260
261      n_rx_bytes += b[0]->current_length = wc[0].byte_len;
262      n_rx_bytes += b[1]->current_length = wc[1].byte_len;
263      n_rx_bytes += b[2]->current_length = wc[2].byte_len;
264      n_rx_bytes += b[3]->current_length = wc[3].byte_len;
265
266      b += 4;
267      wc += 4;
268      n_left_from -= 4;
269    }
270
271  while (n_left_from >= 1)
272    {
273      vlib_buffer_copy_template (b[0], bt);
274      n_rx_bytes += b[0]->current_length = wc[0].byte_len;
275
276      b += 1;
277      wc += 1;
278      n_left_from -= 1;
279    }
280
281  return n_rx_bytes;
282}
283
284static_always_inline void
285process_mini_cqes (rdma_rxq_t * rxq, u32 skip, u32 n_left, u32 cq_ci,
286		   u32 mask, u32 * byte_cnt)
287{
288  mlx5dv_mini_cqe_t *mcqe;
289  u32 mcqe_array_index = (cq_ci + 1) & mask;
290  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
291
292  mcqe_array_index = cq_ci;
293
294  if (skip)
295    {
296      u32 n = skip & ~7;
297
298      if (n)
299	{
300	  mcqe_array_index = (mcqe_array_index + n) & mask;
301	  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
302	  skip -= n;
303	}
304
305      if (skip)
306	{
307	  n = clib_min (8 - skip, n_left);
308	  for (int i = 0; i < n; i++)
309	    byte_cnt[i] = mcqe[skip + i].byte_count;
310	  mcqe_array_index = (mcqe_array_index + 8) & mask;
311	  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
312	  n_left -= n;
313	  byte_cnt += n;
314	}
315
316    }
317
318  while (n_left >= 8)
319    {
320      for (int i = 0; i < 8; i++)
321	byte_cnt[i] = mcqe[i].byte_count;
322
323      n_left -= 8;
324      byte_cnt += 8;
325      mcqe_array_index = (mcqe_array_index + 8) & mask;
326      mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
327    }
328
329  if (n_left)
330    {
331      for (int i = 0; i < n_left; i++)
332	byte_cnt[i] = mcqe[i].byte_count;
333    }
334}
335
336static_always_inline void
337cqe_set_owner (mlx5dv_cqe_t * cqe, u32 n_left, u8 owner)
338{
339  while (n_left >= 8)
340    {
341      cqe[0].opcode_cqefmt_se_owner = owner;
342      cqe[1].opcode_cqefmt_se_owner = owner;
343      cqe[2].opcode_cqefmt_se_owner = owner;
344      cqe[3].opcode_cqefmt_se_owner = owner;
345      cqe[4].opcode_cqefmt_se_owner = owner;
346      cqe[5].opcode_cqefmt_se_owner = owner;
347      cqe[6].opcode_cqefmt_se_owner = owner;
348      cqe[7].opcode_cqefmt_se_owner = owner;
349      n_left -= 8;
350      cqe += 8;
351    }
352  while (n_left)
353    {
354      cqe[0].opcode_cqefmt_se_owner = owner;
355      n_left--;
356      cqe++;
357    }
358}
359
360static_always_inline void
361compressed_cqe_reset_owner (rdma_rxq_t * rxq, u32 n_mini_cqes, u32 cq_ci,
362			    u32 mask, u32 log2_cq_size)
363{
364  u8 owner;
365  u32 offset, cq_size = 1 << log2_cq_size;
366
367
368  /* first CQE is reset by hardware */
369  cq_ci++;
370  n_mini_cqes--;
371
372  offset = cq_ci & mask;
373  owner = 0xf0 | ((cq_ci >> log2_cq_size) & 1);
374
375  if (offset + n_mini_cqes < cq_size)
376    {
377      cqe_set_owner (rxq->cqes + offset, n_mini_cqes, owner);
378    }
379  else
380    {
381      u32 n = cq_size - offset;
382      cqe_set_owner (rxq->cqes + offset, n, owner);
383      cqe_set_owner (rxq->cqes, n_mini_cqes - n, owner ^ 1);
384    }
385
386}
387
388static_always_inline uword
389rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
390			    u32 * byte_cnt, u16 * cqe_flags)
391{
392  u32 n_rx_packets = 0;
393  u32 log2_cq_size = rxq->log2_cq_size;
394  u32 mask = pow2_mask (log2_cq_size);
395  u32 cq_ci = rxq->cq_ci;
396
397  if (rxq->n_mini_cqes_left)
398    {
399      /* partially processed mini-cqe array */
400      u32 n_mini_cqes = rxq->n_mini_cqes;
401      u32 n_mini_cqes_left = rxq->n_mini_cqes_left;
402      process_mini_cqes (rxq, n_mini_cqes - n_mini_cqes_left,
403			 n_mini_cqes_left, cq_ci, mask, byte_cnt);
404      compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
405				  log2_cq_size);
406      clib_memset_u16 (cqe_flags, rxq->last_cqe_flags, n_mini_cqes_left);
407      n_rx_packets = n_mini_cqes_left;
408      byte_cnt += n_mini_cqes_left;
409      cqe_flags += n_mini_cqes_left;
410      rxq->n_mini_cqes_left = 0;
411      rxq->cq_ci = cq_ci = cq_ci + n_mini_cqes;
412    }
413
414  while (n_rx_packets < VLIB_FRAME_SIZE)
415    {
416      u8 cqe_last_byte, owner;
417      mlx5dv_cqe_t *cqe = rxq->cqes + (cq_ci & mask);
418
419      clib_prefetch_load (rxq->cqes + ((cq_ci + 8) & mask));
420
421      owner = (cq_ci >> log2_cq_size) & 1;
422      cqe_last_byte = cqe->opcode_cqefmt_se_owner;
423
424      if ((cqe_last_byte & 0x1) != owner)
425	break;
426
427      cqe_last_byte &= 0xfe;	/* remove owner bit */
428
429      if (cqe_last_byte == 0x2c)
430	{
431	  u32 n_mini_cqes = clib_net_to_host_u32 (cqe->mini_cqe_num);
432	  u32 n_left = VLIB_FRAME_SIZE - n_rx_packets;
433	  u16 flags = cqe->flags;
434
435	  if (n_left >= n_mini_cqes)
436	    {
437	      process_mini_cqes (rxq, 0, n_mini_cqes, cq_ci, mask, byte_cnt);
438	      clib_memset_u16 (cqe_flags, flags, n_mini_cqes);
439	      compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
440					  log2_cq_size);
441	      n_rx_packets += n_mini_cqes;
442	      byte_cnt += n_mini_cqes;
443	      cqe_flags += n_mini_cqes;
444	      cq_ci += n_mini_cqes;
445	    }
446	  else
447	    {
448	      process_mini_cqes (rxq, 0, n_left, cq_ci, mask, byte_cnt);
449	      clib_memset_u16 (cqe_flags, flags, n_left);
450	      n_rx_packets = VLIB_FRAME_SIZE;
451	      rxq->n_mini_cqes = n_mini_cqes;
452	      rxq->n_mini_cqes_left = n_mini_cqes - n_left;
453	      rxq->last_cqe_flags = flags;
454	      goto done;
455	    }
456	  continue;
457	}
458
459      if (cqe_last_byte == 0x20)
460	{
461	  byte_cnt[0] = cqe->byte_cnt;
462	  cqe_flags[0] = cqe->flags;
463	  n_rx_packets++;
464	  cq_ci++;
465	  byte_cnt++;
466	  continue;
467	}
468
469      rd->flags |= RDMA_DEVICE_F_ERROR;
470      break;
471    }
472
473done:
474  if (n_rx_packets)
475    rxq->cq_db[0] = rxq->cq_ci = cq_ci;
476  return n_rx_packets;
477}
478
479static_always_inline uword
480rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
481			  vlib_frame_t * frame, rdma_device_t * rd, u16 qid,
482			  int use_mlx5dv)
483{
484  rdma_main_t *rm = &rdma_main;
485  vnet_main_t *vnm = vnet_get_main ();
486  rdma_per_thread_data_t *ptd = vec_elt_at_index (rm->per_thread_data,
487						  vm->thread_index);
488  rdma_rxq_t *rxq = vec_elt_at_index (rd->rxqs, qid);
489  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
490  struct ibv_wc wc[VLIB_FRAME_SIZE];
491  u32 __clib_aligned (32) byte_cnts[VLIB_FRAME_SIZE];
492  vlib_buffer_t bt;
493  u32 next_index, *to_next, n_left_to_next, n_rx_bytes = 0;
494  int n_rx_packets, skip_ip4_cksum = 0;
495  u32 mask = rxq->size - 1;
496
497  if (use_mlx5dv)
498    n_rx_packets = rdma_device_poll_cq_mlx5dv (rd, rxq, byte_cnts,
499					       ptd->cqe_flags);
500  else
501    n_rx_packets = ibv_poll_cq (rxq->cq, VLIB_FRAME_SIZE, wc);
502
503  if (PREDICT_FALSE (n_rx_packets <= 0))
504    goto refill;
505
506  /* init buffer template */
507  vlib_buffer_copy_template (&bt, &ptd->buffer_template);
508  vnet_buffer (&bt)->sw_if_index[VLIB_RX] = rd->sw_if_index;
509  bt.buffer_pool_index = rd->pool;
510
511  /* update buffer template for input feature arcs if any */
512  next_index = rd->per_interface_next_index;
513  if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index)))
514    vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt);
515
516  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
517
518  vlib_buffer_copy_indices_from_ring (to_next, rxq->bufs, rxq->head & mask,
519				      rxq->size, n_rx_packets);
520
521  vlib_get_buffers (vm, to_next, bufs, n_rx_packets);
522
523  if (use_mlx5dv)
524    {
525      u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
526      u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
527      u32 n_left = n_rx_packets;
528      u32 *bc = byte_cnts;
529
530      /* verify that all ip4 packets have l3_ok flag set and convert packet
531         length from network to host byte order */
532      skip_ip4_cksum = 1;
533
534#if defined CLIB_HAVE_VEC256
535      u16x16 mask16 = u16x16_splat (mask);
536      u16x16 match16 = u16x16_splat (match);
537      u16x16 r = { };
538
539      for (int i = 0; i * 16 < n_rx_packets; i++)
540	r |= (ptd->cqe_flags16[i] & mask16) != match16;
541
542      if (!u16x16_is_all_zero (r))
543	skip_ip4_cksum = 0;
544
545      for (int i = 0; i < n_rx_packets; i += 8)
546	*(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
547#elif defined CLIB_HAVE_VEC128
548      u16x8 mask8 = u16x8_splat (mask);
549      u16x8 match8 = u16x8_splat (match);
550      u16x8 r = { };
551
552      for (int i = 0; i * 8 < n_rx_packets; i++)
553	r |= (ptd->cqe_flags8[i] & mask8) != match8;
554
555      if (!u16x8_is_all_zero (r))
556	skip_ip4_cksum = 0;
557
558      for (int i = 0; i < n_rx_packets; i += 4)
559	*(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
560#else
561      for (int i = 0; i < n_rx_packets; i++)
562	if ((ptd->cqe_flags[i] & mask) == match)
563	  skip_ip4_cksum = 0;
564
565      for (int i = 0; i < n_rx_packets; i++)
566	bc[i] = clib_net_to_host_u32 (bc[i]);
567#endif
568
569      while (n_left >= 8)
570	{
571	  clib_prefetch_store (b[4]);
572	  vlib_buffer_copy_template (b[0], &bt);
573	  n_rx_bytes += b[0]->current_length = bc[0];
574	  clib_prefetch_store (b[5]);
575	  vlib_buffer_copy_template (b[1], &bt);
576	  n_rx_bytes += b[1]->current_length = bc[1];
577	  clib_prefetch_store (b[6]);
578	  vlib_buffer_copy_template (b[2], &bt);
579	  n_rx_bytes += b[2]->current_length = bc[2];
580	  clib_prefetch_store (b[7]);
581	  vlib_buffer_copy_template (b[3], &bt);
582	  n_rx_bytes += b[3]->current_length = bc[3];
583
584	  /* next */
585	  bc += 4;
586	  b += 4;
587	  n_left -= 4;
588	}
589      while (n_left)
590	{
591	  vlib_buffer_copy_template (b[0], &bt);
592	  n_rx_bytes += b[0]->current_length = bc[0];
593
594	  /* next */
595	  bc++;
596	  b++;
597	  n_left--;
598	}
599    }
600  else
601    n_rx_bytes = rdma_device_input_bufs (vm, rd, bufs, wc, n_rx_packets, &bt);
602
603  rdma_device_input_ethernet (vm, node, rd, next_index, skip_ip4_cksum);
604
605  vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
606
607  rxq->head += n_rx_packets;
608
609  rdma_device_input_trace (vm, node, rd, n_rx_packets, to_next, next_index,
610			   ptd->cqe_flags, use_mlx5dv);
611
612  /* reset flags to zero for the next run */
613  if (use_mlx5dv)
614    clib_memset_u16 (ptd->cqe_flags, 0, VLIB_FRAME_SIZE);
615
616  vlib_increment_combined_counter
617    (vnm->interface_main.combined_sw_if_counters +
618     VNET_INTERFACE_COUNTER_RX, vm->thread_index,
619     rd->hw_if_index, n_rx_packets, n_rx_bytes);
620
621refill:
622  rdma_device_input_refill (vm, rd, rxq, use_mlx5dv);
623
624  return n_rx_packets;
625}
626
627VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
628				vlib_node_runtime_t * node,
629				vlib_frame_t * frame)
630{
631  u32 n_rx = 0;
632  rdma_main_t *rm = &rdma_main;
633  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
634  vnet_device_and_queue_t *dq;
635
636  foreach_device_and_queue (dq, rt->devices_and_queues)
637  {
638    rdma_device_t *rd;
639    rd = vec_elt_at_index (rm->devices, dq->dev_instance);
640    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
641      continue;
642
643    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR))
644      continue;
645
646    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
647      n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id, 1);
648    else
649      n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id, 0);
650  }
651  return n_rx;
652}
653
654/* *INDENT-OFF* */
655VLIB_REGISTER_NODE (rdma_input_node) = {
656  .name = "rdma-input",
657  .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
658  .sibling_of = "device-input",
659  .format_trace = format_rdma_input_trace,
660  .type = VLIB_NODE_TYPE_INPUT,
661  .state = VLIB_NODE_STATE_DISABLED,
662  .n_errors = RDMA_INPUT_N_ERROR,
663  .error_strings = rdma_input_error_strings,
664};
665
666/* *INDENT-ON* */
667
668
669/*
670 * fd.io coding-style-patch-verification: ON
671 *
672 * Local Variables:
673 * eval: (c-set-style "gnu")
674 * End:
675 */
676