1fe750c24SBenoît Ganne/*
2fe750c24SBenoît Ganne *------------------------------------------------------------------
3fe750c24SBenoît Ganne * Copyright (c) 2018 Cisco and/or its affiliates.
4fe750c24SBenoît Ganne * Licensed under the Apache License, Version 2.0 (the "License");
5fe750c24SBenoît Ganne * you may not use this file except in compliance with the License.
6fe750c24SBenoît Ganne * You may obtain a copy of the License at:
7fe750c24SBenoît Ganne *
8fe750c24SBenoît Ganne *     http://www.apache.org/licenses/LICENSE-2.0
9fe750c24SBenoît Ganne *
10fe750c24SBenoît Ganne * Unless required by applicable law or agreed to in writing, software
11fe750c24SBenoît Ganne * distributed under the License is distributed on an "AS IS" BASIS,
12fe750c24SBenoît Ganne * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13fe750c24SBenoît Ganne * See the License for the specific language governing permissions and
14fe750c24SBenoît Ganne * limitations under the License.
15fe750c24SBenoît Ganne *------------------------------------------------------------------
16fe750c24SBenoît Ganne */
17fe750c24SBenoît Ganne
18fe750c24SBenoît Ganne#include <vlib/vlib.h>
19fe750c24SBenoît Ganne#include <vlib/unix/unix.h>
20fe750c24SBenoît Ganne#include <vlib/pci/pci.h>
21fe750c24SBenoît Ganne#include <vnet/ethernet/ethernet.h>
22fe750c24SBenoît Ganne#include <vnet/devices/devices.h>
23fe750c24SBenoît Ganne
24fe750c24SBenoît Ganne#include <rdma/rdma.h>
25fe750c24SBenoît Ganne
26fe750c24SBenoît Ganne#define foreach_rdma_input_error \
27fe750c24SBenoît Ganne  _(BUFFER_ALLOC, "buffer alloc error")
28fe750c24SBenoît Ganne
29fe750c24SBenoît Gannetypedef enum
30fe750c24SBenoît Ganne{
31fe750c24SBenoît Ganne#define _(f,s) RDMA_INPUT_ERROR_##f,
32fe750c24SBenoît Ganne  foreach_rdma_input_error
33fe750c24SBenoît Ganne#undef _
34fe750c24SBenoît Ganne    RDMA_INPUT_N_ERROR,
35fe750c24SBenoît Ganne} rdma_input_error_t;
36fe750c24SBenoît Ganne
37fe750c24SBenoît Gannestatic __clib_unused char *rdma_input_error_strings[] = {
38fe750c24SBenoît Ganne#define _(n,s) s,
39fe750c24SBenoît Ganne  foreach_rdma_input_error
40fe750c24SBenoît Ganne#undef _
41fe750c24SBenoît Ganne};
42fe750c24SBenoît Ganne
43599d0888SDamjan Marion
44599d0888SDamjan Marionstatic_always_inline void
45599d0888SDamjan Marionibv_set_recv_wr_and_sge (struct ibv_recv_wr *w, struct ibv_sge *s, u64 va,
46599d0888SDamjan Marion			 u32 data_size, u32 lkey)
47599d0888SDamjan Marion{
48599d0888SDamjan Marion  s[0].addr = va;
49599d0888SDamjan Marion  s[0].length = data_size;
50599d0888SDamjan Marion  s[0].lkey = lkey;
51599d0888SDamjan Marion  w[0].next = w + 1;
52599d0888SDamjan Marion  w[0].sg_list = s;
53599d0888SDamjan Marion  w[0].num_sge = 1;
54599d0888SDamjan Marion}
55599d0888SDamjan Marion
56fe750c24SBenoît Gannestatic_always_inline void
57fe750c24SBenoît Gannerdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
58dd648aacSDamjan Marion			  rdma_rxq_t * rxq, int is_mlx5dv)
59fe750c24SBenoît Ganne{
60fe750c24SBenoît Ganne  u32 n_alloc, n;
61dc195d68SBenoît Ganne  struct ibv_recv_wr wr[VLIB_FRAME_SIZE], *w = wr;
62dc195d68SBenoît Ganne  struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
63599d0888SDamjan Marion  u32 mask = rxq->size - 1;
64599d0888SDamjan Marion  u32 slot = rxq->tail & mask;
65599d0888SDamjan Marion  u32 *bufs = rxq->bufs + slot;
66599d0888SDamjan Marion  u32 data_size = vlib_buffer_get_default_data_size (vm);
67599d0888SDamjan Marion  u32 lkey = rd->lkey;
68fe750c24SBenoît Ganne
69e7e8bf37SBenoît Ganne  /* do not enqueue more packet than ring space */
70e7e8bf37SBenoît Ganne  n_alloc = clib_min (VLIB_FRAME_SIZE, rxq->size - (rxq->tail - rxq->head));
71e7e8bf37SBenoît Ganne
72e7e8bf37SBenoît Ganne  /* do not bother to allocate if too small */
73e7e8bf37SBenoît Ganne  if (n_alloc < 16)
74e7e8bf37SBenoît Ganne    return;
75e7e8bf37SBenoît Ganne
76e7e8bf37SBenoît Ganne  /* avoid wrap-around logic in core loop */
77e7e8bf37SBenoît Ganne  n_alloc = clib_min (n_alloc, rxq->size - slot);
78e7e8bf37SBenoît Ganne
79599d0888SDamjan Marion  n_alloc &= ~7;		/* round to 8 */
80e7e8bf37SBenoît Ganne
81599d0888SDamjan Marion  n = vlib_buffer_alloc_to_ring_from_pool (vm, rxq->bufs, slot, rxq->size,
82599d0888SDamjan Marion					   n_alloc, rd->pool);
83dc195d68SBenoît Ganne
84599d0888SDamjan Marion  if (PREDICT_FALSE (n != n_alloc))
85fe750c24SBenoît Ganne    {
86599d0888SDamjan Marion      u32 n_free;
87599d0888SDamjan Marion      if (n < 8)
88dc195d68SBenoît Ganne	{
89599d0888SDamjan Marion	  if (n)
90599d0888SDamjan Marion	    vlib_buffer_free_from_ring (vm, rxq->bufs, slot, rxq->size, n);
91599d0888SDamjan Marion	  return;
92dc195d68SBenoît Ganne	}
93dc195d68SBenoît Ganne
94599d0888SDamjan Marion      /* partial allocation, round and return rest */
95599d0888SDamjan Marion      n_free = n - (n & 7);
96599d0888SDamjan Marion      n -= n_free;
97599d0888SDamjan Marion      if (n_free)
98599d0888SDamjan Marion	vlib_buffer_free_from_ring (vm, rxq->bufs, (slot + n) & mask,
99599d0888SDamjan Marion				    rxq->size, n_free);
100fe750c24SBenoît Ganne    }
101dc195d68SBenoît Ganne
102599d0888SDamjan Marion  n_alloc = n;
103599d0888SDamjan Marion
104dd648aacSDamjan Marion  if (is_mlx5dv)
105dd648aacSDamjan Marion    {
106a1e03d4dSDamjan Marion      u64 __clib_aligned (32) va[8];
107dd648aacSDamjan Marion      mlx5dv_rwq_t *wqe = rxq->wqes + slot;
108dd648aacSDamjan Marion
109dd648aacSDamjan Marion      while (n >= 1)
110dd648aacSDamjan Marion	{
111dd648aacSDamjan Marion	  vlib_get_buffers_with_offset (vm, rxq->bufs + slot, (void **) va, 8,
112dd648aacSDamjan Marion					sizeof (vlib_buffer_t));
113dd648aacSDamjan Marion#ifdef CLIB_HAVE_VEC256
114dd648aacSDamjan Marion	  *(u64x4 *) va = u64x4_byte_swap (*(u64x4 *) va);
115dd648aacSDamjan Marion	  *(u64x4 *) (va + 4) = u64x4_byte_swap (*(u64x4 *) (va + 4));
116dd648aacSDamjan Marion#else
117dd648aacSDamjan Marion	  for (int i = 0; i < 8; i++)
118dd648aacSDamjan Marion	    va[i] = clib_host_to_net_u64 (va[i]);
119dd648aacSDamjan Marion#endif
120dd648aacSDamjan Marion	  wqe[0].addr = va[0];
121dd648aacSDamjan Marion	  wqe[1].addr = va[1];
122dd648aacSDamjan Marion	  wqe[2].addr = va[2];
123dd648aacSDamjan Marion	  wqe[3].addr = va[3];
124dd648aacSDamjan Marion	  wqe[4].addr = va[4];
125dd648aacSDamjan Marion	  wqe[5].addr = va[5];
126dd648aacSDamjan Marion	  wqe[6].addr = va[6];
127dd648aacSDamjan Marion	  wqe[7].addr = va[7];
128dd648aacSDamjan Marion	  wqe += 8;
129dd648aacSDamjan Marion	  slot += 8;
130dd648aacSDamjan Marion	  n -= 8;
131dd648aacSDamjan Marion	}
132dd648aacSDamjan Marion
133dd648aacSDamjan Marion      CLIB_MEMORY_STORE_BARRIER ();
134dd648aacSDamjan Marion      rxq->tail += n_alloc;
135dd648aacSDamjan Marion      rxq->wq_db[MLX5_RCV_DBR] = clib_host_to_net_u32 (rxq->tail);
136dd648aacSDamjan Marion      return;
137dd648aacSDamjan Marion    }
138dd648aacSDamjan Marion
139599d0888SDamjan Marion  while (n >= 8)
140dc195d68SBenoît Ganne    {
141599d0888SDamjan Marion      u64 va[8];
142599d0888SDamjan Marion      if (PREDICT_TRUE (n >= 16))
143599d0888SDamjan Marion	{
144599d0888SDamjan Marion	  clib_prefetch_store (s + 16);
145599d0888SDamjan Marion	  clib_prefetch_store (w + 16);
146599d0888SDamjan Marion	}
147dc195d68SBenoît Ganne
148599d0888SDamjan Marion      vlib_get_buffers_with_offset (vm, bufs, (void **) va, 8,
149599d0888SDamjan Marion				    sizeof (vlib_buffer_t));
150dc195d68SBenoît Ganne
151599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[0], data_size, lkey);
152599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[1], data_size, lkey);
153599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[2], data_size, lkey);
154599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[3], data_size, lkey);
155599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[4], data_size, lkey);
156599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[5], data_size, lkey);
157599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[6], data_size, lkey);
158599d0888SDamjan Marion      ibv_set_recv_wr_and_sge (w++, s++, va[7], data_size, lkey);
159599d0888SDamjan Marion
160599d0888SDamjan Marion      bufs += 8;
161599d0888SDamjan Marion      n -= 8;
162dc195d68SBenoît Ganne    }
163dc195d68SBenoît Ganne
164dc195d68SBenoît Ganne  w[-1].next = 0;		/* fix next pointer in WR linked-list last item */
165dc195d68SBenoît Ganne
166dd1ccb4fSBenoît Ganne  n = n_alloc;
167dd1ccb4fSBenoît Ganne  if (ibv_post_wq_recv (rxq->wq, wr, &w) != 0)
168dd1ccb4fSBenoît Ganne    {
169dd1ccb4fSBenoît Ganne      n = w - wr;
170e7e8bf37SBenoît Ganne      vlib_buffer_free_from_ring (vm, rxq->bufs, slot + n, rxq->size,
171e7e8bf37SBenoît Ganne				  n_alloc - n);
172dd1ccb4fSBenoît Ganne    }
173dc195d68SBenoît Ganne
174e7e8bf37SBenoît Ganne  rxq->tail += n;
175fe750c24SBenoît Ganne}
176fe750c24SBenoît Ganne
177dc195d68SBenoît Gannestatic_always_inline void
178dc195d68SBenoît Gannerdma_device_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
17974eba446SBenoît Ganne			 const rdma_device_t * rd, u32 n_left, const u32 * bi,
180dd648aacSDamjan Marion			 u32 next_index, u16 * cqe_flags, int is_mlx5dv)
181fe750c24SBenoît Ganne{
182dc195d68SBenoît Ganne  u32 n_trace, i;
183fe750c24SBenoît Ganne
184dc195d68SBenoît Ganne  if (PREDICT_TRUE (0 == (n_trace = vlib_get_trace_count (vm, node))))
185dc195d68SBenoît Ganne    return;
186dc195d68SBenoît Ganne
187dc195d68SBenoît Ganne  i = 0;
188dc195d68SBenoît Ganne  while (n_trace && n_left)
189dc195d68SBenoît Ganne    {
190dc195d68SBenoît Ganne      vlib_buffer_t *b;
191dc195d68SBenoît Ganne      rdma_input_trace_t *tr;
192dc195d68SBenoît Ganne      b = vlib_get_buffer (vm, bi[0]);
19374eba446SBenoît Ganne      vlib_trace_buffer (vm, node, next_index, b,
194dc195d68SBenoît Ganne			 /* follow_chain */ 0);
195dc195d68SBenoît Ganne      tr = vlib_add_trace (vm, node, b, sizeof (*tr));
19674eba446SBenoît Ganne      tr->next_index = next_index;
197dc195d68SBenoît Ganne      tr->hw_if_index = rd->hw_if_index;
198dd648aacSDamjan Marion      tr->cqe_flags = is_mlx5dv ? clib_net_to_host_u16 (cqe_flags[0]) : 0;
199dc195d68SBenoît Ganne
200dc195d68SBenoît Ganne      /* next */
201dc195d68SBenoît Ganne      n_trace--;
202dc195d68SBenoît Ganne      n_left--;
203dd648aacSDamjan Marion      cqe_flags++;
204dc195d68SBenoît Ganne      bi++;
205dc195d68SBenoît Ganne      i++;
206dc195d68SBenoît Ganne    }
207dc195d68SBenoît Ganne  vlib_set_trace_count (vm, node, n_trace);
208dc195d68SBenoît Ganne}
209dc195d68SBenoît Ganne
210dc195d68SBenoît Gannestatic_always_inline void
211dc195d68SBenoît Gannerdma_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
212dd648aacSDamjan Marion			    const rdma_device_t * rd, u32 next_index,
213dd648aacSDamjan Marion			    int skip_ip4_cksum)
214dc195d68SBenoît Ganne{
215dc195d68SBenoît Ganne  vlib_next_frame_t *nf;
216dc195d68SBenoît Ganne  vlib_frame_t *f;
217dc195d68SBenoît Ganne  ethernet_input_frame_t *ef;
218fe750c24SBenoît Ganne
21974eba446SBenoît Ganne  if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
220dc195d68SBenoît Ganne    return;
221fe750c24SBenoît Ganne
222dc195d68SBenoît Ganne  nf =
22374eba446SBenoît Ganne    vlib_node_runtime_get_next_frame (vm, node,
22474eba446SBenoît Ganne				      VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
22558b2eb1aSAndreas Schultz  f = vlib_get_frame (vm, nf->frame);
226dc195d68SBenoît Ganne  f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
227dd648aacSDamjan Marion  if (skip_ip4_cksum)
228dd648aacSDamjan Marion    f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
229fe750c24SBenoît Ganne
230dc195d68SBenoît Ganne  ef = vlib_frame_scalar_args (f);
231dc195d68SBenoît Ganne  ef->sw_if_index = rd->sw_if_index;
232dc195d68SBenoît Ganne  ef->hw_if_index = rd->hw_if_index;
233dc195d68SBenoît Ganne}
234fe750c24SBenoît Ganne
235dc195d68SBenoît Gannestatic_always_inline u32
236dd648aacSDamjan Marionrdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd,
237dd648aacSDamjan Marion			vlib_buffer_t ** b, struct ibv_wc *wc,
238dd648aacSDamjan Marion			u32 n_left_from, vlib_buffer_t * bt)
239dc195d68SBenoît Ganne{
240b9250a9eSDamjan Marion  u32 n_rx_bytes = 0;
241dc195d68SBenoît Ganne
242dc195d68SBenoît Ganne  while (n_left_from >= 4)
243fe750c24SBenoît Ganne    {
244dc195d68SBenoît Ganne      if (PREDICT_TRUE (n_left_from >= 8))
245dc195d68SBenoît Ganne	{
246dc195d68SBenoît Ganne	  CLIB_PREFETCH (&wc[4 + 0], CLIB_CACHE_LINE_BYTES, LOAD);
247dc195d68SBenoît Ganne	  CLIB_PREFETCH (&wc[4 + 1], CLIB_CACHE_LINE_BYTES, LOAD);
248dc195d68SBenoît Ganne	  CLIB_PREFETCH (&wc[4 + 2], CLIB_CACHE_LINE_BYTES, LOAD);
249dc195d68SBenoît Ganne	  CLIB_PREFETCH (&wc[4 + 3], CLIB_CACHE_LINE_BYTES, LOAD);
250e7e8bf37SBenoît Ganne	  vlib_prefetch_buffer_header (b[4 + 0], STORE);
251e7e8bf37SBenoît Ganne	  vlib_prefetch_buffer_header (b[4 + 1], STORE);
252e7e8bf37SBenoît Ganne	  vlib_prefetch_buffer_header (b[4 + 2], STORE);
253e7e8bf37SBenoît Ganne	  vlib_prefetch_buffer_header (b[4 + 3], STORE);
254dc195d68SBenoît Ganne	}
255dc195d68SBenoît Ganne
256e7e8bf37SBenoît Ganne      vlib_buffer_copy_template (b[0], bt);
257e7e8bf37SBenoît Ganne      vlib_buffer_copy_template (b[1], bt);
258e7e8bf37SBenoît Ganne      vlib_buffer_copy_template (b[2], bt);
259e7e8bf37SBenoît Ganne      vlib_buffer_copy_template (b[3], bt);
260e7e8bf37SBenoît Ganne
261b9250a9eSDamjan Marion      n_rx_bytes += b[0]->current_length = wc[0].byte_len;
262b9250a9eSDamjan Marion      n_rx_bytes += b[1]->current_length = wc[1].byte_len;
263b9250a9eSDamjan Marion      n_rx_bytes += b[2]->current_length = wc[2].byte_len;
264b9250a9eSDamjan Marion      n_rx_bytes += b[3]->current_length = wc[3].byte_len;
265dc195d68SBenoît Ganne
266e7e8bf37SBenoît Ganne      b += 4;
267dc195d68SBenoît Ganne      wc += 4;
268dc195d68SBenoît Ganne      n_left_from -= 4;
269fe750c24SBenoît Ganne    }
270fe750c24SBenoît Ganne
271dc195d68SBenoît Ganne  while (n_left_from >= 1)
272fe750c24SBenoît Ganne    {
273e7e8bf37SBenoît Ganne      vlib_buffer_copy_template (b[0], bt);
274b9250a9eSDamjan Marion      n_rx_bytes += b[0]->current_length = wc[0].byte_len;
275fe750c24SBenoît Ganne
276e7e8bf37SBenoît Ganne      b += 1;
277dc195d68SBenoît Ganne      wc += 1;
278dc195d68SBenoît Ganne      n_left_from -= 1;
279dc195d68SBenoît Ganne    }
280dc195d68SBenoît Ganne
281b9250a9eSDamjan Marion  return n_rx_bytes;
282dc195d68SBenoît Ganne}
283dc195d68SBenoît Ganne
284dd648aacSDamjan Marionstatic_always_inline void
285dd648aacSDamjan Marionprocess_mini_cqes (rdma_rxq_t * rxq, u32 skip, u32 n_left, u32 cq_ci,
286dd648aacSDamjan Marion		   u32 mask, u32 * byte_cnt)
287dd648aacSDamjan Marion{
288dd648aacSDamjan Marion  mlx5dv_mini_cqe_t *mcqe;
289dd648aacSDamjan Marion  u32 mcqe_array_index = (cq_ci + 1) & mask;
290dd648aacSDamjan Marion  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
291dd648aacSDamjan Marion
292dd648aacSDamjan Marion  mcqe_array_index = cq_ci;
293dd648aacSDamjan Marion
294dd648aacSDamjan Marion  if (skip)
295dd648aacSDamjan Marion    {
296dd648aacSDamjan Marion      u32 n = skip & ~7;
297dd648aacSDamjan Marion
298dd648aacSDamjan Marion      if (n)
299dd648aacSDamjan Marion	{
300dd648aacSDamjan Marion	  mcqe_array_index = (mcqe_array_index + n) & mask;
301dd648aacSDamjan Marion	  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
302dd648aacSDamjan Marion	  skip -= n;
303dd648aacSDamjan Marion	}
304dd648aacSDamjan Marion
305dd648aacSDamjan Marion      if (skip)
306dd648aacSDamjan Marion	{
307dd648aacSDamjan Marion	  n = clib_min (8 - skip, n_left);
308dd648aacSDamjan Marion	  for (int i = 0; i < n; i++)
309dd648aacSDamjan Marion	    byte_cnt[i] = mcqe[skip + i].byte_count;
310dd648aacSDamjan Marion	  mcqe_array_index = (mcqe_array_index + 8) & mask;
311dd648aacSDamjan Marion	  mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
312dd648aacSDamjan Marion	  n_left -= n;
313dd648aacSDamjan Marion	  byte_cnt += n;
314dd648aacSDamjan Marion	}
315dd648aacSDamjan Marion
316dd648aacSDamjan Marion    }
317dd648aacSDamjan Marion
318dd648aacSDamjan Marion  while (n_left >= 8)
319dd648aacSDamjan Marion    {
320dd648aacSDamjan Marion      for (int i = 0; i < 8; i++)
321dd648aacSDamjan Marion	byte_cnt[i] = mcqe[i].byte_count;
322dd648aacSDamjan Marion
323dd648aacSDamjan Marion      n_left -= 8;
324dd648aacSDamjan Marion      byte_cnt += 8;
325dd648aacSDamjan Marion      mcqe_array_index = (mcqe_array_index + 8) & mask;
326dd648aacSDamjan Marion      mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
327dd648aacSDamjan Marion    }
328dd648aacSDamjan Marion
329dd648aacSDamjan Marion  if (n_left)
330dd648aacSDamjan Marion    {
331dd648aacSDamjan Marion      for (int i = 0; i < n_left; i++)
332dd648aacSDamjan Marion	byte_cnt[i] = mcqe[i].byte_count;
333dd648aacSDamjan Marion    }
334dd648aacSDamjan Marion}
335dd648aacSDamjan Marion
336dd648aacSDamjan Marionstatic_always_inline void
337dd648aacSDamjan Marioncqe_set_owner (mlx5dv_cqe_t * cqe, u32 n_left, u8 owner)
338dd648aacSDamjan Marion{
339dd648aacSDamjan Marion  while (n_left >= 8)
340dd648aacSDamjan Marion    {
341dd648aacSDamjan Marion      cqe[0].opcode_cqefmt_se_owner = owner;
342dd648aacSDamjan Marion      cqe[1].opcode_cqefmt_se_owner = owner;
343dd648aacSDamjan Marion      cqe[2].opcode_cqefmt_se_owner = owner;
344dd648aacSDamjan Marion      cqe[3].opcode_cqefmt_se_owner = owner;
345dd648aacSDamjan Marion      cqe[4].opcode_cqefmt_se_owner = owner;
346dd648aacSDamjan Marion      cqe[5].opcode_cqefmt_se_owner = owner;
347dd648aacSDamjan Marion      cqe[6].opcode_cqefmt_se_owner = owner;
348dd648aacSDamjan Marion      cqe[7].opcode_cqefmt_se_owner = owner;
349dd648aacSDamjan Marion      n_left -= 8;
350dd648aacSDamjan Marion      cqe += 8;
351dd648aacSDamjan Marion    }
352dd648aacSDamjan Marion  while (n_left)
353dd648aacSDamjan Marion    {
354dd648aacSDamjan Marion      cqe[0].opcode_cqefmt_se_owner = owner;
355dd648aacSDamjan Marion      n_left--;
356dd648aacSDamjan Marion      cqe++;
357dd648aacSDamjan Marion    }
358dd648aacSDamjan Marion}
359dd648aacSDamjan Marion
360dd648aacSDamjan Marionstatic_always_inline void
361dd648aacSDamjan Marioncompressed_cqe_reset_owner (rdma_rxq_t * rxq, u32 n_mini_cqes, u32 cq_ci,
362dd648aacSDamjan Marion			    u32 mask, u32 log2_cq_size)
363dd648aacSDamjan Marion{
364dd648aacSDamjan Marion  u8 owner;
365dd648aacSDamjan Marion  u32 offset, cq_size = 1 << log2_cq_size;
366dd648aacSDamjan Marion
367dd648aacSDamjan Marion
368dd648aacSDamjan Marion  /* first CQE is reset by hardware */
369dd648aacSDamjan Marion  cq_ci++;
370dd648aacSDamjan Marion  n_mini_cqes--;
371dd648aacSDamjan Marion
372dd648aacSDamjan Marion  offset = cq_ci & mask;
373dd648aacSDamjan Marion  owner = 0xf0 | ((cq_ci >> log2_cq_size) & 1);
374dd648aacSDamjan Marion
375dd648aacSDamjan Marion  if (offset + n_mini_cqes < cq_size)
376dd648aacSDamjan Marion    {
377dd648aacSDamjan Marion      cqe_set_owner (rxq->cqes + offset, n_mini_cqes, owner);
378dd648aacSDamjan Marion    }
379dd648aacSDamjan Marion  else
380dd648aacSDamjan Marion    {
381dd648aacSDamjan Marion      u32 n = cq_size - offset;
382dd648aacSDamjan Marion      cqe_set_owner (rxq->cqes + offset, n, owner);
383dd648aacSDamjan Marion      cqe_set_owner (rxq->cqes, n_mini_cqes - n, owner ^ 1);
384dd648aacSDamjan Marion    }
385dd648aacSDamjan Marion
386dd648aacSDamjan Marion}
387dd648aacSDamjan Marion
388dd648aacSDamjan Marionstatic_always_inline uword
389dd648aacSDamjan Marionrdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
390dd648aacSDamjan Marion			    u32 * byte_cnt, u16 * cqe_flags)
391dd648aacSDamjan Marion{
392dd648aacSDamjan Marion  u32 n_rx_packets = 0;
393dd648aacSDamjan Marion  u32 log2_cq_size = rxq->log2_cq_size;
394dd648aacSDamjan Marion  u32 mask = pow2_mask (log2_cq_size);
395dd648aacSDamjan Marion  u32 cq_ci = rxq->cq_ci;
396dd648aacSDamjan Marion
397dd648aacSDamjan Marion  if (rxq->n_mini_cqes_left)
398dd648aacSDamjan Marion    {
399dd648aacSDamjan Marion      /* partially processed mini-cqe array */
400dd648aacSDamjan Marion      u32 n_mini_cqes = rxq->n_mini_cqes;
401dd648aacSDamjan Marion      u32 n_mini_cqes_left = rxq->n_mini_cqes_left;
402dd648aacSDamjan Marion      process_mini_cqes (rxq, n_mini_cqes - n_mini_cqes_left,
403dd648aacSDamjan Marion			 n_mini_cqes_left, cq_ci, mask, byte_cnt);
404dd648aacSDamjan Marion      compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
405dd648aacSDamjan Marion				  log2_cq_size);
406dd648aacSDamjan Marion      clib_memset_u16 (cqe_flags, rxq->last_cqe_flags, n_mini_cqes_left);
407dd648aacSDamjan Marion      n_rx_packets = n_mini_cqes_left;
408dd648aacSDamjan Marion      byte_cnt += n_mini_cqes_left;
409dd648aacSDamjan Marion      cqe_flags += n_mini_cqes_left;
410dd648aacSDamjan Marion      rxq->n_mini_cqes_left = 0;
411dd648aacSDamjan Marion      rxq->cq_ci = cq_ci = cq_ci + n_mini_cqes;
412dd648aacSDamjan Marion    }
413dd648aacSDamjan Marion
414dd648aacSDamjan Marion  while (n_rx_packets < VLIB_FRAME_SIZE)
415dd648aacSDamjan Marion    {
416dd648aacSDamjan Marion      u8 cqe_last_byte, owner;
417dd648aacSDamjan Marion      mlx5dv_cqe_t *cqe = rxq->cqes + (cq_ci & mask);
418dd648aacSDamjan Marion
419dd648aacSDamjan Marion      clib_prefetch_load (rxq->cqes + ((cq_ci + 8) & mask));
420dd648aacSDamjan Marion
421dd648aacSDamjan Marion      owner = (cq_ci >> log2_cq_size) & 1;
422dd648aacSDamjan Marion      cqe_last_byte = cqe->opcode_cqefmt_se_owner;
423dd648aacSDamjan Marion
424dd648aacSDamjan Marion      if ((cqe_last_byte & 0x1) != owner)
425dd648aacSDamjan Marion	break;
426dd648aacSDamjan Marion
427dd648aacSDamjan Marion      cqe_last_byte &= 0xfe;	/* remove owner bit */
428dd648aacSDamjan Marion
429dd648aacSDamjan Marion      if (cqe_last_byte == 0x2c)
430dd648aacSDamjan Marion	{
431dd648aacSDamjan Marion	  u32 n_mini_cqes = clib_net_to_host_u32 (cqe->mini_cqe_num);
432dd648aacSDamjan Marion	  u32 n_left = VLIB_FRAME_SIZE - n_rx_packets;
433dd648aacSDamjan Marion	  u16 flags = cqe->flags;
434dd648aacSDamjan Marion
435dd648aacSDamjan Marion	  if (n_left >= n_mini_cqes)
436dd648aacSDamjan Marion	    {
437dd648aacSDamjan Marion	      process_mini_cqes (rxq, 0, n_mini_cqes, cq_ci, mask, byte_cnt);
438dd648aacSDamjan Marion	      clib_memset_u16 (cqe_flags, flags, n_mini_cqes);
439dd648aacSDamjan Marion	      compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
440dd648aacSDamjan Marion					  log2_cq_size);
441dd648aacSDamjan Marion	      n_rx_packets += n_mini_cqes;
442dd648aacSDamjan Marion	      byte_cnt += n_mini_cqes;
443dd648aacSDamjan Marion	      cqe_flags += n_mini_cqes;
444dd648aacSDamjan Marion	      cq_ci += n_mini_cqes;
445dd648aacSDamjan Marion	    }
446dd648aacSDamjan Marion	  else
447dd648aacSDamjan Marion	    {
448dd648aacSDamjan Marion	      process_mini_cqes (rxq, 0, n_left, cq_ci, mask, byte_cnt);
449dd648aacSDamjan Marion	      clib_memset_u16 (cqe_flags, flags, n_left);
450dd648aacSDamjan Marion	      n_rx_packets = VLIB_FRAME_SIZE;
451dd648aacSDamjan Marion	      rxq->n_mini_cqes = n_mini_cqes;
452dd648aacSDamjan Marion	      rxq->n_mini_cqes_left = n_mini_cqes - n_left;
453dd648aacSDamjan Marion	      rxq->last_cqe_flags = flags;
454dd648aacSDamjan Marion	      goto done;
455dd648aacSDamjan Marion	    }
456dd648aacSDamjan Marion	  continue;
457dd648aacSDamjan Marion	}
458dd648aacSDamjan Marion
459dd648aacSDamjan Marion      if (cqe_last_byte == 0x20)
460dd648aacSDamjan Marion	{
461dd648aacSDamjan Marion	  byte_cnt[0] = cqe->byte_cnt;
462dd648aacSDamjan Marion	  cqe_flags[0] = cqe->flags;
463dd648aacSDamjan Marion	  n_rx_packets++;
464dd648aacSDamjan Marion	  cq_ci++;
465dd648aacSDamjan Marion	  byte_cnt++;
466dd648aacSDamjan Marion	  continue;
467dd648aacSDamjan Marion	}
468dd648aacSDamjan Marion
469dd648aacSDamjan Marion      rd->flags |= RDMA_DEVICE_F_ERROR;
470dd648aacSDamjan Marion      break;
471dd648aacSDamjan Marion    }
472dd648aacSDamjan Marion
473dd648aacSDamjan Mariondone:
474dd648aacSDamjan Marion  if (n_rx_packets)
475dd648aacSDamjan Marion    rxq->cq_db[0] = rxq->cq_ci = cq_ci;
476dd648aacSDamjan Marion  return n_rx_packets;
477dd648aacSDamjan Marion}
478dd648aacSDamjan Marion
479dc195d68SBenoît Gannestatic_always_inline uword
480dc195d68SBenoît Gannerdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
481dd648aacSDamjan Marion			  vlib_frame_t * frame, rdma_device_t * rd, u16 qid,
482dd648aacSDamjan Marion			  int use_mlx5dv)
483dc195d68SBenoît Ganne{
484a3188febSDamjan Marion  rdma_main_t *rm = &rdma_main;
485dc195d68SBenoît Ganne  vnet_main_t *vnm = vnet_get_main ();
486a3188febSDamjan Marion  rdma_per_thread_data_t *ptd = vec_elt_at_index (rm->per_thread_data,
487a3188febSDamjan Marion						  vm->thread_index);
488dc195d68SBenoît Ganne  rdma_rxq_t *rxq = vec_elt_at_index (rd->rxqs, qid);
489dd648aacSDamjan Marion  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
490dc195d68SBenoît Ganne  struct ibv_wc wc[VLIB_FRAME_SIZE];
491a1e03d4dSDamjan Marion  u32 __clib_aligned (32) byte_cnts[VLIB_FRAME_SIZE];
492e7e8bf37SBenoît Ganne  vlib_buffer_t bt;
493dd648aacSDamjan Marion  u32 next_index, *to_next, n_left_to_next, n_rx_bytes = 0;
494dd648aacSDamjan Marion  int n_rx_packets, skip_ip4_cksum = 0;
495b9250a9eSDamjan Marion  u32 mask = rxq->size - 1;
496e7e8bf37SBenoît Ganne
497dd648aacSDamjan Marion  if (use_mlx5dv)
498dd648aacSDamjan Marion    n_rx_packets = rdma_device_poll_cq_mlx5dv (rd, rxq, byte_cnts,
499dd648aacSDamjan Marion					       ptd->cqe_flags);
500dd648aacSDamjan Marion  else
501dd648aacSDamjan Marion    n_rx_packets = ibv_poll_cq (rxq->cq, VLIB_FRAME_SIZE, wc);
502dc195d68SBenoît Ganne
503dc195d68SBenoît Ganne  if (PREDICT_FALSE (n_rx_packets <= 0))
504599d0888SDamjan Marion    goto refill;
505dc195d68SBenoît Ganne
506e7e8bf37SBenoît Ganne  /* init buffer template */
507a3188febSDamjan Marion  vlib_buffer_copy_template (&bt, &ptd->buffer_template);
508e7e8bf37SBenoît Ganne  vnet_buffer (&bt)->sw_if_index[VLIB_RX] = rd->sw_if_index;
509e7e8bf37SBenoît Ganne  bt.buffer_pool_index = rd->pool;
510e7e8bf37SBenoît Ganne
511e7e8bf37SBenoît Ganne  /* update buffer template for input feature arcs if any */
51274eba446SBenoît Ganne  next_index = rd->per_interface_next_index;
51374eba446SBenoît Ganne  if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index)))
51474eba446SBenoît Ganne    vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt);
51574eba446SBenoît Ganne
51674eba446SBenoît Ganne  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
517e7e8bf37SBenoît Ganne
518b9250a9eSDamjan Marion  vlib_buffer_copy_indices_from_ring (to_next, rxq->bufs, rxq->head & mask,
519b9250a9eSDamjan Marion				      rxq->size, n_rx_packets);
520b9250a9eSDamjan Marion
521dd648aacSDamjan Marion  vlib_get_buffers (vm, to_next, bufs, n_rx_packets);
522dd648aacSDamjan Marion
523dd648aacSDamjan Marion  if (use_mlx5dv)
524dd648aacSDamjan Marion    {
525dd648aacSDamjan Marion      u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
526dd648aacSDamjan Marion      u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
527dd648aacSDamjan Marion      u32 n_left = n_rx_packets;
528dd648aacSDamjan Marion      u32 *bc = byte_cnts;
529dd648aacSDamjan Marion
530dd648aacSDamjan Marion      /* verify that all ip4 packets have l3_ok flag set and convert packet
531dd648aacSDamjan Marion         length from network to host byte order */
532dd648aacSDamjan Marion      skip_ip4_cksum = 1;
533dd648aacSDamjan Marion
534dd648aacSDamjan Marion#if defined CLIB_HAVE_VEC256
535dd648aacSDamjan Marion      u16x16 mask16 = u16x16_splat (mask);
536dd648aacSDamjan Marion      u16x16 match16 = u16x16_splat (match);
537dd648aacSDamjan Marion      u16x16 r = { };
538dd648aacSDamjan Marion
539dd648aacSDamjan Marion      for (int i = 0; i * 16 < n_rx_packets; i++)
540dd648aacSDamjan Marion	r |= (ptd->cqe_flags16[i] & mask16) != match16;
541dd648aacSDamjan Marion
542dd648aacSDamjan Marion      if (!u16x16_is_all_zero (r))
543dd648aacSDamjan Marion	skip_ip4_cksum = 0;
544dd648aacSDamjan Marion
545dd648aacSDamjan Marion      for (int i = 0; i < n_rx_packets; i += 8)
546dd648aacSDamjan Marion	*(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
547dd648aacSDamjan Marion#elif defined CLIB_HAVE_VEC128
548dd648aacSDamjan Marion      u16x8 mask8 = u16x8_splat (mask);
549dd648aacSDamjan Marion      u16x8 match8 = u16x8_splat (match);
550dd648aacSDamjan Marion      u16x8 r = { };
551dd648aacSDamjan Marion
552dd648aacSDamjan Marion      for (int i = 0; i * 8 < n_rx_packets; i++)
553dd648aacSDamjan Marion	r |= (ptd->cqe_flags8[i] & mask8) != match8;
554dd648aacSDamjan Marion
555dd648aacSDamjan Marion      if (!u16x8_is_all_zero (r))
556dd648aacSDamjan Marion	skip_ip4_cksum = 0;
557dd648aacSDamjan Marion
558dd648aacSDamjan Marion      for (int i = 0; i < n_rx_packets; i += 4)
559dd648aacSDamjan Marion	*(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
560dd648aacSDamjan Marion#else
561dd648aacSDamjan Marion      for (int i = 0; i < n_rx_packets; i++)
562dd648aacSDamjan Marion	if ((ptd->cqe_flags[i] & mask) == match)
563dd648aacSDamjan Marion	  skip_ip4_cksum = 0;
564dd648aacSDamjan Marion
565dd648aacSDamjan Marion      for (int i = 0; i < n_rx_packets; i++)
566dd648aacSDamjan Marion	bc[i] = clib_net_to_host_u32 (bc[i]);
567dd648aacSDamjan Marion#endif
568dd648aacSDamjan Marion
569dd648aacSDamjan Marion      while (n_left >= 8)
570dd648aacSDamjan Marion	{
571dd648aacSDamjan Marion	  clib_prefetch_store (b[4]);
572dd648aacSDamjan Marion	  vlib_buffer_copy_template (b[0], &bt);
573dd648aacSDamjan Marion	  n_rx_bytes += b[0]->current_length = bc[0];
574dd648aacSDamjan Marion	  clib_prefetch_store (b[5]);
575dd648aacSDamjan Marion	  vlib_buffer_copy_template (b[1], &bt);
576dd648aacSDamjan Marion	  n_rx_bytes += b[1]->current_length = bc[1];
577dd648aacSDamjan Marion	  clib_prefetch_store (b[6]);
578dd648aacSDamjan Marion	  vlib_buffer_copy_template (b[2], &bt);
579dd648aacSDamjan Marion	  n_rx_bytes += b[2]->current_length = bc[2];
580dd648aacSDamjan Marion	  clib_prefetch_store (b[7]);
581dd648aacSDamjan Marion	  vlib_buffer_copy_template (b[3], &bt);
582dd648aacSDamjan Marion	  n_rx_bytes += b[3]->current_length = bc[3];
583dd648aacSDamjan Marion
584dd648aacSDamjan Marion	  /* next */
585dd648aacSDamjan Marion	  bc += 4;
586dd648aacSDamjan Marion	  b += 4;
587dd648aacSDamjan Marion	  n_left -= 4;
588dd648aacSDamjan Marion	}
589dd648aacSDamjan Marion      while (n_left)
590dd648aacSDamjan Marion	{
591dd648aacSDamjan Marion	  vlib_buffer_copy_template (b[0], &bt);
592dd648aacSDamjan Marion	  n_rx_bytes += b[0]->current_length = bc[0];
593dd648aacSDamjan Marion
594dd648aacSDamjan Marion	  /* next */
595dd648aacSDamjan Marion	  bc++;
596dd648aacSDamjan Marion	  b++;
597dd648aacSDamjan Marion	  n_left--;
598dd648aacSDamjan Marion	}
599dd648aacSDamjan Marion    }
600dd648aacSDamjan Marion  else
601dd648aacSDamjan Marion    n_rx_bytes = rdma_device_input_bufs (vm, rd, bufs, wc, n_rx_packets, &bt);
602dd648aacSDamjan Marion
603dd648aacSDamjan Marion  rdma_device_input_ethernet (vm, node, rd, next_index, skip_ip4_cksum);
604dc195d68SBenoît Ganne
60574eba446SBenoît Ganne  vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
606fe750c24SBenoît Ganne
607e7e8bf37SBenoît Ganne  rxq->head += n_rx_packets;
608e7e8bf37SBenoît Ganne
609dd648aacSDamjan Marion  rdma_device_input_trace (vm, node, rd, n_rx_packets, to_next, next_index,
610dd648aacSDamjan Marion			   ptd->cqe_flags, use_mlx5dv);
611dd648aacSDamjan Marion
612dd648aacSDamjan Marion  /* reset flags to zero for the next run */
613dd648aacSDamjan Marion  if (use_mlx5dv)
614dd648aacSDamjan Marion    clib_memset_u16 (ptd->cqe_flags, 0, VLIB_FRAME_SIZE);
615e7e8bf37SBenoît Ganne
616fe750c24SBenoît Ganne  vlib_increment_combined_counter
617fe750c24SBenoît Ganne    (vnm->interface_main.combined_sw_if_counters +
618fe750c24SBenoît Ganne     VNET_INTERFACE_COUNTER_RX, vm->thread_index,
619fe750c24SBenoît Ganne     rd->hw_if_index, n_rx_packets, n_rx_bytes);
620fe750c24SBenoît Ganne
621599d0888SDamjan Marionrefill:
622dd648aacSDamjan Marion  rdma_device_input_refill (vm, rd, rxq, use_mlx5dv);
623fe750c24SBenoît Ganne
624fe750c24SBenoît Ganne  return n_rx_packets;
625fe750c24SBenoît Ganne}
626fe750c24SBenoît Ganne
627fe750c24SBenoît GanneVLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
628fe750c24SBenoît Ganne				vlib_node_runtime_t * node,
629fe750c24SBenoît Ganne				vlib_frame_t * frame)
630fe750c24SBenoît Ganne{
631fe750c24SBenoît Ganne  u32 n_rx = 0;
632fe750c24SBenoît Ganne  rdma_main_t *rm = &rdma_main;
633fe750c24SBenoît Ganne  vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
634fe750c24SBenoît Ganne  vnet_device_and_queue_t *dq;
635fe750c24SBenoît Ganne
636fe750c24SBenoît Ganne  foreach_device_and_queue (dq, rt->devices_and_queues)
637fe750c24SBenoît Ganne  {
638fe750c24SBenoît Ganne    rdma_device_t *rd;
639fe750c24SBenoît Ganne    rd = vec_elt_at_index (rm->devices, dq->dev_instance);
640dd648aacSDamjan Marion    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
641dd648aacSDamjan Marion      continue;
642dd648aacSDamjan Marion
643dd648aacSDamjan Marion    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR))
644dd648aacSDamjan Marion      continue;
645dd648aacSDamjan Marion
646dd648aacSDamjan Marion    if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
647dd648aacSDamjan Marion      n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id, 1);
648dd648aacSDamjan Marion    else
649dd648aacSDamjan Marion      n_rx += rdma_device_input_inline (vm, node, frame, rd, dq->queue_id, 0);
650fe750c24SBenoît Ganne  }
651fe750c24SBenoît Ganne  return n_rx;
652fe750c24SBenoît Ganne}
653fe750c24SBenoît Ganne
654fe750c24SBenoît Ganne/* *INDENT-OFF* */
655fe750c24SBenoît GanneVLIB_REGISTER_NODE (rdma_input_node) = {
656fe750c24SBenoît Ganne  .name = "rdma-input",
6577ca5aaacSDamjan Marion  .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
658fe750c24SBenoît Ganne  .sibling_of = "device-input",
659fe750c24SBenoît Ganne  .format_trace = format_rdma_input_trace,
660fe750c24SBenoît Ganne  .type = VLIB_NODE_TYPE_INPUT,
661fe750c24SBenoît Ganne  .state = VLIB_NODE_STATE_DISABLED,
662fe750c24SBenoît Ganne  .n_errors = RDMA_INPUT_N_ERROR,
663fe750c24SBenoît Ganne  .error_strings = rdma_input_error_strings,
664fe750c24SBenoît Ganne};
665fe750c24SBenoît Ganne
666fe750c24SBenoît Ganne/* *INDENT-ON* */
667fe750c24SBenoît Ganne
668fe750c24SBenoît Ganne
669fe750c24SBenoît Ganne/*
670fe750c24SBenoît Ganne * fd.io coding-style-patch-verification: ON
671fe750c24SBenoît Ganne *
672fe750c24SBenoît Ganne * Local Variables:
673fe750c24SBenoît Ganne * eval: (c-set-style "gnu")
674fe750c24SBenoît Ganne * End:
675fe750c24SBenoît Ganne */