1/*
2 *------------------------------------------------------------------
3 * Copyright (c) 2018 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
16 */
17
18#ifndef _RDMA_H_
19#define _RDMA_H_
20
21#include <infiniband/verbs.h>
22#include <vlib/log.h>
23#include <vlib/pci/pci.h>
24#include <vnet/interface.h>
25#include <vnet/ethernet/mac_address.h>
26#include <rdma/rdma_mlx5dv.h>
27
28#define foreach_rdma_device_flags \
29  _(0, ERROR, "error") \
30  _(1, ADMIN_UP, "admin-up") \
31  _(2, LINK_UP, "link-up") \
32  _(3, PROMISC, "promiscuous") \
33  _(4, MLX5DV, "mlx5dv")
34
35enum
36{
37#define _(a, b, c) RDMA_DEVICE_F_##b = (1 << a),
38  foreach_rdma_device_flags
39#undef _
40};
41
42#ifndef MLX5_ETH_L2_INLINE_HEADER_SIZE
43#define MLX5_ETH_L2_INLINE_HEADER_SIZE  18
44#endif
45
46typedef struct
47{
48  CLIB_ALIGN_MARK (align0, MLX5_SEND_WQE_BB);
49  union
50  {
51    struct mlx5_wqe_ctrl_seg ctrl;
52    struct
53    {
54      u8 opc_mod;
55      u8 wqe_index_hi;
56      u8 wqe_index_lo;
57      u8 opcode;
58    };
59  };
60  struct mlx5_wqe_eth_seg eseg;
61  struct mlx5_wqe_data_seg dseg;
62} rdma_mlx5_wqe_t;
63#define RDMA_MLX5_WQE_SZ        sizeof(rdma_mlx5_wqe_t)
64#define RDMA_MLX5_WQE_DS        (RDMA_MLX5_WQE_SZ/sizeof(struct mlx5_wqe_data_seg))
65STATIC_ASSERT (RDMA_MLX5_WQE_SZ == MLX5_SEND_WQE_BB &&
66	       RDMA_MLX5_WQE_SZ % sizeof (struct mlx5_wqe_data_seg) == 0,
67	       "bad size");
68
69typedef struct
70{
71  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
72  struct ibv_cq *cq;
73  struct ibv_wq *wq;
74  u32 *bufs;
75  u32 size;
76  u32 head;
77  u32 tail;
78  u32 cq_ci;
79  u16 log2_cq_size;
80  u16 n_mini_cqes;
81  u16 n_mini_cqes_left;
82  u16 last_cqe_flags;
83  mlx5dv_cqe_t *cqes;
84  mlx5dv_rwq_t *wqes;
85  volatile u32 *wq_db;
86  volatile u32 *cq_db;
87  u32 cqn;
88  u32 wqe_cnt;
89  u32 wq_stride;
90} rdma_rxq_t;
91
92typedef struct
93{
94  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
95
96  /* following fields are accessed in datapath */
97  clib_spinlock_t lock;
98
99  union
100  {
101    struct
102    {
103      /* ibverb datapath. Cache of cq, sq below */
104      struct ibv_cq *ibv_cq;
105      struct ibv_qp *ibv_qp;
106    };
107    struct
108    {
109      /* direct verbs datapath */
110      rdma_mlx5_wqe_t *dv_sq_wqes;
111      volatile u32 *dv_sq_dbrec;
112      volatile u64 *dv_sq_db;
113      struct mlx5_cqe64 *dv_cq_cqes;
114      volatile u32 *dv_cq_dbrec;
115    };
116  };
117
118  u32 *bufs;			/* vlib_buffer ring buffer */
119  u16 head;
120  u16 tail;
121  u16 dv_cq_idx;		/* monotonic CQE index (valid only for direct verbs) */
122  u8 bufs_log2sz;		/* log2 vlib_buffer entries */
123  u8 dv_sq_log2sz:4;		/* log2 SQ WQE entries (valid only for direct verbs) */
124  u8 dv_cq_log2sz:4;		/* log2 CQ CQE entries (valid only for direct verbs) */
125    STRUCT_MARK (cacheline1);
126
127  /* WQE template (valid only for direct verbs) */
128  u8 dv_wqe_tmpl[64];
129
130  /* end of 2nd 64-bytes cacheline (or 1st 128-bytes cacheline) */
131    STRUCT_MARK (cacheline2);
132
133  /* fields below are not accessed in datapath */
134  struct ibv_cq *cq;
135  struct ibv_qp *qp;
136
137} rdma_txq_t;
138STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline1, 64);
139STATIC_ASSERT_OFFSET_OF (rdma_txq_t, cacheline2, 128);
140
141#define RDMA_TXQ_DV_INVALID_ID  0xffffffff
142
143#define RDMA_TXQ_BUF_SZ(txq)    (1U << (txq)->bufs_log2sz)
144#define RDMA_TXQ_DV_SQ_SZ(txq)  (1U << (txq)->dv_sq_log2sz)
145#define RDMA_TXQ_DV_CQ_SZ(txq)  (1U << (txq)->dv_cq_log2sz)
146
147#define RDMA_TXQ_USED_SZ(head, tail)            ((u16)((u16)(tail) - (u16)(head)))
148#define RDMA_TXQ_AVAIL_SZ(txq, head, tail)      ((u16)(RDMA_TXQ_BUF_SZ (txq) - RDMA_TXQ_USED_SZ (head, tail)))
149
150typedef struct
151{
152  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
153
154  /* following fields are accessed in datapath */
155  rdma_rxq_t *rxqs;
156  rdma_txq_t *txqs;
157  u32 flags;
158  u32 per_interface_next_index;
159  u32 sw_if_index;
160  u32 hw_if_index;
161  u32 lkey;			/* cache of mr->lkey */
162  u8 pool;			/* buffer pool index */
163
164  /* fields below are not accessed in datapath */
165  vlib_pci_device_info_t *pci;
166  u8 *name;
167  u8 *linux_ifname;
168  mac_address_t hwaddr;
169  u32 async_event_clib_file_index;
170  u32 dev_instance;
171
172  struct ibv_context *ctx;
173  struct ibv_pd *pd;
174  struct ibv_mr *mr;
175  struct ibv_qp *rx_qp;
176  struct ibv_rwq_ind_table *rx_rwq_ind_tbl;
177  struct ibv_flow *flow_ucast;
178  struct ibv_flow *flow_mcast;
179
180  clib_error_t *error;
181} rdma_device_t;
182
183typedef struct
184{
185  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
186  union
187  {
188    u16 cqe_flags[VLIB_FRAME_SIZE];
189    u16x8 cqe_flags8[VLIB_FRAME_SIZE / 8];
190    u16x16 cqe_flags16[VLIB_FRAME_SIZE / 16];
191  };
192  vlib_buffer_t buffer_template;
193} rdma_per_thread_data_t;
194
195typedef struct
196{
197  rdma_per_thread_data_t *per_thread_data;
198  rdma_device_t *devices;
199  vlib_log_class_t log_class;
200  u16 msg_id_base;
201} rdma_main_t;
202
203extern rdma_main_t rdma_main;
204
205typedef enum
206{
207  RDMA_MODE_AUTO = 0,
208  RDMA_MODE_IBV,
209  RDMA_MODE_DV,
210} rdma_mode_t;
211
212typedef struct
213{
214  u8 *ifname;
215  u8 *name;
216  u32 rxq_size;
217  u32 txq_size;
218  u32 rxq_num;
219  rdma_mode_t mode;
220
221  /* return */
222  int rv;
223  u32 sw_if_index;
224  clib_error_t *error;
225} rdma_create_if_args_t;
226
227void rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args);
228void rdma_delete_if (vlib_main_t * vm, rdma_device_t * rd);
229
230extern vlib_node_registration_t rdma_input_node;
231extern vnet_device_class_t rdma_device_class;
232
233format_function_t format_rdma_device;
234format_function_t format_rdma_device_name;
235format_function_t format_rdma_input_trace;
236format_function_t format_rdma_rxq;
237unformat_function_t unformat_rdma_create_if_args;
238
239typedef struct
240{
241  u32 next_index;
242  u32 hw_if_index;
243  u16 cqe_flags;
244} rdma_input_trace_t;
245
246#define foreach_rdma_tx_func_error \
247_(SEGMENT_SIZE_EXCEEDED, "segment size exceeded") \
248_(NO_FREE_SLOTS, "no free tx slots") \
249_(SUBMISSION, "tx submission errors") \
250_(COMPLETION, "tx completion errors")
251
252typedef enum
253{
254#define _(f,s) RDMA_TX_ERROR_##f,
255  foreach_rdma_tx_func_error
256#undef _
257    RDMA_TX_N_ERROR,
258} rdma_tx_func_error_t;
259
260#endif /* _RDMA_H_ */
261
262/*
263 * fd.io coding-style-patch-verification: ON
264 *
265 * Local Variables:
266 * eval: (c-set-style "gnu")
267 * End:
268 */
269