vppcom.c revision 64cf459b
1/*
2 * Copyright (c) 2017-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <vcl/vppcom.h>
19#include <vcl/vcl_debug.h>
20#include <vcl/vcl_private.h>
21#include <svm/fifo_segment.h>
22
23__thread uword __vcl_worker_index = ~0;
24
25static int
26vcl_wait_for_segment (u64 segment_handle)
27{
28  vcl_worker_t *wrk = vcl_worker_get_current ();
29  u32 wait_for_seconds = 10, segment_index;
30  f64 timeout;
31
32  if (segment_handle == VCL_INVALID_SEGMENT_HANDLE)
33    return 0;
34
35  timeout = clib_time_now (&wrk->clib_time) + wait_for_seconds;
36  while (clib_time_now (&wrk->clib_time) < timeout)
37    {
38      segment_index = vcl_segment_table_lookup (segment_handle);
39      if (segment_index != VCL_INVALID_SEGMENT_INDEX)
40	return 0;
41      usleep (10);
42    }
43  return 1;
44}
45
46static inline int
47vcl_mq_dequeue_batch (vcl_worker_t * wrk, svm_msg_q_t * mq, u32 n_max_msg)
48{
49  svm_msg_q_msg_t *msg;
50  u32 n_msgs;
51  int i;
52
53  n_msgs = clib_min (svm_msg_q_size (mq), n_max_msg);
54  for (i = 0; i < n_msgs; i++)
55    {
56      vec_add2 (wrk->mq_msg_vector, msg, 1);
57      svm_msg_q_sub_w_lock (mq, msg);
58    }
59  return n_msgs;
60}
61
62const char *
63vppcom_session_state_str (vcl_session_state_t state)
64{
65  char *st;
66
67  switch (state)
68    {
69    case STATE_START:
70      st = "STATE_START";
71      break;
72
73    case STATE_CONNECT:
74      st = "STATE_CONNECT";
75      break;
76
77    case STATE_LISTEN:
78      st = "STATE_LISTEN";
79      break;
80
81    case STATE_ACCEPT:
82      st = "STATE_ACCEPT";
83      break;
84
85    case STATE_VPP_CLOSING:
86      st = "STATE_VPP_CLOSING";
87      break;
88
89    case STATE_DISCONNECT:
90      st = "STATE_DISCONNECT";
91      break;
92
93    case STATE_FAILED:
94      st = "STATE_FAILED";
95      break;
96
97    case STATE_UPDATED:
98      st = "STATE_UPDATED";
99      break;
100
101    case STATE_LISTEN_NO_MQ:
102      st = "STATE_LISTEN_NO_MQ";
103      break;
104
105    default:
106      st = "UNKNOWN_STATE";
107      break;
108    }
109
110  return st;
111}
112
113u8 *
114format_ip4_address (u8 * s, va_list * args)
115{
116  u8 *a = va_arg (*args, u8 *);
117  return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
118}
119
120u8 *
121format_ip6_address (u8 * s, va_list * args)
122{
123  ip6_address_t *a = va_arg (*args, ip6_address_t *);
124  u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
125
126  i_max_n_zero = ARRAY_LEN (a->as_u16);
127  max_n_zeros = 0;
128  i_first_zero = i_max_n_zero;
129  n_zeros = 0;
130  for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
131    {
132      u32 is_zero = a->as_u16[i] == 0;
133      if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
134	{
135	  i_first_zero = i;
136	  n_zeros = 0;
137	}
138      n_zeros += is_zero;
139      if ((!is_zero && n_zeros > max_n_zeros)
140	  || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
141	{
142	  i_max_n_zero = i_first_zero;
143	  max_n_zeros = n_zeros;
144	  i_first_zero = ARRAY_LEN (a->as_u16);
145	  n_zeros = 0;
146	}
147    }
148
149  last_double_colon = 0;
150  for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
151    {
152      if (i == i_max_n_zero && max_n_zeros > 1)
153	{
154	  s = format (s, "::");
155	  i += max_n_zeros - 1;
156	  last_double_colon = 1;
157	}
158      else
159	{
160	  s = format (s, "%s%x",
161		      (last_double_colon || i == 0) ? "" : ":",
162		      clib_net_to_host_u16 (a->as_u16[i]));
163	  last_double_colon = 0;
164	}
165    }
166
167  return s;
168}
169
170/* Format an IP46 address. */
171u8 *
172format_ip46_address (u8 * s, va_list * args)
173{
174  ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
175  ip46_type_t type = va_arg (*args, ip46_type_t);
176  int is_ip4 = 1;
177
178  switch (type)
179    {
180    case IP46_TYPE_ANY:
181      is_ip4 = ip46_address_is_ip4 (ip46);
182      break;
183    case IP46_TYPE_IP4:
184      is_ip4 = 1;
185      break;
186    case IP46_TYPE_IP6:
187      is_ip4 = 0;
188      break;
189    }
190
191  return is_ip4 ?
192    format (s, "%U", format_ip4_address, &ip46->ip4) :
193    format (s, "%U", format_ip6_address, &ip46->ip6);
194}
195
196/*
197 * VPPCOM Utility Functions
198 */
199
200static void
201vcl_send_session_listen (vcl_worker_t * wrk, vcl_session_t * s)
202{
203  app_session_evt_t _app_evt, *app_evt = &_app_evt;
204  session_listen_msg_t *mp;
205  svm_msg_q_t *mq;
206
207  mq = vcl_worker_ctrl_mq (wrk);
208  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_LISTEN);
209  mp = (session_listen_msg_t *) app_evt->evt->data;
210  memset (mp, 0, sizeof (*mp));
211  mp->client_index = wrk->my_client_index;
212  mp->context = s->session_index;
213  mp->wrk_index = wrk->vpp_wrk_index;
214  mp->is_ip4 = s->transport.is_ip4;
215  clib_memcpy_fast (&mp->ip, &s->transport.lcl_ip, sizeof (mp->ip));
216  mp->port = s->transport.lcl_port;
217  mp->proto = s->session_type;
218  app_send_ctrl_evt_to_vpp (mq, app_evt);
219}
220
221static void
222vcl_send_session_connect (vcl_worker_t * wrk, vcl_session_t * s)
223{
224  app_session_evt_t _app_evt, *app_evt = &_app_evt;
225  session_connect_msg_t *mp;
226  svm_msg_q_t *mq;
227
228  mq = vcl_worker_ctrl_mq (wrk);
229  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_CONNECT);
230  mp = (session_connect_msg_t *) app_evt->evt->data;
231  memset (mp, 0, sizeof (*mp));
232  mp->client_index = wrk->my_client_index;
233  mp->context = s->session_index;
234  mp->wrk_index = wrk->vpp_wrk_index;
235  mp->is_ip4 = s->transport.is_ip4;
236  mp->parent_handle = s->parent_handle;
237  clib_memcpy_fast (&mp->ip, &s->transport.rmt_ip, sizeof (mp->ip));
238  clib_memcpy_fast (&mp->lcl_ip, &s->transport.lcl_ip, sizeof (mp->lcl_ip));
239  mp->port = s->transport.rmt_port;
240  mp->proto = s->session_type;
241  app_send_ctrl_evt_to_vpp (mq, app_evt);
242}
243
244void
245vcl_send_session_unlisten (vcl_worker_t * wrk, vcl_session_t * s)
246{
247  app_session_evt_t _app_evt, *app_evt = &_app_evt;
248  session_unlisten_msg_t *mp;
249  svm_msg_q_t *mq;
250
251  mq = vcl_worker_ctrl_mq (wrk);
252  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_UNLISTEN);
253  mp = (session_unlisten_msg_t *) app_evt->evt->data;
254  memset (mp, 0, sizeof (*mp));
255  mp->client_index = wrk->my_client_index;
256  mp->wrk_index = wrk->vpp_wrk_index;
257  mp->handle = s->vpp_handle;
258  mp->context = wrk->wrk_index;
259  app_send_ctrl_evt_to_vpp (mq, app_evt);
260}
261
262static void
263vcl_send_session_disconnect (vcl_worker_t * wrk, vcl_session_t * s)
264{
265  app_session_evt_t _app_evt, *app_evt = &_app_evt;
266  session_disconnect_msg_t *mp;
267  svm_msg_q_t *mq;
268
269  /* Send to thread that owns the session */
270  mq = s->vpp_evt_q;
271  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_DISCONNECT);
272  mp = (session_disconnect_msg_t *) app_evt->evt->data;
273  memset (mp, 0, sizeof (*mp));
274  mp->client_index = wrk->my_client_index;
275  mp->handle = s->vpp_handle;
276  app_send_ctrl_evt_to_vpp (mq, app_evt);
277}
278
279static void
280vcl_send_app_detach (vcl_worker_t * wrk)
281{
282  app_session_evt_t _app_evt, *app_evt = &_app_evt;
283  session_app_detach_msg_t *mp;
284  svm_msg_q_t *mq;
285
286  mq = vcl_worker_ctrl_mq (wrk);
287  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_APP_DETACH);
288  mp = (session_app_detach_msg_t *) app_evt->evt->data;
289  memset (mp, 0, sizeof (*mp));
290  mp->client_index = wrk->my_client_index;
291  app_send_ctrl_evt_to_vpp (mq, app_evt);
292}
293
294static void
295vcl_send_session_accepted_reply (svm_msg_q_t * mq, u32 context,
296				 session_handle_t handle, int retval)
297{
298  app_session_evt_t _app_evt, *app_evt = &_app_evt;
299  session_accepted_reply_msg_t *rmp;
300  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_ACCEPTED_REPLY);
301  rmp = (session_accepted_reply_msg_t *) app_evt->evt->data;
302  rmp->handle = handle;
303  rmp->context = context;
304  rmp->retval = retval;
305  app_send_ctrl_evt_to_vpp (mq, app_evt);
306}
307
308static void
309vcl_send_session_disconnected_reply (svm_msg_q_t * mq, u32 context,
310				     session_handle_t handle, int retval)
311{
312  app_session_evt_t _app_evt, *app_evt = &_app_evt;
313  session_disconnected_reply_msg_t *rmp;
314  app_alloc_ctrl_evt_to_vpp (mq, app_evt,
315			     SESSION_CTRL_EVT_DISCONNECTED_REPLY);
316  rmp = (session_disconnected_reply_msg_t *) app_evt->evt->data;
317  rmp->handle = handle;
318  rmp->context = context;
319  rmp->retval = retval;
320  app_send_ctrl_evt_to_vpp (mq, app_evt);
321}
322
323static void
324vcl_send_session_reset_reply (svm_msg_q_t * mq, u32 context,
325			      session_handle_t handle, int retval)
326{
327  app_session_evt_t _app_evt, *app_evt = &_app_evt;
328  session_reset_reply_msg_t *rmp;
329  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_RESET_REPLY);
330  rmp = (session_reset_reply_msg_t *) app_evt->evt->data;
331  rmp->handle = handle;
332  rmp->context = context;
333  rmp->retval = retval;
334  app_send_ctrl_evt_to_vpp (mq, app_evt);
335}
336
337void
338vcl_send_session_worker_update (vcl_worker_t * wrk, vcl_session_t * s,
339				u32 wrk_index)
340{
341  app_session_evt_t _app_evt, *app_evt = &_app_evt;
342  session_worker_update_msg_t *mp;
343  svm_msg_q_t *mq;
344
345  mq = vcl_session_vpp_evt_q (wrk, s);
346  app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_WORKER_UPDATE);
347  mp = (session_worker_update_msg_t *) app_evt->evt->data;
348  mp->client_index = wrk->my_client_index;
349  mp->handle = s->vpp_handle;
350  mp->req_wrk_index = wrk->vpp_wrk_index;
351  mp->wrk_index = wrk_index;
352  app_send_ctrl_evt_to_vpp (mq, app_evt);
353}
354
355static u32
356vcl_session_accepted_handler (vcl_worker_t * wrk, session_accepted_msg_t * mp,
357			      u32 ls_index)
358{
359  vcl_session_t *session, *listen_session;
360  svm_fifo_t *rx_fifo, *tx_fifo;
361  u32 vpp_wrk_index;
362  svm_msg_q_t *evt_q;
363
364  session = vcl_session_alloc (wrk);
365
366  listen_session = vcl_session_get (wrk, ls_index);
367  if (listen_session->vpp_handle != mp->listener_handle)
368    {
369      VDBG (0, "ERROR: listener handle %lu does not match session %u",
370	    mp->listener_handle, ls_index);
371      goto error;
372    }
373
374  if (vcl_wait_for_segment (mp->segment_handle))
375    {
376      VDBG (0, "ERROR: segment for session %u couldn't be mounted!",
377	    session->session_index);
378      goto error;
379    }
380
381  rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
382  tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
383  session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address,
384					 svm_msg_q_t *);
385  rx_fifo->client_session_index = session->session_index;
386  tx_fifo->client_session_index = session->session_index;
387  rx_fifo->client_thread_index = vcl_get_worker_index ();
388  tx_fifo->client_thread_index = vcl_get_worker_index ();
389  vpp_wrk_index = tx_fifo->master_thread_index;
390  vec_validate (wrk->vpp_event_queues, vpp_wrk_index);
391  wrk->vpp_event_queues[vpp_wrk_index] = session->vpp_evt_q;
392
393  session->vpp_handle = mp->handle;
394  session->vpp_thread_index = rx_fifo->master_thread_index;
395  session->rx_fifo = rx_fifo;
396  session->tx_fifo = tx_fifo;
397
398  session->session_state = STATE_ACCEPT;
399  session->transport.rmt_port = mp->rmt.port;
400  session->transport.is_ip4 = mp->rmt.is_ip4;
401  clib_memcpy_fast (&session->transport.rmt_ip, &mp->rmt.ip,
402		    sizeof (ip46_address_t));
403
404  vcl_session_table_add_vpp_handle (wrk, mp->handle, session->session_index);
405  session->transport.lcl_port = listen_session->transport.lcl_port;
406  session->transport.lcl_ip = listen_session->transport.lcl_ip;
407  session->session_type = listen_session->session_type;
408  session->is_dgram = vcl_proto_is_dgram (session->session_type);
409  session->listener_index = listen_session->session_index;
410  listen_session->n_accepted_sessions++;
411
412  VDBG (1, "session %u [0x%llx]: client accept request from %s address %U"
413	" port %d queue %p!", session->session_index, mp->handle,
414	mp->rmt.is_ip4 ? "IPv4" : "IPv6", format_ip46_address, &mp->rmt.ip,
415	mp->rmt.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
416	clib_net_to_host_u16 (mp->rmt.port), session->vpp_evt_q);
417  vcl_evt (VCL_EVT_ACCEPT, session, listen_session, session_index);
418
419  vcl_send_session_accepted_reply (session->vpp_evt_q, mp->context,
420				   session->vpp_handle, 0);
421
422  return session->session_index;
423
424error:
425  evt_q = uword_to_pointer (mp->vpp_event_queue_address, svm_msg_q_t *);
426  vcl_send_session_accepted_reply (evt_q, mp->context, mp->handle,
427				   VNET_API_ERROR_INVALID_ARGUMENT);
428  vcl_session_free (wrk, session);
429  return VCL_INVALID_SESSION_INDEX;
430}
431
432static u32
433vcl_session_connected_handler (vcl_worker_t * wrk,
434			       session_connected_msg_t * mp)
435{
436  u32 session_index, vpp_wrk_index;
437  svm_fifo_t *rx_fifo, *tx_fifo;
438  vcl_session_t *session = 0;
439
440  session_index = mp->context;
441  session = vcl_session_get (wrk, session_index);
442  if (!session)
443    {
444      VDBG (0, "ERROR: vpp handle 0x%llx has no session index (%u)!",
445	    mp->handle, session_index);
446      return VCL_INVALID_SESSION_INDEX;
447    }
448  if (mp->retval)
449    {
450      VDBG (0, "ERROR: session index %u: connect failed! %U",
451	    session_index, format_api_error, ntohl (mp->retval));
452      session->session_state = STATE_FAILED | STATE_DISCONNECT;
453      session->vpp_handle = mp->handle;
454      return session_index;
455    }
456
457  session->vpp_handle = mp->handle;
458  session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address,
459					 svm_msg_q_t *);
460  rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
461  tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
462  if (vcl_wait_for_segment (mp->segment_handle))
463    {
464      VDBG (0, "segment for session %u couldn't be mounted!",
465	    session->session_index);
466      session->session_state = STATE_FAILED | STATE_DISCONNECT;
467      vcl_send_session_disconnect (wrk, session);
468      return session_index;
469    }
470
471  rx_fifo->client_session_index = session_index;
472  tx_fifo->client_session_index = session_index;
473  rx_fifo->client_thread_index = vcl_get_worker_index ();
474  tx_fifo->client_thread_index = vcl_get_worker_index ();
475
476  vpp_wrk_index = tx_fifo->master_thread_index;
477  vec_validate (wrk->vpp_event_queues, vpp_wrk_index);
478  wrk->vpp_event_queues[vpp_wrk_index] = session->vpp_evt_q;
479
480  if (mp->ct_rx_fifo)
481    {
482      session->ct_rx_fifo = uword_to_pointer (mp->ct_rx_fifo, svm_fifo_t *);
483      session->ct_tx_fifo = uword_to_pointer (mp->ct_tx_fifo, svm_fifo_t *);
484      if (vcl_wait_for_segment (mp->ct_segment_handle))
485	{
486	  VDBG (0, "ct segment for session %u couldn't be mounted!",
487		session->session_index);
488	  session->session_state = STATE_FAILED | STATE_DISCONNECT;
489	  vcl_send_session_disconnect (wrk, session);
490	  return session_index;
491	}
492    }
493
494  session->rx_fifo = rx_fifo;
495  session->tx_fifo = tx_fifo;
496  session->vpp_thread_index = rx_fifo->master_thread_index;
497  session->transport.is_ip4 = mp->lcl.is_ip4;
498  clib_memcpy_fast (&session->transport.lcl_ip, &mp->lcl.ip,
499		    sizeof (session->transport.lcl_ip));
500  session->transport.lcl_port = mp->lcl.port;
501  session->session_state = STATE_CONNECT;
502
503  /* Add it to lookup table */
504  vcl_session_table_add_vpp_handle (wrk, mp->handle, session_index);
505
506  VDBG (1, "session %u [0x%llx] connected! rx_fifo %p, refcnt %d, tx_fifo %p,"
507	" refcnt %d", session_index, mp->handle, session->rx_fifo,
508	session->rx_fifo->refcnt, session->tx_fifo, session->tx_fifo->refcnt);
509
510  return session_index;
511}
512
513static int
514vcl_flag_accepted_session (vcl_session_t * session, u64 handle, u32 flags)
515{
516  vcl_session_msg_t *accepted_msg;
517  int i;
518
519  for (i = 0; i < vec_len (session->accept_evts_fifo); i++)
520    {
521      accepted_msg = &session->accept_evts_fifo[i];
522      if (accepted_msg->accepted_msg.handle == handle)
523	{
524	  accepted_msg->flags |= flags;
525	  return 1;
526	}
527    }
528  return 0;
529}
530
531static u32
532vcl_session_reset_handler (vcl_worker_t * wrk,
533			   session_reset_msg_t * reset_msg)
534{
535  vcl_session_t *session;
536  u32 sid;
537
538  sid = vcl_session_index_from_vpp_handle (wrk, reset_msg->handle);
539  session = vcl_session_get (wrk, sid);
540  if (!session)
541    {
542      VDBG (0, "request to reset unknown handle 0x%llx", reset_msg->handle);
543      return VCL_INVALID_SESSION_INDEX;
544    }
545
546  /* Caught a reset before actually accepting the session */
547  if (session->session_state == STATE_LISTEN)
548    {
549
550      if (!vcl_flag_accepted_session (session, reset_msg->handle,
551				      VCL_ACCEPTED_F_RESET))
552	VDBG (0, "session was not accepted!");
553      return VCL_INVALID_SESSION_INDEX;
554    }
555
556  session->session_state = STATE_DISCONNECT;
557  VDBG (0, "reset session %u [0x%llx]", sid, reset_msg->handle);
558  return sid;
559}
560
561static u32
562vcl_session_bound_handler (vcl_worker_t * wrk, session_bound_msg_t * mp)
563{
564  vcl_session_t *session;
565  u32 sid = mp->context;
566
567  session = vcl_session_get (wrk, sid);
568  if (mp->retval)
569    {
570      VERR ("session %u [0x%llx]: bind failed: %U", sid, mp->handle,
571	    format_api_error, mp->retval);
572      if (session)
573	{
574	  session->session_state = STATE_FAILED;
575	  session->vpp_handle = mp->handle;
576	  return sid;
577	}
578      else
579	{
580	  VDBG (0, "ERROR: session %u [0x%llx]: Invalid session index!",
581		sid, mp->handle);
582	  return VCL_INVALID_SESSION_INDEX;
583	}
584    }
585
586  session->vpp_handle = mp->handle;
587  session->transport.is_ip4 = mp->lcl_is_ip4;
588  clib_memcpy_fast (&session->transport.lcl_ip, mp->lcl_ip,
589		    sizeof (ip46_address_t));
590  session->transport.lcl_port = mp->lcl_port;
591  vcl_session_table_add_listener (wrk, mp->handle, sid);
592  session->session_state = STATE_LISTEN;
593
594  session->vpp_evt_q = uword_to_pointer (mp->vpp_evt_q, svm_msg_q_t *);
595  vec_validate (wrk->vpp_event_queues, 0);
596  wrk->vpp_event_queues[0] = session->vpp_evt_q;
597
598  if (session->is_dgram)
599    {
600      svm_fifo_t *rx_fifo, *tx_fifo;
601      session->vpp_evt_q = uword_to_pointer (mp->vpp_evt_q, svm_msg_q_t *);
602      rx_fifo = uword_to_pointer (mp->rx_fifo, svm_fifo_t *);
603      rx_fifo->client_session_index = sid;
604      tx_fifo = uword_to_pointer (mp->tx_fifo, svm_fifo_t *);
605      tx_fifo->client_session_index = sid;
606      session->rx_fifo = rx_fifo;
607      session->tx_fifo = tx_fifo;
608    }
609
610  VDBG (0, "session %u [0x%llx]: listen succeeded!", sid, mp->handle);
611  return sid;
612}
613
614static void
615vcl_session_unlisten_reply_handler (vcl_worker_t * wrk, void *data)
616{
617  session_unlisten_reply_msg_t *mp = (session_unlisten_reply_msg_t *) data;
618  vcl_session_t *s;
619
620  s = vcl_session_get_w_vpp_handle (wrk, mp->handle);
621  if (!s || s->session_state != STATE_DISCONNECT)
622    {
623      VDBG (0, "Unlisten reply with wrong handle %llx", mp->handle);
624      return;
625    }
626
627  if (mp->retval)
628    VDBG (0, "ERROR: session %u [0xllx]: unlisten failed: %U",
629	  s->session_index, mp->handle, format_api_error, ntohl (mp->retval));
630
631  if (mp->context != wrk->wrk_index)
632    VDBG (0, "wrong context");
633
634  vcl_session_table_del_vpp_handle (wrk, mp->handle);
635  vcl_session_free (wrk, s);
636}
637
638static vcl_session_t *
639vcl_session_accepted (vcl_worker_t * wrk, session_accepted_msg_t * msg)
640{
641  vcl_session_msg_t *vcl_msg;
642  vcl_session_t *session;
643
644  session = vcl_session_get_w_vpp_handle (wrk, msg->handle);
645  if (PREDICT_FALSE (session != 0))
646    VWRN ("session overlap handle %lu state %u!", msg->handle,
647	  session->session_state);
648
649  session = vcl_session_table_lookup_listener (wrk, msg->listener_handle);
650  if (!session)
651    {
652      VERR ("couldn't find listen session: listener handle %llx",
653	    msg->listener_handle);
654      return 0;
655    }
656
657  clib_fifo_add2 (session->accept_evts_fifo, vcl_msg);
658  vcl_msg->accepted_msg = *msg;
659  /* Session handle points to listener until fully accepted by app */
660  vcl_session_table_add_vpp_handle (wrk, msg->handle, session->session_index);
661
662  return session;
663}
664
665static vcl_session_t *
666vcl_session_disconnected_handler (vcl_worker_t * wrk,
667				  session_disconnected_msg_t * msg)
668{
669  vcl_session_t *session;
670
671  session = vcl_session_get_w_vpp_handle (wrk, msg->handle);
672  if (!session)
673    {
674      VDBG (0, "request to disconnect unknown handle 0x%llx", msg->handle);
675      return 0;
676    }
677
678  /* Caught a disconnect before actually accepting the session */
679  if (session->session_state == STATE_LISTEN)
680    {
681      if (!vcl_flag_accepted_session (session, msg->handle,
682				      VCL_ACCEPTED_F_CLOSED))
683	VDBG (0, "session was not accepted!");
684      return 0;
685    }
686
687  session->session_state = STATE_VPP_CLOSING;
688  return session;
689}
690
691static void
692vcl_session_req_worker_update_handler (vcl_worker_t * wrk, void *data)
693{
694  session_req_worker_update_msg_t *msg;
695  vcl_session_t *s;
696
697  msg = (session_req_worker_update_msg_t *) data;
698  s = vcl_session_get_w_vpp_handle (wrk, msg->session_handle);
699  if (!s)
700    return;
701
702  vec_add1 (wrk->pending_session_wrk_updates, s->session_index);
703}
704
705static void
706vcl_session_worker_update_reply_handler (vcl_worker_t * wrk, void *data)
707{
708  session_worker_update_reply_msg_t *msg;
709  vcl_session_t *s;
710
711  msg = (session_worker_update_reply_msg_t *) data;
712  s = vcl_session_get_w_vpp_handle (wrk, msg->handle);
713  if (!s)
714    {
715      VDBG (0, "unknown handle 0x%llx", msg->handle);
716      return;
717    }
718  if (vcl_wait_for_segment (msg->segment_handle))
719    {
720      clib_warning ("segment for session %u couldn't be mounted!",
721		    s->session_index);
722      return;
723    }
724
725  if (s->rx_fifo)
726    {
727      s->rx_fifo = uword_to_pointer (msg->rx_fifo, svm_fifo_t *);
728      s->tx_fifo = uword_to_pointer (msg->tx_fifo, svm_fifo_t *);
729      s->rx_fifo->client_session_index = s->session_index;
730      s->tx_fifo->client_session_index = s->session_index;
731      s->rx_fifo->client_thread_index = wrk->wrk_index;
732      s->tx_fifo->client_thread_index = wrk->wrk_index;
733    }
734  s->session_state = STATE_UPDATED;
735
736  VDBG (0, "session %u[0x%llx] moved to worker %u", s->session_index,
737	s->vpp_handle, wrk->wrk_index);
738}
739
740static int
741vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e)
742{
743  session_disconnected_msg_t *disconnected_msg;
744  vcl_session_t *session;
745
746  switch (e->event_type)
747    {
748    case SESSION_IO_EVT_RX:
749    case SESSION_IO_EVT_TX:
750      session = vcl_session_get (wrk, e->session_index);
751      if (!session || !(session->session_state & STATE_OPEN))
752	break;
753      vec_add1 (wrk->unhandled_evts_vector, *e);
754      break;
755    case SESSION_CTRL_EVT_ACCEPTED:
756      vcl_session_accepted (wrk, (session_accepted_msg_t *) e->data);
757      break;
758    case SESSION_CTRL_EVT_CONNECTED:
759      vcl_session_connected_handler (wrk,
760				     (session_connected_msg_t *) e->data);
761      break;
762    case SESSION_CTRL_EVT_DISCONNECTED:
763      disconnected_msg = (session_disconnected_msg_t *) e->data;
764      session = vcl_session_disconnected_handler (wrk, disconnected_msg);
765      if (!session)
766	break;
767      VDBG (0, "disconnected session %u [0x%llx]", session->session_index,
768	    session->vpp_handle);
769      break;
770    case SESSION_CTRL_EVT_RESET:
771      vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
772      break;
773    case SESSION_CTRL_EVT_BOUND:
774      vcl_session_bound_handler (wrk, (session_bound_msg_t *) e->data);
775      break;
776    case SESSION_CTRL_EVT_UNLISTEN_REPLY:
777      vcl_session_unlisten_reply_handler (wrk, e->data);
778      break;
779    case SESSION_CTRL_EVT_REQ_WORKER_UPDATE:
780      vcl_session_req_worker_update_handler (wrk, e->data);
781      break;
782    case SESSION_CTRL_EVT_WORKER_UPDATE_REPLY:
783      vcl_session_worker_update_reply_handler (wrk, e->data);
784      break;
785    default:
786      clib_warning ("unhandled %u", e->event_type);
787    }
788  return VPPCOM_OK;
789}
790
791static int
792vppcom_wait_for_session_state_change (u32 session_index,
793				      vcl_session_state_t state,
794				      f64 wait_for_time)
795{
796  vcl_worker_t *wrk = vcl_worker_get_current ();
797  f64 timeout = clib_time_now (&wrk->clib_time) + wait_for_time;
798  vcl_session_t *volatile session;
799  svm_msg_q_msg_t msg;
800  session_event_t *e;
801
802  do
803    {
804      session = vcl_session_get (wrk, session_index);
805      if (PREDICT_FALSE (!session))
806	{
807	  return VPPCOM_EBADFD;
808	}
809      if (session->session_state & state)
810	{
811	  return VPPCOM_OK;
812	}
813      if (session->session_state & STATE_FAILED)
814	{
815	  return VPPCOM_ECONNREFUSED;
816	}
817
818      if (svm_msg_q_sub (wrk->app_event_queue, &msg, SVM_Q_NOWAIT, 0))
819	{
820	  usleep (100);
821	  continue;
822	}
823      e = svm_msg_q_msg_data (wrk->app_event_queue, &msg);
824      vcl_handle_mq_event (wrk, e);
825      svm_msg_q_free_msg (wrk->app_event_queue, &msg);
826    }
827  while (clib_time_now (&wrk->clib_time) < timeout);
828
829  VDBG (0, "timeout waiting for state 0x%x (%s)", state,
830	vppcom_session_state_str (state));
831  vcl_evt (VCL_EVT_SESSION_TIMEOUT, session, session_state);
832
833  return VPPCOM_ETIMEDOUT;
834}
835
836static void
837vcl_handle_pending_wrk_updates (vcl_worker_t * wrk)
838{
839  vcl_session_state_t state;
840  vcl_session_t *s;
841  u32 *sip;
842
843  if (PREDICT_TRUE (vec_len (wrk->pending_session_wrk_updates) == 0))
844    return;
845
846  vec_foreach (sip, wrk->pending_session_wrk_updates)
847  {
848    s = vcl_session_get (wrk, *sip);
849    vcl_send_session_worker_update (wrk, s, wrk->wrk_index);
850    state = s->session_state;
851    vppcom_wait_for_session_state_change (s->session_index, STATE_UPDATED, 5);
852    s->session_state = state;
853  }
854  vec_reset_length (wrk->pending_session_wrk_updates);
855}
856
857void
858vcl_flush_mq_events (void)
859{
860  vcl_worker_t *wrk = vcl_worker_get_current ();
861  svm_msg_q_msg_t *msg;
862  session_event_t *e;
863  svm_msg_q_t *mq;
864  int i;
865
866  mq = wrk->app_event_queue;
867  svm_msg_q_lock (mq);
868  vcl_mq_dequeue_batch (wrk, mq, ~0);
869  svm_msg_q_unlock (mq);
870
871  for (i = 0; i < vec_len (wrk->mq_msg_vector); i++)
872    {
873      msg = vec_elt_at_index (wrk->mq_msg_vector, i);
874      e = svm_msg_q_msg_data (mq, msg);
875      vcl_handle_mq_event (wrk, e);
876      svm_msg_q_free_msg (mq, msg);
877    }
878  vec_reset_length (wrk->mq_msg_vector);
879  vcl_handle_pending_wrk_updates (wrk);
880}
881
882static int
883vppcom_app_session_enable (void)
884{
885  int rv;
886
887  if (vcm->app_state != STATE_APP_ENABLED)
888    {
889      vppcom_send_session_enable_disable (1 /* is_enabled == TRUE */ );
890      rv = vcl_wait_for_app_state_change (STATE_APP_ENABLED);
891      if (PREDICT_FALSE (rv))
892	{
893	  VDBG (0, "application session enable timed out! returning %d (%s)",
894		rv, vppcom_retval_str (rv));
895	  return rv;
896	}
897    }
898  return VPPCOM_OK;
899}
900
901static int
902vppcom_app_attach (void)
903{
904  int rv;
905
906  vppcom_app_send_attach ();
907  rv = vcl_wait_for_app_state_change (STATE_APP_ATTACHED);
908  if (PREDICT_FALSE (rv))
909    {
910      VDBG (0, "application attach timed out! returning %d (%s)", rv,
911	    vppcom_retval_str (rv));
912      return rv;
913    }
914
915  return VPPCOM_OK;
916}
917
918static int
919vppcom_session_unbind (u32 session_handle)
920{
921  vcl_worker_t *wrk = vcl_worker_get_current ();
922  session_accepted_msg_t *accepted_msg;
923  vcl_session_t *session = 0;
924  vcl_session_msg_t *evt;
925
926  session = vcl_session_get_w_handle (wrk, session_handle);
927  if (!session)
928    return VPPCOM_EBADFD;
929
930  /* Flush pending accept events, if any */
931  while (clib_fifo_elts (session->accept_evts_fifo))
932    {
933      clib_fifo_sub2 (session->accept_evts_fifo, evt);
934      accepted_msg = &evt->accepted_msg;
935      vcl_session_table_del_vpp_handle (wrk, accepted_msg->handle);
936      vcl_send_session_accepted_reply (session->vpp_evt_q,
937				       accepted_msg->context,
938				       session->vpp_handle, -1);
939    }
940  clib_fifo_free (session->accept_evts_fifo);
941
942  vcl_send_session_unlisten (wrk, session);
943
944  VDBG (1, "session %u [0x%llx]: sending unbind!", session->session_index,
945	session->vpp_handle);
946  vcl_evt (VCL_EVT_UNBIND, session);
947
948  session->vpp_handle = ~0;
949  session->session_state = STATE_DISCONNECT;
950
951  return VPPCOM_OK;
952}
953
954static int
955vppcom_session_disconnect (u32 session_handle)
956{
957  vcl_worker_t *wrk = vcl_worker_get_current ();
958  svm_msg_q_t *vpp_evt_q;
959  vcl_session_t *session, *listen_session;
960  vcl_session_state_t state;
961  u64 vpp_handle;
962
963  session = vcl_session_get_w_handle (wrk, session_handle);
964  if (!session)
965    return VPPCOM_EBADFD;
966
967  vpp_handle = session->vpp_handle;
968  state = session->session_state;
969
970  VDBG (1, "session %u [0x%llx] state 0x%x (%s)", session->session_index,
971	vpp_handle, state, vppcom_session_state_str (state));
972
973  if (PREDICT_FALSE (state & STATE_LISTEN))
974    {
975      VDBG (0, "ERROR: Cannot disconnect a listen socket!");
976      return VPPCOM_EBADFD;
977    }
978
979  if (state & STATE_VPP_CLOSING)
980    {
981      vpp_evt_q = vcl_session_vpp_evt_q (wrk, session);
982      vcl_send_session_disconnected_reply (vpp_evt_q, wrk->my_client_index,
983					   vpp_handle, 0);
984      VDBG (1, "session %u [0x%llx]: sending disconnect REPLY...",
985	    session->session_index, vpp_handle);
986    }
987  else
988    {
989      VDBG (1, "session %u [0x%llx]: sending disconnect...",
990	    session->session_index, vpp_handle);
991      vcl_send_session_disconnect (wrk, session);
992    }
993
994  if (session->listener_index != VCL_INVALID_SESSION_INDEX)
995    {
996      listen_session = vcl_session_get (wrk, session->listener_index);
997      listen_session->n_accepted_sessions--;
998    }
999
1000  return VPPCOM_OK;
1001}
1002
1003/**
1004 * Handle app exit
1005 *
1006 * Notify vpp of the disconnect and mark the worker as free. If we're the
1007 * last worker, do a full cleanup otherwise, since we're probably a forked
1008 * child, avoid syscalls as much as possible. We might've lost privileges.
1009 */
1010void
1011vppcom_app_exit (void)
1012{
1013  if (!pool_elts (vcm->workers))
1014    return;
1015  vcl_worker_cleanup (vcl_worker_get_current (), 1 /* notify vpp */ );
1016  vcl_set_worker_index (~0);
1017  vcl_elog_stop (vcm);
1018}
1019
1020/*
1021 * VPPCOM Public API functions
1022 */
1023int
1024vppcom_app_create (char *app_name)
1025{
1026  vppcom_cfg_t *vcl_cfg = &vcm->cfg;
1027  int rv;
1028
1029  if (vcm->is_init)
1030    {
1031      VDBG (1, "already initialized");
1032      return VPPCOM_EEXIST;
1033    }
1034
1035  vcm->is_init = 1;
1036  vppcom_cfg (&vcm->cfg);
1037  vcl_cfg = &vcm->cfg;
1038
1039  vcm->main_cpu = pthread_self ();
1040  vcm->main_pid = getpid ();
1041  vcm->app_name = format (0, "%s", app_name);
1042  vppcom_init_error_string_table ();
1043  fifo_segment_main_init (&vcm->segment_main, vcl_cfg->segment_baseva,
1044			  20 /* timeout in secs */ );
1045  pool_alloc (vcm->workers, vcl_cfg->max_workers);
1046  clib_spinlock_init (&vcm->workers_lock);
1047  clib_rwlock_init (&vcm->segment_table_lock);
1048  atexit (vppcom_app_exit);
1049
1050  /* Allocate default worker */
1051  vcl_worker_alloc_and_init ();
1052
1053  /* API hookup and connect to VPP */
1054  vcl_elog_init (vcm);
1055  vcm->app_state = STATE_APP_START;
1056  rv = vppcom_connect_to_vpp (app_name);
1057  if (rv)
1058    {
1059      VERR ("couldn't connect to VPP!");
1060      return rv;
1061    }
1062  VDBG (0, "sending session enable");
1063  rv = vppcom_app_session_enable ();
1064  if (rv)
1065    {
1066      VERR ("vppcom_app_session_enable() failed!");
1067      return rv;
1068    }
1069
1070  VDBG (0, "sending app attach");
1071  rv = vppcom_app_attach ();
1072  if (rv)
1073    {
1074      VERR ("vppcom_app_attach() failed!");
1075      return rv;
1076    }
1077
1078  VDBG (0, "app_name '%s', my_client_index %d (0x%x)", app_name,
1079	vcm->workers[0].my_client_index, vcm->workers[0].my_client_index);
1080
1081  return VPPCOM_OK;
1082}
1083
1084void
1085vppcom_app_destroy (void)
1086{
1087  int rv;
1088  f64 orig_app_timeout;
1089
1090  if (!pool_elts (vcm->workers))
1091    return;
1092
1093  vcl_evt (VCL_EVT_DETACH, vcm);
1094
1095  if (pool_elts (vcm->workers) == 1)
1096    {
1097      vcl_send_app_detach (vcl_worker_get_current ());
1098      orig_app_timeout = vcm->cfg.app_timeout;
1099      vcm->cfg.app_timeout = 2.0;
1100      rv = vcl_wait_for_app_state_change (STATE_APP_ENABLED);
1101      vcm->cfg.app_timeout = orig_app_timeout;
1102      if (PREDICT_FALSE (rv))
1103	VDBG (0, "application detach timed out! returning %d (%s)", rv,
1104	      vppcom_retval_str (rv));
1105      vec_free (vcm->app_name);
1106      vcl_worker_cleanup (vcl_worker_get_current (), 0 /* notify vpp */ );
1107    }
1108  else
1109    {
1110      vcl_worker_cleanup (vcl_worker_get_current (), 1 /* notify vpp */ );
1111    }
1112
1113  vcl_set_worker_index (~0);
1114  vcl_elog_stop (vcm);
1115  vl_client_disconnect_from_vlib ();
1116}
1117
1118int
1119vppcom_session_create (u8 proto, u8 is_nonblocking)
1120{
1121  vcl_worker_t *wrk = vcl_worker_get_current ();
1122  vcl_session_t *session;
1123
1124  session = vcl_session_alloc (wrk);
1125
1126  session->session_type = proto;
1127  session->session_state = STATE_START;
1128  session->vpp_handle = ~0;
1129  session->is_dgram = vcl_proto_is_dgram (proto);
1130
1131  if (is_nonblocking)
1132    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_NONBLOCK);
1133
1134  vcl_evt (VCL_EVT_CREATE, session, session_type, session->session_state,
1135	   is_nonblocking, session_index);
1136
1137  VDBG (0, "created session %u", session->session_index);
1138
1139  return vcl_session_handle (session);
1140}
1141
1142int
1143vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * session,
1144		     vcl_session_handle_t sh, u8 do_disconnect)
1145{
1146  vcl_session_state_t state;
1147  u32 next_sh, vep_sh;
1148  int rv = VPPCOM_OK;
1149  u64 vpp_handle;
1150  u8 is_vep;
1151
1152  is_vep = session->is_vep;
1153  next_sh = session->vep.next_sh;
1154  vep_sh = session->vep.vep_sh;
1155  state = session->session_state;
1156  vpp_handle = session->vpp_handle;
1157
1158  VDBG (1, "session %u [0x%llx] closing", session->session_index, vpp_handle);
1159
1160  if (is_vep)
1161    {
1162      while (next_sh != ~0)
1163	{
1164	  rv = vppcom_epoll_ctl (sh, EPOLL_CTL_DEL, next_sh, 0);
1165	  if (PREDICT_FALSE (rv < 0))
1166	    VDBG (0, "vpp handle 0x%llx, sh %u: EPOLL_CTL_DEL vep_idx %u"
1167		  " failed! rv %d (%s)", vpp_handle, next_sh, vep_sh, rv,
1168		  vppcom_retval_str (rv));
1169
1170	  next_sh = session->vep.next_sh;
1171	}
1172    }
1173  else
1174    {
1175      if (session->is_vep_session)
1176	{
1177	  rv = vppcom_epoll_ctl (vep_sh, EPOLL_CTL_DEL, sh, 0);
1178	  if (rv < 0)
1179	    VDBG (0, "session %u [0x%llx]: EPOLL_CTL_DEL vep_idx %u "
1180		  "failed! rv %d (%s)", session->session_index, vpp_handle,
1181		  vep_sh, rv, vppcom_retval_str (rv));
1182	}
1183
1184      if (!do_disconnect)
1185	{
1186	  VDBG (1, "session %u [0x%llx] disconnect skipped",
1187		session->session_index, vpp_handle);
1188	  goto cleanup;
1189	}
1190
1191      if (state & STATE_LISTEN)
1192	{
1193	  rv = vppcom_session_unbind (sh);
1194	  if (PREDICT_FALSE (rv < 0))
1195	    VDBG (0, "session %u [0x%llx]: listener unbind failed! "
1196		  "rv %d (%s)", session->session_index, vpp_handle, rv,
1197		  vppcom_retval_str (rv));
1198	  return rv;
1199	}
1200      else if ((state & STATE_OPEN)
1201	       || (vcl_session_is_connectable_listener (wrk, session)))
1202	{
1203	  rv = vppcom_session_disconnect (sh);
1204	  if (PREDICT_FALSE (rv < 0))
1205	    VDBG (0, "ERROR: session %u [0x%llx]: disconnect failed!"
1206		  " rv %d (%s)", session->session_index, vpp_handle,
1207		  rv, vppcom_retval_str (rv));
1208	}
1209      else if (state == STATE_DISCONNECT)
1210	{
1211	  svm_msg_q_t *mq = vcl_session_vpp_evt_q (wrk, session);
1212	  vcl_send_session_reset_reply (mq, wrk->my_client_index,
1213					session->vpp_handle, 0);
1214	}
1215    }
1216
1217  VDBG (0, "session %u [0x%llx] removed", session->session_index, vpp_handle);
1218
1219cleanup:
1220  vcl_session_table_del_vpp_handle (wrk, vpp_handle);
1221  vcl_session_free (wrk, session);
1222  vcl_evt (VCL_EVT_CLOSE, session, rv);
1223
1224  return rv;
1225}
1226
1227int
1228vppcom_session_close (uint32_t session_handle)
1229{
1230  vcl_worker_t *wrk = vcl_worker_get_current ();
1231  vcl_session_t *session;
1232
1233  session = vcl_session_get_w_handle (wrk, session_handle);
1234  if (!session)
1235    return VPPCOM_EBADFD;
1236  return vcl_session_cleanup (wrk, session, session_handle,
1237			      1 /* do_disconnect */ );
1238}
1239
1240int
1241vppcom_session_bind (uint32_t session_handle, vppcom_endpt_t * ep)
1242{
1243  vcl_worker_t *wrk = vcl_worker_get_current ();
1244  vcl_session_t *session = 0;
1245
1246  if (!ep || !ep->ip)
1247    return VPPCOM_EINVAL;
1248
1249  session = vcl_session_get_w_handle (wrk, session_handle);
1250  if (!session)
1251    return VPPCOM_EBADFD;
1252
1253  if (session->is_vep)
1254    {
1255      VDBG (0, "ERROR: cannot bind to epoll session %u!",
1256	    session->session_index);
1257      return VPPCOM_EBADFD;
1258    }
1259
1260  session->transport.is_ip4 = ep->is_ip4;
1261  if (ep->is_ip4)
1262    clib_memcpy_fast (&session->transport.lcl_ip.ip4, ep->ip,
1263		      sizeof (ip4_address_t));
1264  else
1265    clib_memcpy_fast (&session->transport.lcl_ip.ip6, ep->ip,
1266		      sizeof (ip6_address_t));
1267  session->transport.lcl_port = ep->port;
1268
1269  VDBG (0, "session %u handle %u: binding to local %s address %U port %u, "
1270	"proto %s", session->session_index, session_handle,
1271	session->transport.is_ip4 ? "IPv4" : "IPv6",
1272	format_ip46_address, &session->transport.lcl_ip,
1273	session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
1274	clib_net_to_host_u16 (session->transport.lcl_port),
1275	vppcom_proto_str (session->session_type));
1276  vcl_evt (VCL_EVT_BIND, session);
1277
1278  if (session->session_type == VPPCOM_PROTO_UDP)
1279    vppcom_session_listen (session_handle, 10);
1280
1281  return VPPCOM_OK;
1282}
1283
1284int
1285vppcom_session_listen (uint32_t listen_sh, uint32_t q_len)
1286{
1287  vcl_worker_t *wrk = vcl_worker_get_current ();
1288  vcl_session_t *listen_session = 0;
1289  u64 listen_vpp_handle;
1290  int rv;
1291
1292  listen_session = vcl_session_get_w_handle (wrk, listen_sh);
1293  if (!listen_session || listen_session->is_vep)
1294    return VPPCOM_EBADFD;
1295
1296  if (q_len == 0 || q_len == ~0)
1297    q_len = vcm->cfg.listen_queue_size;
1298
1299  listen_vpp_handle = listen_session->vpp_handle;
1300  if (listen_session->session_state & STATE_LISTEN)
1301    {
1302      VDBG (0, "session %u [0x%llx]: already in listen state!",
1303	    listen_sh, listen_vpp_handle);
1304      return VPPCOM_OK;
1305    }
1306
1307  VDBG (0, "session %u: sending vpp listen request...", listen_sh);
1308
1309  /*
1310   * Send listen request to vpp and wait for reply
1311   */
1312  vcl_send_session_listen (wrk, listen_session);
1313  rv = vppcom_wait_for_session_state_change (listen_session->session_index,
1314					     STATE_LISTEN,
1315					     vcm->cfg.session_timeout);
1316
1317  if (PREDICT_FALSE (rv))
1318    {
1319      listen_session = vcl_session_get_w_handle (wrk, listen_sh);
1320      VDBG (0, "session %u [0x%llx]: listen failed! returning %d (%s)",
1321	    listen_sh, listen_session->vpp_handle, rv,
1322	    vppcom_retval_str (rv));
1323      return rv;
1324    }
1325
1326  return VPPCOM_OK;
1327}
1328
1329int
1330vppcom_session_tls_add_cert (uint32_t session_handle, char *cert,
1331			     uint32_t cert_len)
1332{
1333
1334  vcl_worker_t *wrk = vcl_worker_get_current ();
1335  vcl_session_t *session = 0;
1336
1337  session = vcl_session_get_w_handle (wrk, session_handle);
1338  if (!session)
1339    return VPPCOM_EBADFD;
1340
1341  if (cert_len == 0 || cert_len == ~0)
1342    return VPPCOM_EBADFD;
1343
1344  /*
1345   * Send listen request to vpp and wait for reply
1346   */
1347  vppcom_send_application_tls_cert_add (session, cert, cert_len);
1348  vcm->app_state = STATE_APP_ADDING_TLS_DATA;
1349  vcl_wait_for_app_state_change (STATE_APP_READY);
1350  return VPPCOM_OK;
1351
1352}
1353
1354int
1355vppcom_session_tls_add_key (uint32_t session_handle, char *key,
1356			    uint32_t key_len)
1357{
1358
1359  vcl_worker_t *wrk = vcl_worker_get_current ();
1360  vcl_session_t *session = 0;
1361
1362  session = vcl_session_get_w_handle (wrk, session_handle);
1363  if (!session)
1364    return VPPCOM_EBADFD;
1365
1366  if (key_len == 0 || key_len == ~0)
1367    return VPPCOM_EBADFD;
1368
1369  vppcom_send_application_tls_key_add (session, key, key_len);
1370  vcm->app_state = STATE_APP_ADDING_TLS_DATA;
1371  vcl_wait_for_app_state_change (STATE_APP_READY);
1372  return VPPCOM_OK;
1373}
1374
1375static int
1376validate_args_session_accept_ (vcl_worker_t * wrk, vcl_session_t * ls)
1377{
1378  if (ls->is_vep)
1379    {
1380      VDBG (0, "ERROR: cannot accept on epoll session %u!",
1381	    ls->session_index);
1382      return VPPCOM_EBADFD;
1383    }
1384
1385  if ((ls->session_state != STATE_LISTEN)
1386      && (!vcl_session_is_connectable_listener (wrk, ls)))
1387    {
1388      VDBG (0,
1389	    "ERROR: session [0x%llx]: not in listen state! state 0x%x"
1390	    " (%s)", ls->vpp_handle, ls->session_state,
1391	    vppcom_session_state_str (ls->session_state));
1392      return VPPCOM_EBADFD;
1393    }
1394  return VPPCOM_OK;
1395}
1396
1397int
1398vppcom_unformat_proto (uint8_t * proto, char *proto_str)
1399{
1400  if (!strcmp (proto_str, "TCP"))
1401    *proto = VPPCOM_PROTO_TCP;
1402  else if (!strcmp (proto_str, "tcp"))
1403    *proto = VPPCOM_PROTO_TCP;
1404  else if (!strcmp (proto_str, "UDP"))
1405    *proto = VPPCOM_PROTO_UDP;
1406  else if (!strcmp (proto_str, "udp"))
1407    *proto = VPPCOM_PROTO_UDP;
1408  else if (!strcmp (proto_str, "UDPC"))
1409    *proto = VPPCOM_PROTO_UDPC;
1410  else if (!strcmp (proto_str, "udpc"))
1411    *proto = VPPCOM_PROTO_UDPC;
1412  else if (!strcmp (proto_str, "SCTP"))
1413    *proto = VPPCOM_PROTO_SCTP;
1414  else if (!strcmp (proto_str, "sctp"))
1415    *proto = VPPCOM_PROTO_SCTP;
1416  else if (!strcmp (proto_str, "TLS"))
1417    *proto = VPPCOM_PROTO_TLS;
1418  else if (!strcmp (proto_str, "tls"))
1419    *proto = VPPCOM_PROTO_TLS;
1420  else if (!strcmp (proto_str, "QUIC"))
1421    *proto = VPPCOM_PROTO_QUIC;
1422  else if (!strcmp (proto_str, "quic"))
1423    *proto = VPPCOM_PROTO_QUIC;
1424  else
1425    return 1;
1426  return 0;
1427}
1428
1429int
1430vppcom_session_accept (uint32_t listen_session_handle, vppcom_endpt_t * ep,
1431		       uint32_t flags)
1432{
1433  u32 client_session_index = ~0, listen_session_index, accept_flags = 0;
1434  vcl_worker_t *wrk = vcl_worker_get_current ();
1435  session_accepted_msg_t accepted_msg;
1436  vcl_session_t *listen_session = 0;
1437  vcl_session_t *client_session = 0;
1438  vcl_session_msg_t *evt;
1439  svm_msg_q_msg_t msg;
1440  session_event_t *e;
1441  u8 is_nonblocking;
1442  int rv;
1443
1444  listen_session = vcl_session_get_w_handle (wrk, listen_session_handle);
1445  if (!listen_session)
1446    return VPPCOM_EBADFD;
1447
1448  listen_session_index = listen_session->session_index;
1449  if ((rv = validate_args_session_accept_ (wrk, listen_session)))
1450    return rv;
1451
1452  if (clib_fifo_elts (listen_session->accept_evts_fifo))
1453    {
1454      clib_fifo_sub2 (listen_session->accept_evts_fifo, evt);
1455      accept_flags = evt->flags;
1456      accepted_msg = evt->accepted_msg;
1457      goto handle;
1458    }
1459
1460  is_nonblocking = VCL_SESS_ATTR_TEST (listen_session->attr,
1461				       VCL_SESS_ATTR_NONBLOCK);
1462  while (1)
1463    {
1464      if (svm_msg_q_is_empty (wrk->app_event_queue) && is_nonblocking)
1465	return VPPCOM_EAGAIN;
1466
1467      if (svm_msg_q_sub (wrk->app_event_queue, &msg, SVM_Q_WAIT, 0))
1468	return VPPCOM_EAGAIN;
1469
1470      e = svm_msg_q_msg_data (wrk->app_event_queue, &msg);
1471      if (e->event_type != SESSION_CTRL_EVT_ACCEPTED)
1472	{
1473	  VDBG (0, "discarded event: %u", e->event_type);
1474	  svm_msg_q_free_msg (wrk->app_event_queue, &msg);
1475	  continue;
1476	}
1477      clib_memcpy_fast (&accepted_msg, e->data, sizeof (accepted_msg));
1478      svm_msg_q_free_msg (wrk->app_event_queue, &msg);
1479      break;
1480    }
1481
1482handle:
1483
1484  client_session_index = vcl_session_accepted_handler (wrk, &accepted_msg,
1485						       listen_session_index);
1486  if (client_session_index == VCL_INVALID_SESSION_INDEX)
1487    return VPPCOM_ECONNABORTED;
1488
1489  listen_session = vcl_session_get (wrk, listen_session_index);
1490  client_session = vcl_session_get (wrk, client_session_index);
1491
1492  if (flags & O_NONBLOCK)
1493    VCL_SESS_ATTR_SET (client_session->attr, VCL_SESS_ATTR_NONBLOCK);
1494
1495  VDBG (1, "listener %u [0x%llx]: Got a connect request! session %u [0x%llx],"
1496	" flags %d, is_nonblocking %u", listen_session->session_index,
1497	listen_session->vpp_handle, client_session_index,
1498	client_session->vpp_handle, flags,
1499	VCL_SESS_ATTR_TEST (client_session->attr, VCL_SESS_ATTR_NONBLOCK));
1500
1501  if (ep)
1502    {
1503      ep->is_ip4 = client_session->transport.is_ip4;
1504      ep->port = client_session->transport.rmt_port;
1505      if (client_session->transport.is_ip4)
1506	clib_memcpy_fast (ep->ip, &client_session->transport.rmt_ip.ip4,
1507			  sizeof (ip4_address_t));
1508      else
1509	clib_memcpy_fast (ep->ip, &client_session->transport.rmt_ip.ip6,
1510			  sizeof (ip6_address_t));
1511    }
1512
1513  VDBG (0, "listener %u [0x%llx] accepted %u [0x%llx] peer: %U:%u "
1514	"local: %U:%u", listen_session_handle, listen_session->vpp_handle,
1515	client_session_index, client_session->vpp_handle,
1516	format_ip46_address, &client_session->transport.rmt_ip,
1517	client_session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
1518	clib_net_to_host_u16 (client_session->transport.rmt_port),
1519	format_ip46_address, &client_session->transport.lcl_ip,
1520	client_session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
1521	clib_net_to_host_u16 (client_session->transport.lcl_port));
1522  vcl_evt (VCL_EVT_ACCEPT, client_session, listen_session,
1523	   client_session_index);
1524
1525  /*
1526   * Session might have been closed already
1527   */
1528  if (accept_flags)
1529    {
1530      if (accept_flags & VCL_ACCEPTED_F_CLOSED)
1531	client_session->session_state = STATE_VPP_CLOSING;
1532      else if (accept_flags & VCL_ACCEPTED_F_RESET)
1533	client_session->session_state = STATE_DISCONNECT;
1534    }
1535  return vcl_session_handle (client_session);
1536}
1537
1538static void
1539vcl_ip_copy_from_ep (ip46_address_t * ip, vppcom_endpt_t * ep)
1540{
1541  if (ep->is_ip4)
1542    clib_memcpy_fast (&ip->ip4, ep->ip, sizeof (ip4_address_t));
1543  else
1544    clib_memcpy_fast (&ip->ip6, ep->ip, sizeof (ip6_address_t));
1545}
1546
1547void
1548vcl_ip_copy_to_ep (ip46_address_t * ip, vppcom_endpt_t * ep, u8 is_ip4)
1549{
1550  ep->is_ip4 = is_ip4;
1551  if (is_ip4)
1552    clib_memcpy_fast (ep->ip, &ip->ip4, sizeof (ip4_address_t));
1553  else
1554    clib_memcpy_fast (ep->ip, &ip->ip6, sizeof (ip6_address_t));
1555}
1556
1557int
1558vppcom_session_connect (uint32_t session_handle, vppcom_endpt_t * server_ep)
1559{
1560  vcl_worker_t *wrk = vcl_worker_get_current ();
1561  vcl_session_t *session = 0;
1562  u32 session_index;
1563  int rv;
1564
1565  session = vcl_session_get_w_handle (wrk, session_handle);
1566  if (!session)
1567    return VPPCOM_EBADFD;
1568  session_index = session->session_index;
1569
1570  if (PREDICT_FALSE (session->is_vep))
1571    {
1572      VDBG (0, "ERROR: cannot connect epoll session %u!",
1573	    session->session_index);
1574      return VPPCOM_EBADFD;
1575    }
1576
1577  if (PREDICT_FALSE (session->session_state & CLIENT_STATE_OPEN))
1578    {
1579      VDBG (0, "session handle %u [0x%llx]: session already "
1580	    "connected to %s %U port %d proto %s, state 0x%x (%s)",
1581	    session_handle, session->vpp_handle,
1582	    session->transport.is_ip4 ? "IPv4" : "IPv6", format_ip46_address,
1583	    &session->transport.rmt_ip, session->transport.is_ip4 ?
1584	    IP46_TYPE_IP4 : IP46_TYPE_IP6,
1585	    clib_net_to_host_u16 (session->transport.rmt_port),
1586	    vppcom_proto_str (session->session_type), session->session_state,
1587	    vppcom_session_state_str (session->session_state));
1588      return VPPCOM_OK;
1589    }
1590
1591  session->transport.is_ip4 = server_ep->is_ip4;
1592  vcl_ip_copy_from_ep (&session->transport.rmt_ip, server_ep);
1593  session->transport.rmt_port = server_ep->port;
1594  session->parent_handle = VCL_INVALID_SESSION_HANDLE;
1595
1596  VDBG (0, "session handle %u: connecting to server %s %U "
1597	"port %d proto %s", session_handle,
1598	session->transport.is_ip4 ? "IPv4" : "IPv6",
1599	format_ip46_address,
1600	&session->transport.rmt_ip, session->transport.is_ip4 ?
1601	IP46_TYPE_IP4 : IP46_TYPE_IP6,
1602	clib_net_to_host_u16 (session->transport.rmt_port),
1603	vppcom_proto_str (session->session_type));
1604
1605  vcl_send_session_connect (wrk, session);
1606
1607  if (VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_NONBLOCK))
1608    return VPPCOM_EINPROGRESS;
1609
1610  /*
1611   * Wait for reply from vpp if blocking
1612   */
1613  rv = vppcom_wait_for_session_state_change (session_index, STATE_CONNECT,
1614					     vcm->cfg.session_timeout);
1615
1616  session = vcl_session_get (wrk, session_index);
1617  VDBG (0, "session %u [0x%llx]: connect %s!", session->session_index,
1618	session->vpp_handle, rv ? "failed" : "succeeded");
1619
1620  return rv;
1621}
1622
1623int
1624vppcom_session_stream_connect (uint32_t session_handle,
1625			       uint32_t parent_session_handle)
1626{
1627  vcl_worker_t *wrk = vcl_worker_get_current ();
1628  vcl_session_t *session, *parent_session;
1629  u32 session_index, parent_session_index;
1630  int rv;
1631
1632  session = vcl_session_get_w_handle (wrk, session_handle);
1633  if (!session)
1634    return VPPCOM_EBADFD;
1635  parent_session = vcl_session_get_w_handle (wrk, parent_session_handle);
1636  if (!parent_session)
1637    return VPPCOM_EBADFD;
1638
1639  session_index = session->session_index;
1640  parent_session_index = parent_session->session_index;
1641  if (PREDICT_FALSE (session->is_vep))
1642    {
1643      VDBG (0, "ERROR: cannot connect epoll session %u!",
1644	    session->session_index);
1645      return VPPCOM_EBADFD;
1646    }
1647
1648  if (PREDICT_FALSE (session->session_state & CLIENT_STATE_OPEN))
1649    {
1650      VDBG (0, "session handle %u [0x%llx]: session already "
1651	    "connected to session %u [0x%llx] proto %s, state 0x%x (%s)",
1652	    session_handle, session->vpp_handle,
1653	    parent_session_handle, parent_session->vpp_handle,
1654	    vppcom_proto_str (session->session_type), session->session_state,
1655	    vppcom_session_state_str (session->session_state));
1656      return VPPCOM_OK;
1657    }
1658
1659  /* Connect to quic session specifics */
1660  session->transport.is_ip4 = parent_session->transport.is_ip4;
1661  session->transport.rmt_ip.ip4.as_u32 = (uint32_t) 1;
1662  session->transport.rmt_port = 0;
1663  session->parent_handle = parent_session->vpp_handle;
1664
1665  VDBG (0, "session handle %u: connecting to session %u [0x%llx]",
1666	session_handle, parent_session_handle, parent_session->vpp_handle);
1667
1668  /*
1669   * Send connect request and wait for reply from vpp
1670   */
1671  vcl_send_session_connect (wrk, session);
1672  rv = vppcom_wait_for_session_state_change (session_index, STATE_CONNECT,
1673					     vcm->cfg.session_timeout);
1674
1675  session->listener_index = parent_session_index;
1676  parent_session = vcl_session_get_w_handle (wrk, parent_session_handle);
1677  if (parent_session)
1678    parent_session->n_accepted_sessions++;
1679
1680  session = vcl_session_get (wrk, session_index);
1681  VDBG (0, "session %u [0x%llx]: connect %s!", session->session_index,
1682	session->vpp_handle, rv ? "failed" : "succeeded");
1683
1684  return rv;
1685}
1686
1687static u8
1688vcl_is_rx_evt_for_session (session_event_t * e, u32 sid, u8 is_ct)
1689{
1690  return (e->event_type == SESSION_IO_EVT_RX && e->session_index == sid);
1691}
1692
1693static inline int
1694vppcom_session_read_internal (uint32_t session_handle, void *buf, int n,
1695			      u8 peek)
1696{
1697  vcl_worker_t *wrk = vcl_worker_get_current ();
1698  int n_read = 0, is_nonblocking;
1699  vcl_session_t *s = 0;
1700  svm_fifo_t *rx_fifo;
1701  svm_msg_q_msg_t msg;
1702  session_event_t *e;
1703  svm_msg_q_t *mq;
1704  u8 is_ct;
1705
1706  if (PREDICT_FALSE (!buf))
1707    return VPPCOM_EINVAL;
1708
1709  s = vcl_session_get_w_handle (wrk, session_handle);
1710  if (PREDICT_FALSE (!s || s->is_vep))
1711    return VPPCOM_EBADFD;
1712
1713  if (PREDICT_FALSE (!vcl_session_is_open (s)))
1714    {
1715      VDBG (0, "session %u[0x%llx] is not open! state 0x%x (%s)",
1716	    s->session_index, s->vpp_handle, s->session_state,
1717	    vppcom_session_state_str (s->session_state));
1718      return vcl_session_closed_error (s);
1719    }
1720
1721  is_nonblocking = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK);
1722  is_ct = vcl_session_is_ct (s);
1723  mq = wrk->app_event_queue;
1724  rx_fifo = is_ct ? s->ct_rx_fifo : s->rx_fifo;
1725  s->has_rx_evt = 0;
1726
1727  if (svm_fifo_is_empty_cons (rx_fifo))
1728    {
1729      if (is_nonblocking)
1730	{
1731	  if (vcl_session_is_closing (s))
1732	    return vcl_session_closing_error (s);
1733	  svm_fifo_unset_event (s->rx_fifo);
1734	  return VPPCOM_EWOULDBLOCK;
1735	}
1736      while (svm_fifo_is_empty_cons (rx_fifo))
1737	{
1738	  if (vcl_session_is_closing (s))
1739	    return vcl_session_closing_error (s);
1740
1741	  svm_fifo_unset_event (s->rx_fifo);
1742	  svm_msg_q_lock (mq);
1743	  if (svm_msg_q_is_empty (mq))
1744	    svm_msg_q_wait (mq);
1745
1746	  svm_msg_q_sub_w_lock (mq, &msg);
1747	  e = svm_msg_q_msg_data (mq, &msg);
1748	  svm_msg_q_unlock (mq);
1749	  if (!vcl_is_rx_evt_for_session (e, s->session_index, is_ct))
1750	    vcl_handle_mq_event (wrk, e);
1751	  svm_msg_q_free_msg (mq, &msg);
1752	}
1753    }
1754
1755  if (s->is_dgram)
1756    n_read = app_recv_dgram_raw (rx_fifo, buf, n, &s->transport, 0, peek);
1757  else
1758    n_read = app_recv_stream_raw (rx_fifo, buf, n, 0, peek);
1759
1760  if (svm_fifo_is_empty_cons (rx_fifo))
1761    svm_fifo_unset_event (s->rx_fifo);
1762
1763  /* Cut-through sessions might request tx notifications on rx fifos */
1764  if (PREDICT_FALSE (rx_fifo->want_deq_ntf))
1765    {
1766      app_send_io_evt_to_vpp (s->vpp_evt_q, s->rx_fifo->master_session_index,
1767			      SESSION_IO_EVT_RX, SVM_Q_WAIT);
1768      svm_fifo_reset_has_deq_ntf (s->rx_fifo);
1769    }
1770
1771  VDBG (2, "session %u[0x%llx]: read %d bytes from (%p)", s->session_index,
1772	s->vpp_handle, n_read, rx_fifo);
1773
1774  return n_read;
1775}
1776
1777int
1778vppcom_session_read (uint32_t session_handle, void *buf, size_t n)
1779{
1780  return (vppcom_session_read_internal (session_handle, buf, n, 0));
1781}
1782
1783static int
1784vppcom_session_peek (uint32_t session_handle, void *buf, int n)
1785{
1786  return (vppcom_session_read_internal (session_handle, buf, n, 1));
1787}
1788
1789int
1790vppcom_session_read_segments (uint32_t session_handle,
1791			      vppcom_data_segments_t ds)
1792{
1793  vcl_worker_t *wrk = vcl_worker_get_current ();
1794  int n_read = 0, is_nonblocking;
1795  vcl_session_t *s = 0;
1796  svm_fifo_t *rx_fifo;
1797  svm_msg_q_msg_t msg;
1798  session_event_t *e;
1799  svm_msg_q_t *mq;
1800  u8 is_ct;
1801
1802  s = vcl_session_get_w_handle (wrk, session_handle);
1803  if (PREDICT_FALSE (!s || s->is_vep))
1804    return VPPCOM_EBADFD;
1805
1806  if (PREDICT_FALSE (!vcl_session_is_open (s)))
1807    return vcl_session_closed_error (s);
1808
1809  is_nonblocking = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK);
1810  is_ct = vcl_session_is_ct (s);
1811  mq = is_ct ? s->our_evt_q : wrk->app_event_queue;
1812  rx_fifo = s->rx_fifo;
1813  s->has_rx_evt = 0;
1814
1815  if (is_ct)
1816    svm_fifo_unset_event (s->rx_fifo);
1817
1818  if (svm_fifo_is_empty_cons (rx_fifo))
1819    {
1820      if (is_nonblocking)
1821	{
1822	  svm_fifo_unset_event (rx_fifo);
1823	  return VPPCOM_EWOULDBLOCK;
1824	}
1825      while (svm_fifo_is_empty_cons (rx_fifo))
1826	{
1827	  if (vcl_session_is_closing (s))
1828	    return vcl_session_closing_error (s);
1829
1830	  svm_fifo_unset_event (rx_fifo);
1831	  svm_msg_q_lock (mq);
1832	  if (svm_msg_q_is_empty (mq))
1833	    svm_msg_q_wait (mq);
1834
1835	  svm_msg_q_sub_w_lock (mq, &msg);
1836	  e = svm_msg_q_msg_data (mq, &msg);
1837	  svm_msg_q_unlock (mq);
1838	  if (!vcl_is_rx_evt_for_session (e, s->session_index, is_ct))
1839	    vcl_handle_mq_event (wrk, e);
1840	  svm_msg_q_free_msg (mq, &msg);
1841	}
1842    }
1843
1844  n_read = svm_fifo_segments (rx_fifo, (svm_fifo_seg_t *) ds);
1845  svm_fifo_unset_event (rx_fifo);
1846
1847  return n_read;
1848}
1849
1850void
1851vppcom_session_free_segments (uint32_t session_handle,
1852			      vppcom_data_segments_t ds)
1853{
1854  vcl_worker_t *wrk = vcl_worker_get_current ();
1855  vcl_session_t *s;
1856
1857  s = vcl_session_get_w_handle (wrk, session_handle);
1858  if (PREDICT_FALSE (!s || s->is_vep))
1859    return;
1860
1861  svm_fifo_segments_free (s->rx_fifo, (svm_fifo_seg_t *) ds);
1862}
1863
1864int
1865vppcom_data_segment_copy (void *buf, vppcom_data_segments_t ds, u32 max_bytes)
1866{
1867  u32 first_copy = clib_min (ds[0].len, max_bytes);
1868  clib_memcpy_fast (buf, ds[0].data, first_copy);
1869  if (first_copy < max_bytes)
1870    {
1871      clib_memcpy_fast (buf + first_copy, ds[1].data,
1872			clib_min (ds[1].len, max_bytes - first_copy));
1873    }
1874  return 0;
1875}
1876
1877static u8
1878vcl_is_tx_evt_for_session (session_event_t * e, u32 sid, u8 is_ct)
1879{
1880  return (e->event_type == SESSION_IO_EVT_TX && e->session_index == sid);
1881}
1882
1883static inline int
1884vppcom_session_write_inline (uint32_t session_handle, void *buf, size_t n,
1885			     u8 is_flush)
1886{
1887  vcl_worker_t *wrk = vcl_worker_get_current ();
1888  int n_write, is_nonblocking;
1889  vcl_session_t *s = 0;
1890  session_evt_type_t et;
1891  svm_msg_q_msg_t msg;
1892  svm_fifo_t *tx_fifo;
1893  session_event_t *e;
1894  svm_msg_q_t *mq;
1895  u8 is_ct;
1896
1897  if (PREDICT_FALSE (!buf || n == 0))
1898    return VPPCOM_EINVAL;
1899
1900  s = vcl_session_get_w_handle (wrk, session_handle);
1901  if (PREDICT_FALSE (!s))
1902    return VPPCOM_EBADFD;
1903
1904  if (PREDICT_FALSE (s->is_vep))
1905    {
1906      VDBG (0, "ERROR: session %u [0x%llx]: cannot write to an epoll"
1907	    " session!", s->session_index, s->vpp_handle);
1908      return VPPCOM_EBADFD;
1909    }
1910
1911  if (PREDICT_FALSE (!vcl_session_is_open (s)))
1912    {
1913      VDBG (1, "session %u [0x%llx]: is not open! state 0x%x (%s)",
1914	    s->session_index, s->vpp_handle, s->session_state,
1915	    vppcom_session_state_str (s->session_state));
1916      return vcl_session_closed_error (s);;
1917    }
1918
1919  is_ct = vcl_session_is_ct (s);
1920  tx_fifo = is_ct ? s->ct_tx_fifo : s->tx_fifo;
1921  is_nonblocking = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK);
1922
1923  mq = wrk->app_event_queue;
1924  if (svm_fifo_is_full_prod (tx_fifo))
1925    {
1926      if (is_nonblocking)
1927	{
1928	  return VPPCOM_EWOULDBLOCK;
1929	}
1930      while (svm_fifo_is_full_prod (tx_fifo))
1931	{
1932	  svm_fifo_add_want_deq_ntf (tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
1933	  if (vcl_session_is_closing (s))
1934	    return vcl_session_closing_error (s);
1935	  svm_msg_q_lock (mq);
1936	  if (svm_msg_q_is_empty (mq))
1937	    svm_msg_q_wait (mq);
1938
1939	  svm_msg_q_sub_w_lock (mq, &msg);
1940	  e = svm_msg_q_msg_data (mq, &msg);
1941	  svm_msg_q_unlock (mq);
1942
1943	  if (!vcl_is_tx_evt_for_session (e, s->session_index, is_ct))
1944	    vcl_handle_mq_event (wrk, e);
1945	  svm_msg_q_free_msg (mq, &msg);
1946	}
1947    }
1948
1949  et = SESSION_IO_EVT_TX;
1950  if (is_flush && !is_ct)
1951    et = SESSION_IO_EVT_TX_FLUSH;
1952
1953  if (s->is_dgram)
1954    n_write = app_send_dgram_raw (tx_fifo, &s->transport,
1955				  s->vpp_evt_q, buf, n, et,
1956				  0 /* do_evt */ , SVM_Q_WAIT);
1957  else
1958    n_write = app_send_stream_raw (tx_fifo, s->vpp_evt_q, buf, n, et,
1959				   0 /* do_evt */ , SVM_Q_WAIT);
1960
1961  if (svm_fifo_set_event (s->tx_fifo))
1962    app_send_io_evt_to_vpp (s->vpp_evt_q, s->tx_fifo->master_session_index,
1963			    et, SVM_Q_WAIT);
1964
1965  ASSERT (n_write > 0);
1966
1967  VDBG (2, "session %u [0x%llx]: wrote %d bytes", s->session_index,
1968	s->vpp_handle, n_write);
1969
1970  return n_write;
1971}
1972
1973int
1974vppcom_session_write (uint32_t session_handle, void *buf, size_t n)
1975{
1976  return vppcom_session_write_inline (session_handle, buf, n,
1977				      0 /* is_flush */ );
1978}
1979
1980int
1981vppcom_session_write_msg (uint32_t session_handle, void *buf, size_t n)
1982{
1983  return vppcom_session_write_inline (session_handle, buf, n,
1984				      1 /* is_flush */ );
1985}
1986
1987#define vcl_fifo_rx_evt_valid_or_break(_s)				\
1988if (PREDICT_FALSE (!_s->rx_fifo))					\
1989  break;								\
1990if (PREDICT_FALSE (svm_fifo_is_empty (_s->rx_fifo)))			\
1991  {									\
1992    if (!vcl_session_is_ct (_s))					\
1993      {									\
1994	svm_fifo_unset_event (_s->rx_fifo);				\
1995	if (svm_fifo_is_empty (_s->rx_fifo))				\
1996	  break;							\
1997      }									\
1998    else if (svm_fifo_is_empty (_s->ct_rx_fifo))			\
1999      {									\
2000	svm_fifo_unset_event (_s->ct_rx_fifo);				\
2001	if (svm_fifo_is_empty (_s->ct_rx_fifo))				\
2002	  break;							\
2003      }									\
2004  }									\
2005
2006static void
2007vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
2008			    unsigned long n_bits, unsigned long *read_map,
2009			    unsigned long *write_map,
2010			    unsigned long *except_map, u32 * bits_set)
2011{
2012  session_disconnected_msg_t *disconnected_msg;
2013  session_connected_msg_t *connected_msg;
2014  vcl_session_t *session;
2015  u32 sid;
2016
2017  switch (e->event_type)
2018    {
2019    case SESSION_IO_EVT_RX:
2020      sid = e->session_index;
2021      session = vcl_session_get (wrk, sid);
2022      if (!session)
2023	break;
2024      vcl_fifo_rx_evt_valid_or_break (session);
2025      if (sid < n_bits && read_map)
2026	{
2027	  clib_bitmap_set_no_check ((uword *) read_map, sid, 1);
2028	  *bits_set += 1;
2029	}
2030      break;
2031    case SESSION_IO_EVT_TX:
2032      sid = e->session_index;
2033      session = vcl_session_get (wrk, sid);
2034      if (!session)
2035	break;
2036      if (sid < n_bits && write_map)
2037	{
2038	  clib_bitmap_set_no_check ((uword *) write_map, sid, 1);
2039	  *bits_set += 1;
2040	}
2041      break;
2042    case SESSION_CTRL_EVT_ACCEPTED:
2043      session = vcl_session_accepted (wrk,
2044				      (session_accepted_msg_t *) e->data);
2045      if (!session)
2046	break;
2047      sid = session->session_index;
2048      if (sid < n_bits && read_map)
2049	{
2050	  clib_bitmap_set_no_check ((uword *) read_map, sid, 1);
2051	  *bits_set += 1;
2052	}
2053      break;
2054    case SESSION_CTRL_EVT_CONNECTED:
2055      connected_msg = (session_connected_msg_t *) e->data;
2056      sid = vcl_session_connected_handler (wrk, connected_msg);
2057      if (sid == VCL_INVALID_SESSION_INDEX)
2058	break;
2059      if (sid < n_bits && write_map)
2060	{
2061	  clib_bitmap_set_no_check ((uword *) write_map, sid, 1);
2062	  *bits_set += 1;
2063	}
2064      break;
2065    case SESSION_CTRL_EVT_DISCONNECTED:
2066      disconnected_msg = (session_disconnected_msg_t *) e->data;
2067      session = vcl_session_disconnected_handler (wrk, disconnected_msg);
2068      if (!session)
2069	break;
2070      sid = session->session_index;
2071      if (sid < n_bits && except_map)
2072	{
2073	  clib_bitmap_set_no_check ((uword *) except_map, sid, 1);
2074	  *bits_set += 1;
2075	}
2076      break;
2077    case SESSION_CTRL_EVT_RESET:
2078      sid = vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
2079      if (sid < n_bits && except_map)
2080	{
2081	  clib_bitmap_set_no_check ((uword *) except_map, sid, 1);
2082	  *bits_set += 1;
2083	}
2084      break;
2085    case SESSION_CTRL_EVT_UNLISTEN_REPLY:
2086      vcl_session_unlisten_reply_handler (wrk, e->data);
2087      break;
2088    case SESSION_CTRL_EVT_WORKER_UPDATE_REPLY:
2089      vcl_session_worker_update_reply_handler (wrk, e->data);
2090      break;
2091    case SESSION_CTRL_EVT_REQ_WORKER_UPDATE:
2092      vcl_session_req_worker_update_handler (wrk, e->data);
2093      break;
2094    default:
2095      clib_warning ("unhandled: %u", e->event_type);
2096      break;
2097    }
2098}
2099
2100static int
2101vcl_select_handle_mq (vcl_worker_t * wrk, svm_msg_q_t * mq,
2102		      unsigned long n_bits, unsigned long *read_map,
2103		      unsigned long *write_map, unsigned long *except_map,
2104		      double time_to_wait, u32 * bits_set)
2105{
2106  svm_msg_q_msg_t *msg;
2107  session_event_t *e;
2108  u32 i;
2109
2110  svm_msg_q_lock (mq);
2111  if (svm_msg_q_is_empty (mq))
2112    {
2113      if (*bits_set)
2114	{
2115	  svm_msg_q_unlock (mq);
2116	  return 0;
2117	}
2118
2119      if (!time_to_wait)
2120	{
2121	  svm_msg_q_unlock (mq);
2122	  return 0;
2123	}
2124      else if (time_to_wait < 0)
2125	{
2126	  svm_msg_q_wait (mq);
2127	}
2128      else
2129	{
2130	  if (svm_msg_q_timedwait (mq, time_to_wait))
2131	    {
2132	      svm_msg_q_unlock (mq);
2133	      return 0;
2134	    }
2135	}
2136    }
2137  vcl_mq_dequeue_batch (wrk, mq, ~0);
2138  svm_msg_q_unlock (mq);
2139
2140  for (i = 0; i < vec_len (wrk->mq_msg_vector); i++)
2141    {
2142      msg = vec_elt_at_index (wrk->mq_msg_vector, i);
2143      e = svm_msg_q_msg_data (mq, msg);
2144      vcl_select_handle_mq_event (wrk, e, n_bits, read_map, write_map,
2145				  except_map, bits_set);
2146      svm_msg_q_free_msg (mq, msg);
2147    }
2148  vec_reset_length (wrk->mq_msg_vector);
2149  vcl_handle_pending_wrk_updates (wrk);
2150  return *bits_set;
2151}
2152
2153static int
2154vppcom_select_condvar (vcl_worker_t * wrk, int n_bits,
2155		       vcl_si_set * read_map, vcl_si_set * write_map,
2156		       vcl_si_set * except_map, double time_to_wait,
2157		       u32 * bits_set)
2158{
2159  double wait = 0, start = 0;
2160
2161  if (!*bits_set)
2162    {
2163      wait = time_to_wait;
2164      start = clib_time_now (&wrk->clib_time);
2165    }
2166
2167  do
2168    {
2169      vcl_select_handle_mq (wrk, wrk->app_event_queue, n_bits, read_map,
2170			    write_map, except_map, wait, bits_set);
2171      if (*bits_set)
2172	return *bits_set;
2173      if (wait == -1)
2174	continue;
2175
2176      wait = wait - (clib_time_now (&wrk->clib_time) - start);
2177    }
2178  while (wait > 0);
2179
2180  return 0;
2181}
2182
2183static int
2184vppcom_select_eventfd (vcl_worker_t * wrk, int n_bits,
2185		       vcl_si_set * read_map, vcl_si_set * write_map,
2186		       vcl_si_set * except_map, double time_to_wait,
2187		       u32 * bits_set)
2188{
2189  vcl_mq_evt_conn_t *mqc;
2190  int __clib_unused n_read;
2191  int n_mq_evts, i;
2192  u64 buf;
2193
2194  vec_validate (wrk->mq_events, pool_elts (wrk->mq_evt_conns));
2195  n_mq_evts = epoll_wait (wrk->mqs_epfd, wrk->mq_events,
2196			  vec_len (wrk->mq_events), time_to_wait);
2197  for (i = 0; i < n_mq_evts; i++)
2198    {
2199      mqc = vcl_mq_evt_conn_get (wrk, wrk->mq_events[i].data.u32);
2200      n_read = read (mqc->mq_fd, &buf, sizeof (buf));
2201      vcl_select_handle_mq (wrk, mqc->mq, n_bits, read_map, write_map,
2202			    except_map, 0, bits_set);
2203    }
2204
2205  return (n_mq_evts > 0 ? (int) *bits_set : 0);
2206}
2207
2208int
2209vppcom_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map,
2210	       vcl_si_set * except_map, double time_to_wait)
2211{
2212  u32 sid, minbits = clib_max (n_bits, BITS (uword)), bits_set = 0;
2213  vcl_worker_t *wrk = vcl_worker_get_current ();
2214  vcl_session_t *session = 0;
2215  int rv, i;
2216
2217  if (n_bits && read_map)
2218    {
2219      clib_bitmap_validate (wrk->rd_bitmap, minbits);
2220      clib_memcpy_fast (wrk->rd_bitmap, read_map,
2221			vec_len (wrk->rd_bitmap) * sizeof (vcl_si_set));
2222      memset (read_map, 0, vec_len (wrk->rd_bitmap) * sizeof (vcl_si_set));
2223    }
2224  if (n_bits && write_map)
2225    {
2226      clib_bitmap_validate (wrk->wr_bitmap, minbits);
2227      clib_memcpy_fast (wrk->wr_bitmap, write_map,
2228			vec_len (wrk->wr_bitmap) * sizeof (vcl_si_set));
2229      memset (write_map, 0, vec_len (wrk->wr_bitmap) * sizeof (vcl_si_set));
2230    }
2231  if (n_bits && except_map)
2232    {
2233      clib_bitmap_validate (wrk->ex_bitmap, minbits);
2234      clib_memcpy_fast (wrk->ex_bitmap, except_map,
2235			vec_len (wrk->ex_bitmap) * sizeof (vcl_si_set));
2236      memset (except_map, 0, vec_len (wrk->ex_bitmap) * sizeof (vcl_si_set));
2237    }
2238
2239  if (!n_bits)
2240    return 0;
2241
2242  if (!write_map)
2243    goto check_rd;
2244
2245  /* *INDENT-OFF* */
2246  clib_bitmap_foreach (sid, wrk->wr_bitmap, ({
2247    if (!(session = vcl_session_get (wrk, sid)))
2248      {
2249        if (except_map && sid < minbits)
2250          clib_bitmap_set_no_check (except_map, sid, 1);
2251        continue;
2252      }
2253
2254    rv = svm_fifo_is_full_prod (session->tx_fifo);
2255    if (!rv)
2256      {
2257        clib_bitmap_set_no_check ((uword*)write_map, sid, 1);
2258        bits_set++;
2259      }
2260    else
2261      svm_fifo_add_want_deq_ntf (session->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
2262  }));
2263
2264check_rd:
2265  if (!read_map)
2266    goto check_mq;
2267
2268  clib_bitmap_foreach (sid, wrk->rd_bitmap, ({
2269    if (!(session = vcl_session_get (wrk, sid)))
2270      {
2271        if (except_map && sid < minbits)
2272          clib_bitmap_set_no_check (except_map, sid, 1);
2273        continue;
2274      }
2275
2276    rv = vcl_session_read_ready (session);
2277    if (rv)
2278      {
2279        clib_bitmap_set_no_check ((uword*)read_map, sid, 1);
2280        bits_set++;
2281      }
2282  }));
2283  /* *INDENT-ON* */
2284
2285check_mq:
2286
2287  for (i = 0; i < vec_len (wrk->unhandled_evts_vector); i++)
2288    {
2289      vcl_select_handle_mq_event (wrk, &wrk->unhandled_evts_vector[i], n_bits,
2290				  read_map, write_map, except_map, &bits_set);
2291    }
2292  vec_reset_length (wrk->unhandled_evts_vector);
2293
2294  if (vcm->cfg.use_mq_eventfd)
2295    vppcom_select_eventfd (wrk, n_bits, read_map, write_map, except_map,
2296			   time_to_wait, &bits_set);
2297  else
2298    vppcom_select_condvar (wrk, n_bits, read_map, write_map, except_map,
2299			   time_to_wait, &bits_set);
2300
2301  return (bits_set);
2302}
2303
2304static inline void
2305vep_verify_epoll_chain (vcl_worker_t * wrk, u32 vep_handle)
2306{
2307  vcl_session_t *session;
2308  vppcom_epoll_t *vep;
2309  u32 sh = vep_handle;
2310
2311  if (VPPCOM_DEBUG <= 2)
2312    return;
2313
2314  session = vcl_session_get_w_handle (wrk, vep_handle);
2315  if (PREDICT_FALSE (!session))
2316    {
2317      VDBG (0, "ERROR: Invalid vep_sh (%u)!", vep_handle);
2318      goto done;
2319    }
2320  if (PREDICT_FALSE (!session->is_vep))
2321    {
2322      VDBG (0, "ERROR: vep_sh (%u) is not a vep!", vep_handle);
2323      goto done;
2324    }
2325  vep = &session->vep;
2326  VDBG (0, "vep_sh (%u): Dumping epoll chain\n"
2327	"{\n"
2328	"   is_vep         = %u\n"
2329	"   is_vep_session = %u\n"
2330	"   next_sh        = 0x%x (%u)\n"
2331	"}\n", vep_handle, session->is_vep, session->is_vep_session,
2332	vep->next_sh, vep->next_sh);
2333
2334  for (sh = vep->next_sh; sh != ~0; sh = vep->next_sh)
2335    {
2336      session = vcl_session_get_w_handle (wrk, sh);
2337      if (PREDICT_FALSE (!session))
2338	{
2339	  VDBG (0, "ERROR: Invalid sh (%u)!", sh);
2340	  goto done;
2341	}
2342      if (PREDICT_FALSE (session->is_vep))
2343	{
2344	  VDBG (0, "ERROR: sh (%u) is a vep!", vep_handle);
2345	}
2346      else if (PREDICT_FALSE (!session->is_vep_session))
2347	{
2348	  VDBG (0, "ERROR: sh (%u) is not a vep session handle!", sh);
2349	  goto done;
2350	}
2351      vep = &session->vep;
2352      if (PREDICT_FALSE (vep->vep_sh != vep_handle))
2353	VDBG (0, "ERROR: session (%u) vep_sh (%u) != vep_sh (%u)!",
2354	      sh, session->vep.vep_sh, vep_handle);
2355      if (session->is_vep_session)
2356	{
2357	  VDBG (0, "vep_sh[%u]: sh 0x%x (%u)\n"
2358		"{\n"
2359		"   next_sh        = 0x%x (%u)\n"
2360		"   prev_sh        = 0x%x (%u)\n"
2361		"   vep_sh         = 0x%x (%u)\n"
2362		"   ev.events      = 0x%x\n"
2363		"   ev.data.u64    = 0x%llx\n"
2364		"   et_mask        = 0x%x\n"
2365		"}\n",
2366		vep_handle, sh, sh, vep->next_sh, vep->next_sh, vep->prev_sh,
2367		vep->prev_sh, vep->vep_sh, vep->vep_sh, vep->ev.events,
2368		vep->ev.data.u64, vep->et_mask);
2369	}
2370    }
2371
2372done:
2373  VDBG (0, "vep_sh (%u): Dump complete!\n", vep_handle);
2374}
2375
2376int
2377vppcom_epoll_create (void)
2378{
2379  vcl_worker_t *wrk = vcl_worker_get_current ();
2380  vcl_session_t *vep_session;
2381
2382  vep_session = vcl_session_alloc (wrk);
2383
2384  vep_session->is_vep = 1;
2385  vep_session->vep.vep_sh = ~0;
2386  vep_session->vep.next_sh = ~0;
2387  vep_session->vep.prev_sh = ~0;
2388  vep_session->vpp_handle = ~0;
2389
2390  vcl_evt (VCL_EVT_EPOLL_CREATE, vep_session, vep_session->session_index);
2391  VDBG (0, "Created vep_idx %u", vep_session->session_index);
2392
2393  return vcl_session_handle (vep_session);
2394}
2395
2396int
2397vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
2398		  struct epoll_event *event)
2399{
2400  vcl_worker_t *wrk = vcl_worker_get_current ();
2401  vcl_session_t *vep_session;
2402  vcl_session_t *session;
2403  int rv = VPPCOM_OK;
2404
2405  if (vep_handle == session_handle)
2406    {
2407      VDBG (0, "vep_sh == session handle (%u)!", vep_handle);
2408      return VPPCOM_EINVAL;
2409    }
2410
2411  vep_session = vcl_session_get_w_handle (wrk, vep_handle);
2412  if (PREDICT_FALSE (!vep_session))
2413    {
2414      VDBG (0, "Invalid vep_sh (%u)!", vep_handle);
2415      return VPPCOM_EBADFD;
2416    }
2417  if (PREDICT_FALSE (!vep_session->is_vep))
2418    {
2419      VDBG (0, "vep_sh (%u) is not a vep!", vep_handle);
2420      return VPPCOM_EINVAL;
2421    }
2422
2423  ASSERT (vep_session->vep.vep_sh == ~0);
2424  ASSERT (vep_session->vep.prev_sh == ~0);
2425
2426  session = vcl_session_get_w_handle (wrk, session_handle);
2427  if (PREDICT_FALSE (!session))
2428    {
2429      VDBG (0, "Invalid session_handle (%u)!", session_handle);
2430      return VPPCOM_EBADFD;
2431    }
2432  if (PREDICT_FALSE (session->is_vep))
2433    {
2434      VDBG (0, "session_handle (%u) is a vep!", vep_handle);
2435      return VPPCOM_EINVAL;
2436    }
2437
2438  switch (op)
2439    {
2440    case EPOLL_CTL_ADD:
2441      if (PREDICT_FALSE (!event))
2442	{
2443	  VDBG (0, "EPOLL_CTL_ADD: NULL pointer to epoll_event structure!");
2444	  return VPPCOM_EINVAL;
2445	}
2446      if (vep_session->vep.next_sh != ~0)
2447	{
2448	  vcl_session_t *next_session;
2449	  next_session = vcl_session_get_w_handle (wrk,
2450						   vep_session->vep.next_sh);
2451	  if (PREDICT_FALSE (!next_session))
2452	    {
2453	      VDBG (0, "EPOLL_CTL_ADD: Invalid vep.next_sh (%u) on "
2454		    "vep_idx (%u)!", vep_session->vep.next_sh, vep_handle);
2455	      return VPPCOM_EBADFD;
2456	    }
2457	  ASSERT (next_session->vep.prev_sh == vep_handle);
2458	  next_session->vep.prev_sh = session_handle;
2459	}
2460      session->vep.next_sh = vep_session->vep.next_sh;
2461      session->vep.prev_sh = vep_handle;
2462      session->vep.vep_sh = vep_handle;
2463      session->vep.et_mask = VEP_DEFAULT_ET_MASK;
2464      session->vep.ev = *event;
2465      session->is_vep = 0;
2466      session->is_vep_session = 1;
2467      vep_session->vep.next_sh = session_handle;
2468
2469      if (session->tx_fifo)
2470	svm_fifo_add_want_deq_ntf (session->tx_fifo,
2471				   SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
2472
2473      VDBG (1, "EPOLL_CTL_ADD: vep_sh %u, sh %u, events 0x%x, data 0x%llx!",
2474	    vep_handle, session_handle, event->events, event->data.u64);
2475      vcl_evt (VCL_EVT_EPOLL_CTLADD, session, event->events, event->data.u64);
2476      break;
2477
2478    case EPOLL_CTL_MOD:
2479      if (PREDICT_FALSE (!event))
2480	{
2481	  VDBG (0, "EPOLL_CTL_MOD: NULL pointer to epoll_event structure!");
2482	  rv = VPPCOM_EINVAL;
2483	  goto done;
2484	}
2485      else if (PREDICT_FALSE (!session->is_vep_session))
2486	{
2487	  VDBG (0, "sh %u EPOLL_CTL_MOD: not a vep session!", session_handle);
2488	  rv = VPPCOM_EINVAL;
2489	  goto done;
2490	}
2491      else if (PREDICT_FALSE (session->vep.vep_sh != vep_handle))
2492	{
2493	  VDBG (0, "EPOLL_CTL_MOD: sh %u vep_sh (%u) != vep_sh (%u)!",
2494		session_handle, session->vep.vep_sh, vep_handle);
2495	  rv = VPPCOM_EINVAL;
2496	  goto done;
2497	}
2498      session->vep.et_mask = VEP_DEFAULT_ET_MASK;
2499      session->vep.ev = *event;
2500      VDBG (1, "EPOLL_CTL_MOD: vep_sh %u, sh %u, events 0x%x, data 0x%llx!",
2501	    vep_handle, session_handle, event->events, event->data.u64);
2502      break;
2503
2504    case EPOLL_CTL_DEL:
2505      if (PREDICT_FALSE (!session->is_vep_session))
2506	{
2507	  VDBG (0, "EPOLL_CTL_DEL: %u not a vep session!", session_handle);
2508	  rv = VPPCOM_EINVAL;
2509	  goto done;
2510	}
2511      else if (PREDICT_FALSE (session->vep.vep_sh != vep_handle))
2512	{
2513	  VDBG (0, "EPOLL_CTL_DEL: sh %u vep_sh (%u) != vep_sh (%u)!",
2514		session_handle, session->vep.vep_sh, vep_handle);
2515	  rv = VPPCOM_EINVAL;
2516	  goto done;
2517	}
2518
2519      if (session->vep.prev_sh == vep_handle)
2520	vep_session->vep.next_sh = session->vep.next_sh;
2521      else
2522	{
2523	  vcl_session_t *prev_session;
2524	  prev_session = vcl_session_get_w_handle (wrk, session->vep.prev_sh);
2525	  if (PREDICT_FALSE (!prev_session))
2526	    {
2527	      VDBG (0, "EPOLL_CTL_DEL: Invalid prev_sh (%u) on sh (%u)!",
2528		    session->vep.prev_sh, session_handle);
2529	      return VPPCOM_EBADFD;
2530	    }
2531	  ASSERT (prev_session->vep.next_sh == session_handle);
2532	  prev_session->vep.next_sh = session->vep.next_sh;
2533	}
2534      if (session->vep.next_sh != ~0)
2535	{
2536	  vcl_session_t *next_session;
2537	  next_session = vcl_session_get_w_handle (wrk, session->vep.next_sh);
2538	  if (PREDICT_FALSE (!next_session))
2539	    {
2540	      VDBG (0, "EPOLL_CTL_DEL: Invalid next_sh (%u) on sh (%u)!",
2541		    session->vep.next_sh, session_handle);
2542	      return VPPCOM_EBADFD;
2543	    }
2544	  ASSERT (next_session->vep.prev_sh == session_handle);
2545	  next_session->vep.prev_sh = session->vep.prev_sh;
2546	}
2547
2548      memset (&session->vep, 0, sizeof (session->vep));
2549      session->vep.next_sh = ~0;
2550      session->vep.prev_sh = ~0;
2551      session->vep.vep_sh = ~0;
2552      session->is_vep_session = 0;
2553
2554      if (session->tx_fifo)
2555	svm_fifo_del_want_deq_ntf (session->tx_fifo, SVM_FIFO_NO_DEQ_NOTIF);
2556
2557      VDBG (1, "EPOLL_CTL_DEL: vep_idx %u, sh %u!", vep_handle,
2558	    session_handle);
2559      vcl_evt (VCL_EVT_EPOLL_CTLDEL, session, vep_sh);
2560      break;
2561
2562    default:
2563      VDBG (0, "Invalid operation (%d)!", op);
2564      rv = VPPCOM_EINVAL;
2565    }
2566
2567  vep_verify_epoll_chain (wrk, vep_handle);
2568
2569done:
2570  return rv;
2571}
2572
2573static inline void
2574vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
2575				struct epoll_event *events, u32 * num_ev)
2576{
2577  session_disconnected_msg_t *disconnected_msg;
2578  session_connected_msg_t *connected_msg;
2579  u32 sid = ~0, session_events;
2580  u64 session_evt_data = ~0;
2581  vcl_session_t *session;
2582  u8 add_event = 0;
2583
2584  switch (e->event_type)
2585    {
2586    case SESSION_IO_EVT_RX:
2587      sid = e->session_index;
2588      if (!(session = vcl_session_get (wrk, sid)))
2589	break;
2590      vcl_fifo_rx_evt_valid_or_break (session);
2591      session_events = session->vep.ev.events;
2592      if (!(EPOLLIN & session->vep.ev.events) || session->has_rx_evt)
2593	break;
2594      add_event = 1;
2595      events[*num_ev].events |= EPOLLIN;
2596      session_evt_data = session->vep.ev.data.u64;
2597      session->has_rx_evt = 1;
2598      break;
2599    case SESSION_IO_EVT_TX:
2600      sid = e->session_index;
2601      if (!(session = vcl_session_get (wrk, sid)))
2602	break;
2603      session_events = session->vep.ev.events;
2604      if (!(EPOLLOUT & session_events))
2605	break;
2606      add_event = 1;
2607      events[*num_ev].events |= EPOLLOUT;
2608      session_evt_data = session->vep.ev.data.u64;
2609      svm_fifo_reset_has_deq_ntf (session->tx_fifo);
2610      break;
2611    case SESSION_CTRL_EVT_ACCEPTED:
2612      session = vcl_session_accepted (wrk,
2613				      (session_accepted_msg_t *) e->data);
2614      if (!session)
2615	break;
2616
2617      session_events = session->vep.ev.events;
2618      if (!(EPOLLIN & session_events))
2619	break;
2620
2621      add_event = 1;
2622      events[*num_ev].events |= EPOLLIN;
2623      session_evt_data = session->vep.ev.data.u64;
2624      break;
2625    case SESSION_CTRL_EVT_CONNECTED:
2626      connected_msg = (session_connected_msg_t *) e->data;
2627      sid = vcl_session_connected_handler (wrk, connected_msg);
2628      /* Generate EPOLLOUT because there's no connected event */
2629      if (!(session = vcl_session_get (wrk, sid)))
2630	break;
2631      session_events = session->vep.ev.events;
2632      if (!(EPOLLOUT & session_events))
2633	break;
2634      add_event = 1;
2635      events[*num_ev].events |= EPOLLOUT;
2636      session_evt_data = session->vep.ev.data.u64;
2637      if (session->session_state & STATE_FAILED)
2638	events[*num_ev].events |= EPOLLHUP;
2639      break;
2640    case SESSION_CTRL_EVT_DISCONNECTED:
2641      disconnected_msg = (session_disconnected_msg_t *) e->data;
2642      session = vcl_session_disconnected_handler (wrk, disconnected_msg);
2643      if (!session)
2644	break;
2645      session_events = session->vep.ev.events;
2646      if (!((EPOLLHUP | EPOLLRDHUP) & session_events))
2647	break;
2648      add_event = 1;
2649      events[*num_ev].events |= EPOLLHUP | EPOLLRDHUP;
2650      session_evt_data = session->vep.ev.data.u64;
2651      break;
2652    case SESSION_CTRL_EVT_RESET:
2653      sid = vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
2654      if (!(session = vcl_session_get (wrk, sid)))
2655	break;
2656      session_events = session->vep.ev.events;
2657      if (!((EPOLLHUP | EPOLLRDHUP) & session_events))
2658	break;
2659      add_event = 1;
2660      events[*num_ev].events |= EPOLLHUP | EPOLLRDHUP;
2661      session_evt_data = session->vep.ev.data.u64;
2662      break;
2663    case SESSION_CTRL_EVT_UNLISTEN_REPLY:
2664      vcl_session_unlisten_reply_handler (wrk, e->data);
2665      break;
2666    case SESSION_CTRL_EVT_REQ_WORKER_UPDATE:
2667      vcl_session_req_worker_update_handler (wrk, e->data);
2668      break;
2669    case SESSION_CTRL_EVT_WORKER_UPDATE_REPLY:
2670      vcl_session_worker_update_reply_handler (wrk, e->data);
2671      break;
2672    default:
2673      VDBG (0, "unhandled: %u", e->event_type);
2674      break;
2675    }
2676
2677  if (add_event)
2678    {
2679      events[*num_ev].data.u64 = session_evt_data;
2680      if (EPOLLONESHOT & session_events)
2681	{
2682	  session = vcl_session_get (wrk, sid);
2683	  session->vep.ev.events = 0;
2684	}
2685      *num_ev += 1;
2686    }
2687}
2688
2689static int
2690vcl_epoll_wait_handle_mq (vcl_worker_t * wrk, svm_msg_q_t * mq,
2691			  struct epoll_event *events, u32 maxevents,
2692			  double wait_for_time, u32 * num_ev)
2693{
2694  svm_msg_q_msg_t *msg;
2695  session_event_t *e;
2696  int i;
2697
2698  if (vec_len (wrk->mq_msg_vector) && svm_msg_q_is_empty (mq))
2699    goto handle_dequeued;
2700
2701  svm_msg_q_lock (mq);
2702  if (svm_msg_q_is_empty (mq))
2703    {
2704      if (!wait_for_time)
2705	{
2706	  svm_msg_q_unlock (mq);
2707	  return 0;
2708	}
2709      else if (wait_for_time < 0)
2710	{
2711	  svm_msg_q_wait (mq);
2712	}
2713      else
2714	{
2715	  if (svm_msg_q_timedwait (mq, wait_for_time / 1e3))
2716	    {
2717	      svm_msg_q_unlock (mq);
2718	      return 0;
2719	    }
2720	}
2721    }
2722  ASSERT (maxevents > *num_ev);
2723  vcl_mq_dequeue_batch (wrk, mq, maxevents - *num_ev);
2724  svm_msg_q_unlock (mq);
2725
2726handle_dequeued:
2727  for (i = 0; i < vec_len (wrk->mq_msg_vector); i++)
2728    {
2729      msg = vec_elt_at_index (wrk->mq_msg_vector, i);
2730      e = svm_msg_q_msg_data (mq, msg);
2731      vcl_epoll_wait_handle_mq_event (wrk, e, events, num_ev);
2732      svm_msg_q_free_msg (mq, msg);
2733    }
2734  vec_reset_length (wrk->mq_msg_vector);
2735  vcl_handle_pending_wrk_updates (wrk);
2736  return *num_ev;
2737}
2738
2739static int
2740vppcom_epoll_wait_condvar (vcl_worker_t * wrk, struct epoll_event *events,
2741			   int maxevents, u32 n_evts, double wait_for_time)
2742{
2743  double wait = 0, start = 0, now;
2744
2745  if (!n_evts)
2746    {
2747      wait = wait_for_time;
2748      start = clib_time_now (&wrk->clib_time);
2749    }
2750
2751  do
2752    {
2753      vcl_epoll_wait_handle_mq (wrk, wrk->app_event_queue, events, maxevents,
2754				wait, &n_evts);
2755      if (n_evts)
2756	return n_evts;
2757      if (wait == -1)
2758	continue;
2759
2760      now = clib_time_now (&wrk->clib_time);
2761      wait -= now - start;
2762      start = now;
2763    }
2764  while (wait > 0);
2765
2766  return 0;
2767}
2768
2769static int
2770vppcom_epoll_wait_eventfd (vcl_worker_t * wrk, struct epoll_event *events,
2771			   int maxevents, u32 n_evts, double wait_for_time)
2772{
2773  vcl_mq_evt_conn_t *mqc;
2774  int __clib_unused n_read;
2775  int n_mq_evts, i;
2776  u64 buf;
2777
2778  vec_validate (wrk->mq_events, pool_elts (wrk->mq_evt_conns));
2779again:
2780  n_mq_evts = epoll_wait (wrk->mqs_epfd, wrk->mq_events,
2781			  vec_len (wrk->mq_events), wait_for_time);
2782  for (i = 0; i < n_mq_evts; i++)
2783    {
2784      mqc = vcl_mq_evt_conn_get (wrk, wrk->mq_events[i].data.u32);
2785      n_read = read (mqc->mq_fd, &buf, sizeof (buf));
2786      vcl_epoll_wait_handle_mq (wrk, mqc->mq, events, maxevents, 0, &n_evts);
2787    }
2788  if (!n_evts && n_mq_evts > 0)
2789    goto again;
2790
2791  return (int) n_evts;
2792}
2793
2794int
2795vppcom_epoll_wait (uint32_t vep_handle, struct epoll_event *events,
2796		   int maxevents, double wait_for_time)
2797{
2798  vcl_worker_t *wrk = vcl_worker_get_current ();
2799  vcl_session_t *vep_session;
2800  u32 n_evts = 0;
2801  int i;
2802
2803  if (PREDICT_FALSE (maxevents <= 0))
2804    {
2805      VDBG (0, "ERROR: Invalid maxevents (%d)!", maxevents);
2806      return VPPCOM_EINVAL;
2807    }
2808
2809  vep_session = vcl_session_get_w_handle (wrk, vep_handle);
2810  if (!vep_session)
2811    return VPPCOM_EBADFD;
2812
2813  if (PREDICT_FALSE (!vep_session->is_vep))
2814    {
2815      VDBG (0, "ERROR: vep_idx (%u) is not a vep!", vep_handle);
2816      return VPPCOM_EINVAL;
2817    }
2818
2819  memset (events, 0, sizeof (*events) * maxevents);
2820
2821  if (vec_len (wrk->unhandled_evts_vector))
2822    {
2823      for (i = 0; i < vec_len (wrk->unhandled_evts_vector); i++)
2824	{
2825	  vcl_epoll_wait_handle_mq_event (wrk, &wrk->unhandled_evts_vector[i],
2826					  events, &n_evts);
2827	  if (n_evts == maxevents)
2828	    {
2829	      vec_delete (wrk->unhandled_evts_vector, i + 1, 0);
2830	      return n_evts;
2831	    }
2832	}
2833      vec_reset_length (wrk->unhandled_evts_vector);
2834    }
2835
2836  if (vcm->cfg.use_mq_eventfd)
2837    return vppcom_epoll_wait_eventfd (wrk, events, maxevents, n_evts,
2838				      wait_for_time);
2839
2840  return vppcom_epoll_wait_condvar (wrk, events, maxevents, n_evts,
2841				    wait_for_time);
2842}
2843
2844int
2845vppcom_session_attr (uint32_t session_handle, uint32_t op,
2846		     void *buffer, uint32_t * buflen)
2847{
2848  vcl_worker_t *wrk = vcl_worker_get_current ();
2849  vcl_session_t *session;
2850  int rv = VPPCOM_OK;
2851  u32 *flags = buffer, tmp_flags = 0;
2852  vppcom_endpt_t *ep = buffer;
2853
2854  session = vcl_session_get_w_handle (wrk, session_handle);
2855  if (!session)
2856    return VPPCOM_EBADFD;
2857
2858  switch (op)
2859    {
2860    case VPPCOM_ATTR_GET_NREAD:
2861      rv = vcl_session_read_ready (session);
2862      VDBG (2, "VPPCOM_ATTR_GET_NREAD: sh %u, nread = %d", session_handle,
2863	    rv);
2864      break;
2865
2866    case VPPCOM_ATTR_GET_NWRITE:
2867      rv = vcl_session_write_ready (session);
2868      VDBG (2, "VPPCOM_ATTR_GET_NWRITE: sh %u, nwrite = %d", session_handle,
2869	    rv);
2870      break;
2871
2872    case VPPCOM_ATTR_GET_FLAGS:
2873      if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*flags))))
2874	{
2875	  *flags = O_RDWR | (VCL_SESS_ATTR_TEST (session->attr,
2876						 VCL_SESS_ATTR_NONBLOCK));
2877	  *buflen = sizeof (*flags);
2878	  VDBG (2, "VPPCOM_ATTR_GET_FLAGS: sh %u, flags = 0x%08x, "
2879		"is_nonblocking = %u", session_handle, *flags,
2880		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_NONBLOCK));
2881	}
2882      else
2883	rv = VPPCOM_EINVAL;
2884      break;
2885
2886    case VPPCOM_ATTR_SET_FLAGS:
2887      if (PREDICT_TRUE (buffer && buflen && (*buflen == sizeof (*flags))))
2888	{
2889	  if (*flags & O_NONBLOCK)
2890	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_NONBLOCK);
2891	  else
2892	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_NONBLOCK);
2893
2894	  VDBG (2, "VPPCOM_ATTR_SET_FLAGS: sh %u, flags = 0x%08x,"
2895		" is_nonblocking = %u", session_handle, *flags,
2896		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_NONBLOCK));
2897	}
2898      else
2899	rv = VPPCOM_EINVAL;
2900      break;
2901
2902    case VPPCOM_ATTR_GET_PEER_ADDR:
2903      if (PREDICT_TRUE (buffer && buflen &&
2904			(*buflen >= sizeof (*ep)) && ep->ip))
2905	{
2906	  ep->is_ip4 = session->transport.is_ip4;
2907	  ep->port = session->transport.rmt_port;
2908	  if (session->transport.is_ip4)
2909	    clib_memcpy_fast (ep->ip, &session->transport.rmt_ip.ip4,
2910			      sizeof (ip4_address_t));
2911	  else
2912	    clib_memcpy_fast (ep->ip, &session->transport.rmt_ip.ip6,
2913			      sizeof (ip6_address_t));
2914	  *buflen = sizeof (*ep);
2915	  VDBG (1, "VPPCOM_ATTR_GET_PEER_ADDR: sh %u, is_ip4 = %u, "
2916		"addr = %U, port %u", session_handle, ep->is_ip4,
2917		format_ip46_address, &session->transport.rmt_ip,
2918		ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
2919		clib_net_to_host_u16 (ep->port));
2920	}
2921      else
2922	rv = VPPCOM_EINVAL;
2923      break;
2924
2925    case VPPCOM_ATTR_GET_LCL_ADDR:
2926      if (PREDICT_TRUE (buffer && buflen &&
2927			(*buflen >= sizeof (*ep)) && ep->ip))
2928	{
2929	  ep->is_ip4 = session->transport.is_ip4;
2930	  ep->port = session->transport.lcl_port;
2931	  if (session->transport.is_ip4)
2932	    clib_memcpy_fast (ep->ip, &session->transport.lcl_ip.ip4,
2933			      sizeof (ip4_address_t));
2934	  else
2935	    clib_memcpy_fast (ep->ip, &session->transport.lcl_ip.ip6,
2936			      sizeof (ip6_address_t));
2937	  *buflen = sizeof (*ep);
2938	  VDBG (1, "VPPCOM_ATTR_GET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
2939		" port %d", session_handle, ep->is_ip4, format_ip46_address,
2940		&session->transport.lcl_ip,
2941		ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
2942		clib_net_to_host_u16 (ep->port));
2943	}
2944      else
2945	rv = VPPCOM_EINVAL;
2946      break;
2947
2948    case VPPCOM_ATTR_SET_LCL_ADDR:
2949      if (PREDICT_TRUE (buffer && buflen &&
2950			(*buflen >= sizeof (*ep)) && ep->ip))
2951	{
2952	  session->transport.is_ip4 = ep->is_ip4;
2953	  session->transport.lcl_port = ep->port;
2954	  vcl_ip_copy_from_ep (&session->transport.lcl_ip, ep);
2955	  *buflen = sizeof (*ep);
2956	  VDBG (1, "VPPCOM_ATTR_SET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
2957		" port %d", session_handle, ep->is_ip4, format_ip46_address,
2958		&session->transport.lcl_ip,
2959		ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
2960		clib_net_to_host_u16 (ep->port));
2961	}
2962      else
2963	rv = VPPCOM_EINVAL;
2964      break;
2965
2966    case VPPCOM_ATTR_GET_LIBC_EPFD:
2967      rv = session->libc_epfd;
2968      VDBG (2, "VPPCOM_ATTR_GET_LIBC_EPFD: libc_epfd %d", rv);
2969      break;
2970
2971    case VPPCOM_ATTR_SET_LIBC_EPFD:
2972      if (PREDICT_TRUE (buffer && buflen &&
2973			(*buflen == sizeof (session->libc_epfd))))
2974	{
2975	  session->libc_epfd = *(int *) buffer;
2976	  *buflen = sizeof (session->libc_epfd);
2977
2978	  VDBG (2, "VPPCOM_ATTR_SET_LIBC_EPFD: libc_epfd %d, buflen %d",
2979		session->libc_epfd, *buflen);
2980	}
2981      else
2982	rv = VPPCOM_EINVAL;
2983      break;
2984
2985    case VPPCOM_ATTR_GET_PROTOCOL:
2986      if (buffer && buflen && (*buflen >= sizeof (int)))
2987	{
2988	  *(int *) buffer = session->session_type;
2989	  *buflen = sizeof (int);
2990
2991	  VDBG (2, "VPPCOM_ATTR_GET_PROTOCOL: %d (%s), buflen %d",
2992		*(int *) buffer, *(int *) buffer ? "UDP" : "TCP", *buflen);
2993	}
2994      else
2995	rv = VPPCOM_EINVAL;
2996      break;
2997
2998    case VPPCOM_ATTR_GET_LISTEN:
2999      if (buffer && buflen && (*buflen >= sizeof (int)))
3000	{
3001	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3002						VCL_SESS_ATTR_LISTEN);
3003	  *buflen = sizeof (int);
3004
3005	  VDBG (2, "VPPCOM_ATTR_GET_LISTEN: %d, buflen %d", *(int *) buffer,
3006		*buflen);
3007	}
3008      else
3009	rv = VPPCOM_EINVAL;
3010      break;
3011
3012    case VPPCOM_ATTR_GET_ERROR:
3013      if (buffer && buflen && (*buflen >= sizeof (int)))
3014	{
3015	  *(int *) buffer = 0;
3016	  *buflen = sizeof (int);
3017
3018	  VDBG (2, "VPPCOM_ATTR_GET_ERROR: %d, buflen %d, #VPP-TBD#",
3019		*(int *) buffer, *buflen);
3020	}
3021      else
3022	rv = VPPCOM_EINVAL;
3023      break;
3024
3025    case VPPCOM_ATTR_GET_TX_FIFO_LEN:
3026      if (buffer && buflen && (*buflen >= sizeof (u32)))
3027	{
3028
3029	  /* VPP-TBD */
3030	  *(size_t *) buffer = (session->sndbuf_size ? session->sndbuf_size :
3031				session->tx_fifo ? session->tx_fifo->nitems :
3032				vcm->cfg.tx_fifo_size);
3033	  *buflen = sizeof (u32);
3034
3035	  VDBG (2, "VPPCOM_ATTR_GET_TX_FIFO_LEN: %u (0x%x), buflen %d,"
3036		" #VPP-TBD#", *(size_t *) buffer, *(size_t *) buffer,
3037		*buflen);
3038	}
3039      else
3040	rv = VPPCOM_EINVAL;
3041      break;
3042
3043    case VPPCOM_ATTR_SET_TX_FIFO_LEN:
3044      if (buffer && buflen && (*buflen == sizeof (u32)))
3045	{
3046	  /* VPP-TBD */
3047	  session->sndbuf_size = *(u32 *) buffer;
3048	  VDBG (2, "VPPCOM_ATTR_SET_TX_FIFO_LEN: %u (0x%x), buflen %d,"
3049		" #VPP-TBD#", session->sndbuf_size, session->sndbuf_size,
3050		*buflen);
3051	}
3052      else
3053	rv = VPPCOM_EINVAL;
3054      break;
3055
3056    case VPPCOM_ATTR_GET_RX_FIFO_LEN:
3057      if (buffer && buflen && (*buflen >= sizeof (u32)))
3058	{
3059
3060	  /* VPP-TBD */
3061	  *(size_t *) buffer = (session->rcvbuf_size ? session->rcvbuf_size :
3062				session->rx_fifo ? session->rx_fifo->nitems :
3063				vcm->cfg.rx_fifo_size);
3064	  *buflen = sizeof (u32);
3065
3066	  VDBG (2, "VPPCOM_ATTR_GET_RX_FIFO_LEN: %u (0x%x), buflen %d, "
3067		"#VPP-TBD#", *(size_t *) buffer, *(size_t *) buffer, *buflen);
3068	}
3069      else
3070	rv = VPPCOM_EINVAL;
3071      break;
3072
3073    case VPPCOM_ATTR_SET_RX_FIFO_LEN:
3074      if (buffer && buflen && (*buflen == sizeof (u32)))
3075	{
3076	  /* VPP-TBD */
3077	  session->rcvbuf_size = *(u32 *) buffer;
3078	  VDBG (2, "VPPCOM_ATTR_SET_RX_FIFO_LEN: %u (0x%x), buflen %d,"
3079		" #VPP-TBD#", session->sndbuf_size, session->sndbuf_size,
3080		*buflen);
3081	}
3082      else
3083	rv = VPPCOM_EINVAL;
3084      break;
3085
3086    case VPPCOM_ATTR_GET_REUSEADDR:
3087      if (buffer && buflen && (*buflen >= sizeof (int)))
3088	{
3089	  /* VPP-TBD */
3090	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3091						VCL_SESS_ATTR_REUSEADDR);
3092	  *buflen = sizeof (int);
3093
3094	  VDBG (2, "VPPCOM_ATTR_GET_REUSEADDR: %d, buflen %d, #VPP-TBD#",
3095		*(int *) buffer, *buflen);
3096	}
3097      else
3098	rv = VPPCOM_EINVAL;
3099      break;
3100
3101    case VPPCOM_ATTR_SET_REUSEADDR:
3102      if (buffer && buflen && (*buflen == sizeof (int)) &&
3103	  !VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_LISTEN))
3104	{
3105	  /* VPP-TBD */
3106	  if (*(int *) buffer)
3107	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_REUSEADDR);
3108	  else
3109	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_REUSEADDR);
3110
3111	  VDBG (2, "VPPCOM_ATTR_SET_REUSEADDR: %d, buflen %d, #VPP-TBD#",
3112		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_REUSEADDR),
3113		*buflen);
3114	}
3115      else
3116	rv = VPPCOM_EINVAL;
3117      break;
3118
3119    case VPPCOM_ATTR_GET_REUSEPORT:
3120      if (buffer && buflen && (*buflen >= sizeof (int)))
3121	{
3122	  /* VPP-TBD */
3123	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3124						VCL_SESS_ATTR_REUSEPORT);
3125	  *buflen = sizeof (int);
3126
3127	  VDBG (2, "VPPCOM_ATTR_GET_REUSEPORT: %d, buflen %d, #VPP-TBD#",
3128		*(int *) buffer, *buflen);
3129	}
3130      else
3131	rv = VPPCOM_EINVAL;
3132      break;
3133
3134    case VPPCOM_ATTR_SET_REUSEPORT:
3135      if (buffer && buflen && (*buflen == sizeof (int)) &&
3136	  !VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_LISTEN))
3137	{
3138	  /* VPP-TBD */
3139	  if (*(int *) buffer)
3140	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_REUSEPORT);
3141	  else
3142	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_REUSEPORT);
3143
3144	  VDBG (2, "VPPCOM_ATTR_SET_REUSEPORT: %d, buflen %d, #VPP-TBD#",
3145		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_REUSEPORT),
3146		*buflen);
3147	}
3148      else
3149	rv = VPPCOM_EINVAL;
3150      break;
3151
3152    case VPPCOM_ATTR_GET_BROADCAST:
3153      if (buffer && buflen && (*buflen >= sizeof (int)))
3154	{
3155	  /* VPP-TBD */
3156	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3157						VCL_SESS_ATTR_BROADCAST);
3158	  *buflen = sizeof (int);
3159
3160	  VDBG (2, "VPPCOM_ATTR_GET_BROADCAST: %d, buflen %d, #VPP-TBD#",
3161		*(int *) buffer, *buflen);
3162	}
3163      else
3164	rv = VPPCOM_EINVAL;
3165      break;
3166
3167    case VPPCOM_ATTR_SET_BROADCAST:
3168      if (buffer && buflen && (*buflen == sizeof (int)))
3169	{
3170	  /* VPP-TBD */
3171	  if (*(int *) buffer)
3172	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_BROADCAST);
3173	  else
3174	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_BROADCAST);
3175
3176	  VDBG (2, "VPPCOM_ATTR_SET_BROADCAST: %d, buflen %d, #VPP-TBD#",
3177		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_BROADCAST),
3178		*buflen);
3179	}
3180      else
3181	rv = VPPCOM_EINVAL;
3182      break;
3183
3184    case VPPCOM_ATTR_GET_V6ONLY:
3185      if (buffer && buflen && (*buflen >= sizeof (int)))
3186	{
3187	  /* VPP-TBD */
3188	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3189						VCL_SESS_ATTR_V6ONLY);
3190	  *buflen = sizeof (int);
3191
3192	  VDBG (2, "VPPCOM_ATTR_GET_V6ONLY: %d, buflen %d, #VPP-TBD#",
3193		*(int *) buffer, *buflen);
3194	}
3195      else
3196	rv = VPPCOM_EINVAL;
3197      break;
3198
3199    case VPPCOM_ATTR_SET_V6ONLY:
3200      if (buffer && buflen && (*buflen == sizeof (int)))
3201	{
3202	  /* VPP-TBD */
3203	  if (*(int *) buffer)
3204	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_V6ONLY);
3205	  else
3206	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_V6ONLY);
3207
3208	  VDBG (2, "VPPCOM_ATTR_SET_V6ONLY: %d, buflen %d, #VPP-TBD#",
3209		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_V6ONLY),
3210		*buflen);
3211	}
3212      else
3213	rv = VPPCOM_EINVAL;
3214      break;
3215
3216    case VPPCOM_ATTR_GET_KEEPALIVE:
3217      if (buffer && buflen && (*buflen >= sizeof (int)))
3218	{
3219	  /* VPP-TBD */
3220	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3221						VCL_SESS_ATTR_KEEPALIVE);
3222	  *buflen = sizeof (int);
3223
3224	  VDBG (2, "VPPCOM_ATTR_GET_KEEPALIVE: %d, buflen %d, #VPP-TBD#",
3225		*(int *) buffer, *buflen);
3226	}
3227      else
3228	rv = VPPCOM_EINVAL;
3229      break;
3230
3231    case VPPCOM_ATTR_SET_KEEPALIVE:
3232      if (buffer && buflen && (*buflen == sizeof (int)))
3233	{
3234	  /* VPP-TBD */
3235	  if (*(int *) buffer)
3236	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_KEEPALIVE);
3237	  else
3238	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_KEEPALIVE);
3239
3240	  VDBG (2, "VPPCOM_ATTR_SET_KEEPALIVE: %d, buflen %d, #VPP-TBD#",
3241		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_KEEPALIVE),
3242		*buflen);
3243	}
3244      else
3245	rv = VPPCOM_EINVAL;
3246      break;
3247
3248    case VPPCOM_ATTR_GET_TCP_NODELAY:
3249      if (buffer && buflen && (*buflen >= sizeof (int)))
3250	{
3251	  /* VPP-TBD */
3252	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3253						VCL_SESS_ATTR_TCP_NODELAY);
3254	  *buflen = sizeof (int);
3255
3256	  VDBG (2, "VPPCOM_ATTR_GET_TCP_NODELAY: %d, buflen %d, #VPP-TBD#",
3257		*(int *) buffer, *buflen);
3258	}
3259      else
3260	rv = VPPCOM_EINVAL;
3261      break;
3262
3263    case VPPCOM_ATTR_SET_TCP_NODELAY:
3264      if (buffer && buflen && (*buflen == sizeof (int)))
3265	{
3266	  /* VPP-TBD */
3267	  if (*(int *) buffer)
3268	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_TCP_NODELAY);
3269	  else
3270	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_TCP_NODELAY);
3271
3272	  VDBG (2, "VPPCOM_ATTR_SET_TCP_NODELAY: %d, buflen %d, #VPP-TBD#",
3273		VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_TCP_NODELAY),
3274		*buflen);
3275	}
3276      else
3277	rv = VPPCOM_EINVAL;
3278      break;
3279
3280    case VPPCOM_ATTR_GET_TCP_KEEPIDLE:
3281      if (buffer && buflen && (*buflen >= sizeof (int)))
3282	{
3283	  /* VPP-TBD */
3284	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3285						VCL_SESS_ATTR_TCP_KEEPIDLE);
3286	  *buflen = sizeof (int);
3287
3288	  VDBG (2, "VPPCOM_ATTR_GET_TCP_KEEPIDLE: %d, buflen %d, #VPP-TBD#",
3289		*(int *) buffer, *buflen);
3290	}
3291      else
3292	rv = VPPCOM_EINVAL;
3293      break;
3294
3295    case VPPCOM_ATTR_SET_TCP_KEEPIDLE:
3296      if (buffer && buflen && (*buflen == sizeof (int)))
3297	{
3298	  /* VPP-TBD */
3299	  if (*(int *) buffer)
3300	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_TCP_KEEPIDLE);
3301	  else
3302	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_TCP_KEEPIDLE);
3303
3304	  VDBG (2, "VPPCOM_ATTR_SET_TCP_KEEPIDLE: %d, buflen %d, #VPP-TBD#",
3305		VCL_SESS_ATTR_TEST (session->attr,
3306				    VCL_SESS_ATTR_TCP_KEEPIDLE), *buflen);
3307	}
3308      else
3309	rv = VPPCOM_EINVAL;
3310      break;
3311
3312    case VPPCOM_ATTR_GET_TCP_KEEPINTVL:
3313      if (buffer && buflen && (*buflen >= sizeof (int)))
3314	{
3315	  /* VPP-TBD */
3316	  *(int *) buffer = VCL_SESS_ATTR_TEST (session->attr,
3317						VCL_SESS_ATTR_TCP_KEEPINTVL);
3318	  *buflen = sizeof (int);
3319
3320	  VDBG (2, "VPPCOM_ATTR_GET_TCP_KEEPINTVL: %d, buflen %d, #VPP-TBD#",
3321		*(int *) buffer, *buflen);
3322	}
3323      else
3324	rv = VPPCOM_EINVAL;
3325      break;
3326
3327    case VPPCOM_ATTR_SET_TCP_KEEPINTVL:
3328      if (buffer && buflen && (*buflen == sizeof (int)))
3329	{
3330	  /* VPP-TBD */
3331	  if (*(int *) buffer)
3332	    VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_TCP_KEEPINTVL);
3333	  else
3334	    VCL_SESS_ATTR_CLR (session->attr, VCL_SESS_ATTR_TCP_KEEPINTVL);
3335
3336	  VDBG (2, "VPPCOM_ATTR_SET_TCP_KEEPINTVL: %d, buflen %d, #VPP-TBD#",
3337		VCL_SESS_ATTR_TEST (session->attr,
3338				    VCL_SESS_ATTR_TCP_KEEPINTVL), *buflen);
3339	}
3340      else
3341	rv = VPPCOM_EINVAL;
3342      break;
3343
3344    case VPPCOM_ATTR_GET_TCP_USER_MSS:
3345      if (buffer && buflen && (*buflen >= sizeof (u32)))
3346	{
3347	  /* VPP-TBD */
3348	  *(u32 *) buffer = session->user_mss;
3349	  *buflen = sizeof (int);
3350
3351	  VDBG (2, "VPPCOM_ATTR_GET_TCP_USER_MSS: %d, buflen %d, #VPP-TBD#",
3352		*(int *) buffer, *buflen);
3353	}
3354      else
3355	rv = VPPCOM_EINVAL;
3356      break;
3357
3358    case VPPCOM_ATTR_SET_TCP_USER_MSS:
3359      if (buffer && buflen && (*buflen == sizeof (u32)))
3360	{
3361	  /* VPP-TBD */
3362	  session->user_mss = *(u32 *) buffer;
3363
3364	  VDBG (2, "VPPCOM_ATTR_SET_TCP_USER_MSS: %u, buflen %d, #VPP-TBD#",
3365		session->user_mss, *buflen);
3366	}
3367      else
3368	rv = VPPCOM_EINVAL;
3369      break;
3370
3371    case VPPCOM_ATTR_SET_SHUT:
3372      if (*flags == SHUT_RD || *flags == SHUT_RDWR)
3373	VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_SHUT_RD);
3374      if (*flags == SHUT_WR || *flags == SHUT_RDWR)
3375	VCL_SESS_ATTR_SET (session->attr, VCL_SESS_ATTR_SHUT_WR);
3376      break;
3377
3378    case VPPCOM_ATTR_GET_SHUT:
3379      if (VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_SHUT_RD))
3380	tmp_flags = 1;
3381      if (VCL_SESS_ATTR_TEST (session->attr, VCL_SESS_ATTR_SHUT_WR))
3382	tmp_flags |= 2;
3383      if (tmp_flags == 1)
3384	*(int *) buffer = SHUT_RD;
3385      else if (tmp_flags == 2)
3386	*(int *) buffer = SHUT_WR;
3387      else if (tmp_flags == 3)
3388	*(int *) buffer = SHUT_RDWR;
3389      *buflen = sizeof (int);
3390      break;
3391    default:
3392      rv = VPPCOM_EINVAL;
3393      break;
3394    }
3395
3396  return rv;
3397}
3398
3399int
3400vppcom_session_recvfrom (uint32_t session_handle, void *buffer,
3401			 uint32_t buflen, int flags, vppcom_endpt_t * ep)
3402{
3403  vcl_worker_t *wrk = vcl_worker_get_current ();
3404  int rv = VPPCOM_OK;
3405  vcl_session_t *session = 0;
3406
3407  if (ep)
3408    {
3409      session = vcl_session_get_w_handle (wrk, session_handle);
3410      if (PREDICT_FALSE (!session))
3411	{
3412	  VDBG (0, "sh 0x%llx is closed!", session_handle);
3413	  return VPPCOM_EBADFD;
3414	}
3415      ep->is_ip4 = session->transport.is_ip4;
3416      ep->port = session->transport.rmt_port;
3417    }
3418
3419  if (flags == 0)
3420    rv = vppcom_session_read (session_handle, buffer, buflen);
3421  else if (flags & MSG_PEEK)
3422    rv = vppcom_session_peek (session_handle, buffer, buflen);
3423  else
3424    {
3425      VDBG (0, "Unsupport flags for recvfrom %d", flags);
3426      return VPPCOM_EAFNOSUPPORT;
3427    }
3428
3429  if (ep)
3430    {
3431      if (session->transport.is_ip4)
3432	clib_memcpy_fast (ep->ip, &session->transport.rmt_ip.ip4,
3433			  sizeof (ip4_address_t));
3434      else
3435	clib_memcpy_fast (ep->ip, &session->transport.rmt_ip.ip6,
3436			  sizeof (ip6_address_t));
3437    }
3438
3439  return rv;
3440}
3441
3442int
3443vppcom_session_sendto (uint32_t session_handle, void *buffer,
3444		       uint32_t buflen, int flags, vppcom_endpt_t * ep)
3445{
3446  if (!buffer)
3447    return VPPCOM_EINVAL;
3448
3449  if (ep)
3450    {
3451      // TBD
3452      return VPPCOM_EINVAL;
3453    }
3454
3455  if (flags)
3456    {
3457      // TBD check the flags and do the right thing
3458      VDBG (2, "handling flags 0x%u (%d) not implemented yet.", flags, flags);
3459    }
3460
3461  return (vppcom_session_write_inline (session_handle, buffer, buflen, 1));
3462}
3463
3464int
3465vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, double wait_for_time)
3466{
3467  vcl_worker_t *wrk = vcl_worker_get_current ();
3468  f64 timeout = clib_time_now (&wrk->clib_time) + wait_for_time;
3469  u32 i, keep_trying = 1;
3470  svm_msg_q_msg_t msg;
3471  session_event_t *e;
3472  int rv, num_ev = 0;
3473
3474  VDBG (3, "vp %p, nsids %u, wait_for_time %f", vp, n_sids, wait_for_time);
3475
3476  if (!vp)
3477    return VPPCOM_EFAULT;
3478
3479  do
3480    {
3481      vcl_session_t *session;
3482
3483      /* Dequeue all events and drop all unhandled io events */
3484      while (svm_msg_q_sub (wrk->app_event_queue, &msg, SVM_Q_NOWAIT, 0) == 0)
3485	{
3486	  e = svm_msg_q_msg_data (wrk->app_event_queue, &msg);
3487	  vcl_handle_mq_event (wrk, e);
3488	  svm_msg_q_free_msg (wrk->app_event_queue, &msg);
3489	}
3490      vec_reset_length (wrk->unhandled_evts_vector);
3491
3492      for (i = 0; i < n_sids; i++)
3493	{
3494	  session = vcl_session_get (wrk, vp[i].sh);
3495	  if (!session)
3496	    {
3497	      vp[i].revents = POLLHUP;
3498	      num_ev++;
3499	      continue;
3500	    }
3501
3502	  vp[i].revents = 0;
3503
3504	  if (POLLIN & vp[i].events)
3505	    {
3506	      rv = vcl_session_read_ready (session);
3507	      if (rv > 0)
3508		{
3509		  vp[i].revents |= POLLIN;
3510		  num_ev++;
3511		}
3512	      else if (rv < 0)
3513		{
3514		  switch (rv)
3515		    {
3516		    case VPPCOM_ECONNRESET:
3517		      vp[i].revents = POLLHUP;
3518		      break;
3519
3520		    default:
3521		      vp[i].revents = POLLERR;
3522		      break;
3523		    }
3524		  num_ev++;
3525		}
3526	    }
3527
3528	  if (POLLOUT & vp[i].events)
3529	    {
3530	      rv = vcl_session_write_ready (session);
3531	      if (rv > 0)
3532		{
3533		  vp[i].revents |= POLLOUT;
3534		  num_ev++;
3535		}
3536	      else if (rv < 0)
3537		{
3538		  switch (rv)
3539		    {
3540		    case VPPCOM_ECONNRESET:
3541		      vp[i].revents = POLLHUP;
3542		      break;
3543
3544		    default:
3545		      vp[i].revents = POLLERR;
3546		      break;
3547		    }
3548		  num_ev++;
3549		}
3550	    }
3551
3552	  if (0)		// Note "done:" label used by VCL_SESSION_LOCK_AND_GET()
3553	    {
3554	      vp[i].revents = POLLNVAL;
3555	      num_ev++;
3556	    }
3557	}
3558      if (wait_for_time != -1)
3559	keep_trying = (clib_time_now (&wrk->clib_time) <= timeout) ? 1 : 0;
3560    }
3561  while ((num_ev == 0) && keep_trying);
3562
3563  return num_ev;
3564}
3565
3566int
3567vppcom_mq_epoll_fd (void)
3568{
3569  vcl_worker_t *wrk = vcl_worker_get_current ();
3570  return wrk->mqs_epfd;
3571}
3572
3573int
3574vppcom_session_index (vcl_session_handle_t session_handle)
3575{
3576  return session_handle & 0xFFFFFF;
3577}
3578
3579int
3580vppcom_session_worker (vcl_session_handle_t session_handle)
3581{
3582  return session_handle >> 24;
3583}
3584
3585int
3586vppcom_worker_register (void)
3587{
3588  vcl_worker_t *wrk;
3589  u8 *wrk_name = 0;
3590  int rv;
3591
3592  if (!vcl_worker_alloc_and_init ())
3593    return VPPCOM_EEXIST;
3594
3595  wrk = vcl_worker_get_current ();
3596  wrk_name = format (0, "%s-wrk-%u", vcm->app_name, wrk->wrk_index);
3597
3598  rv = vppcom_connect_to_vpp ((char *) wrk_name);
3599  vec_free (wrk_name);
3600
3601  if (rv)
3602    return VPPCOM_EFAULT;
3603
3604  if (vcl_worker_register_with_vpp ())
3605    return VPPCOM_EEXIST;
3606
3607  return VPPCOM_OK;
3608}
3609
3610void
3611vppcom_worker_unregister (void)
3612{
3613  vcl_worker_cleanup (vcl_worker_get_current (), 1 /* notify vpp */ );
3614  vcl_set_worker_index (~0);
3615}
3616
3617int
3618vppcom_worker_index (void)
3619{
3620  return vcl_get_worker_index ();
3621}
3622
3623int
3624vppcom_worker_mqs_epfd (void)
3625{
3626  vcl_worker_t *wrk = vcl_worker_get_current ();
3627  if (!vcm->cfg.use_mq_eventfd)
3628    return -1;
3629  return wrk->mqs_epfd;
3630}
3631
3632int
3633vppcom_session_is_connectable_listener (uint32_t session_handle)
3634{
3635  vcl_session_t *session;
3636  vcl_worker_t *wrk = vcl_worker_get_current ();
3637  session = vcl_session_get_w_handle (wrk, session_handle);
3638  if (!session)
3639    return VPPCOM_EBADFD;
3640  return vcl_session_is_connectable_listener (wrk, session);
3641}
3642
3643int
3644vppcom_session_listener (uint32_t session_handle)
3645{
3646  vcl_worker_t *wrk = vcl_worker_get_current ();
3647  vcl_session_t *listen_session, *session;
3648  session = vcl_session_get_w_handle (wrk, session_handle);
3649  if (!session)
3650    return VPPCOM_EBADFD;
3651  if (session->listener_index == VCL_INVALID_SESSION_INDEX)
3652    return VPPCOM_EBADFD;
3653  listen_session = vcl_session_get_w_handle (wrk, session->listener_index);
3654  if (!listen_session)
3655    return VPPCOM_EBADFD;
3656  return vcl_session_handle (listen_session);
3657}
3658
3659int
3660vppcom_session_n_accepted (uint32_t session_handle)
3661{
3662  vcl_worker_t *wrk = vcl_worker_get_current ();
3663  vcl_session_t *session = vcl_session_get_w_handle (wrk, session_handle);
3664  if (!session)
3665    return VPPCOM_EBADFD;
3666  return session->n_accepted_sessions;
3667}
3668
3669/*
3670 * fd.io coding-style-patch-verification: ON
3671 *
3672 * Local Variables:
3673 * eval: (c-set-style "gnu")
3674 * End:
3675 */
3676