vcom_socket.c revision eedb87c1
1/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#include <unistd.h>
16#include <stdio.h>
17
18#include <vppinfra/types.h>
19#include <vppinfra/hash.h>
20#include <vppinfra/pool.h>
21
22#include <libvcl-ldpreload/vcom_socket.h>
23#include <libvcl-ldpreload/vcom_socket_wrapper.h>
24#include <libvcl-ldpreload/vcom.h>
25
26#include <uri/vppcom.h>
27
28
29/*
30 * VCOM_SOCKET Private definitions and functions.
31 */
32
33typedef struct vcom_socket_main_t_
34{
35  u8 init;
36
37  /* vcom_socket pool */
38  vcom_socket_t *vsockets;
39
40  /* Hash table for socketidx to fd mapping */
41  uword *sockidx_by_fd;
42
43  /* vcom_epoll pool */
44  vcom_epoll_t *vepolls;
45
46  /* Hash table for epollidx to epfd mapping */
47  uword *epollidx_by_epfd;
48
49
50  /* common epitem poll for all epfd */
51  /* TBD: epitem poll per epfd */
52  /* vcom_epitem pool */
53  vcom_epitem_t *vepitems;
54
55  /* Hash table for epitemidx to epfdfd mapping */
56  uword *epollidx_by_epfdfd;
57
58  /* Hash table - key:epfd, value:vec of epitemidx */
59  uword *epitemidxs_by_epfd;
60  /* Hash table - key:fd, value:vec of epitemidx */
61  uword *epitemidxs_by_fd;
62
63} vcom_socket_main_t;
64
65vcom_socket_main_t vcom_socket_main;
66
67
68static int
69vcom_socket_open_socket (int domain, int type, int protocol)
70{
71  int rv = -1;
72
73  /* handle domains implemented by vpp */
74  switch (domain)
75    {
76    case AF_INET:
77    case AF_INET6:
78      /* get socket type and
79       * handle the socket types supported by vpp */
80      switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
81        {
82        case SOCK_STREAM:
83        case SOCK_DGRAM:
84          /* the type argument serves a second purpose,
85           * in addition to specifying a socket type,
86           * it may include the bitwise OR of any of
87           * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify
88           * the behavior of socket. */
89          rv = libc_socket (domain, type, protocol);
90          if (rv == -1)
91            rv = -errno;
92          break;
93
94        default:
95          break;
96        }
97
98      break;
99
100    default:
101      break;
102    }
103
104  return rv;
105}
106
107static int
108vcom_socket_open_epoll (int flags)
109{
110  int rv = -1;
111
112  if (flags < 0)
113    {
114      return -EINVAL;
115    }
116  if (flags && (flags & ~EPOLL_CLOEXEC))
117    {
118      return -EINVAL;
119    }
120
121  /* flags can be either zero or EPOLL_CLOEXEC */
122  rv = libc_epoll_create1 (flags);
123  if (rv == -1)
124    rv = -errno;
125
126  return rv;
127}
128
129static int
130vcom_socket_close_socket (int fd)
131{
132  int rv;
133
134  rv = libc_close (fd);
135  if (rv == -1)
136    rv = -errno;
137
138  return rv;
139}
140
141static int
142vcom_socket_close_epoll (int epfd)
143{
144  int rv;
145
146  rv = libc_close (epfd);
147  if (rv == -1)
148    rv = -errno;
149
150  return rv;
151}
152
153/*
154 * Public API functions
155 */
156
157int
158vcom_socket_main_init (void)
159{
160  vcom_socket_main_t *vsm = &vcom_socket_main;
161
162  if (VCOM_DEBUG > 0)
163    printf ("vcom_socket_main_init\n");
164
165  if (!vsm->init)
166    {
167      /* TBD: define FD_MAXSIZE and use it here */
168      pool_alloc (vsm->vsockets, FD_SETSIZE);
169      vsm->sockidx_by_fd = hash_create (0, sizeof (i32));
170
171      pool_alloc (vsm->vepolls, FD_SETSIZE);
172      vsm->epollidx_by_epfd = hash_create (0, sizeof (i32));
173
174      pool_alloc (vsm->vepitems, FD_SETSIZE);
175      vsm->epitemidxs_by_epfd = hash_create (0, sizeof (uword *));
176      vsm->epitemidxs_by_fd = hash_create (0, sizeof (uword *));
177
178      vsm->init = 1;
179    }
180
181  return 0;
182}
183
184void
185vcom_socket_main_destroy (void)
186{
187  vcom_socket_main_t *vsm = &vcom_socket_main;
188  vcom_socket_t *vsock;
189
190  vcom_epoll_t *vepoll;
191
192  if (VCOM_DEBUG > 0)
193    printf ("vcom_socket_main_destroy\n");
194
195  if (vsm->init)
196    {
197      /*
198       * from active list of vsockets,
199       * close socket and vppcom session
200       * */
201
202      /* *INDENT-OFF* */
203      pool_foreach (vsock, vsm->vsockets,
204        ({
205          if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
206            {
207              vppcom_session_close (vsock->sid);
208              vcom_socket_close_socket (vsock->fd);
209              vsocket_init (vsock);
210            }
211        }));
212      /* *INDENT-ON* */
213
214      /*
215       * return vsocket element to the pool
216       * */
217
218      /* *INDENT-OFF* */
219      pool_flush (vsock, vsm->vsockets,
220        ({
221          // vsocket_init(vsock);
222          ;
223        }));
224      /* *INDENT-ON* */
225
226      pool_free (vsm->vsockets);
227      hash_free (vsm->sockidx_by_fd);
228
229      /*
230       * from active list of vepolls,
231       * close epoll and vppcom_epoll
232       * */
233
234      /* *INDENT-OFF* */
235      pool_foreach (vepoll, vsm->vepolls,
236        ({
237          if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
238            {
239              vppcom_session_close (vepoll->vep_idx);
240              vcom_socket_close_epoll (vepoll->epfd); /* TBD: */
241              vepoll_init (vepoll);
242            }
243        }));
244      /* *INDENT-ON* */
245
246      /*
247       * return vepoll element to the pool
248       * */
249
250      /* *INDENT-OFF* */
251      pool_flush (vepoll, vsm->vepolls,
252        ({
253          // vepoll_init(vepoll);
254          ;
255        }));
256      /* *INDENT-ON* */
257
258      pool_free (vsm->vepolls);
259      hash_free (vsm->epollidx_by_epfd);
260
261
262      vsm->init = 0;
263    }
264}
265
266void
267vcom_socket_main_show (void)
268{
269  vcom_socket_main_t *vsm = &vcom_socket_main;
270  vcom_socket_t *vsock;
271
272  vcom_epoll_t *vepoll;
273  vcom_epitem_t *vepitem;
274
275  if (vsm->init)
276    {
277      /* from active list of vsockets show vsock */
278
279      /* *INDENT-OFF* */
280      pool_foreach (vsock, vsm->vsockets,
281        ({
282          printf(
283                 "fd='%04d', sid='%08x',type='%-30s'\n",
284                 vsock->fd, vsock->sid,
285                 vcom_socket_type_str (vsock->type));
286        }));
287      /* *INDENT-ON* */
288
289      /* from active list of vepolls, show vepoll */
290
291      /* *INDENT-OFF* */
292      pool_foreach (vepoll, vsm->vepolls,
293        ({
294          printf(
295                 "epfd='%04d', vep_idx='%08x', "
296                 "type='%-30s', "
297                 "flags='%d', count='%d', close='%d'\n",
298                 vepoll->epfd, vepoll->vep_idx,
299                 vcom_socket_epoll_type_str (vepoll->type),
300                 vepoll->flags, vepoll->count, vepoll->close);
301        }));
302      /* *INDENT-ON* */
303
304      /* from active list of vepitems, show vepitem */
305
306      /* *INDENT-OFF* */
307      pool_foreach (vepitem, vsm->vepitems,
308        ({
309          printf(
310                 "epfd='%04d', fd='%04d', "
311                 "next_fd='%04d', prev_fd='%04d', "
312                 "type='%-30s', "
313                 "events='%04x', revents='%04x'\n",
314                 vepitem->epfd, vepitem->fd,
315                 vepitem->next_fd, vepitem->prev_fd,
316                 vcom_socket_vcom_fd_type_str (vepitem->type),
317                 vepitem->event.events, vepitem->revent.events);
318        }));
319      /* *INDENT-ON* */
320    }
321}
322
323int
324vcom_socket_is_vcom_fd (int fd)
325{
326  vcom_socket_main_t *vsm = &vcom_socket_main;
327  uword *p;
328  vcom_socket_t *vsock;
329
330  p = hash_get (vsm->sockidx_by_fd, fd);
331
332  if (p)
333    {
334      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
335      if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
336        return 1;
337    }
338  return 0;
339}
340
341int
342vcom_socket_is_vcom_epfd (int epfd)
343{
344  vcom_socket_main_t *vsm = &vcom_socket_main;
345  uword *p;
346  vcom_epoll_t *vepoll;
347
348  p = hash_get (vsm->epollidx_by_epfd, epfd);
349
350  if (p)
351    {
352      vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
353      if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
354        return 1;
355    }
356  return 0;
357}
358
359static inline int
360vcom_socket_get_sid (int fd)
361{
362  vcom_socket_main_t *vsm = &vcom_socket_main;
363  uword *p;
364  vcom_socket_t *vsock;
365
366  p = hash_get (vsm->sockidx_by_fd, fd);
367
368  if (p)
369    {
370      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
371      if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
372        return vsock->sid;
373    }
374  return INVALID_SESSION_ID;
375}
376
377static inline int
378vcom_socket_get_vep_idx (int epfd)
379{
380  vcom_socket_main_t *vsm = &vcom_socket_main;
381  uword *p;
382  vcom_epoll_t *vepoll;
383
384  p = hash_get (vsm->epollidx_by_epfd, epfd);
385
386  if (p)
387    {
388      vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
389      if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
390        return vepoll->vep_idx;
391    }
392  return INVALID_VEP_IDX;
393}
394
395static int
396vcom_socket_close_vsock (int fd)
397{
398  int rv = -1;
399  vcom_socket_main_t *vsm = &vcom_socket_main;
400  uword *p;
401  vcom_socket_t *vsock;
402
403  p = hash_get (vsm->sockidx_by_fd, fd);
404  if (!p)
405    return -EBADF;
406
407  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
408  if (!vsock)
409    return -ENOTSOCK;
410
411  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
412    return -EINVAL;
413
414  rv = vppcom_session_close (vsock->sid);
415  rv = vcom_socket_close_socket (vsock->fd);
416
417  vsocket_init (vsock);
418  hash_unset (vsm->sockidx_by_fd, fd);
419  pool_put (vsm->vsockets, vsock);
420
421  /*
422   * TBD:
423   * close all epoll instances that are marked as "close"
424   * of which this fd is the last remaining member
425   * */
426
427  return rv;
428}
429
430int
431vcom_socket_close_vepoll (int epfd)
432{
433  int rv = -1;
434  vcom_socket_main_t *vsm = &vcom_socket_main;
435  uword *p;
436  vcom_epoll_t *vepoll;
437
438  p = hash_get (vsm->epollidx_by_epfd, epfd);
439  if (!p)
440    return -EBADF;
441
442  vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
443  if (!vepoll)
444    return -EBADF;
445
446  if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND)
447    return -EINVAL;
448
449  if (vepoll->count)
450    {
451      if (!vepoll->close)
452        {
453          vepoll->close = 1;
454          return 0;
455        }
456      else
457        {
458          return -EBADF;
459        }
460    }
461
462  /* count is zero */
463  rv = vppcom_session_close (vepoll->vep_idx);
464  rv = vcom_socket_close_epoll (vepoll->epfd);
465
466  vepoll_init (vepoll);
467  hash_unset (vsm->epollidx_by_epfd, epfd);
468  pool_put (vsm->vepolls, vepoll);
469
470  return rv;
471}
472
473int
474vcom_socket_close (int __fd)
475{
476  int rv;
477
478  if (vcom_socket_is_vcom_fd (__fd))
479    {
480      rv = vcom_socket_close_vsock (__fd);
481    }
482  else if (vcom_socket_is_vcom_epfd (__fd))
483    {
484      rv = vcom_socket_close_vepoll (__fd);
485    }
486  else
487    {
488       rv = -EBADF;
489    }
490
491  return rv;
492}
493
494ssize_t
495vcom_socket_read (int __fd, void *__buf, size_t __nbytes)
496{
497  int rv = -1;
498  vcom_socket_main_t *vsm = &vcom_socket_main;
499  uword *p;
500  vcom_socket_t *vsock;
501
502  p = hash_get (vsm->sockidx_by_fd, __fd);
503  if (!p)
504    return -EBADF;
505
506  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
507  if (!vsock)
508    return -ENOTSOCK;
509
510  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
511    return -EINVAL;
512
513  if (!__buf || __nbytes < 0)
514    {
515      return -EINVAL;
516    }
517
518  rv = vcom_fcntl (__fd, F_GETFL, 0);
519  if (rv < 0)
520    {
521      return rv;
522
523    }
524
525  /* is blocking */
526  if (!(rv & O_NONBLOCK))
527    {
528      do
529        {
530          rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
531        }
532      while (rv == -EAGAIN || rv == -EWOULDBLOCK);
533      return rv;
534    }
535  /* The file descriptor refers to a socket and has been
536   * marked nonblocking(O_NONBLOCK) and the read would
537   * block.
538   * */
539  /* is non blocking */
540  rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
541  return rv;
542}
543
544ssize_t
545vcom_socket_write (int __fd, const void *__buf, size_t __n)
546{
547  int rv = -1;
548  vcom_socket_main_t *vsm = &vcom_socket_main;
549  uword *p;
550  vcom_socket_t *vsock;
551
552  p = hash_get (vsm->sockidx_by_fd, __fd);
553  if (!p)
554    return -EBADF;
555
556  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
557  if (!vsock)
558    return -ENOTSOCK;
559
560  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
561    return -EINVAL;
562
563  if (!__buf || __n < 0)
564    {
565      return -EINVAL;
566    }
567
568  rv = vppcom_session_write (vsock->sid, (void *) __buf, __n);
569  return rv;
570}
571
572/*
573 * RETURN:  0 - invalid cmd
574 *          1 - cmd not handled by vcom and vppcom
575 *          2 - cmd handled by vcom socket resource
576 *          3 - cmd handled by vppcom
577 * */
578/* TBD: incomplete list of cmd */
579static int
580vcom_socket_check_fcntl_cmd (int __cmd)
581{
582  switch (__cmd)
583    {
584      /*cmd not handled by vcom and vppcom */
585      /* Fallthrough */
586    case F_DUPFD:
587    case F_DUPFD_CLOEXEC:
588      return 1;
589
590      /* cmd handled by vcom socket resource */
591      /* Fallthrough */
592    case F_GETFD:
593    case F_SETFD:
594    case F_GETFL:
595    case F_SETFL:
596    case F_GETLK:
597    case F_SETLK:
598    case F_SETLKW:
599    case F_GETOWN:
600    case F_SETOWN:
601      return 2;
602
603#if 0
604      /* cmd handled by vppcom */
605    case F_XXXXX:
606      return 3;
607#endif
608      /* invalid cmd */
609    default:
610      return 0;
611    }
612  return 0;
613}
614
615/* TBD: move it to vppcom */
616static int
617vppcom_session_fcntl_va (int __fd, int __cmd, va_list __ap)
618{
619  int rv;
620
621  rv = -EINVAL;
622
623  return rv;
624}
625
626int
627vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap)
628{
629  int rv = -EBADF;
630  vcom_socket_main_t *vsm = &vcom_socket_main;
631  uword *p;
632  vcom_socket_t *vsock;
633
634  p = hash_get (vsm->sockidx_by_fd, __fd);
635  if (!p)
636    return -EBADF;
637
638  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
639  if (!vsock)
640    return -ENOTSOCK;
641
642  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
643    return -EINVAL;
644
645  switch (vcom_socket_check_fcntl_cmd (__cmd))
646    {
647      /* invalid cmd */
648    case 0:
649      rv = -EBADF;
650      break;
651      /*cmd not handled by vcom and vppcom */
652    case 1:
653      rv = -EBADF;
654      break;
655      /* cmd handled by vcom socket resource */
656    case 2:
657      rv = libc_vfcntl (vsock->fd, __cmd, __ap);
658      break;
659      /* cmd handled by vppcom */
660    case 3:
661      rv = vppcom_session_fcntl_va (vsock->sid, __cmd, __ap);
662      break;
663
664    default:
665      rv = -EINVAL;
666      break;
667    }
668
669  return rv;
670}
671
672static inline int
673vcom_socket_fds_2_sid_fds (
674                            /* dest */
675                            int *vcom_nsid_fds,
676                            fd_set * __restrict vcom_rd_sid_fds,
677                            fd_set * __restrict vcom_wr_sid_fds,
678                            fd_set * __restrict vcom_ex_sid_fds,
679                            /* src */
680                            int vcom_nfds,
681                            fd_set * __restrict vcom_readfds,
682                            fd_set * __restrict vcom_writefds,
683                            fd_set * __restrict vcom_exceptfds)
684{
685  int rv = 0;
686  int fd;
687  int sid;
688  /* invalid max_sid is -1 */
689  int max_sid = -1;
690  int nsid = 0;
691
692  /*
693   *  set sid in sid sets corresponding to fd's in fd sets
694   *  compute nsid and vcom_nsid_fds from sid sets
695   */
696
697  for (fd = 0; fd < vcom_nfds; fd++)
698    {
699      /*
700       * F fd set, src
701       * S sid set, dest
702       */
703#define _(S,F)                              \
704      if ((F) && (S) && FD_ISSET (fd, (F))) \
705        {                                   \
706          sid = vcom_socket_get_sid (fd);   \
707          if (sid != INVALID_SESSION_ID)    \
708            {                               \
709              FD_SET (sid, (S));            \
710              if (sid > max_sid)            \
711                {                           \
712                  max_sid = sid;            \
713                }                           \
714              ++nsid;                       \
715            }                               \
716          else                              \
717            {                               \
718              rv = -EBADFD;                 \
719              goto done;                    \
720            }                               \
721        }
722
723
724      _(vcom_rd_sid_fds, vcom_readfds);
725      _(vcom_wr_sid_fds, vcom_writefds);
726      _(vcom_ex_sid_fds, vcom_exceptfds);
727#undef _
728    }
729
730  *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0;
731  rv = nsid;
732
733done:
734  return rv;
735}
736
737/*
738 * PRE: 00. sid sets were derived from fd sets
739 *      01. sid sets were updated with sids that actually changed
740 *          status
741 *      02. fd sets still has watched fds
742 *
743 * This function will modify in place fd sets to indicate which fd's
744 * actually changed status(inferred from sid sets)
745 */
746static inline int
747vcom_socket_sid_fds_2_fds (
748                            /* dest */
749                            int *new_vcom_nfds,
750                            int vcom_nfds,
751                            fd_set * __restrict vcom_readfds,
752                            fd_set * __restrict vcom_writefds,
753                            fd_set * __restrict vcom_exceptfds,
754                            /* src */
755                            int vcom_nsid_fds,
756                            fd_set * __restrict vcom_rd_sid_fds,
757                            fd_set * __restrict vcom_wr_sid_fds,
758                            fd_set * __restrict vcom_ex_sid_fds)
759{
760  int rv = 0;
761  int fd;
762  int sid;
763  /* invalid max_fd is -1 */
764  int max_fd = -1;
765  int nfd = 0;
766
767
768  /*
769   *  modify in place fd sets to indicate which fd's
770   * actually changed status(inferred from sid sets)
771   */
772  for (fd = 0; fd < vcom_nfds; fd++)
773    {
774      /*
775       * F fd set, dest
776       * S sid set, src
777       */
778#define _(S,F)                              \
779      if ((F) && (S) && FD_ISSET (fd, (F))) \
780        {                                   \
781          sid = vcom_socket_get_sid (fd);   \
782          if (sid != INVALID_SESSION_ID)    \
783            {                               \
784              if (!FD_ISSET (sid, (S)))     \
785                {                           \
786                   FD_CLR(fd, (F));         \
787                }                           \
788            }                               \
789          else                              \
790            {                               \
791              rv = -EBADFD;                 \
792              goto done;                    \
793            }                               \
794        }
795
796
797      _(vcom_rd_sid_fds, vcom_readfds);
798      _(vcom_wr_sid_fds, vcom_writefds);
799      _(vcom_ex_sid_fds, vcom_exceptfds);
800#undef _
801    }
802
803  /*
804   *  compute nfd and new_vcom_nfds from fd sets
805   */
806  for (fd = 0; fd < vcom_nfds; fd++)
807    {
808
809#define _(F)                                \
810      if ((F) && FD_ISSET (fd, (F)))        \
811        {                                   \
812          if (fd > max_fd)                  \
813            {                               \
814              max_fd = fd;                  \
815            }                               \
816          ++nfd;                            \
817        }
818
819
820      _(vcom_readfds);
821      _(vcom_writefds);
822      _(vcom_exceptfds);
823#undef _
824
825    }
826
827  *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0;
828  rv = nfd;
829
830done:
831  return rv;
832}
833
834/*
835 * PRE:
836 * vom_socket_select is always called with
837 * timeout->tv_sec and timeout->tv_usec set to zero.
838 * hence vppcom_select return immediately.
839 */
840/*
841 * TBD: do{body;} while(timeout conditional); timeout loop
842 */
843int
844vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds,
845                    fd_set * __restrict vcom_writefds,
846                    fd_set * __restrict vcom_exceptfds,
847                    struct timeval *__restrict timeout)
848{
849  int rv = -EBADF;
850  pid_t pid = getpid ();
851
852  int new_vcom_nfds = 0;
853  int new_vcom_nfd = 0;
854
855  /* vcom sid fds */
856  fd_set vcom_rd_sid_fds;
857  fd_set vcom_wr_sid_fds;
858  fd_set vcom_ex_sid_fds;
859  unsigned long vcom_nsid_fds = 0;
860  int vcom_nsid = 0;
861
862  /* in seconds eg. 3.123456789 seconds */
863  double time_to_wait = (double) 0;
864
865  /* validate inputs */
866  if (vcom_nfds < 0)
867    {
868      return -EINVAL;
869    }
870
871  /* convert timeval timeout to double time_to_wait */
872  if (timeout)
873    {
874      if (timeout->tv_sec == 0 && timeout->tv_usec == 0)
875        {
876          /* polling: vppcom_select returns immediately */
877          time_to_wait = (double) 0;
878        }
879      else
880        {
881          /*TBD:  use timeval api */
882          time_to_wait = (double) timeout->tv_sec +
883            (double) timeout->tv_usec / (double) 1000000 +
884            (double) (timeout->tv_usec % 1000000) / (double) 1000000;
885        }
886    }
887  else
888    {
889      /*
890       * no timeout: vppcom_select can block indefinitely
891       * waiting for a file descriptor to become ready
892       * */
893      /* set to a phantom value */
894      time_to_wait = ~0;
895    }
896
897  /* zero the sid_sets */
898  /*
899   * F fd set
900   * S sid set
901   */
902#define _(S,F)                          \
903  if ((F))                              \
904    {                                   \
905      FD_ZERO ((S));                    \
906    }
907
908
909  _(&vcom_rd_sid_fds, vcom_readfds);
910  _(&vcom_wr_sid_fds, vcom_writefds);
911  _(&vcom_ex_sid_fds, vcom_exceptfds);
912#undef _
913
914  /* populate read, write and except sid_sets */
915  vcom_nsid = vcom_socket_fds_2_sid_fds (
916                                          /* dest */
917                                          vcom_readfds || vcom_writefds
918                                          || vcom_exceptfds ? (int *)
919                                          &vcom_nsid_fds : NULL,
920                                          vcom_readfds ? &vcom_rd_sid_fds :
921                                          NULL,
922                                          vcom_writefds ? &vcom_wr_sid_fds :
923                                          NULL,
924                                          vcom_exceptfds ? &vcom_ex_sid_fds :
925                                          NULL,
926                                          /* src */
927                                          vcom_nfds,
928                                          vcom_readfds,
929                                          vcom_writefds, vcom_exceptfds);
930  if (vcom_nsid < 0)
931    {
932      return vcom_nsid;
933    }
934  if (vcom_nsid_fds < 0)
935    {
936      return -EINVAL;
937    }
938
939  rv = vppcom_select (vcom_nsid_fds,
940                      vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds :
941                      NULL,
942                      vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds :
943                      NULL,
944                      vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds :
945                      NULL, time_to_wait);
946  if (VCOM_DEBUG > 0)
947    fprintf (stderr, "[%d] vppcom_select: "
948             "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds);
949
950  /* check if any file descriptors changed status */
951  if (rv > 0)
952    {
953      /*
954       * on exit, sets are modified in place to indicate which
955       * file descriptors actually changed status
956       * */
957
958      /*
959       * comply with pre-condition
960       * do not clear vcom fd sets befor calling
961       * vcom_socket_sid_fds_2_fds
962       */
963      new_vcom_nfd = vcom_socket_sid_fds_2_fds (
964                                                 /* dest */
965                                                 &new_vcom_nfds,
966                                                 vcom_nfds,
967                                                 vcom_readfds,
968                                                 vcom_writefds,
969                                                 vcom_exceptfds,
970                                                 /* src */
971                                                 vcom_nsid_fds,
972                                                 vcom_readfds ?
973                                                 &vcom_rd_sid_fds : NULL,
974                                                 vcom_writefds ?
975                                                 &vcom_wr_sid_fds : NULL,
976                                                 vcom_exceptfds ?
977                                                 &vcom_ex_sid_fds : NULL);
978      if (new_vcom_nfd < 0)
979        {
980          return new_vcom_nfd;
981        }
982      if (new_vcom_nfds < 0)
983        {
984          return -EINVAL;
985        }
986      rv = new_vcom_nfd;
987    }
988  return rv;
989}
990
991
992int
993vcom_socket_socket (int __domain, int __type, int __protocol)
994{
995  int rv = -1;
996  vcom_socket_main_t *vsm = &vcom_socket_main;
997  vcom_socket_t *vsock;
998
999  i32 fd;
1000  i32 sid;
1001  i32 sockidx;
1002  u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0;
1003  int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC);
1004
1005  fd = vcom_socket_open_socket (__domain, __type, __protocol);
1006  if (fd < 0)
1007    {
1008      rv = fd;
1009      goto out;
1010    }
1011
1012  sid = vppcom_session_create (VPPCOM_VRF_DEFAULT,
1013                               (type == SOCK_DGRAM) ?
1014                               VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP,
1015                               is_nonblocking);
1016  if (sid < 0)
1017    {
1018      rv = sid;
1019      goto out_close_socket;
1020    }
1021
1022  pool_get (vsm->vsockets, vsock);
1023  vsocket_init (vsock);
1024
1025  sockidx = vsock - vsm->vsockets;
1026  hash_set (vsm->sockidx_by_fd, fd, sockidx);
1027
1028  vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND);
1029  return fd;
1030
1031out_close_socket:
1032  vcom_socket_close_socket (fd);
1033out:
1034  return rv;
1035}
1036
1037int
1038vcom_socket_socketpair (int __domain, int __type, int __protocol,
1039                        int __fds[2])
1040{
1041/* TBD: */
1042  return 0;
1043}
1044
1045int
1046vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1047{
1048  int rv = -1;
1049  vcom_socket_main_t *vsm = &vcom_socket_main;
1050  uword *p;
1051  vcom_socket_t *vsock;
1052
1053  vppcom_endpt_t ep;
1054
1055  p = hash_get (vsm->sockidx_by_fd, __fd);
1056  if (!p)
1057    return -EBADF;
1058
1059  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1060  if (!vsock)
1061    return -ENOTSOCK;
1062
1063  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1064    return -EINVAL;
1065
1066  if (!__addr)
1067    {
1068      return -EINVAL;
1069    }
1070
1071  ep.vrf = VPPCOM_VRF_DEFAULT;
1072  switch (__addr->sa_family)
1073    {
1074    case AF_INET:
1075      if (__len != sizeof (struct sockaddr_in))
1076        {
1077          return -EINVAL;
1078        }
1079      ep.is_ip4 = VPPCOM_IS_IP4;
1080      ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
1081      ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port;
1082      break;
1083
1084    case AF_INET6:
1085      if (__len != sizeof (struct sockaddr_in6))
1086        {
1087          return -EINVAL;
1088        }
1089      ep.is_ip4 = VPPCOM_IS_IP6;
1090      ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
1091      ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port;
1092      break;
1093
1094    default:
1095      return -1;
1096      break;
1097    }
1098
1099  rv = vppcom_session_bind (vsock->sid, &ep);
1100  /* TBD: remove libc_bind code snippet
1101   * once vppcom implements vppcom_session_getsockname */
1102  if (rv == 0)
1103    {
1104      rv = libc_bind (__fd, __addr, __len);
1105      if (rv != 0)
1106        {
1107          rv = -errno;
1108        }
1109    }
1110  return rv;
1111}
1112
1113int
1114vppcom_session_getsockname (int sid, vppcom_endpt_t * ep)
1115{
1116  /* TBD: move it to vppcom */
1117  return 0;
1118}
1119
1120int
1121vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr,
1122                         socklen_t * __restrict __len)
1123{
1124  int rv = -1;
1125  vcom_socket_main_t *vsm = &vcom_socket_main;
1126  uword *p;
1127  vcom_socket_t *vsock;
1128
1129
1130  p = hash_get (vsm->sockidx_by_fd, __fd);
1131  if (!p)
1132    return -EBADF;
1133
1134  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1135  if (!vsock)
1136    return -ENOTSOCK;
1137
1138  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1139    return -EINVAL;
1140
1141  if (!__addr || !__len)
1142    return -EFAULT;
1143
1144  if (*__len < 0)
1145    {
1146      return -EINVAL;
1147    }
1148
1149  /* TBD: remove libc_getsockname code snippet
1150   * once vppcom implements vppcom_session_getsockname */
1151  rv = libc_getsockname (__fd, __addr, __len);
1152  if (rv != 0)
1153    {
1154      rv = -errno;
1155      return rv;
1156    }
1157
1158  /* TBD: use the below code snippet when vppcom
1159   * implements vppcom_session_getsockname */
1160#if 0
1161  vppcom_endpt_t ep;
1162  ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
1163  rv = vppcom_session_getsockname (vsock->sid, &ep);
1164  if (rv == 0)
1165    {
1166      if (ep.vrf == VPPCOM_VRF_DEFAULT)
1167        {
1168          __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6;
1169          switch (__addr->sa_family)
1170            {
1171            case AF_INET:
1172              ((struct sockaddr_in *) __addr)->sin_port = ep.port;
1173              *__len = sizeof (struct sockaddr_in);
1174              break;
1175
1176            case AF_INET6:
1177              ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
1178              *__len = sizeof (struct sockaddr_in6);
1179              break;
1180
1181            default:
1182              break;
1183            }
1184        }
1185    }
1186#endif
1187
1188  return rv;
1189}
1190
1191int
1192vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1193{
1194  int rv = -1;
1195  vcom_socket_main_t *vsm = &vcom_socket_main;
1196  uword *p;
1197  vcom_socket_t *vsock;
1198
1199  vppcom_endpt_t ep;
1200
1201  p = hash_get (vsm->sockidx_by_fd, __fd);
1202  if (p)
1203    {
1204      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1205
1206      ep.vrf = VPPCOM_VRF_DEFAULT;
1207      switch (__addr->sa_family)
1208        {
1209        case AF_INET:
1210          ep.is_ip4 = VPPCOM_IS_IP4;
1211          ep.ip =
1212            (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr;
1213          ep.port =
1214            (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port;
1215          break;
1216
1217        case AF_INET6:
1218          ep.is_ip4 = VPPCOM_IS_IP6;
1219          ep.ip =
1220            (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
1221          ep.port =
1222            (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port;
1223          break;
1224
1225        default:
1226          return -1;
1227          break;
1228        }
1229
1230      rv = vppcom_session_connect (vsock->sid, &ep);
1231    }
1232  return rv;
1233}
1234
1235int
1236vppcom_session_getpeername (int sid, vppcom_endpt_t * ep)
1237{
1238  /* TBD: move it to vppcom */
1239  return 0;
1240}
1241
1242int
1243vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr,
1244                         socklen_t * __restrict __len)
1245{
1246  int rv = -1;
1247  vcom_socket_main_t *vsm = &vcom_socket_main;
1248  uword *p;
1249  vcom_socket_t *vsock;
1250
1251
1252  p = hash_get (vsm->sockidx_by_fd, __fd);
1253  if (!p)
1254    return -EBADF;
1255
1256  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1257  if (!vsock)
1258    return -ENOTSOCK;
1259
1260  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1261    return -EINVAL;
1262
1263  if (!__addr || !__len)
1264    return -EFAULT;
1265
1266  if (*__len < 0)
1267    {
1268      return -EINVAL;
1269    }
1270
1271  /* DAW: hack to allow iperf3 to be happy w/ getpeername output */
1272  {
1273    uint8_t *a;
1274    ((struct sockaddr_in *) __addr)->sin_family = AF_INET;
1275    ((struct sockaddr_in *) __addr)->sin_port = 0x1000;
1276    a = (uint8_t *) & ((struct sockaddr_in *) __addr)->sin_addr;
1277    a[0] = 0x7f;
1278    a[1] = 0x00;
1279    a[2] = 0x00;
1280    a[3] = 0x01;
1281    *__len = sizeof (struct sockaddr_in);
1282    return 0;
1283  }
1284
1285  /* TBD: remove libc_getpeername code snippet
1286   * once vppcom implements vppcom_session_getpeername */
1287  rv = libc_getpeername (__fd, __addr, __len);
1288  if (rv != 0)
1289    {
1290      rv = -errno;
1291      return rv;
1292    }
1293
1294  /* TBD: use the below code snippet when vppcom
1295   * implements vppcom_session_getpeername */
1296#if 0
1297  vppcom_endpt_t ep;
1298  ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
1299  rv = vppcom_session_getpeername (vsock->sid, &ep);
1300  if (rv == 0)
1301    {
1302      if (ep.vrf == VPPCOM_VRF_DEFAULT)
1303        {
1304          __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6;
1305          switch (__addr->sa_family)
1306            {
1307            case AF_INET:
1308              ((struct sockaddr_in *) __addr)->sin_port = ep.port;
1309              *__len = sizeof (struct sockaddr_in);
1310              break;
1311
1312            case AF_INET6:
1313              ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
1314              *__len = sizeof (struct sockaddr_in6);
1315              break;
1316
1317            default:
1318              break;
1319            }
1320        }
1321    }
1322#endif
1323
1324  return rv;
1325}
1326
1327ssize_t
1328vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags)
1329{
1330  return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0);
1331}
1332
1333ssize_t
1334vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags)
1335{
1336  int rv = -1;
1337  rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0);
1338  return rv;
1339}
1340
1341/*
1342 * RETURN   1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET),
1343 * 0 otherwise
1344 * */
1345int
1346vcom_socket_is_connection_mode_socket (int __fd)
1347{
1348  int rv = -1;
1349  /* TBD define new vppcom api */
1350  vcom_socket_main_t *vsm = &vcom_socket_main;
1351  uword *p;
1352  vcom_socket_t *vsock;
1353
1354  int type;
1355  socklen_t optlen;
1356
1357  p = hash_get (vsm->sockidx_by_fd, __fd);
1358
1359  if (p)
1360    {
1361      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1362      if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
1363        {
1364          optlen = sizeof (type);
1365          rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen);
1366          if (rv != 0)
1367            {
1368              return 0;
1369            }
1370          /* get socket type */
1371          switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1372            {
1373            case SOCK_STREAM:
1374            case SOCK_SEQPACKET:
1375              return 1;
1376              break;
1377
1378            default:
1379              return 0;
1380              break;
1381            }
1382        }
1383    }
1384  return 0;
1385}
1386
1387ssize_t
1388vvppcom_session_sendto (int __sid, const void *__buf, size_t __n,
1389                        int __flags, __CONST_SOCKADDR_ARG __addr,
1390                        socklen_t __addr_len)
1391{
1392  int rv = -1;
1393  /* TBD add new vpp api  */
1394  /* TBD add flags parameter */
1395  rv = vppcom_session_write (__sid, (void *) __buf, (int) __n);
1396  return rv;
1397}
1398
1399ssize_t
1400vcom_socket_sendto (int __fd, const void *__buf, size_t __n,
1401                    int __flags, __CONST_SOCKADDR_ARG __addr,
1402                    socklen_t __addr_len)
1403{
1404  int rv = -1;
1405  vcom_socket_main_t *vsm = &vcom_socket_main;
1406  uword *p;
1407  vcom_socket_t *vsock;
1408
1409  p = hash_get (vsm->sockidx_by_fd, __fd);
1410  if (!p)
1411    return -EBADF;
1412
1413  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1414  if (!vsock)
1415    return -ENOTSOCK;
1416
1417  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1418    return -EINVAL;
1419
1420  if (!__buf || __n < 0)
1421    {
1422      return -EINVAL;
1423    }
1424
1425  if (vcom_socket_is_connection_mode_socket (__fd))
1426    {
1427      /* ignore __addr and _addr_len */
1428      /* and EISCONN may be returned when they are not NULL and 0 */
1429      if ((__addr != NULL) || (__addr_len != 0))
1430        {
1431          return -EISCONN;
1432        }
1433    }
1434  else
1435    {
1436      if (!__addr || __addr_len < 0)
1437        {
1438          return -EDESTADDRREQ;
1439        }
1440      /* not a vppcom supported address family */
1441      if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6))
1442        {
1443          return -EINVAL;
1444        }
1445    }
1446
1447  rv = vvppcom_session_sendto (vsock->sid, (void *) __buf, (int) __n,
1448                               __flags, __addr, __addr_len);
1449  return rv;
1450}
1451
1452/* TBD: move it to vppcom */
1453static ssize_t
1454vppcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n,
1455                         int __flags, __SOCKADDR_ARG __addr,
1456                         socklen_t * __restrict __addr_len)
1457{
1458  int rv = -1;
1459
1460  /* TBD add flags parameter */
1461  rv = vppcom_session_read (__sid, __buf, __n);
1462  return rv;
1463}
1464
1465ssize_t
1466vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n,
1467                      int __flags, __SOCKADDR_ARG __addr,
1468                      socklen_t * __restrict __addr_len)
1469{
1470  int rv = -1;
1471  vcom_socket_main_t *vsm = &vcom_socket_main;
1472  uword *p;
1473  vcom_socket_t *vsock;
1474
1475  p = hash_get (vsm->sockidx_by_fd, __fd);
1476  if (!p)
1477    return -EBADF;
1478
1479  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1480  if (!vsock)
1481    return -ENOTSOCK;
1482
1483  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1484    return -EINVAL;
1485
1486  if (!__buf || __n < 0)
1487    {
1488      return -EINVAL;
1489    }
1490
1491  if (__addr || __addr_len < 0)
1492    {
1493      return -EINVAL;
1494    }
1495
1496  rv = vppcom_session_recvfrom (vsock->sid, __buf, __n,
1497                                __flags, __addr, __addr_len);
1498  return rv;
1499}
1500
1501/* TBD: move it to vppcom */
1502static ssize_t
1503vppcom_sendmsg (int __sid, const struct msghdr *__message, int __flags)
1504{
1505  int rv = -1;
1506  /* rv = vppcom_session_write (__sid, (void *) __message->__buf,
1507     (int)__n); */
1508  return rv;
1509}
1510
1511ssize_t
1512vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags)
1513{
1514  int rv = -1;
1515  vcom_socket_main_t *vsm = &vcom_socket_main;
1516  uword *p;
1517  vcom_socket_t *vsock;
1518
1519  p = hash_get (vsm->sockidx_by_fd, __fd);
1520  if (!p)
1521    return -EBADF;
1522
1523  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1524  if (!vsock)
1525    return -ENOTSOCK;
1526
1527  if (vcom_socket_is_connection_mode_socket (__fd))
1528    {
1529      /* ignore __addr and _addr_len */
1530      /* and EISCONN may be returned when they are not NULL and 0 */
1531      if ((__message->msg_name != NULL) || (__message->msg_namelen != 0))
1532        {
1533          return -EISCONN;
1534        }
1535    }
1536  else
1537    {
1538      /* TBD: validate __message->msg_name and __message->msg_namelen
1539       * and return -EINVAL on validation error
1540       * */
1541      ;
1542    }
1543
1544  rv = vppcom_sendmsg (vsock->sid, __message, __flags);
1545
1546  return rv;
1547}
1548
1549#ifdef __USE_GNU
1550int
1551vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages,
1552                      unsigned int __vlen, int __flags)
1553{
1554
1555  /* TBD: define a new vppcom api */
1556  return 0;
1557}
1558#endif
1559
1560/* TBD: move it to vppcom */
1561static ssize_t
1562vppcom_recvmsg (int __sid, struct msghdr *__message, int __flags)
1563{
1564  int rv = -1;
1565  /* rv = vppcom_session_read (__sid, (void *) __message->__buf,
1566     (int)__n); */
1567  return rv;
1568}
1569
1570ssize_t
1571vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags)
1572{
1573  int rv = -1;
1574  vcom_socket_main_t *vsm = &vcom_socket_main;
1575  uword *p;
1576  vcom_socket_t *vsock;
1577
1578  p = hash_get (vsm->sockidx_by_fd, __fd);
1579  if (!p)
1580    return -EBADF;
1581
1582  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1583  if (!vsock)
1584    return -ENOTSOCK;
1585
1586  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1587    return -EINVAL;
1588
1589  if (!__message)
1590    {
1591      return -EINVAL;
1592    }
1593
1594  /* validate __flags */
1595
1596  rv = vppcom_recvmsg (vsock->sid, __message, __flags);
1597  return rv;
1598}
1599
1600#ifdef __USE_GNU
1601int
1602vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages,
1603                      unsigned int __vlen, int __flags,
1604                      struct timespec *__tmo)
1605{
1606  /* TBD: define a new vppcom api */
1607  return 0;
1608}
1609#endif
1610
1611/* TBD: move it to vppcom */
1612static int
1613vppcom_getsockopt (int __sid, int __level, int __optname,
1614                   void *__restrict __optval, socklen_t * __restrict __optlen)
1615{
1616  /* 1. for socket level options that are NOT socket attributes
1617   *    and that has corresponding vpp options get from vppcom */
1618#if 0
1619  return 0;
1620#endif
1621
1622  /* 2. unhandled options */
1623  return -ENOPROTOOPT;
1624}
1625
1626int
1627vcom_socket_getsockopt (int __fd, int __level, int __optname,
1628                        void *__restrict __optval,
1629                        socklen_t * __restrict __optlen)
1630{
1631  int rv = -1;
1632  vcom_socket_main_t *vsm = &vcom_socket_main;
1633  uword *p;
1634  vcom_socket_t *vsock;
1635
1636  p = hash_get (vsm->sockidx_by_fd, __fd);
1637  if (!p)
1638    return -EBADF;
1639
1640  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1641  if (!vsock)
1642    return -ENOTSOCK;
1643
1644  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1645    return -EINVAL;
1646
1647  if (!__optval && !__optlen)
1648    return -EFAULT;
1649
1650  if (*__optlen < 0)
1651    {
1652      return -EINVAL;
1653    }
1654
1655  switch (__level)
1656    {
1657      /* handle options at socket level */
1658    case SOL_SOCKET:
1659      switch (__optname)
1660        {
1661/*
1662 *  1. for socket level options that are socket attributes,
1663 *     get from libc_getsockopt.
1664 *  2. for socket level options that are NOT socket
1665 *     attributes and that has corresponding vpp options
1666 *     get from vppcom.
1667 *  3. for socket level options unimplemented
1668 *     return -ENOPROTOOPT */
1669        case SO_DEBUG:
1670        case SO_DONTROUTE:
1671        case SO_BROADCAST:
1672        case SO_SNDBUF:
1673        case SO_RCVBUF:
1674        case SO_REUSEADDR:
1675        case SO_REUSEPORT:
1676        case SO_KEEPALIVE:
1677        case SO_TYPE:
1678        case SO_PROTOCOL:
1679        case SO_DOMAIN:
1680        case SO_ERROR:
1681        case SO_OOBINLINE:
1682        case SO_NO_CHECK:
1683        case SO_PRIORITY:
1684        case SO_LINGER:
1685        case SO_BSDCOMPAT:
1686        case SO_TIMESTAMP:
1687        case SO_TIMESTAMPNS:
1688        case SO_TIMESTAMPING:
1689        case SO_RCVTIMEO:
1690        case SO_SNDTIMEO:
1691        case SO_RCVLOWAT:
1692        case SO_SNDLOWAT:
1693        case SO_PASSCRED:
1694        case SO_PEERCRED:
1695        case SO_PEERNAME:
1696        case SO_ACCEPTCONN:
1697        case SO_PASSSEC:
1698        case SO_PEERSEC:
1699        case SO_MARK:
1700        case SO_RXQ_OVFL:
1701        case SO_WIFI_STATUS:
1702        case SO_PEEK_OFF:
1703        case SO_NOFCS:
1704        case SO_BINDTODEVICE:
1705        case SO_GET_FILTER:
1706        case SO_LOCK_FILTER:
1707        case SO_BPF_EXTENSIONS:
1708        case SO_SELECT_ERR_QUEUE:
1709#ifdef CONFIG_NET_RX_BUSY_POLL
1710        case SO_BUSY_POLL:
1711#endif
1712        case SO_MAX_PACING_RATE:
1713        case SO_INCOMING_CPU:
1714          rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
1715          if (rv != 0)
1716            {
1717              rv = -errno;
1718              return rv;
1719            }
1720          break;
1721
1722        default:
1723          /* We implement the SO_SNDLOWAT etc to not be settable
1724           * (1003.1g 7).
1725           */
1726          return -ENOPROTOOPT;
1727        }
1728
1729      break;
1730
1731    default:
1732      /* 1. handle options that are NOT socket level options,
1733       *    but have corresponding vpp otions. */
1734      rv = vppcom_getsockopt (vsock->sid, __level, __optname,
1735                              __optval, __optlen);
1736
1737      return rv;
1738#if 0
1739      /* 2. unhandled options */
1740      return -ENOPROTOOPT;
1741#endif
1742    }
1743
1744  return rv;
1745}
1746
1747/* TBD: move it to vppcom */
1748int
1749vppcom_setsockopt (int __fd, int __level, int __optname,
1750                   const void *__optval, socklen_t __optlen)
1751{
1752  /* 1. for socket level options that are NOT socket attributes
1753   *    and that has corresponding vpp options set it from vppcom */
1754#if 0
1755  return 0;
1756#endif
1757
1758  /* 2. unhandled options */
1759  return -ENOPROTOOPT;
1760}
1761
1762int
1763vcom_socket_setsockopt (int __fd, int __level, int __optname,
1764                        const void *__optval, socklen_t __optlen)
1765{
1766  int rv = -1;
1767  vcom_socket_main_t *vsm = &vcom_socket_main;
1768  uword *p;
1769  vcom_socket_t *vsock;
1770
1771  p = hash_get (vsm->sockidx_by_fd, __fd);
1772  if (!p)
1773    return -EBADF;
1774
1775  vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1776  if (!vsock)
1777    return -ENOTSOCK;
1778
1779  if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
1780    return -EINVAL;
1781
1782  /*
1783   *      Options without arguments
1784   */
1785
1786  if (__optname == SO_BINDTODEVICE)
1787    {
1788      rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
1789      if (rv != 0)
1790        {
1791          rv = -errno;
1792        }
1793      return rv;
1794    }
1795
1796  if (!__optval)
1797    return -EFAULT;
1798
1799  if ((__optlen < 0) || (__optlen < sizeof (int)))
1800    return -EINVAL;
1801
1802  switch (__level)
1803    {
1804      /* handle options at socket level */
1805    case SOL_SOCKET:
1806      switch (__optname)
1807        {
1808          /*
1809           * 1. for socket level options that are socket attributes,
1810           *    set it from libc_getsockopt
1811           * 2. for socket level options that are NOT socket
1812           *    attributes and that has corresponding vpp options
1813           *    set it from vppcom
1814           * 3. for socket level options unimplemented
1815           *    return -ENOPROTOOPT */
1816        case SO_DEBUG:
1817        case SO_DONTROUTE:
1818        case SO_BROADCAST:
1819        case SO_SNDBUF:
1820        case SO_RCVBUF:
1821        case SO_REUSEADDR:
1822        case SO_REUSEPORT:
1823        case SO_KEEPALIVE:
1824        case SO_TYPE:
1825        case SO_PROTOCOL:
1826        case SO_DOMAIN:
1827        case SO_ERROR:
1828        case SO_OOBINLINE:
1829        case SO_NO_CHECK:
1830        case SO_PRIORITY:
1831        case SO_LINGER:
1832        case SO_BSDCOMPAT:
1833        case SO_TIMESTAMP:
1834        case SO_TIMESTAMPNS:
1835        case SO_TIMESTAMPING:
1836        case SO_RCVTIMEO:
1837        case SO_SNDTIMEO:
1838        case SO_RCVLOWAT:
1839        case SO_SNDLOWAT:
1840        case SO_PASSCRED:
1841        case SO_PEERCRED:
1842        case SO_PEERNAME:
1843        case SO_ACCEPTCONN:
1844        case SO_PASSSEC:
1845        case SO_PEERSEC:
1846        case SO_MARK:
1847        case SO_RXQ_OVFL:
1848        case SO_WIFI_STATUS:
1849        case SO_PEEK_OFF:
1850        case SO_NOFCS:
1851          /*
1852           * SO_BINDTODEVICE already handled as
1853           * "Options without arguments" */
1854          /* case SO_BINDTODEVICE: */
1855        case SO_GET_FILTER:
1856        case SO_LOCK_FILTER:
1857        case SO_BPF_EXTENSIONS:
1858        case SO_SELECT_ERR_QUEUE:
1859#ifdef CONFIG_NET_RX_BUSY_POLL
1860        case SO_BUSY_POLL:
1861#endif
1862        case SO_MAX_PACING_RATE:
1863        case SO_INCOMING_CPU:
1864          rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
1865          if (rv != 0)
1866            {
1867              rv = -errno;
1868              return rv;
1869            }
1870          break;
1871
1872        default:
1873          /* We implement the SO_SNDLOWAT etc to not be settable
1874           * (1003.1g 7).
1875           */
1876          return -ENOPROTOOPT;
1877        }
1878
1879      break;
1880
1881    default:
1882      /* 1. handle options that are NOT socket level options,
1883       *    but have corresponding vpp otions. */
1884      rv = vppcom_setsockopt (vsock->sid, __level, __optname,
1885                              __optval, __optlen);
1886      return rv;
1887#if 0
1888      /* 2. unhandled options */
1889      return -ENOPROTOOPT;
1890#endif
1891    }
1892
1893  return rv;
1894}
1895
1896int
1897vcom_socket_listen (int __fd, int __n)
1898{
1899  int rv = -1;
1900  vcom_socket_main_t *vsm = &vcom_socket_main;
1901  uword *p;
1902  vcom_socket_t *vsock;
1903
1904  p = hash_get (vsm->sockidx_by_fd, __fd);
1905  if (p)
1906    {
1907      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
1908
1909      /* TBD vppcom to accept __n parameter */
1910      rv = vppcom_session_listen (vsock->sid, __n);
1911    }
1912
1913  return rv;
1914}
1915
1916static int
1917vcom_socket_connected_socket (int __fd, int __sid,
1918                              int *__domain,
1919                              int *__type, int *__protocol, int flags)
1920{
1921  int rv = -1;
1922  vcom_socket_main_t *vsm = &vcom_socket_main;
1923  vcom_socket_t *vsock;
1924
1925  i32 fd;
1926  i32 sockidx;
1927
1928  socklen_t optlen;
1929
1930  optlen = sizeof (*__domain);
1931  rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen);
1932  if (rv != 0)
1933    {
1934      rv = -errno;
1935      goto out;
1936    }
1937
1938  optlen = sizeof (*__type);
1939  rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen);
1940  if (rv != 0)
1941    {
1942      rv = -errno;
1943      goto out;
1944    }
1945
1946  optlen = sizeof (*__protocol);
1947  rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen);
1948  if (rv != 0)
1949    {
1950      rv = -errno;
1951      goto out;
1952    }
1953
1954  fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol);
1955  if (fd < 0)
1956    {
1957      rv = fd;
1958      goto out;
1959    }
1960
1961  pool_get (vsm->vsockets, vsock);
1962  vsocket_init (vsock);
1963
1964  sockidx = vsock - vsm->vsockets;
1965  hash_set (vsm->sockidx_by_fd, fd, sockidx);
1966
1967  vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND);
1968  return fd;
1969
1970out:
1971  return rv;
1972}
1973
1974/* If flag is 0, then accept4() is the same as accept().
1975 * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags
1976 */
1977static int
1978vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr,
1979                          socklen_t * __restrict __addr_len, int flags)
1980{
1981  int rv = -1;
1982  vcom_socket_main_t *vsm = &vcom_socket_main;
1983  uword *p;
1984  vcom_socket_t *vsock;
1985
1986  int fd;
1987  int sid;
1988  int domain;
1989  int type;
1990  int protocol;
1991
1992  uint8_t addr8[sizeof (struct in6_addr)];
1993  vppcom_endpt_t ep;
1994
1995  ep.ip = addr8;
1996
1997  /* validate flags */
1998
1999  /*
2000   * for documentation
2001   *  switch (flags)
2002   *   {
2003   *   case 0:
2004   *   case SOCK_NONBLOCK:
2005   *   case SOCK_CLOEXEC:
2006   *   case SOCK_NONBLOCK | SOCK_CLOEXEC:
2007   *     break;
2008   *
2009   *   default:
2010   *     return -1;
2011   *   }
2012   */
2013  /* flags can be 0 or can be bitwise OR
2014   * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */
2015
2016  if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC))))
2017    {
2018      /* TBD: return proper error code */
2019      return -1;
2020    }
2021
2022  /* TBD: return proper error code */
2023
2024  if (!vcom_socket_is_connection_mode_socket (__fd))
2025    {
2026      return -EOPNOTSUPP;
2027    }
2028
2029  p = hash_get (vsm->sockidx_by_fd, __fd);
2030  if (p)
2031    {
2032      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
2033
2034
2035      rv = vcom_fcntl (vsock->fd, F_GETFL, 0);
2036      if (rv < 0)
2037        {
2038          return rv;
2039        }
2040
2041      /* is blocking */
2042      if (!(rv & O_NONBLOCK))
2043        {
2044          /* socket is not marked as nonblocking
2045           * and no pending connections are present
2046           * on the queue, accept () blocks the caller
2047           * until a connection is present.
2048           */
2049          rv = vppcom_session_accept (vsock->sid, &ep,
2050                                      -1.0 /* wait forever */ );
2051        }
2052      else
2053        {
2054          /* The file descriptor refers to a socket and has been
2055           * marked nonblocking(O_NONBLOCK) and the accept would
2056           * block.
2057           * */
2058          /* is non blocking */
2059          rv = vppcom_session_accept (vsock->sid, &ep, 0);
2060          /* If the socket is marked nonblocking and
2061           * no pending connections are present on the
2062           * queue, accept fails with the error
2063           * EAGAIN or EWOULDBLOCK
2064           */
2065          if (rv == VPPCOM_ETIMEDOUT)
2066            {
2067              rv = VPPCOM_EAGAIN;
2068            }
2069        }
2070      if (rv < 0)
2071        {
2072          return rv;
2073        }
2074
2075      sid = rv;
2076
2077      /* create a new connected socket resource and set flags
2078       * on the new file descriptor.
2079       * update vsockets and sockidx_by_fd table
2080       * */
2081      fd = vcom_socket_connected_socket (__fd, sid,
2082                                         &domain, &type, &protocol, flags);
2083      if (fd < 0)
2084        {
2085          return fd;
2086        }
2087
2088      rv = fd;
2089
2090      /* TBD populate __addr and __addr_len */
2091      /* TBD: The returned address is truncated if the buffer
2092       * provided is too small, in this case, __addr_len will
2093       * return a value greater than was supplied to the call.*/
2094      if (__addr)
2095        {
2096          if (ep.is_cut_thru)
2097            {
2098              /* TBD populate __addr and __addr_len */
2099              switch (domain)
2100                {
2101                case AF_INET:
2102                  ((struct sockaddr_in *) __addr)->sin_family = AF_INET;
2103                  ((struct sockaddr_in *) __addr)->sin_port = ep.port;
2104                  memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
2105                          addr8, sizeof (struct in_addr));
2106                  /* TBD: populate __addr_len */
2107                  if (__addr_len)
2108                    {
2109                      *__addr_len = sizeof (struct sockaddr_in);
2110                    }
2111                  break;
2112
2113                case AF_INET6:
2114                  ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
2115                  ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
2116                  memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
2117                          __in6_u.__u6_addr8, addr8,
2118                          sizeof (struct in6_addr));
2119                  /* TBD: populate __addr_len */
2120                  if (__addr_len)
2121                    {
2122                      *__addr_len = sizeof (struct sockaddr_in6);
2123                    }
2124                  break;
2125
2126                default:
2127                  return -EAFNOSUPPORT;
2128                }
2129            }
2130          else
2131            {
2132              switch (ep.is_ip4)
2133                {
2134                case VPPCOM_IS_IP4:
2135                  ((struct sockaddr_in *) __addr)->sin_family = AF_INET;
2136                  ((struct sockaddr_in *) __addr)->sin_port = ep.port;
2137                  memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
2138                          addr8, sizeof (struct in_addr));
2139                  /* TBD: populate __addr_len */
2140                  if (__addr_len)
2141                    {
2142                      *__addr_len = sizeof (struct sockaddr_in);
2143                    }
2144                  break;
2145
2146                case VPPCOM_IS_IP6:
2147                  ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
2148                  ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
2149                  memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
2150                          __in6_u.__u6_addr8, addr8,
2151                          sizeof (struct in6_addr));
2152                  /* TBD: populate __addr_len */
2153                  if (__addr_len)
2154                    {
2155                      *__addr_len = sizeof (struct sockaddr_in6);
2156                    }
2157                  break;
2158
2159                default:
2160                  return -EAFNOSUPPORT;
2161                }
2162            }
2163        }
2164      else
2165        {
2166          /* when __addr is NULL, nothing is filled in,
2167           * in this case, __addr_len is not used,
2168           * and should also be null
2169           * */
2170          if (__addr_len)
2171            {
2172              /* TBD: return proper error code */
2173              return -1;
2174            }
2175        }
2176    }
2177
2178  return rv;
2179}
2180
2181int
2182vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr,
2183                    socklen_t * __restrict __addr_len)
2184{
2185  /* set flags to 0 for accept() */
2186  return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0);
2187}
2188
2189#ifdef __USE_GNU
2190int
2191vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr,
2192                     socklen_t * __restrict __addr_len, int __flags)
2193{
2194  /*  SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */
2195  return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags);
2196}
2197#endif
2198
2199/* TBD: move it to vppcom */
2200int
2201vppcom_session_shutdown (int __fd, int __how)
2202{
2203  return 0;
2204}
2205
2206int
2207vcom_socket_shutdown (int __fd, int __how)
2208{
2209  int rv = -1;
2210  vcom_socket_main_t *vsm = &vcom_socket_main;
2211  uword *p;
2212  vcom_socket_t *vsock;
2213
2214  p = hash_get (vsm->sockidx_by_fd, __fd);
2215  if (p)
2216    {
2217      vsock = pool_elt_at_index (vsm->vsockets, p[0]);
2218      switch (__how)
2219        {
2220        case SHUT_RD:
2221        case SHUT_WR:
2222        case SHUT_RDWR:
2223          rv = vppcom_session_shutdown (vsock->sid, __how);
2224          return rv;
2225          break;
2226
2227        default:
2228          return -EINVAL;
2229          break;
2230        }
2231    }
2232
2233  return rv;
2234}
2235
2236/*
2237 * TBD: remove it once vppvom.h is committed.
2238 */
2239int vppcom_epoll_create (void)
2240{
2241  return -ENOSYS;
2242}
2243int
2244vcom_socket_epoll_create1 (int __flags)
2245{
2246  int rv = -1;
2247  vcom_socket_main_t *vsm = &vcom_socket_main;
2248  vcom_epoll_t *vepoll;
2249
2250  i32 epfd;
2251  i32 vep_idx;
2252  i32 epollidx;
2253
2254  epfd = vcom_socket_open_epoll (__flags);
2255  if (epfd < 0)
2256    {
2257      rv = epfd;
2258      goto out;
2259    }
2260
2261  vep_idx = vppcom_epoll_create ( );
2262  if (vep_idx < 0)
2263    {
2264      rv = vep_idx;
2265      goto out_close_epoll;
2266    }
2267
2268  pool_get (vsm->vepolls, vepoll);
2269  vepoll_init (vepoll);
2270
2271  epollidx = vepoll - vsm->vepolls;
2272  hash_set (vsm->epollidx_by_epfd, epfd, epollidx);
2273
2274  vepoll_set (vepoll, epfd, vep_idx,
2275              EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0);
2276
2277  return epfd;
2278
2279out_close_epoll:
2280  vcom_socket_close_epoll (epfd);
2281out:
2282  return rv;
2283}
2284
2285int
2286vcom_socket_epoll_ctl (int __epfd, int __op, int __fd,
2287                       struct epoll_event *__event)
2288{
2289  return -ENOSYS;
2290}
2291
2292int
2293vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events,
2294                         int __maxevents, int __timeout,
2295                         const __sigset_t *__ss)
2296{
2297    return -ENOSYS;
2298}
2299
2300/*
2301 * fd.io coding-style-patch-verification: ON
2302 *
2303 * Local Variables:
2304 * eval: (c-set-style "gnu")
2305 * End:
2306 */
2307