rtnl.c revision 38f03b2d
1/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#define _GNU_SOURCE
17#include <sched.h>
18
19#include <librtnl/rtnl.h>
20#include <librtnl/netns.h>
21
22#include <vlib/vlib.h>
23#include <vlib/unix/unix.h>
24#include <vppinfra/error.h>
25
26#include <sys/socket.h>
27#include <linux/netlink.h>
28#include <linux/rtnetlink.h>
29#include <float.h>
30#include <fcntl.h>
31
32#include <sys/types.h>
33#include <sys/wait.h>
34#include <errno.h>
35#include <string.h>
36
37#undef DBL_MAX
38#define DBL_MAX 1000000000.0
39
40typedef enum {
41  RTNL_E_OPEN,
42  RTNL_E_CLOSE,
43  RTNL_E_READ,
44} rtnl_event_t;
45
46typedef enum {
47  RTNL_S_INIT,
48  RTNL_S_SYNC,
49  RTNL_S_READY,
50} rtnl_state_t;
51
52typedef enum {
53  RTNL_SS_OPENING,
54  RTNL_SS_LINK,
55  RTNL_SS_ADDR,
56  RTNL_SS_ROUTE4,
57  RTNL_SS_ROUTE6,
58  RTNL_SS_NEIGH,
59} rtnl_sync_state_t;
60
61typedef struct {
62  rtnl_stream_t stream;
63  rtnl_state_t state;
64  rtnl_sync_state_t sync_state;
65  int ns_fd;
66  int rtnl_socket;
67  u32 unix_index;
68  u32 rtnl_seq;
69  f64 timeout;
70} rtnl_ns_t;
71
72typedef struct {
73  f64 now;
74  rtnl_ns_t *streams;
75} rtnl_main_t;
76
77static rtnl_main_t rtnl_main;
78static vlib_node_registration_t rtnl_process_node;
79
80#define RTNL_BUFFSIZ 16384
81#define RTNL_DUMP_TIMEOUT 1
82
83static inline u32 grpmask(u32 g)
84{
85  ASSERT (g <= 31);
86  if (g) {
87    return 1 << (g - 1);
88  } else
89    return 0;
90}
91
92
93u8 *format_rtnl_nsname2path(u8 *s, va_list *args)
94{
95  char *nsname = va_arg(*args, char *);
96  if (!nsname || !strlen(nsname)) {
97    return format(s, "/proc/self/ns/net");
98  } else if (strpbrk(nsname, "/") != NULL) {
99    return format(s, "%s", nsname);
100  } else {
101    return format(s, "/var/run/netns/%s", nsname);
102  }
103}
104
105static_always_inline void
106rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when)
107{
108  ns->timeout = when;
109}
110
111static_always_inline void
112rtnl_cancel_timeout(rtnl_ns_t *ns)
113{
114  ns->timeout = DBL_MAX;
115}
116
117static clib_error_t *rtnl_read_cb(struct clib_file * f)
118{
119  rtnl_main_t *rm = &rtnl_main;
120  vlib_main_t *vm = vlib_get_main();
121  rtnl_ns_t *ns = &rm->streams[f->private_data];
122  vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_READ, (uword)(ns - rm->streams));
123  return 0;
124}
125
126int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len)
127{
128  struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
129  struct nlmsghdr nlh = {
130    .nlmsg_len = NLMSG_LENGTH(len),
131    .nlmsg_type = type,
132    .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST,
133    .nlmsg_pid = 0,
134    .nlmsg_seq = ++ns->rtnl_seq,
135  };
136  struct iovec iov[2] = {
137    { .iov_base = &nlh, .iov_len = sizeof(nlh) },
138    { .iov_base = req, .iov_len = len }
139  };
140  struct msghdr msg = {
141    .msg_name = &nladdr,
142    .msg_namelen =  sizeof(nladdr),
143    .msg_iov = iov,
144    .msg_iovlen = 2,
145  };
146  if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) {
147    clib_warning("sendmsg error: %s", strerror(errno));
148    return -1;
149  }
150  return 0;
151}
152
153static void rtnl_socket_close(rtnl_ns_t *ns)
154{
155  clib_file_del(&file_main, &file_main.file_pool[ns->unix_index]);
156  close(ns->rtnl_socket);
157}
158
159struct rtnl_thread_exec {
160  int fd;
161  void *(*fn)(void *);
162  void *arg;
163  void **ret;
164};
165
166static void *rtnl_exec_in_thread_fn(void *p)
167{
168  struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p;
169  if (setns(ex->fd, 0))
170    return (void *) ((uword) (-errno));
171
172  *ex->ret = ex->fn(ex->arg);
173  return NULL;
174}
175
176static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret)
177{
178  pthread_t thread;
179  void *thread_ret;
180  struct rtnl_thread_exec ex = {
181    .fd = fd,
182    .fn = fn,
183    .arg = arg,
184    .ret = ret
185  };
186  if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex))
187    return -errno;
188
189  if(pthread_join(thread, &thread_ret))
190    return -errno;
191
192  if (thread_ret)
193    return (int) ((uword)thread_ret);
194
195  return 0;
196}
197
198int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret)
199{
200  rtnl_main_t *rm = &rtnl_main;
201  if (pool_is_free_index(rm->streams, stream_index))
202    return -EBADR;
203
204  rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index);
205  return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret);
206}
207
208int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret)
209{
210  int fd;
211  u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, nsname);
212
213  if ((fd = open((char *)s, O_RDONLY)) < 0) {
214    vec_free(s);
215    return -errno;
216  }
217
218  int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret);
219  vec_free(s);
220  close(fd);
221  return r;
222}
223
224/* this function is run by the second thread */
225static void *rtnl_thread_fn(void *p)
226{
227  rtnl_ns_t *ns = (rtnl_ns_t *) p;
228  if (setns(ns->ns_fd, 0)) {
229    clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno);
230    return (void *) -1;
231  }
232
233  if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) {
234    clib_warning("Cannot open socket");
235    return (void *) -2;
236  }
237
238  return NULL;
239}
240
241static int rtnl_socket_open(rtnl_ns_t *ns)
242{
243  rtnl_main_t *rm = &rtnl_main;
244  pthread_t thread;
245  void *thread_ret;
246  if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) {
247    clib_warning("Can't create opening thread");
248    return -1;
249  }
250
251  if(pthread_join(thread, &thread_ret)) {
252    clib_warning("Can't join opening thread");
253    return -2;
254  }
255
256  if (thread_ret) {
257    clib_warning("Could not open netlink socket");
258    return -3;
259  }
260
261  struct sockaddr_nl addr = {
262    .nl_family = AF_NETLINK,
263    .nl_pad = 0,
264    .nl_pid = 0,
265    /*add mpls message group*/
266    .nl_groups = grpmask(RTNLGRP_LINK)| grpmask(RTNLGRP_IPV6_IFADDR) |
267    grpmask(RTNLGRP_IPV4_IFADDR) | grpmask(RTNLGRP_IPV4_ROUTE) |
268    grpmask(RTNLGRP_IPV6_ROUTE) | grpmask(RTNLGRP_NEIGH) |
269    grpmask(RTNLGRP_NOTIFY) | grpmask(RTNLGRP_MPLS_ROUTE),
270  };
271
272  if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) {
273    close(ns->rtnl_socket);
274    return -3;
275  }
276
277  clib_file_t template = {0};
278  template.read_function = rtnl_read_cb;
279  template.file_descriptor = ns->rtnl_socket;
280  template.private_data = (uword) (ns - rm->streams);
281  ns->unix_index = clib_file_add (&file_main, &template);
282  return 0;
283}
284
285static int
286rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error)
287{
288  struct nlmsgerr *err = NLMSG_DATA(hdr);
289  size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
290  if(datalen < sizeof(*err))
291    return -1;
292
293  *error = err->error;
294  return 0;
295}
296
297static void
298rtnl_sync_reset(rtnl_ns_t *ns)
299{
300  if (ns->sync_state == RTNL_SS_OPENING)
301    return;
302
303  rtnl_socket_close(ns);
304  ns->sync_state = RTNL_SS_OPENING;
305}
306
307static void
308rtnl_sync_done(rtnl_ns_t *ns)
309{
310  rtnl_main_t *rm = &rtnl_main;
311  struct ifaddrmsg addrmsg;
312  struct rtmsg rtmsg;
313  struct ndmsg ndmsg;
314  switch (ns->sync_state) {
315  case RTNL_SS_OPENING:
316    //Cannot happen here
317    break;
318  case RTNL_SS_LINK:
319    memset(&addrmsg, 0, sizeof(addrmsg));
320    addrmsg.ifa_family = AF_UNSPEC;
321    if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) {
322      rtnl_sync_reset(ns);
323      rtnl_schedule_timeout(ns, rm->now + 1);
324      return;
325    }
326    rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
327    ns->sync_state = RTNL_SS_ADDR;
328    break;
329  case RTNL_SS_ADDR:
330  case RTNL_SS_ROUTE4:
331    memset(&rtmsg, 0, sizeof(rtmsg));
332    rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6;
333    rtmsg.rtm_table = RT_TABLE_UNSPEC;
334    if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) {
335      rtnl_sync_reset(ns);
336      rtnl_schedule_timeout(ns, rm->now + 1);
337      return;
338    }
339    rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
340    ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6;
341    break;
342  case RTNL_SS_ROUTE6:
343    memset(&ndmsg, 0, sizeof(ndmsg));
344    ndmsg.ndm_family = AF_UNSPEC;
345    if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) {
346      rtnl_sync_reset(ns);
347      rtnl_schedule_timeout(ns, rm->now + 1);
348      return;
349    }
350    rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
351    ns->sync_state = RTNL_SS_NEIGH;
352    break;
353  case RTNL_SS_NEIGH:
354    ns->state = RTNL_S_READY;
355    ns->sync_state = 0;
356    rtnl_cancel_timeout(ns);
357    break;
358  }
359}
360
361static void
362rtnl_sync_timeout(rtnl_ns_t *ns)
363{
364  rtnl_main_t *rm = &rtnl_main;
365  struct ifinfomsg imsg = {};
366  switch (ns->sync_state) {
367  case RTNL_SS_OPENING:
368    if (rtnl_socket_open(ns)) {
369      rtnl_schedule_timeout(ns, rm->now + 10);
370      return;
371    }
372    imsg.ifi_family = AF_UNSPEC;
373    if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) {
374      rtnl_sync_reset(ns);
375      rtnl_schedule_timeout(ns, rm->now + 10);
376    }
377    ns->sync_state = RTNL_SS_LINK;
378    rtnl_schedule_timeout(ns, rm->now + 2);
379    break;
380  case RTNL_SS_LINK:
381  case RTNL_SS_ADDR:
382  case RTNL_SS_ROUTE4:
383  case RTNL_SS_ROUTE6:
384  case RTNL_SS_NEIGH:
385    //Timeout happened while synchronizing
386    rtnl_sync_reset(ns);
387    rtnl_schedule_timeout(ns, rm->now + 1);
388    break;
389  }
390}
391
392static int
393rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr)
394{
395  rtnl_main_t *rm = &rtnl_main;
396  int ret, error = 0;
397
398  if (ns->state == RTNL_S_SYNC &&
399      ((hdr->nlmsg_flags & RTM_F_NOTIFY) ||
400       (hdr->nlmsg_seq != (ns->rtnl_seq)))) {
401    clib_warning("Received notification while in sync. Restart synchronization.");
402    rtnl_sync_reset(ns);
403    rtnl_schedule_timeout(ns, rm->now);
404  }
405
406  switch (hdr->nlmsg_type) {
407  case NLMSG_DONE:
408    rtnl_sync_done(ns);
409    break;
410  case NLMSG_ERROR:
411    if((ret = rtnl_rcv_error(ns, hdr, &error)))
412      return ret;
413    break;
414  case RTM_NEWROUTE:
415  case RTM_DELROUTE:
416  case RTM_NEWLINK:
417  case RTM_DELLINK:
418  case RTM_NEWADDR:
419  case RTM_DELADDR:
420  case RTM_NEWNEIGH:
421  case RTM_DELNEIGH:
422    if (ns->stream.recv_message)
423      ns->stream.recv_message(hdr, ns->stream.opaque);
424    break;
425  default:
426    clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type);
427    break;
428  }
429  return 0;
430}
431
432static void
433rtnl_process_open(rtnl_ns_t *ns)
434{
435  rtnl_main_t *rm = &rtnl_main;
436  if (ns->state != RTNL_S_INIT)
437    return;
438
439  ns->state = RTNL_S_SYNC;
440  ns->sync_state = RTNL_SS_OPENING;
441  rtnl_schedule_timeout(ns, rm->now);
442}
443
444static void
445rtnl_process_close(rtnl_ns_t *ns)
446{
447  rtnl_main_t *rm = &rtnl_main;
448  if (ns->state == RTNL_S_INIT)
449    return;
450
451  rtnl_socket_close(ns);
452  close(ns->ns_fd);
453  pool_put(rm->streams, ns);
454}
455
456static int
457rtnl_process_read(rtnl_ns_t *ns)
458{
459  uint8_t buff[RTNL_BUFFSIZ];
460  ssize_t len;
461  struct nlmsghdr *hdr;
462  while(1) {
463    if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) {
464      if(errno != EAGAIN) {
465        clib_warning("rtnetlink recv error (%d) [%s]: %s", ns->rtnl_socket, ns->stream.name, strerror(errno));
466        return -1;
467      }
468      return 0;
469    }
470
471    for(hdr = (struct nlmsghdr *) buff;
472        len > 0;
473        len -= NLMSG_ALIGN(hdr->nlmsg_len),
474          hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) {
475      if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) {
476        clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len);
477        return -1;
478      }
479      if (rtnl_ns_recv(ns, hdr))
480        return -1;
481    }
482  }
483  return 0;
484}
485
486static void
487rtnl_process_timeout(rtnl_ns_t *ns)
488{
489  switch (ns->state) {
490  case RTNL_S_SYNC:
491    rtnl_sync_timeout(ns);
492    break;
493  case RTNL_S_INIT:
494  case RTNL_S_READY:
495    clib_warning("Should not happen");
496    break;
497  }
498}
499
500static uword
501rtnl_process (vlib_main_t * vm,
502              vlib_node_runtime_t * node,
503              vlib_frame_t * frame)
504{
505  rtnl_main_t *rm = &rtnl_main;
506  uword event_type;
507  uword *event_data = 0;
508  rm->now = vlib_time_now(vm);
509  f64 timeout = DBL_MAX;
510  rtnl_ns_t *ns;
511
512  //Setting up
513  while (1) {
514    vlib_process_wait_for_event_or_clock(vm, timeout - rm->now);
515    event_type = vlib_process_get_events(vm, &event_data);
516    rm->now = vlib_time_now(vm);
517
518    if (event_type == ~0) { //Clock event or no event
519      pool_foreach(ns, rm->streams, {
520          if (ns->timeout < rm->now) {
521            ns->timeout = DBL_MAX;
522            rtnl_process_timeout(ns);
523          }
524        });
525    } else {
526      rtnl_ns_t *ns;
527      uword *d;
528      vec_foreach(d, event_data) {
529        ns = &rm->streams[d[0]];
530        switch (event_type)
531          {
532          case RTNL_E_CLOSE:
533            rtnl_process_close(ns);
534            break;
535          case RTNL_E_OPEN:
536            rtnl_process_open(ns);
537            break;
538          case RTNL_E_READ:
539            rtnl_process_read(ns);
540            break;
541          }
542      }
543    }
544
545    vec_reset_length (event_data);
546
547    timeout = DBL_MAX;
548    pool_foreach(ns, rm->streams, {
549        if (ns->timeout < timeout)
550          timeout = ns->timeout;
551      });
552  }
553  return frame->n_vectors;
554}
555
556VLIB_REGISTER_NODE(rtnl_process_node, static) = {
557  .function = rtnl_process,
558  .name = "rtnl-process",
559  .type = VLIB_NODE_TYPE_PROCESS,
560};
561
562u32
563rtnl_stream_open(rtnl_stream_t *template)
564{
565  vlib_main_t *vm = vlib_get_main();
566  rtnl_main_t *rm = &rtnl_main;
567  rtnl_ns_t *ns;
568  int fd;
569  u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, template->name);
570  vec_add1(s, 0);
571
572  if ((fd = open((char *)s, O_RDONLY)) < 0) {
573    clib_unix_warning("open stream %s: ", s);
574    vec_free(s);
575    return ~0;
576  }
577
578  vec_free(s);
579  pool_get(rm->streams, ns);
580  ns->state = RTNL_S_INIT;
581  ns->ns_fd = fd;
582  ns->stream = *template;
583  vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_OPEN, (uword)(ns - rm->streams));
584  return ns - rm->streams;
585}
586
587void
588rtnl_stream_close(u32 stream_index)
589{
590  vlib_main_t *vm = vlib_get_main();
591  rtnl_main_t *rm = &rtnl_main;
592  ASSERT(!pool_is_free_index(rm->streams, stream_index));
593  vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_CLOSE, stream_index);
594}
595
596clib_error_t *
597rtnl_init (vlib_main_t * vm)
598{
599  rtnl_main_t *rm = &rtnl_main;
600  rm->streams = 0;
601  return 0;
602}
603
604VLIB_INIT_FUNCTION (rtnl_init);
605