nat64.c revision f126e746
1/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/**
16 * @file
17 * @brief NAT64 implementation
18 */
19
20#include <nat/nat64.h>
21#include <nat/nat64_db.h>
22#include <nat/nat_inlines.h>
23#include <vnet/fib/ip4_fib.h>
24#include <vppinfra/crc32.h>
25#include <vnet/ip/reass/ip4_sv_reass.h>
26#include <vnet/ip/reass/ip6_sv_reass.h>
27
28
29nat64_main_t nat64_main;
30
31/* *INDENT-OFF* */
32
33/* Hook up input features */
34VNET_FEATURE_INIT (nat64_in2out, static) = {
35  .arc_name = "ip6-unicast",
36  .node_name = "nat64-in2out",
37  .runs_before = VNET_FEATURES ("ip6-lookup"),
38  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
39};
40VNET_FEATURE_INIT (nat64_out2in, static) = {
41  .arc_name = "ip4-unicast",
42  .node_name = "nat64-out2in",
43  .runs_before = VNET_FEATURES ("ip4-lookup"),
44  .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
45};
46VNET_FEATURE_INIT (nat64_in2out_handoff, static) = {
47  .arc_name = "ip6-unicast",
48  .node_name = "nat64-in2out-handoff",
49  .runs_before = VNET_FEATURES ("ip6-lookup"),
50  .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
51};
52VNET_FEATURE_INIT (nat64_out2in_handoff, static) = {
53  .arc_name = "ip4-unicast",
54  .node_name = "nat64-out2in-handoff",
55  .runs_before = VNET_FEATURES ("ip4-lookup"),
56  .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
57};
58
59
60static u8 well_known_prefix[] = {
61  0x00, 0x64, 0xff, 0x9b,
62  0x00, 0x00, 0x00, 0x00,
63  0x00, 0x00, 0x00, 0x00,
64  0x00, 0x00, 0x00, 0x00
65};
66
67/* *INDENT-ON* */
68
69static void
70nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque,
71					u32 sw_if_index,
72					ip4_address_t * address,
73					u32 address_length,
74					u32 if_address_index, u32 is_delete)
75{
76  nat64_main_t *nm = &nat64_main;
77  int i, j;
78
79  for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
80    {
81      if (sw_if_index == nm->auto_add_sw_if_indices[i])
82	{
83	  if (!is_delete)
84	    {
85	      /* Don't trip over lease renewal, static config */
86	      for (j = 0; j < vec_len (nm->addr_pool); j++)
87		if (nm->addr_pool[j].addr.as_u32 == address->as_u32)
88		  return;
89
90	      (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
91					      address, ~0, 1);
92	      return;
93	    }
94	  else
95	    {
96	      (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
97					      address, ~0, 0);
98	      return;
99	    }
100	}
101    }
102}
103
104u32
105nat64_get_worker_in2out (ip6_address_t * addr)
106{
107  nat64_main_t *nm = &nat64_main;
108  snat_main_t *sm = nm->sm;
109  u32 next_worker_index = nm->sm->first_worker_index;
110  u32 hash;
111
112#ifdef clib_crc32c_uses_intrinsics
113  hash = clib_crc32c ((u8 *) addr->as_u32, 16);
114#else
115  u64 tmp = addr->as_u64[0] ^ addr->as_u64[1];
116  hash = clib_xxhash (tmp);
117#endif
118
119  if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
120    next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
121  else
122    next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
123
124  return next_worker_index;
125}
126
127u32
128nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
129{
130  nat64_main_t *nm = &nat64_main;
131  snat_main_t *sm = nm->sm;
132  udp_header_t *udp;
133  u16 port;
134  u32 proto;
135
136  proto = ip_proto_to_snat_proto (ip->protocol);
137  udp = ip4_next_header (ip);
138  port = udp->dst_port;
139
140  /* unknown protocol */
141  if (PREDICT_FALSE (proto == ~0))
142    {
143      nat64_db_t *db;
144      ip46_address_t daddr;
145      nat64_db_bib_entry_t *bibe;
146
147      clib_memset (&daddr, 0, sizeof (daddr));
148      daddr.ip4.as_u32 = ip->dst_address.as_u32;
149
150      /* *INDENT-OFF* */
151      vec_foreach (db, nm->db)
152        {
153          bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
154          if (bibe)
155            return (u32) (db - nm->db);
156        }
157      /* *INDENT-ON* */
158      return vlib_get_thread_index ();
159    }
160
161  /* ICMP */
162  if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
163    {
164      icmp46_header_t *icmp = (icmp46_header_t *) udp;
165      icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
166      if (!icmp_type_is_error_message
167	  (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
168	port = vnet_buffer (b)->ip.reass.l4_src_port;
169      else
170	{
171	  /* if error message, then it's not fragmented and we can access it */
172	  ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
173	  proto = ip_proto_to_snat_proto (inner_ip->protocol);
174	  void *l4_header = ip4_next_header (inner_ip);
175	  switch (proto)
176	    {
177	    case SNAT_PROTOCOL_ICMP:
178	      icmp = (icmp46_header_t *) l4_header;
179	      echo = (icmp_echo_header_t *) (icmp + 1);
180	      port = echo->identifier;
181	      break;
182	    case SNAT_PROTOCOL_UDP:
183	    case SNAT_PROTOCOL_TCP:
184	      port = ((tcp_udp_header_t *) l4_header)->src_port;
185	      break;
186	    default:
187	      return vlib_get_thread_index ();
188	    }
189	}
190    }
191
192  /* worker by outside port  (TCP/UDP) */
193  port = clib_net_to_host_u16 (port);
194  if (port > 1024)
195    return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread);
196
197  return vlib_get_thread_index ();
198}
199
200clib_error_t *
201nat64_init (vlib_main_t * vm)
202{
203  nat64_main_t *nm = &nat64_main;
204  vlib_thread_main_t *tm = vlib_get_thread_main ();
205  ip4_add_del_interface_address_callback_t cb4;
206  ip4_main_t *im = &ip4_main;
207  nm->sm = &snat_main;
208  vlib_node_t *node;
209
210  vec_validate (nm->db, tm->n_vlib_mains - 1);
211
212  nm->fq_in2out_index = ~0;
213  nm->fq_out2in_index = ~0;
214
215  node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
216  nm->error_node_index = node->index;
217
218  node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out");
219  nm->in2out_node_index = node->index;
220
221  node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath");
222  nm->in2out_slowpath_node_index = node->index;
223
224  node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in");
225  nm->out2in_node_index = node->index;
226
227  /* set session timeouts to default values */
228  nm->udp_timeout = SNAT_UDP_TIMEOUT;
229  nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
230  nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
231  nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
232
233  nm->total_enabled_count = 0;
234
235  /* Set up the interface address add/del callback */
236  cb4.function = nat64_ip4_add_del_interface_address_cb;
237  cb4.function_opaque = 0;
238  vec_add1 (im->add_del_interface_address_callbacks, cb4);
239  nm->ip4_main = im;
240
241  /* Init counters */
242  nm->total_bibs.name = "total-bibs";
243  nm->total_bibs.stat_segment_name = "/nat64/total-bibs";
244  vlib_validate_simple_counter (&nm->total_bibs, 0);
245  vlib_zero_simple_counter (&nm->total_bibs, 0);
246  nm->total_sessions.name = "total-sessions";
247  nm->total_sessions.stat_segment_name = "/nat64/total-sessions";
248  vlib_validate_simple_counter (&nm->total_sessions, 0);
249  vlib_zero_simple_counter (&nm->total_sessions, 0);
250
251  return 0;
252}
253
254static void nat64_free_out_addr_and_port (struct nat64_db_s *db,
255					  ip4_address_t * addr, u16 port,
256					  u8 protocol);
257
258void
259nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets,
260		u32 st_memory_size)
261{
262  nat64_main_t *nm = &nat64_main;
263  nat64_db_t *db;
264
265  nm->bib_buckets = bib_buckets;
266  nm->bib_memory_size = bib_memory_size;
267  nm->st_buckets = st_buckets;
268  nm->st_memory_size = st_memory_size;
269
270  /* *INDENT-OFF* */
271  vec_foreach (db, nm->db)
272    {
273      if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets,
274                         st_memory_size, nat64_free_out_addr_and_port))
275	nat_elog_err ("NAT64 DB init failed");
276    }
277  /* *INDENT-ON* */
278}
279
280int
281nat64_add_del_pool_addr (u32 thread_index,
282			 ip4_address_t * addr, u32 vrf_id, u8 is_add)
283{
284  nat64_main_t *nm = &nat64_main;
285  snat_address_t *a = 0;
286  snat_interface_t *interface;
287  int i;
288  nat64_db_t *db;
289  vlib_thread_main_t *tm = vlib_get_thread_main ();
290
291  /* Check if address already exists */
292  for (i = 0; i < vec_len (nm->addr_pool); i++)
293    {
294      if (nm->addr_pool[i].addr.as_u32 == addr->as_u32)
295	{
296	  a = nm->addr_pool + i;
297	  break;
298	}
299    }
300
301  if (is_add)
302    {
303      if (a)
304	return VNET_API_ERROR_VALUE_EXIST;
305
306      vec_add2 (nm->addr_pool, a, 1);
307      a->addr = *addr;
308      a->fib_index = ~0;
309      if (vrf_id != ~0)
310	a->fib_index =
311	  fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
312					     nat_fib_src_hi);
313#define _(N, id, n, s) \
314      clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \
315      a->busy_##n##_ports = 0; \
316      vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
317      foreach_snat_protocol
318#undef _
319    }
320  else
321    {
322      if (!a)
323	return VNET_API_ERROR_NO_SUCH_ENTRY;
324
325      if (a->fib_index != ~0)
326	fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nat_fib_src_hi);
327      /* Delete sessions using address */
328        /* *INDENT-OFF* */
329        vec_foreach (db, nm->db)
330          {
331            nat64_db_free_out_addr (thread_index, db, &a->addr);
332            vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0,
333                                     db->bib.bib_entries_num);
334            vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
335                                     db->st.st_entries_num);
336          }
337#define _(N, id, n, s) \
338      clib_bitmap_free (a->busy_##n##_port_bitmap);
339      foreach_snat_protocol
340#undef _
341        /* *INDENT-ON* */
342      vec_del1 (nm->addr_pool, i);
343    }
344
345  /* Add/del external address to FIB */
346  /* *INDENT-OFF* */
347  pool_foreach (interface, nm->interfaces,
348  ({
349    if (nat_interface_is_inside(interface))
350      continue;
351
352    snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
353    break;
354  }));
355  /* *INDENT-ON* */
356
357  return 0;
358}
359
360void
361nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
362{
363  nat64_main_t *nm = &nat64_main;
364  snat_address_t *a = 0;
365
366  /* *INDENT-OFF* */
367  vec_foreach (a, nm->addr_pool)
368    {
369      if (fn (a, ctx))
370        break;
371    };
372  /* *INDENT-ON* */
373}
374
375int
376nat64_add_interface_address (u32 sw_if_index, int is_add)
377{
378  nat64_main_t *nm = &nat64_main;
379  ip4_main_t *ip4_main = nm->ip4_main;
380  ip4_address_t *first_int_addr;
381  int i;
382
383  first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
384
385  for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
386    {
387      if (nm->auto_add_sw_if_indices[i] == sw_if_index)
388	{
389	  if (is_add)
390	    return VNET_API_ERROR_VALUE_EXIST;
391	  else
392	    {
393	      /* if have address remove it */
394	      if (first_int_addr)
395		(void) nat64_add_del_pool_addr (vlib_get_thread_index (),
396						first_int_addr, ~0, 0);
397	      vec_del1 (nm->auto_add_sw_if_indices, i);
398	      return 0;
399	    }
400	}
401    }
402
403  if (!is_add)
404    return VNET_API_ERROR_NO_SUCH_ENTRY;
405
406  /* add to the auto-address list */
407  vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
408
409  /* If the address is already bound - or static - add it now */
410  if (first_int_addr)
411    (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
412				    first_int_addr, ~0, 1);
413
414  return 0;
415}
416
417int
418nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
419{
420  nat64_main_t *nm = &nat64_main;
421  snat_interface_t *interface = 0, *i;
422  snat_address_t *ap;
423  const char *feature_name, *arc_name;
424
425  /* Check if interface already exists */
426  /* *INDENT-OFF* */
427  pool_foreach (i, nm->interfaces,
428  ({
429    if (i->sw_if_index == sw_if_index)
430      {
431        interface = i;
432        break;
433      }
434  }));
435  /* *INDENT-ON* */
436
437  if (is_add)
438    {
439      if (interface)
440	goto set_flags;
441
442      pool_get (nm->interfaces, interface);
443      interface->sw_if_index = sw_if_index;
444      interface->flags = 0;
445    set_flags:
446      if (is_inside)
447	interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
448      else
449	interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
450
451      nm->total_enabled_count++;
452      vlib_process_signal_event (nm->sm->vlib_main,
453				 nm->nat64_expire_walk_node_index,
454				 NAT64_CLEANER_RESCHEDULE, 0);
455
456    }
457  else
458    {
459      if (!interface)
460	return VNET_API_ERROR_NO_SUCH_ENTRY;
461
462      if ((nat_interface_is_inside (interface)
463	   && nat_interface_is_outside (interface)))
464	interface->flags &=
465	  is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE :
466	  ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
467      else
468	pool_put (nm->interfaces, interface);
469
470      nm->total_enabled_count--;
471    }
472
473  if (!is_inside)
474    {
475      /* *INDENT-OFF* */
476      vec_foreach (ap, nm->addr_pool)
477        snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add);
478      /* *INDENT-ON* */
479    }
480
481  if (nm->sm->num_workers > 1)
482    {
483      feature_name =
484	is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff";
485      if (nm->fq_in2out_index == ~0)
486	nm->fq_in2out_index =
487	  vlib_frame_queue_main_init (nat64_in2out_node.index, 0);
488      if (nm->fq_out2in_index == ~0)
489	nm->fq_out2in_index =
490	  vlib_frame_queue_main_init (nat64_out2in_node.index, 0);
491    }
492  else
493    feature_name = is_inside ? "nat64-in2out" : "nat64-out2in";
494
495  arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
496
497  if (is_inside)
498    {
499      int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
500      if (rv)
501	return rv;
502    }
503  else
504    {
505      int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
506      if (rv)
507	return rv;
508    }
509
510  return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
511				      is_add, 0, 0);
512}
513
514void
515nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
516{
517  nat64_main_t *nm = &nat64_main;
518  snat_interface_t *i = 0;
519
520  /* *INDENT-OFF* */
521  pool_foreach (i, nm->interfaces,
522  ({
523    if (fn (i, ctx))
524      break;
525  }));
526  /* *INDENT-ON* */
527}
528
529int
530nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
531			       ip4_address_t * addr, u16 * port,
532			       u32 thread_index)
533{
534  nat64_main_t *nm = &nat64_main;
535  snat_main_t *sm = nm->sm;
536  snat_session_key_t k;
537  u32 worker_index = 0;
538  int rv;
539
540  k.protocol = proto;
541
542  if (sm->num_workers > 1)
543    worker_index = thread_index - sm->first_worker_index;
544
545  rv =
546    sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k,
547			     sm->port_per_thread, worker_index);
548
549  if (!rv)
550    {
551      *port = k.port;
552      addr->as_u32 = k.addr.as_u32;
553    }
554
555  return rv;
556}
557
558static void
559nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr,
560			      u16 port, u8 protocol)
561{
562  nat64_main_t *nm = &nat64_main;
563  int i;
564  snat_address_t *a;
565  u32 thread_index = db - nm->db;
566  snat_protocol_t proto = ip_proto_to_snat_proto (protocol);
567  u16 port_host_byte_order = clib_net_to_host_u16 (port);
568
569  for (i = 0; i < vec_len (nm->addr_pool); i++)
570    {
571      a = nm->addr_pool + i;
572      if (addr->as_u32 != a->addr.as_u32)
573	continue;
574      switch (proto)
575	{
576#define _(N, j, n, s) \
577        case SNAT_PROTOCOL_##N: \
578          ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
579                  port_host_byte_order) == 1); \
580          clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port_host_byte_order, 0); \
581          a->busy_##n##_ports--; \
582          a->busy_##n##_ports_per_thread[thread_index]--; \
583          break;
584	  foreach_snat_protocol
585#undef _
586	default:
587	  nat_elog_notice ("unknown protocol");
588	  return;
589	}
590      break;
591    }
592}
593
594/**
595 * @brief Add/delete static BIB entry in worker thread.
596 */
597static uword
598nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
599			    vlib_frame_t * f)
600{
601  nat64_main_t *nm = &nat64_main;
602  u32 thread_index = vm->thread_index;
603  nat64_db_t *db = &nm->db[thread_index];
604  nat64_static_bib_to_update_t *static_bib;
605  nat64_db_bib_entry_t *bibe;
606  ip46_address_t addr;
607
608  /* *INDENT-OFF* */
609  pool_foreach (static_bib, nm->static_bibs,
610  ({
611    if ((static_bib->thread_index != thread_index) || (static_bib->done))
612      continue;
613
614    if (static_bib->is_add)
615      {
616          (void) nat64_db_bib_entry_create (thread_index, db,
617                                            &static_bib->in_addr,
618                                            &static_bib->out_addr,
619                                            static_bib->in_port,
620                                            static_bib->out_port,
621                                            static_bib->fib_index,
622                                            static_bib->proto, 1);
623          vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
624                                   db->bib.bib_entries_num);
625      }
626    else
627      {
628        addr.as_u64[0] = static_bib->in_addr.as_u64[0];
629        addr.as_u64[1] = static_bib->in_addr.as_u64[1];
630        bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port,
631                                        static_bib->proto,
632                                        static_bib->fib_index, 1);
633        if (bibe)
634          {
635            nat64_db_bib_entry_free (thread_index, db, bibe);
636            vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
637                                     db->bib.bib_entries_num);
638            vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
639                                     db->st.st_entries_num);
640          }
641      }
642
643      static_bib->done = 1;
644  }));
645  /* *INDENT-ON* */
646
647  return 0;
648}
649
650static vlib_node_registration_t nat64_static_bib_worker_node;
651
652/* *INDENT-OFF* */
653VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
654    .function = nat64_static_bib_worker_fn,
655    .type = VLIB_NODE_TYPE_INPUT,
656    .state = VLIB_NODE_STATE_INTERRUPT,
657    .name = "nat64-static-bib-worker",
658};
659/* *INDENT-ON* */
660
661int
662nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
663				ip4_address_t * out_addr, u16 in_port,
664				u16 out_port, u8 proto, u32 vrf_id, u8 is_add)
665{
666  nat64_main_t *nm = &nat64_main;
667  nat64_db_bib_entry_t *bibe;
668  u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
669						     nat_fib_src_hi);
670  snat_protocol_t p = ip_proto_to_snat_proto (proto);
671  ip46_address_t addr;
672  int i;
673  snat_address_t *a;
674  u32 thread_index = 0;
675  nat64_db_t *db;
676  nat64_static_bib_to_update_t *static_bib;
677  vlib_main_t *worker_vm;
678  u32 *to_be_free = 0, *index;
679
680  if (nm->sm->num_workers > 1)
681    {
682      thread_index = nat64_get_worker_in2out (in_addr);
683      db = &nm->db[thread_index];
684    }
685  else
686    db = &nm->db[nm->sm->num_workers];
687
688  addr.as_u64[0] = in_addr->as_u64[0];
689  addr.as_u64[1] = in_addr->as_u64[1];
690  bibe =
691    nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port),
692			     proto, fib_index, 1);
693
694  if (is_add)
695    {
696      if (bibe)
697	return VNET_API_ERROR_VALUE_EXIST;
698
699      /* outside port must be assigned to same thread as internall address */
700      if ((out_port > 1024) && (nm->sm->num_workers > 1))
701	{
702	  if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread))
703	    return VNET_API_ERROR_INVALID_VALUE_2;
704	}
705
706      for (i = 0; i < vec_len (nm->addr_pool); i++)
707	{
708	  a = nm->addr_pool + i;
709	  if (out_addr->as_u32 != a->addr.as_u32)
710	    continue;
711	  switch (p)
712	    {
713#define _(N, j, n, s) \
714            case SNAT_PROTOCOL_##N: \
715              if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
716                                            out_port)) \
717                return VNET_API_ERROR_INVALID_VALUE; \
718              clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
719                                        out_port, 1); \
720              if (out_port > 1024) \
721                { \
722                  a->busy_##n##_ports++; \
723                  a->busy_##n##_ports_per_thread[thread_index]++; \
724                } \
725              break;
726	      foreach_snat_protocol
727#undef _
728	    default:
729	      clib_memset (&addr, 0, sizeof (addr));
730	      addr.ip4.as_u32 = out_addr->as_u32;
731	      if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0))
732		return VNET_API_ERROR_INVALID_VALUE;
733	    }
734	  break;
735	}
736      if (!nm->sm->num_workers)
737	{
738	  bibe =
739	    nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr,
740				       clib_host_to_net_u16 (in_port),
741				       clib_host_to_net_u16 (out_port),
742				       fib_index, proto, 1);
743	  if (!bibe)
744	    return VNET_API_ERROR_UNSPECIFIED;
745
746	  vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
747				   db->bib.bib_entries_num);
748	}
749    }
750  else
751    {
752      if (!bibe)
753	return VNET_API_ERROR_NO_SUCH_ENTRY;
754
755      if (!nm->sm->num_workers)
756	{
757	  nat64_db_bib_entry_free (thread_index, db, bibe);
758	  vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
759				   db->bib.bib_entries_num);
760	}
761    }
762
763  if (nm->sm->num_workers)
764    {
765      /* *INDENT-OFF* */
766      pool_foreach (static_bib, nm->static_bibs,
767      ({
768        if (static_bib->done)
769          vec_add1 (to_be_free, static_bib - nm->static_bibs);
770      }));
771      vec_foreach (index, to_be_free)
772        pool_put_index (nm->static_bibs, index[0]);
773      /* *INDENT-ON* */
774      vec_free (to_be_free);
775      pool_get (nm->static_bibs, static_bib);
776      static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
777      static_bib->in_addr.as_u64[1] = in_addr->as_u64[1];
778      static_bib->in_port = clib_host_to_net_u16 (in_port);
779      static_bib->out_addr.as_u32 = out_addr->as_u32;
780      static_bib->out_port = clib_host_to_net_u16 (out_port);
781      static_bib->fib_index = fib_index;
782      static_bib->proto = proto;
783      static_bib->is_add = is_add;
784      static_bib->thread_index = thread_index;
785      static_bib->done = 0;
786      worker_vm = vlib_mains[thread_index];
787      if (worker_vm)
788	vlib_node_set_interrupt_pending (worker_vm,
789					 nat64_static_bib_worker_node.index);
790      else
791	return VNET_API_ERROR_UNSPECIFIED;
792    }
793
794  return 0;
795}
796
797int
798nat64_set_udp_timeout (u32 timeout)
799{
800  nat64_main_t *nm = &nat64_main;
801
802  if (timeout == 0)
803    nm->udp_timeout = SNAT_UDP_TIMEOUT;
804  else
805    nm->udp_timeout = timeout;
806
807  return 0;
808}
809
810u32
811nat64_get_udp_timeout (void)
812{
813  nat64_main_t *nm = &nat64_main;
814
815  return nm->udp_timeout;
816}
817
818int
819nat64_set_icmp_timeout (u32 timeout)
820{
821  nat64_main_t *nm = &nat64_main;
822
823  if (timeout == 0)
824    nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
825  else
826    nm->icmp_timeout = timeout;
827
828  return 0;
829}
830
831u32
832nat64_get_icmp_timeout (void)
833{
834  nat64_main_t *nm = &nat64_main;
835
836  return nm->icmp_timeout;
837}
838
839int
840nat64_set_tcp_timeouts (u32 trans, u32 est)
841{
842  nat64_main_t *nm = &nat64_main;
843
844  if (trans == 0)
845    nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
846  else
847    nm->tcp_trans_timeout = trans;
848
849  if (est == 0)
850    nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
851  else
852    nm->tcp_est_timeout = est;
853
854  return 0;
855}
856
857u32
858nat64_get_tcp_trans_timeout (void)
859{
860  nat64_main_t *nm = &nat64_main;
861
862  return nm->tcp_trans_timeout;
863}
864
865u32
866nat64_get_tcp_est_timeout (void)
867{
868  nat64_main_t *nm = &nat64_main;
869
870  return nm->tcp_est_timeout;
871}
872
873void
874nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm)
875{
876  nat64_main_t *nm = &nat64_main;
877  u32 now = (u32) vlib_time_now (vm);
878
879  switch (ip_proto_to_snat_proto (ste->proto))
880    {
881    case SNAT_PROTOCOL_ICMP:
882      ste->expire = now + nm->icmp_timeout;
883      return;
884    case SNAT_PROTOCOL_TCP:
885      {
886	switch (ste->tcp_state)
887	  {
888	  case NAT64_TCP_STATE_V4_INIT:
889	  case NAT64_TCP_STATE_V6_INIT:
890	  case NAT64_TCP_STATE_V4_FIN_RCV:
891	  case NAT64_TCP_STATE_V6_FIN_RCV:
892	  case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV:
893	  case NAT64_TCP_STATE_TRANS:
894	    ste->expire = now + nm->tcp_trans_timeout;
895	    return;
896	  case NAT64_TCP_STATE_ESTABLISHED:
897	    ste->expire = now + nm->tcp_est_timeout;
898	    return;
899	  default:
900	    return;
901	  }
902      }
903    case SNAT_PROTOCOL_UDP:
904      ste->expire = now + nm->udp_timeout;
905      return;
906    default:
907      ste->expire = now + nm->udp_timeout;
908      return;
909    }
910}
911
912void
913nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp,
914			     u8 is_ip6)
915{
916  switch (ste->tcp_state)
917    {
918    case NAT64_TCP_STATE_CLOSED:
919      {
920	if (tcp->flags & TCP_FLAG_SYN)
921	  {
922	    if (is_ip6)
923	      ste->tcp_state = NAT64_TCP_STATE_V6_INIT;
924	    else
925	      ste->tcp_state = NAT64_TCP_STATE_V4_INIT;
926	  }
927	return;
928      }
929    case NAT64_TCP_STATE_V4_INIT:
930      {
931	if (is_ip6 && (tcp->flags & TCP_FLAG_SYN))
932	  ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
933	return;
934      }
935    case NAT64_TCP_STATE_V6_INIT:
936      {
937	if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN))
938	  ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
939	return;
940      }
941    case NAT64_TCP_STATE_ESTABLISHED:
942      {
943	if (tcp->flags & TCP_FLAG_FIN)
944	  {
945	    if (is_ip6)
946	      ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV;
947	    else
948	      ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV;
949	  }
950	else if (tcp->flags & TCP_FLAG_RST)
951	  {
952	    ste->tcp_state = NAT64_TCP_STATE_TRANS;
953	  }
954	return;
955      }
956    case NAT64_TCP_STATE_V4_FIN_RCV:
957      {
958	if (is_ip6 && (tcp->flags & TCP_FLAG_FIN))
959	  ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
960	return;
961      }
962    case NAT64_TCP_STATE_V6_FIN_RCV:
963      {
964	if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN))
965	  ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
966	return;
967      }
968    case NAT64_TCP_STATE_TRANS:
969      {
970	if (!(tcp->flags & TCP_FLAG_RST))
971	  ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
972	return;
973      }
974    default:
975      return;
976    }
977}
978
979int
980nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
981{
982  nat64_main_t *nm = &nat64_main;
983  nat64_prefix_t *p = 0;
984  int i;
985
986  /* Verify prefix length */
987  if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64
988      && plen != 96)
989    return VNET_API_ERROR_INVALID_VALUE;
990
991  /* Check if tenant already have prefix */
992  for (i = 0; i < vec_len (nm->pref64); i++)
993    {
994      if (nm->pref64[i].vrf_id == vrf_id)
995	{
996	  p = nm->pref64 + i;
997	  break;
998	}
999    }
1000
1001  if (is_add)
1002    {
1003      if (!p)
1004	{
1005	  vec_add2 (nm->pref64, p, 1);
1006	  p->fib_index =
1007	    fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
1008					       nat_fib_src_hi);
1009	  p->vrf_id = vrf_id;
1010	}
1011
1012      p->prefix.as_u64[0] = prefix->as_u64[0];
1013      p->prefix.as_u64[1] = prefix->as_u64[1];
1014      p->plen = plen;
1015    }
1016  else
1017    {
1018      if (!p)
1019	return VNET_API_ERROR_NO_SUCH_ENTRY;
1020
1021      vec_del1 (nm->pref64, i);
1022    }
1023
1024  return 0;
1025}
1026
1027void
1028nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
1029{
1030  nat64_main_t *nm = &nat64_main;
1031  nat64_prefix_t *p = 0;
1032
1033  /* *INDENT-OFF* */
1034  vec_foreach (p, nm->pref64)
1035    {
1036      if (fn (p, ctx))
1037        break;
1038    };
1039  /* *INDENT-ON* */
1040}
1041
1042void
1043nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1044{
1045  nat64_main_t *nm = &nat64_main;
1046  nat64_prefix_t *p, *gp = 0, *prefix = 0;
1047
1048  /* *INDENT-OFF* */
1049  vec_foreach (p, nm->pref64)
1050    {
1051      if (p->fib_index == fib_index)
1052        {
1053          prefix = p;
1054          break;
1055        }
1056
1057      if (p->fib_index == 0)
1058        gp = p;
1059    };
1060  /* *INDENT-ON* */
1061
1062  if (!prefix)
1063    prefix = gp;
1064
1065  if (prefix)
1066    {
1067      clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t));
1068      switch (p->plen)
1069	{
1070	case 32:
1071	  ip6->as_u32[1] = ip4->as_u32;
1072	  break;
1073	case 40:
1074	  ip6->as_u8[5] = ip4->as_u8[0];
1075	  ip6->as_u8[6] = ip4->as_u8[1];
1076	  ip6->as_u8[7] = ip4->as_u8[2];
1077	  ip6->as_u8[9] = ip4->as_u8[3];
1078	  break;
1079	case 48:
1080	  ip6->as_u8[6] = ip4->as_u8[0];
1081	  ip6->as_u8[7] = ip4->as_u8[1];
1082	  ip6->as_u8[9] = ip4->as_u8[2];
1083	  ip6->as_u8[10] = ip4->as_u8[3];
1084	  break;
1085	case 56:
1086	  ip6->as_u8[7] = ip4->as_u8[0];
1087	  ip6->as_u8[9] = ip4->as_u8[1];
1088	  ip6->as_u8[10] = ip4->as_u8[2];
1089	  ip6->as_u8[11] = ip4->as_u8[3];
1090	  break;
1091	case 64:
1092	  ip6->as_u8[9] = ip4->as_u8[0];
1093	  ip6->as_u8[10] = ip4->as_u8[1];
1094	  ip6->as_u8[11] = ip4->as_u8[2];
1095	  ip6->as_u8[12] = ip4->as_u8[3];
1096	  break;
1097	case 96:
1098	  ip6->as_u32[3] = ip4->as_u32;
1099	  break;
1100	default:
1101	  nat_elog_notice ("invalid prefix length");
1102	  break;
1103	}
1104    }
1105  else
1106    {
1107      clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t));
1108      ip6->as_u32[3] = ip4->as_u32;
1109    }
1110}
1111
1112void
1113nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1114{
1115  nat64_main_t *nm = &nat64_main;
1116  nat64_prefix_t *p, *gp = 0;
1117  u8 plen = 0;
1118
1119  /* *INDENT-OFF* */
1120  vec_foreach (p, nm->pref64)
1121    {
1122      if (p->fib_index == fib_index)
1123        {
1124          plen = p->plen;
1125          break;
1126        }
1127
1128      if (p->vrf_id == 0)
1129        gp = p;
1130    };
1131  /* *INDENT-ON* */
1132
1133  if (!plen)
1134    {
1135      if (gp)
1136	plen = gp->plen;
1137      else
1138	plen = 96;
1139    }
1140
1141  switch (plen)
1142    {
1143    case 32:
1144      ip4->as_u32 = ip6->as_u32[1];
1145      break;
1146    case 40:
1147      ip4->as_u8[0] = ip6->as_u8[5];
1148      ip4->as_u8[1] = ip6->as_u8[6];
1149      ip4->as_u8[2] = ip6->as_u8[7];
1150      ip4->as_u8[3] = ip6->as_u8[9];
1151      break;
1152    case 48:
1153      ip4->as_u8[0] = ip6->as_u8[6];
1154      ip4->as_u8[1] = ip6->as_u8[7];
1155      ip4->as_u8[2] = ip6->as_u8[9];
1156      ip4->as_u8[3] = ip6->as_u8[10];
1157      break;
1158    case 56:
1159      ip4->as_u8[0] = ip6->as_u8[7];
1160      ip4->as_u8[1] = ip6->as_u8[9];
1161      ip4->as_u8[2] = ip6->as_u8[10];
1162      ip4->as_u8[3] = ip6->as_u8[11];
1163      break;
1164    case 64:
1165      ip4->as_u8[0] = ip6->as_u8[9];
1166      ip4->as_u8[1] = ip6->as_u8[10];
1167      ip4->as_u8[2] = ip6->as_u8[11];
1168      ip4->as_u8[3] = ip6->as_u8[12];
1169      break;
1170    case 96:
1171      ip4->as_u32 = ip6->as_u32[3];
1172      break;
1173    default:
1174      nat_elog_notice ("invalid prefix length");
1175      break;
1176    }
1177}
1178
1179/**
1180 * @brief Per worker process checking expire time for NAT64 sessions.
1181 */
1182static uword
1183nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1184			     vlib_frame_t * f)
1185{
1186  nat64_main_t *nm = &nat64_main;
1187  u32 thread_index = vm->thread_index;
1188  nat64_db_t *db = &nm->db[thread_index];
1189  u32 now = (u32) vlib_time_now (vm);
1190
1191  nad64_db_st_free_expired (thread_index, db, now);
1192  vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
1193			   db->bib.bib_entries_num);
1194  vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
1195			   db->st.st_entries_num);
1196
1197  return 0;
1198}
1199
1200static vlib_node_registration_t nat64_expire_worker_walk_node;
1201
1202/* *INDENT-OFF* */
1203VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
1204    .function = nat64_expire_worker_walk_fn,
1205    .type = VLIB_NODE_TYPE_INPUT,
1206    .state = VLIB_NODE_STATE_INTERRUPT,
1207    .name = "nat64-expire-worker-walk",
1208};
1209/* *INDENT-ON* */
1210
1211static vlib_node_registration_t nat64_expire_walk_node;
1212
1213/**
1214 * @brief Centralized process to drive per worker expire walk.
1215 */
1216static uword
1217nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1218		      vlib_frame_t * f)
1219{
1220  nat64_main_t *nm = &nat64_main;
1221  vlib_main_t **worker_vms = 0, *worker_vm;
1222  int i;
1223  uword event_type, *event_data = 0;
1224
1225  nm->nat64_expire_walk_node_index = nat64_expire_walk_node.index;
1226
1227  if (vec_len (vlib_mains) == 0)
1228    vec_add1 (worker_vms, vm);
1229  else
1230    {
1231      for (i = 0; i < vec_len (vlib_mains); i++)
1232	{
1233	  worker_vm = vlib_mains[i];
1234	  if (worker_vm)
1235	    vec_add1 (worker_vms, worker_vm);
1236	}
1237    }
1238
1239  while (1)
1240    {
1241      if (nm->total_enabled_count)
1242	{
1243	  vlib_process_wait_for_event_or_clock (vm, 10.0);
1244	  event_type = vlib_process_get_events (vm, &event_data);
1245	}
1246      else
1247	{
1248	  vlib_process_wait_for_event (vm);
1249	  event_type = vlib_process_get_events (vm, &event_data);
1250	}
1251
1252      switch (event_type)
1253	{
1254	case ~0:
1255	  break;
1256	case NAT64_CLEANER_RESCHEDULE:
1257	  break;
1258	default:
1259	  nat_elog_notice_X1 ("unknown event %d", "i4", event_type);
1260	  break;
1261	}
1262
1263      for (i = 0; i < vec_len (worker_vms); i++)
1264	{
1265	  worker_vm = worker_vms[i];
1266	  vlib_node_set_interrupt_pending (worker_vm,
1267					   nat64_expire_worker_walk_node.index);
1268	}
1269    }
1270
1271  return 0;
1272}
1273
1274/* *INDENT-OFF* */
1275VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = {
1276    .function = nat64_expire_walk_fn,
1277    .type = VLIB_NODE_TYPE_PROCESS,
1278    .name = "nat64-expire-walk",
1279};
1280/* *INDENT-ON* */
1281
1282/*
1283 * fd.io coding-style-patch-verification: ON
1284 *
1285 * Local Variables:
1286 * eval: (c-set-style "gnu")
1287 * End:
1288 */
1289