ip4_map.c revision 640edcd9
1/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * Defines used for testing various optimisation schemes
17 */
18
19#include "map.h"
20#include <vnet/ip/ip_frag.h>
21#include <vnet/ip/ip4_to_ip6.h>
22
23enum ip4_map_next_e
24{
25  IP4_MAP_NEXT_IP6_LOOKUP,
26#ifdef MAP_SKIP_IP6_LOOKUP
27  IP4_MAP_NEXT_IP6_REWRITE,
28#endif
29  IP4_MAP_NEXT_IP4_FRAGMENT,
30  IP4_MAP_NEXT_IP6_FRAGMENT,
31  IP4_MAP_NEXT_ICMP_ERROR,
32  IP4_MAP_NEXT_DROP,
33  IP4_MAP_N_NEXT,
34};
35
36static_always_inline u16
37ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
38				 u8 * error)
39{
40  u16 port;
41  if (d->psid_length > 0)
42    {
43      ip4_header_t *ip = vlib_buffer_get_current (b0);
44
45      if (PREDICT_FALSE
46	  ((ip->ip_version_and_header_length != 0x45)
47	   || clib_host_to_net_u16 (ip->length) < 28))
48	{
49	  return 0;
50	}
51
52      port = vnet_buffer (b0)->ip.reass.l4_dst_port;
53
54      /* Verify that port is not among the well-known ports */
55      if ((d->psid_offset > 0)
56	  && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
57	{
58	  *error = MAP_ERROR_ENCAP_SEC_CHECK;
59	}
60      else
61	{
62	  return port;
63	}
64    }
65  return (0);
66}
67
68/*
69 * ip4_map_vtcfl
70 */
71static_always_inline u32
72ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
73{
74  map_main_t *mm = &map_main;
75  u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
76  u32 vtcfl = 0x6 << 28;
77  vtcfl |= tc << 20;
78  vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
79
80  return (clib_host_to_net_u32 (vtcfl));
81}
82
83static_always_inline bool
84ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
85{
86#ifdef MAP_SKIP_IP6_LOOKUP
87  if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
88    {
89      vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
90	pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
91      return (true);
92    }
93#endif
94  return (false);
95}
96
97/*
98 * ip4_map_ttl
99 */
100static inline void
101ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
102{
103  i32 ttl = ip->ttl;
104
105  /* Input node should have reject packets with ttl 0. */
106  ASSERT (ip->ttl > 0);
107
108  u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
109  checksum += checksum >= 0xffff;
110  ip->checksum = checksum;
111  ttl -= 1;
112  ip->ttl = ttl;
113  *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
114
115  /* Verify checksum. */
116  ASSERT (ip->checksum == ip4_header_checksum (ip));
117}
118
119static u32
120ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
121{
122  map_main_t *mm = &map_main;
123
124  if (mm->frag_inner)
125    {
126      // TODO: Fix inner fragmentation after removed inner support from ip-frag.
127      ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
128			       IP4_FRAG_NEXT_IP6_LOOKUP,
129			       IP_FRAG_FLAG_IP6_HEADER);
130      return (IP4_MAP_NEXT_IP4_FRAGMENT);
131    }
132  else
133    {
134      if (df && !mm->frag_ignore_df)
135	{
136	  icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
137				       ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
138				       mtu);
139	  vlib_buffer_advance (b, sizeof (ip6_header_t));
140	  *error = MAP_ERROR_DF_SET;
141	  return (IP4_MAP_NEXT_ICMP_ERROR);
142	}
143      ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
144			       IP_FRAG_FLAG_IP6_HEADER);
145      return (IP4_MAP_NEXT_IP6_FRAGMENT);
146    }
147}
148
149/*
150 * ip4_map
151 */
152static uword
153ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
154{
155  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156  vlib_node_runtime_t *error_node =
157    vlib_node_get_runtime (vm, ip4_map_node.index);
158  from = vlib_frame_vector_args (frame);
159  n_left_from = frame->n_vectors;
160  next_index = node->cached_next_index;
161  map_main_t *mm = &map_main;
162  vlib_combined_counter_main_t *cm = mm->domain_counters;
163  u32 thread_index = vm->thread_index;
164
165  while (n_left_from > 0)
166    {
167      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
168
169      /* Dual loop */
170      while (n_left_from >= 4 && n_left_to_next >= 2)
171	{
172	  u32 pi0, pi1;
173	  vlib_buffer_t *p0, *p1;
174	  map_domain_t *d0, *d1;
175	  u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
176	  ip4_header_t *ip40, *ip41;
177	  u16 port0 = 0, port1 = 0;
178	  ip6_header_t *ip6h0, *ip6h1;
179	  u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
180	  u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
181	    IP4_MAP_NEXT_IP6_LOOKUP;
182
183	  /* Prefetch next iteration. */
184	  {
185	    vlib_buffer_t *p2, *p3;
186
187	    p2 = vlib_get_buffer (vm, from[2]);
188	    p3 = vlib_get_buffer (vm, from[3]);
189
190	    vlib_prefetch_buffer_header (p2, STORE);
191	    vlib_prefetch_buffer_header (p3, STORE);
192	    /* IPv4 + 8 = 28. possibly plus -40 */
193	    CLIB_PREFETCH (p2->data - 40, 68, STORE);
194	    CLIB_PREFETCH (p3->data - 40, 68, STORE);
195	  }
196
197	  pi0 = to_next[0] = from[0];
198	  pi1 = to_next[1] = from[1];
199	  from += 2;
200	  n_left_from -= 2;
201	  to_next += 2;
202	  n_left_to_next -= 2;
203
204	  p0 = vlib_get_buffer (vm, pi0);
205	  p1 = vlib_get_buffer (vm, pi1);
206	  ip40 = vlib_buffer_get_current (p0);
207	  ip41 = vlib_buffer_get_current (p1);
208	  d0 =
209	    ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
210				&error0);
211	  d1 =
212	    ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
213				&error1);
214
215	  /*
216	   * Shared IPv4 address
217	   */
218	  port0 = ip4_map_port_and_security_check (d0, p0, &error0);
219	  port1 = ip4_map_port_and_security_check (d1, p1, &error1);
220
221	  /* Decrement IPv4 TTL */
222	  ip4_map_decrement_ttl (ip40, &error0);
223	  ip4_map_decrement_ttl (ip41, &error1);
224	  bool df0 =
225	    ip40->flags_and_fragment_offset &
226	    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
227	  bool df1 =
228	    ip41->flags_and_fragment_offset &
229	    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
230
231	  /* MAP calc */
232	  u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
233	  u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
234	  u16 dp40 = clib_net_to_host_u16 (port0);
235	  u16 dp41 = clib_net_to_host_u16 (port1);
236	  u64 dal60 = map_get_pfx (d0, da40, dp40);
237	  u64 dal61 = map_get_pfx (d1, da41, dp41);
238	  u64 dar60 = map_get_sfx (d0, da40, dp40);
239	  u64 dar61 = map_get_sfx (d1, da41, dp41);
240	  if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
241	    error0 = MAP_ERROR_NO_BINDING;
242	  if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
243	    error1 = MAP_ERROR_NO_BINDING;
244
245	  /* construct ipv6 header */
246	  vlib_buffer_advance (p0, -sizeof (ip6_header_t));
247	  vlib_buffer_advance (p1, -sizeof (ip6_header_t));
248	  ip6h0 = vlib_buffer_get_current (p0);
249	  ip6h1 = vlib_buffer_get_current (p1);
250	  vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
251	  vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
252
253	  ip6h0->ip_version_traffic_class_and_flow_label =
254	    ip4_map_vtcfl (ip40, p0);
255	  ip6h1->ip_version_traffic_class_and_flow_label =
256	    ip4_map_vtcfl (ip41, p1);
257	  ip6h0->payload_length = ip40->length;
258	  ip6h1->payload_length = ip41->length;
259	  ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
260	  ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
261	  ip6h0->hop_limit = 0x40;
262	  ip6h1->hop_limit = 0x40;
263	  ip6h0->src_address = d0->ip6_src;
264	  ip6h1->src_address = d1->ip6_src;
265	  ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
266	  ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
267	  ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
268	  ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
269
270	  /*
271	   * Determine next node. Can be one of:
272	   * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
273	   */
274	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
275	    {
276	      if (PREDICT_FALSE
277		  (d0->mtu
278		   && (clib_net_to_host_u16 (ip6h0->payload_length) +
279		       sizeof (*ip6h0) > d0->mtu)))
280		{
281		  next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
282		}
283	      else
284		{
285		  next0 =
286		    ip4_map_ip6_lookup_bypass (p0,
287					       ip40) ?
288		    IP4_MAP_NEXT_IP6_REWRITE : next0;
289		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
290						   thread_index,
291						   map_domain_index0, 1,
292						   clib_net_to_host_u16
293						   (ip6h0->payload_length) +
294						   40);
295		}
296	    }
297	  else
298	    {
299	      next0 = IP4_MAP_NEXT_DROP;
300	    }
301
302	  /*
303	   * Determine next node. Can be one of:
304	   * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
305	   */
306	  if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
307	    {
308	      if (PREDICT_FALSE
309		  (d1->mtu
310		   && (clib_net_to_host_u16 (ip6h1->payload_length) +
311		       sizeof (*ip6h1) > d1->mtu)))
312		{
313		  next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
314		}
315	      else
316		{
317		  next1 =
318		    ip4_map_ip6_lookup_bypass (p1,
319					       ip41) ?
320		    IP4_MAP_NEXT_IP6_REWRITE : next1;
321		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
322						   thread_index,
323						   map_domain_index1, 1,
324						   clib_net_to_host_u16
325						   (ip6h1->payload_length) +
326						   40);
327		}
328	    }
329	  else
330	    {
331	      next1 = IP4_MAP_NEXT_DROP;
332	    }
333
334	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
335	    {
336	      map_add_trace (vm, node, p0, map_domain_index0, port0);
337	    }
338	  if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
339	    {
340	      map_add_trace (vm, node, p1, map_domain_index1, port0);
341	    }
342
343	  p0->error = error_node->errors[error0];
344	  p1->error = error_node->errors[error1];
345
346	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
347					   n_left_to_next, pi0, pi1, next0,
348					   next1);
349	}
350
351      while (n_left_from > 0 && n_left_to_next > 0)
352	{
353	  u32 pi0;
354	  vlib_buffer_t *p0;
355	  map_domain_t *d0;
356	  u8 error0 = MAP_ERROR_NONE;
357	  ip4_header_t *ip40;
358	  u16 port0 = 0;
359	  ip6_header_t *ip6h0;
360	  u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
361	  u32 map_domain_index0 = ~0;
362
363	  pi0 = to_next[0] = from[0];
364	  from += 1;
365	  n_left_from -= 1;
366	  to_next += 1;
367	  n_left_to_next -= 1;
368
369	  p0 = vlib_get_buffer (vm, pi0);
370	  ip40 = vlib_buffer_get_current (p0);
371
372	  d0 =
373	    ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
374				&error0);
375	  if (!d0)
376	    {			/* Guess it wasn't for us */
377	      vnet_feature_next (&next0, p0);
378	      goto exit;
379	    }
380
381	  /*
382	   * Shared IPv4 address
383	   */
384	  port0 = ip4_map_port_and_security_check (d0, p0, &error0);
385
386	  /* Decrement IPv4 TTL */
387	  ip4_map_decrement_ttl (ip40, &error0);
388	  bool df0 =
389	    ip40->flags_and_fragment_offset &
390	    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
391
392	  /* MAP calc */
393	  u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
394	  u16 dp40 = clib_net_to_host_u16 (port0);
395	  u64 dal60 = map_get_pfx (d0, da40, dp40);
396	  u64 dar60 = map_get_sfx (d0, da40, dp40);
397	  if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
398	    error0 = MAP_ERROR_NO_BINDING;
399
400	  /* construct ipv6 header */
401	  vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
402	  ip6h0 = vlib_buffer_get_current (p0);
403	  vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
404
405	  ip6h0->ip_version_traffic_class_and_flow_label =
406	    ip4_map_vtcfl (ip40, p0);
407	  ip6h0->payload_length = ip40->length;
408	  ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
409	  ip6h0->hop_limit = 0x40;
410	  ip6h0->src_address = d0->ip6_src;
411	  ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
412	  ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
413
414	  /*
415	   * Determine next node. Can be one of:
416	   * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
417	   */
418	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
419	    {
420	      if (PREDICT_FALSE
421		  (d0->mtu
422		   && (clib_net_to_host_u16 (ip6h0->payload_length) +
423		       sizeof (*ip6h0) > d0->mtu)))
424		{
425		  next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
426		}
427	      else
428		{
429		  next0 =
430		    ip4_map_ip6_lookup_bypass (p0,
431					       ip40) ?
432		    IP4_MAP_NEXT_IP6_REWRITE : next0;
433		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
434						   thread_index,
435						   map_domain_index0, 1,
436						   clib_net_to_host_u16
437						   (ip6h0->payload_length) +
438						   40);
439		}
440	    }
441	  else
442	    {
443	      next0 = IP4_MAP_NEXT_DROP;
444	    }
445
446	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
447	    {
448	      map_add_trace (vm, node, p0, map_domain_index0, port0);
449	    }
450
451	  p0->error = error_node->errors[error0];
452	exit:
453	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
454					   n_left_to_next, pi0, next0);
455	}
456      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
457    }
458
459  return frame->n_vectors;
460}
461
462static char *map_error_strings[] = {
463#define _(sym,string) string,
464  foreach_map_error
465#undef _
466};
467
468
469/* *INDENT-OFF* */
470VNET_FEATURE_INIT (ip4_map_feature, static) =
471{
472  .arc_name = "ip4-unicast",
473  .node_name = "ip4-map",
474  .runs_before = VNET_FEATURES ("ip4-flow-classify"),
475  .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
476};
477
478VLIB_REGISTER_NODE(ip4_map_node) = {
479  .function = ip4_map,
480  .name = "ip4-map",
481  .vector_size = sizeof(u32),
482  .format_trace = format_map_trace,
483  .type = VLIB_NODE_TYPE_INTERNAL,
484
485  .n_errors = MAP_N_ERROR,
486  .error_strings = map_error_strings,
487
488  .n_next_nodes = IP4_MAP_N_NEXT,
489  .next_nodes = {
490    [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
491#ifdef MAP_SKIP_IP6_LOOKUP
492    [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
493#endif
494    [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
495    [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
496    [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
497    [IP4_MAP_NEXT_DROP] = "error-drop",
498  },
499};
500/* *INDENT-ON* */
501
502/*
503 * fd.io coding-style-patch-verification: ON
504 *
505 * Local Variables:
506 * eval: (c-set-style "gnu")
507 * End:
508 */
509