1/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * Defines used for testing various optimisation schemes
17 */
18
19#include "map.h"
20#include <vnet/ip/ip_frag.h>
21#include <vnet/ip/ip4_to_ip6.h>
22
23enum ip4_map_next_e
24{
25  IP4_MAP_NEXT_IP6_LOOKUP,
26#ifdef MAP_SKIP_IP6_LOOKUP
27  IP4_MAP_NEXT_IP6_REWRITE,
28#endif
29  IP4_MAP_NEXT_ICMP_ERROR,
30  IP4_MAP_NEXT_DROP,
31  IP4_MAP_N_NEXT,
32};
33
34static_always_inline u16
35ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
36				 u8 * error)
37{
38  u16 port;
39  if (d->psid_length > 0)
40    {
41      ip4_header_t *ip = vlib_buffer_get_current (b0);
42
43      if (PREDICT_FALSE
44	  ((ip->ip_version_and_header_length != 0x45)
45	   || clib_host_to_net_u16 (ip->length) < 28))
46	{
47	  return 0;
48	}
49
50      port = vnet_buffer (b0)->ip.reass.l4_dst_port;
51
52      /* Verify that port is not among the well-known ports */
53      if ((d->psid_offset > 0)
54	  && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
55	{
56	  *error = MAP_ERROR_ENCAP_SEC_CHECK;
57	}
58      else
59	{
60	  return port;
61	}
62    }
63  return (0);
64}
65
66/*
67 * ip4_map_vtcfl
68 */
69static_always_inline u32
70ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
71{
72  map_main_t *mm = &map_main;
73  u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
74  u32 vtcfl = 0x6 << 28;
75  vtcfl |= tc << 20;
76  vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
77
78  return (clib_host_to_net_u32 (vtcfl));
79}
80
81/*
82 * ip4_map_ttl
83 */
84static inline void
85ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
86{
87  i32 ttl = ip->ttl;
88
89  /* Input node should have reject packets with ttl 0. */
90  ASSERT (ip->ttl > 0);
91
92  u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
93  checksum += checksum >= 0xffff;
94  ip->checksum = checksum;
95  ttl -= 1;
96  ip->ttl = ttl;
97  *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
98
99  /* Verify checksum. */
100  ASSERT (ip->checksum == ip4_header_checksum (ip));
101}
102
103static u32
104ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
105		  u8 * error)
106{
107  map_main_t *mm = &map_main;
108  vlib_buffer_t *b = vlib_get_buffer (vm, bi);
109
110  if (mm->frag_inner)
111    {
112      /* IPv4 fragmented packets inside of IPv6 */
113      ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
114
115      /* Fixup */
116      u32 *i;
117      vec_foreach (i, *buffers)
118      {
119	vlib_buffer_t *p = vlib_get_buffer (vm, *i);
120	ip6_header_t *ip6 = vlib_buffer_get_current (p);
121	ip6->payload_length =
122	  clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
123      }
124    }
125  else
126    {
127      if (df && !mm->frag_ignore_df)
128	{
129	  icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
130				       ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
131				       mtu);
132	  vlib_buffer_advance (b, sizeof (ip6_header_t));
133	  *error = MAP_ERROR_DF_SET;
134	  return (IP4_MAP_NEXT_ICMP_ERROR);
135	}
136
137      /* Create IPv6 fragments here */
138      ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
139    }
140  return (IP4_MAP_NEXT_IP6_LOOKUP);
141}
142
143/*
144 * ip4_map
145 */
146static uword
147ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
148{
149  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
150  vlib_node_runtime_t *error_node =
151    vlib_node_get_runtime (vm, ip4_map_node.index);
152  from = vlib_frame_vector_args (frame);
153  n_left_from = frame->n_vectors;
154  next_index = node->cached_next_index;
155  map_main_t *mm = &map_main;
156  vlib_combined_counter_main_t *cm = mm->domain_counters;
157  u32 thread_index = vm->thread_index;
158
159  while (n_left_from > 0)
160    {
161      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
162      while (n_left_from > 0 && n_left_to_next > 0)
163	{
164	  u32 pi0;
165	  vlib_buffer_t *p0;
166	  map_domain_t *d0;
167	  u8 error0 = MAP_ERROR_NONE;
168	  ip4_header_t *ip40;
169	  u16 port0 = 0;
170	  ip6_header_t *ip6h0;
171	  u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
172	  u32 map_domain_index0 = ~0;
173	  u32 *buffer0 = 0;
174	  bool free_original_buffer0 = false;
175	  u32 *frag_from0, frag_left0;
176
177	  pi0 = to_next[0] = from[0];
178	  from += 1;
179	  n_left_from -= 1;
180
181	  p0 = vlib_get_buffer (vm, pi0);
182	  ip40 = vlib_buffer_get_current (p0);
183
184	  d0 =
185	    ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
186				&error0);
187	  if (!d0)
188	    {			/* Guess it wasn't for us */
189	      vnet_feature_next (&next0, p0);
190	      goto exit;
191	    }
192
193	  /*
194	   * Shared IPv4 address
195	   */
196	  port0 = ip4_map_port_and_security_check (d0, p0, &error0);
197
198	  /*
199	   * Clamp TCP MSS value.
200	   */
201	  if (ip40->protocol == IP_PROTOCOL_TCP)
202	    {
203	      tcp_header_t *tcp = ip4_next_header (ip40);
204	      if (mm->tcp_mss > 0 && tcp_syn (tcp))
205		{
206		  ip_csum_t csum = tcp->checksum;
207		  map_mss_clamping (tcp, &csum, mm->tcp_mss);
208		  tcp->checksum = ip_csum_fold (csum);
209		}
210	    }
211
212	  /* Decrement IPv4 TTL */
213	  ip4_map_decrement_ttl (ip40, &error0);
214	  bool df0 =
215	    ip40->flags_and_fragment_offset &
216	    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
217
218	  /* MAP calc */
219	  u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
220	  u16 dp40 = clib_net_to_host_u16 (port0);
221	  u64 dal60 = map_get_pfx (d0, da40, dp40);
222	  u64 dar60 = map_get_sfx (d0, da40, dp40);
223	  if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
224	    error0 = MAP_ERROR_NO_BINDING;
225
226	  /* construct ipv6 header */
227	  vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
228	  ip6h0 = vlib_buffer_get_current (p0);
229	  vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
230
231	  ip6h0->ip_version_traffic_class_and_flow_label =
232	    ip4_map_vtcfl (ip40, p0);
233	  ip6h0->payload_length = ip40->length;
234	  ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
235	  ip6h0->hop_limit = 0x40;
236	  ip6h0->src_address = d0->ip6_src;
237	  ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
238	  ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
239
240	  /*
241	   * Determine next node. Can be one of:
242	   * ip6-lookup, ip6-rewrite, error-drop
243	   */
244	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
245	    {
246	      if (PREDICT_FALSE
247		  (d0->mtu
248		   && (clib_net_to_host_u16 (ip6h0->payload_length) +
249		       sizeof (*ip6h0) > d0->mtu)))
250		{
251		  next0 =
252		    ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
253				      &error0);
254
255		  if (error0 == MAP_ERROR_NONE)
256		    {
257		      free_original_buffer0 = true;
258		    }
259		}
260	      else
261		{
262		  next0 =
263		    ip4_map_ip6_lookup_bypass (p0,
264					       ip40) ?
265		    IP4_MAP_NEXT_IP6_REWRITE : next0;
266		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
267						   thread_index,
268						   map_domain_index0, 1,
269						   clib_net_to_host_u16
270						   (ip6h0->payload_length) +
271						   40);
272		}
273	    }
274	  else
275	    {
276	      next0 = IP4_MAP_NEXT_DROP;
277	    }
278
279	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
280	    {
281	      map_add_trace (vm, node, p0, map_domain_index0, port0);
282	    }
283
284	  p0->error = error_node->errors[error0];
285	exit:
286	  /* Send fragments that were added in the frame */
287	  if (free_original_buffer0)
288	    {
289	      vlib_buffer_free_one (vm, pi0);	/* Free original packet */
290	    }
291	  else
292	    {
293	      vec_add1 (buffer0, pi0);
294	    }
295
296	  frag_from0 = buffer0;
297	  frag_left0 = vec_len (buffer0);
298
299	  while (frag_left0 > 0)
300	    {
301	      while (frag_left0 > 0 && n_left_to_next > 0)
302		{
303		  u32 i0;
304		  i0 = to_next[0] = frag_from0[0];
305		  frag_from0 += 1;
306		  frag_left0 -= 1;
307		  to_next += 1;
308		  n_left_to_next -= 1;
309
310		  vlib_get_buffer (vm, i0)->error =
311		    error_node->errors[error0];
312		  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
313						   to_next, n_left_to_next,
314						   i0, next0);
315		}
316	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
317	      vlib_get_next_frame (vm, node, next_index, to_next,
318				   n_left_to_next);
319	    }
320	  vec_reset_length (buffer0);
321	}
322      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
323    }
324
325  return frame->n_vectors;
326}
327
328static char *map_error_strings[] = {
329#define _(sym,string) string,
330  foreach_map_error
331#undef _
332};
333
334
335/* *INDENT-OFF* */
336VNET_FEATURE_INIT (ip4_map_feature, static) =
337{
338  .arc_name = "ip4-unicast",
339  .node_name = "ip4-map",
340  .runs_before = VNET_FEATURES ("ip4-flow-classify"),
341  .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
342};
343
344VLIB_REGISTER_NODE(ip4_map_node) = {
345  .function = ip4_map,
346  .name = "ip4-map",
347  .vector_size = sizeof(u32),
348  .format_trace = format_map_trace,
349  .type = VLIB_NODE_TYPE_INTERNAL,
350
351  .n_errors = MAP_N_ERROR,
352  .error_strings = map_error_strings,
353
354  .n_next_nodes = IP4_MAP_N_NEXT,
355  .next_nodes = {
356    [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
357#ifdef MAP_SKIP_IP6_LOOKUP
358    [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
359#endif
360    [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
361    [IP4_MAP_NEXT_DROP] = "error-drop",
362  },
363};
364/* *INDENT-ON* */
365
366/*
367 * fd.io coding-style-patch-verification: ON
368 *
369 * Local Variables:
370 * eval: (c-set-style "gnu")
371 * End:
372 */
373