tcp_misc.h revision c5f8f7f0
1/*
2 * Copyright (c) 2016-2017  Intel Corporation.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef _TCP_MISC_H_
17#define _TCP_MISC_H_
18
19#include "net_misc.h"
20#include <rte_tcp.h>
21#include <rte_cycles.h>
22
23#ifdef __cplusplus
24extern "C" {
25#endif
26
27/*
28 * TCP protocols related structures/functions definitions.
29 * Main purpose to simplify (and optimise) processing and representation
30 * of protocol related data.
31 */
32
33#define	TCP_WSCALE_DEFAULT	7
34#define	TCP_WSCALE_NONE		0
35
36#define	TCP_TX_HDR_MAX	(sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_MAX)
37
38/* max header size for normal data+ack packet */
39#define	TCP_TX_HDR_DACK	(sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_TMS)
40
41#define	TCP4_MIN_MSS	536
42
43#define	TCP6_MIN_MSS	1220
44
45/* default MTU, no TCP options. */
46#define TCP4_NOP_MSS	\
47	(ETHER_MTU - sizeof(struct ipv4_hdr) - sizeof(struct tcp_hdr))
48
49#define TCP6_NOP_MSS	\
50	(ETHER_MTU - sizeof(struct ipv6_hdr) - sizeof(struct tcp_hdr))
51
52/* default MTU, TCP options present */
53#define TCP4_OP_MSS	(TCP4_NOP_MSS - TCP_TX_OPT_LEN_MAX)
54
55#define TCP6_OP_MSS	(TCP6_NOP_MSS - TCP_TX_OPT_LEN_MAX)
56
57/* Initial Window Configuration parameter, probably will be configured during
58 * the startup in future */
59#define TCP_INITIAL_CWND_MAX 14600
60
61/*
62 * TCP flags
63 */
64#define	TCP_FLAG_FIN	0x01
65#define	TCP_FLAG_SYN	0x02
66#define	TCP_FLAG_RST	0x04
67#define	TCP_FLAG_PSH	0x08
68#define	TCP_FLAG_ACK	0x10
69#define	TCP_FLAG_URG	0x20
70
71/* TCP flags mask. */
72#define	TCP_FLAG_MASK	UINT8_MAX
73
74union typflg {
75	uint16_t raw;
76	struct {
77		uint8_t type;  /* TLE_V4/TLE_V6 */
78		uint8_t flags; /* TCP header flags */
79	};
80};
81
82union pkt_info {
83	rte_xmm_t raw;
84	struct {
85		union typflg tf;
86		uint16_t csf;  /* checksum flags */
87		union l4_ports port;
88		union {
89			union ipv4_addrs addr4;
90			const union ipv6_addrs *addr6;
91		};
92	};
93};
94
95union seg_info {
96	rte_xmm_t raw;
97	struct {
98		uint32_t seq;
99		uint32_t ack;
100		uint16_t wnd;
101		uint16_t mss; /* valid only at SYN time */
102	};
103};
104
105union seqlen {
106	uint64_t raw;
107	struct {
108		uint32_t seq;
109		uint32_t len;
110	};
111};
112
113#define	TCP_DATA_ALIGN	4
114
115#define	TCP_DATA_OFFSET	4
116
117/*
118 * recognizable options.
119 */
120#define	TCP_OPT_KIND_EOL	0x00
121#define	TCP_OPT_KIND_NOP	0x01
122#define	TCP_OPT_KIND_MSS	0x02
123#define	TCP_OPT_KIND_WSC	0x03
124#define	TCP_OPT_KIND_TMS	0x08
125
126#define	TCP_OPT_LEN_EOL		0x01
127#define	TCP_OPT_LEN_NOP		0x01
128#define	TCP_OPT_LEN_MSS		0x04
129#define	TCP_OPT_LEN_WSC		0x03
130#define	TCP_OPT_LEN_TMS		0x0a
131
132#define	TCP_TX_OPT_LEN_MAX	\
133	RTE_ALIGN_CEIL(TCP_OPT_LEN_MSS + TCP_OPT_LEN_WSC + TCP_OPT_LEN_TMS + \
134		TCP_OPT_LEN_EOL, TCP_DATA_ALIGN)
135
136/*
137 * recomended format for TSOPT from RFC 1323, appendix A:
138 *  +--------+--------+--------+--------+
139 *  |   NOP  |  NOP   |  TSopt |   10   |
140 *  +--------+--------+--------+--------+
141 *  |          TSval   timestamp        |
142 *  +--------+--------+--------+--------+
143 *  |          TSecr   timestamp        |
144 *  +--------+--------+--------+--------+
145 */
146#define	TCP_TX_OPT_LEN_TMS	(TCP_OPT_LEN_TMS + 2 * TCP_OPT_LEN_NOP)
147
148#define TCP_OPT_TMS_HDR		(rte_be_to_cpu_32( \
149	TCP_OPT_KIND_NOP << 3 * CHAR_BIT | \
150	TCP_OPT_KIND_NOP << 2 * CHAR_BIT | \
151	TCP_OPT_KIND_TMS << CHAR_BIT | \
152	TCP_OPT_LEN_TMS))
153
154#define	TCP_OPT_KL(k, l)	(rte_be_to_cpu_16((k) << CHAR_BIT | (l)))
155
156#define	TCP_OPT_KL_MSS		TCP_OPT_KL(TCP_OPT_KIND_MSS, TCP_OPT_LEN_MSS)
157#define	TCP_OPT_KL_WSC		TCP_OPT_KL(TCP_OPT_KIND_WSC, TCP_OPT_LEN_WSC)
158#define	TCP_OPT_KL_TMS		TCP_OPT_KL(TCP_OPT_KIND_TMS, TCP_OPT_LEN_TMS)
159
160/*
161 * Timestamp option.
162 */
163union tsopt {
164	uint64_t raw;
165	struct {
166		uint32_t val;
167		uint32_t ecr;
168	};
169};
170
171struct tcpopt {
172	union {
173		uint16_t raw;
174		struct {
175			uint8_t kind;
176			uint8_t len;
177		};
178	} kl;
179	union {
180		uint16_t mss;
181		uint8_t  wscale;
182		union tsopt ts;
183	};
184} __attribute__((__packed__));
185
186struct syn_opts {
187	uint16_t mss;
188	uint8_t  wscale;
189	union tsopt ts;
190};
191
192struct resp_info {
193	uint32_t flags;
194};
195
196
197/* window update information (RFC 793 WL1, WL2) */
198union wui {
199	uint64_t raw;
200	struct {
201		uint32_t wl1;
202		uint32_t wl2;
203	};
204};
205
206/*
207 * helper structure: holds aggregated information about group
208 * of processed data+ack packets.
209 */
210struct dack_info {
211	struct {                    /* # of received segments with: */
212		uint32_t data;      /* incoming data */
213		uint32_t ack;       /* newly acked data */
214		uint32_t dup;       /* duplicate acks */
215		uint32_t badseq;    /* bad seq/ack */
216		uint32_t ofo;       /* OFO incoming data */
217	} segs;
218	uint32_t ack;       /* highest received ACK */
219	union tsopt ts;     /* TS of highest ACK */
220	union wui wu;       /* window update information */
221	uint32_t wnd;
222	struct {               /* 3 duplicate ACKs were observed after */
223		uint32_t seg;  /* # of meaningful ACK segments */
224		uint32_t ack;  /* ACK sequence */
225	} dup3;
226};
227
228/* get current timestamp in ms */
229static inline uint32_t
230tcp_get_tms(uint32_t mshift)
231{
232	uint64_t ts;
233	ts = rte_get_tsc_cycles() >> mshift;
234	return ts;
235}
236
237static inline int
238tcp_seq_lt(uint32_t l, uint32_t r)
239{
240	return (int32_t)(l - r) < 0;
241}
242
243static inline int
244tcp_seq_leq(uint32_t l, uint32_t r)
245{
246	return (int32_t)(l - r) <= 0;
247}
248
249
250static inline void
251get_seg_info(const struct tcp_hdr *th, union seg_info *si)
252{
253	__m128i v;
254	const  __m128i bswap_mask =
255		_mm_set_epi8(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX,
256			UINT8_MAX, UINT8_MAX, 10, 11,
257			4, 5, 6, 7,
258			0, 1, 2, 3);
259
260	v = _mm_loadu_si128((const __m128i *)&th->sent_seq);
261	si->raw.x = _mm_shuffle_epi8(v, bswap_mask);
262}
263
264static inline void
265get_syn_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
266{
267	uint32_t i, kind;
268	const struct tcpopt *opt;
269
270	memset(so, 0, sizeof(*so));
271
272	i = 0;
273	while (i < len) {
274		opt = (const struct tcpopt *)(p + i);
275		kind = opt->kl.kind;
276		if (kind == TCP_OPT_KIND_EOL)
277			return;
278		else if (kind == TCP_OPT_KIND_NOP)
279			i += sizeof(opt->kl.kind);
280		else {
281			i += opt->kl.len;
282			if (i <= len) {
283				if (opt->kl.raw == TCP_OPT_KL_MSS)
284					so->mss = rte_be_to_cpu_16(opt->mss);
285				else if (opt->kl.raw == TCP_OPT_KL_WSC)
286					so->wscale = opt->wscale;
287				else if (opt->kl.raw == TCP_OPT_KL_TMS) {
288					so->ts.val =
289						rte_be_to_cpu_32(opt->ts.val);
290					so->ts.ecr =
291						rte_be_to_cpu_32(opt->ts.ecr);
292				}
293			}
294		}
295	}
296}
297
298/*
299 * generates SYN options, assumes that there are
300 * at least TCP_TX_OPT_LEN_MAX bytes available.
301 */
302static inline void
303fill_syn_opts(void *p, const struct syn_opts *so)
304{
305	uint8_t *to;
306	struct tcpopt *opt;
307
308	to = (uint8_t *)p;
309
310	/* setup MSS*/
311	opt = (struct tcpopt *)to;
312	opt->kl.raw = TCP_OPT_KL_MSS;
313	opt->mss = rte_cpu_to_be_16(so->mss);
314
315	to += TCP_OPT_LEN_MSS;
316	opt = (struct tcpopt *)to;
317
318	/* setup TMS*/
319	if (so->ts.val != 0) {
320
321		opt->kl.raw = TCP_OPT_KL_TMS;
322		opt->ts.val = rte_cpu_to_be_32(so->ts.val);
323		opt->ts.ecr = rte_cpu_to_be_32(so->ts.ecr);
324
325		to += TCP_OPT_LEN_TMS;
326		opt = (struct tcpopt *)to;
327	}
328
329	/* setup TMS*/
330	if (so->wscale != 0) {
331
332		opt->kl.raw = TCP_OPT_KL_WSC;
333		opt->wscale = so->wscale;
334
335		to += TCP_OPT_LEN_WSC;
336		opt = (struct tcpopt *)to;
337	}
338
339	to[0] = TCP_OPT_KIND_EOL;
340}
341
342/*
343 * generate TMS option, for non SYN packet, make sure
344 * there at least TCP_TX_OPT_LEN_TMS available.
345 */
346static inline void
347fill_tms_opts(void *p, uint32_t val, uint32_t ecr)
348{
349	uint32_t *opt;
350
351	opt = (uint32_t *)p;
352	opt[0] = TCP_OPT_TMS_HDR;
353	opt[1] = rte_cpu_to_be_32(val);
354	opt[2] = rte_cpu_to_be_32(ecr);
355}
356
357static inline union tsopt
358get_tms_opts(uintptr_t p, uint32_t len)
359{
360	union tsopt ts;
361	uint32_t i, kind;
362	const uint32_t *opt;
363	const struct tcpopt *to;
364
365	opt = (const uint32_t *)p;
366
367	/* TS option is presented in recommended way */
368	if (len >= TCP_TX_OPT_LEN_TMS && opt[0] == TCP_OPT_TMS_HDR) {
369		ts.val = rte_be_to_cpu_32(opt[1]);
370		ts.ecr = rte_be_to_cpu_32(opt[2]);
371		return ts;
372	}
373
374	/* parse through whole list of options. */
375	ts.raw = 0;
376	i = 0;
377	while (i < len) {
378		to = (const struct tcpopt *)(p + i);
379		kind = to->kl.kind;
380		if (kind == TCP_OPT_KIND_EOL)
381			break;
382		else if (kind == TCP_OPT_KIND_NOP)
383			i += sizeof(to->kl.kind);
384		else {
385			i += to->kl.len;
386			if (i <= len && to->kl.raw == TCP_OPT_KL_TMS) {
387				ts.val = rte_be_to_cpu_32(to->ts.val);
388				ts.ecr = rte_be_to_cpu_32(to->ts.ecr);
389				break;
390			}
391		}
392	}
393
394	return ts;
395}
396
397static inline uint8_t
398get_pkt_type(const struct rte_mbuf *m)
399{
400	uint32_t v;
401
402	v = m->packet_type &
403		(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
404	if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
405		return TLE_V4;
406	else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP))
407		return TLE_V6;
408	else
409		return TLE_VNUM;
410}
411
412static inline void
413get_pkt_info(const struct rte_mbuf *m, union pkt_info *pi, union seg_info *si)
414{
415	uint32_t len, type;
416	const struct tcp_hdr *tcph;
417	const union l4_ports *prt;
418	const union ipv4_addrs *pa4;
419
420	type = get_pkt_type(m);
421	len = m->l2_len;
422
423	/*
424	 * this line is here just to avoid gcc warning:
425	 * error: .<U6098>.<U6000>.addr4.raw may be used uninitialized.
426	 */
427	pi->addr4.raw = 0;
428
429	if (type == TLE_V4) {
430		pa4 = rte_pktmbuf_mtod_offset(m, const union ipv4_addrs *,
431			len + offsetof(struct ipv4_hdr, src_addr));
432		pi->addr4.raw = pa4->raw;
433	} else if (type == TLE_V6) {
434		pi->addr6 = rte_pktmbuf_mtod_offset(m, const union ipv6_addrs *,
435			len + offsetof(struct ipv6_hdr, src_addr));
436	}
437
438	len += m->l3_len;
439	tcph = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *, len);
440	prt = (const union l4_ports *)
441		((uintptr_t)tcph + offsetof(struct tcp_hdr, src_port));
442	pi->tf.flags = tcph->tcp_flags;
443	pi->tf.type = type;
444	pi->csf = m->ol_flags & (PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK);
445	pi->port.raw = prt->raw;
446
447	get_seg_info(tcph, si);
448}
449
450static inline uint32_t
451tcp_mbuf_seq_free(struct rte_mbuf *mb[], uint32_t num)
452{
453	uint32_t i, len;
454
455	len = 0;
456	for (i = 0; i != num; i++) {
457		len += mb[i]->pkt_len;
458		rte_pktmbuf_free(mb[i]);
459	}
460
461	return len;
462}
463
464#ifdef __cplusplus
465}
466#endif
467
468#endif /* _TCP_MISC_H_ */
469