tcp_misc.h revision 6e95f5ec
1/*
2 * Copyright (c) 2016  Intel Corporation.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef _TCP_MISC_H_
17#define _TCP_MISC_H_
18
19#include "net_misc.h"
20#include <rte_tcp.h>
21#include <rte_cycles.h>
22
23#ifdef __cplusplus
24extern "C" {
25#endif
26
27/*
28 * TCP protocols related structures/functions definitions.
29 * Main purpose to simplify (and optimise) processing and representation
30 * of protocol related data.
31 */
32
33#define	TCP_WSCALE_DEFAULT	7
34#define	TCP_WSCALE_NONE		0
35
36#define	TCP_TX_HDR_MAX	(sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_MAX)
37
38/* max header size for normal data+ack packet */
39#define	TCP_TX_HDR_DACK	(sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_TMS)
40
41#define	TCP4_MIN_MSS	536
42
43#define	TCP6_MIN_MSS	1220
44
45/* default MTU, no TCP options. */
46#define TCP4_NOP_MSS	\
47	(ETHER_MTU - sizeof(struct ipv4_hdr) - sizeof(struct tcp_hdr))
48
49#define TCP6_NOP_MSS	\
50	(ETHER_MTU - sizeof(struct ipv6_hdr) - sizeof(struct tcp_hdr))
51
52/* default MTU, TCP options present */
53#define TCP4_OP_MSS	(TCP4_NOP_MSS - TCP_TX_OPT_LEN_MAX)
54
55#define TCP6_OP_MSS	(TCP6_NOP_MSS - TCP_TX_OPT_LEN_MAX)
56
57/*
58 * TCP flags
59 */
60#define	TCP_FLAG_FIN	0x01
61#define	TCP_FLAG_SYN	0x02
62#define	TCP_FLAG_RST	0x04
63#define	TCP_FLAG_PSH	0x08
64#define	TCP_FLAG_ACK	0x10
65#define	TCP_FLAG_URG	0x20
66
67/* TCP flags mask. */
68#define	TCP_FLAG_MASK	UINT8_MAX
69
70union typflg {
71	uint16_t raw;
72	struct {
73		uint8_t type;  /* TLE_V4/TLE_V6 */
74		uint8_t flags; /* TCP header flags */
75	};
76};
77
78union pkt_info {
79	rte_xmm_t raw;
80	struct {
81		union typflg tf;
82		uint16_t csf;  /* checksum flags */
83		union l4_ports port;
84		union {
85			union ipv4_addrs addr4;
86			const union ipv6_addrs *addr6;
87		};
88	};
89};
90
91union seg_info {
92	rte_xmm_t raw;
93	struct {
94		uint32_t seq;
95		uint32_t ack;
96		uint16_t hole1;
97		uint16_t wnd;
98	};
99};
100
101union seqlen {
102	uint64_t raw;
103	struct {
104		uint32_t seq;
105		uint32_t len;
106	};
107};
108
109#define	TCP_DATA_ALIGN	4
110
111#define	TCP_DATA_OFFSET	4
112
113/*
114 * recognizable options.
115 */
116#define	TCP_OPT_KIND_EOL	0x00
117#define	TCP_OPT_KIND_NOP	0x01
118#define	TCP_OPT_KIND_MSS	0x02
119#define	TCP_OPT_KIND_WSC	0x03
120#define	TCP_OPT_KIND_TMS	0x08
121
122#define	TCP_OPT_LEN_EOL		0x01
123#define	TCP_OPT_LEN_NOP		0x01
124#define	TCP_OPT_LEN_MSS		0x04
125#define	TCP_OPT_LEN_WSC		0x03
126#define	TCP_OPT_LEN_TMS		0x0a
127
128#define	TCP_TX_OPT_LEN_MAX	\
129	RTE_ALIGN_CEIL(TCP_OPT_LEN_MSS + TCP_OPT_LEN_WSC + TCP_OPT_LEN_TMS + \
130		TCP_OPT_LEN_EOL, TCP_DATA_ALIGN)
131
132/*
133 * recomended format for TSOPT from RFC 1323, appendix A:
134 *  +--------+--------+--------+--------+
135 *  |   NOP  |  NOP   |  TSopt |   10   |
136 *  +--------+--------+--------+--------+
137 *  |          TSval   timestamp        |
138 *  +--------+--------+--------+--------+
139 *  |          TSecr   timestamp        |
140 *  +--------+--------+--------+--------+
141 */
142#define	TCP_TX_OPT_LEN_TMS	(TCP_OPT_LEN_TMS + 2 * TCP_OPT_LEN_NOP)
143
144#define TCP_OPT_TMS_HDR		(rte_be_to_cpu_32( \
145	TCP_OPT_KIND_NOP << 3 * CHAR_BIT | \
146	TCP_OPT_KIND_NOP << 2 * CHAR_BIT | \
147	TCP_OPT_KIND_TMS << CHAR_BIT | \
148	TCP_OPT_LEN_TMS))
149
150#define	TCP_OPT_KL(k, l)	(rte_be_to_cpu_16((k) << CHAR_BIT | (l)))
151
152#define	TCP_OPT_KL_MSS		TCP_OPT_KL(TCP_OPT_KIND_MSS, TCP_OPT_LEN_MSS)
153#define	TCP_OPT_KL_WSC		TCP_OPT_KL(TCP_OPT_KIND_WSC, TCP_OPT_LEN_WSC)
154#define	TCP_OPT_KL_TMS		TCP_OPT_KL(TCP_OPT_KIND_TMS, TCP_OPT_LEN_TMS)
155
156/*
157 * Timestamp option.
158 */
159union tsopt {
160	uint64_t raw;
161	struct {
162		uint32_t val;
163		uint32_t ecr;
164	};
165};
166
167struct tcpopt {
168	union {
169		uint16_t raw;
170		struct {
171			uint8_t kind;
172			uint8_t len;
173		};
174	} kl;
175	union {
176		uint16_t mss;
177		uint8_t  wscale;
178		union tsopt ts;
179	};
180} __attribute__((__packed__));
181
182struct syn_opts {
183	uint16_t mss;
184	uint8_t  wscale;
185	union tsopt ts;
186};
187
188struct resp_info {
189	uint32_t flags;
190};
191
192
193/* window update information (RFC 793 WL1, WL2) */
194union wui {
195	uint64_t raw;
196	struct {
197		uint32_t wl1;
198		uint32_t wl2;
199	};
200};
201
202/*
203 * helper structure: holds aggregated information about group
204 * of processed data+ack packets.
205 */
206struct dack_info {
207	struct {                    /* # of received segments with: */
208		uint32_t data;      /* incoming data */
209		uint32_t ack;       /* newly acked data */
210		uint32_t dup;       /* duplicate acks */
211		uint32_t badseq;    /* bad seq/ack */
212		uint32_t ofo;       /* OFO incoming data */
213	} segs;
214	uint32_t ack;       /* highest received ACK */
215	union tsopt ts;     /* TS of highest ACK */
216	union wui wu;       /* window update information */
217	uint32_t wnd;
218	struct {               /* 3 duplicate ACKs were observed after */
219		uint32_t seg;  /* # of meaningful ACK segments */
220		uint32_t ack;  /* ACK sequence */
221	} dup3;
222};
223
224/* get current timestamp in ms */
225static inline uint32_t
226tcp_get_tms(void)
227{
228	uint64_t ts, ms;
229	ms = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S;
230	ts = rte_get_tsc_cycles() / ms;
231	return ts;
232}
233
234static inline int
235tcp_seq_lt(uint32_t l, uint32_t r)
236{
237	return (int32_t)(l - r) < 0;
238}
239
240static inline int
241tcp_seq_leq(uint32_t l, uint32_t r)
242{
243	return (int32_t)(l - r) <= 0;
244}
245
246
247static inline void
248get_seg_info(const struct tcp_hdr *th, union seg_info *si)
249{
250	__m128i v;
251	const  __m128i bswap_mask = _mm_set_epi8(15, 14, 13, 12, 10, 11, 9, 8,
252			4, 5, 6, 7, 0, 1, 2, 3);
253
254	v = _mm_loadu_si128((const __m128i *)&th->sent_seq);
255	si->raw.x = _mm_shuffle_epi8(v, bswap_mask);
256}
257
258static inline void
259get_syn_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
260{
261	uint32_t i, kind;
262	const struct tcpopt *opt;
263
264	memset(so, 0, sizeof(*so));
265
266	i = 0;
267	while (i < len) {
268		opt = (const struct tcpopt *)(p + i);
269		kind = opt->kl.kind;
270		if (kind == TCP_OPT_KIND_EOL)
271			return;
272		else if (kind == TCP_OPT_KIND_NOP)
273			i += sizeof(opt->kl.kind);
274		else {
275			i += opt->kl.len;
276			if (i <= len) {
277				if (opt->kl.raw == TCP_OPT_KL_MSS)
278					so->mss = rte_be_to_cpu_16(opt->mss);
279				else if (opt->kl.raw == TCP_OPT_KL_WSC)
280					so->wscale = opt->wscale;
281				else if (opt->kl.raw == TCP_OPT_KL_TMS) {
282					so->ts.val =
283						rte_be_to_cpu_32(opt->ts.val);
284					so->ts.ecr =
285						rte_be_to_cpu_32(opt->ts.ecr);
286				}
287			}
288		}
289	}
290}
291
292/*
293 * generates SYN options, assumes that there are
294 * at least TCP_TX_OPT_LEN_MAX bytes available.
295 */
296static inline void
297fill_syn_opts(void *p, const struct syn_opts *so)
298{
299	uint8_t *to;
300	struct tcpopt *opt;
301
302	to = (uint8_t *)p;
303
304	/* setup MSS*/
305	opt = (struct tcpopt *)to;
306	opt->kl.raw = TCP_OPT_KL_MSS;
307	opt->mss = rte_cpu_to_be_16(so->mss);
308
309	to += TCP_OPT_LEN_MSS;
310	opt = (struct tcpopt *)to;
311
312	/* setup TMS*/
313	if (so->ts.val != 0) {
314
315		opt->kl.raw = TCP_OPT_KL_TMS;
316		opt->ts.val = rte_cpu_to_be_32(so->ts.val);
317		opt->ts.ecr = rte_cpu_to_be_32(so->ts.ecr);
318
319		to += TCP_OPT_LEN_TMS;
320		opt = (struct tcpopt *)to;
321	}
322
323	/* setup TMS*/
324	if (so->wscale != 0) {
325
326		opt->kl.raw = TCP_OPT_KL_WSC;
327		opt->wscale = so->wscale;
328
329		to += TCP_OPT_LEN_WSC;
330		opt = (struct tcpopt *)to;
331	}
332
333	to[0] = TCP_OPT_KIND_EOL;
334}
335
336/*
337 * generate TMS option, for non SYN packet, make sure
338 * there at least TCP_TX_OPT_LEN_TMS available.
339 */
340static inline void
341fill_tms_opts(void *p, uint32_t val, uint32_t ecr)
342{
343	uint32_t *opt;
344
345	opt = (uint32_t *)p;
346	opt[0] = TCP_OPT_TMS_HDR;
347	opt[1] = rte_cpu_to_be_32(val);
348	opt[2] = rte_cpu_to_be_32(ecr);
349}
350
351static inline union tsopt
352get_tms_opts(uintptr_t p, uint32_t len)
353{
354	union tsopt ts;
355	uint32_t i, kind;
356	const uint32_t *opt;
357	const struct tcpopt *to;
358
359	opt = (const uint32_t *)p;
360
361	/* TS option is presented in recommended way */
362	if (len >= TCP_TX_OPT_LEN_TMS && opt[0] == TCP_OPT_TMS_HDR) {
363		ts.val = rte_be_to_cpu_32(opt[1]);
364		ts.ecr = rte_be_to_cpu_32(opt[2]);
365		return ts;
366	}
367
368	/* parse through whole list of options. */
369	ts.raw = 0;
370	i = 0;
371	while (i < len) {
372		to = (const struct tcpopt *)(p + i);
373		kind = to->kl.kind;
374		if (kind == TCP_OPT_KIND_EOL)
375			break;
376		else if (kind == TCP_OPT_KIND_NOP)
377			i += sizeof(to->kl.kind);
378		else {
379			i += to->kl.len;
380			if (i <= len && to->kl.raw == TCP_OPT_KL_TMS) {
381				ts.val = rte_be_to_cpu_32(to->ts.val);
382				ts.ecr = rte_be_to_cpu_32(to->ts.ecr);
383				break;
384			}
385		}
386	}
387
388	return ts;
389}
390
391static inline uint8_t
392get_pkt_type(const struct rte_mbuf *m)
393{
394	uint32_t v;
395
396	v = m->packet_type &
397		(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
398	if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
399		return TLE_V4;
400	else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP))
401		return TLE_V6;
402	else
403		return TLE_VNUM;
404}
405
406static inline void
407get_pkt_info(const struct rte_mbuf *m, union pkt_info *pi, union seg_info *si)
408{
409	uint32_t len, type;
410	const struct tcp_hdr *tcph;
411	const union l4_ports *prt;
412	const union ipv4_addrs *pa4;
413
414	type = get_pkt_type(m);
415	len = m->l2_len;
416
417	/*
418	 * this line is here just to avoid gcc warning:
419	 * error: .<U6098>.<U6000>.addr4.raw may be used uninitialized.
420	 */
421	pi->addr4.raw = 0;
422
423	if (type == TLE_V4) {
424		pa4 = rte_pktmbuf_mtod_offset(m, const union ipv4_addrs *,
425			len + offsetof(struct ipv4_hdr, src_addr));
426		pi->addr4.raw = pa4->raw;
427	} else if (type == TLE_V6) {
428		pi->addr6 = rte_pktmbuf_mtod_offset(m, const union ipv6_addrs *,
429			len + offsetof(struct ipv6_hdr, src_addr));
430	}
431
432	len += m->l3_len;
433	tcph = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *, len);
434	prt = (const union l4_ports *)
435		((uintptr_t)tcph + offsetof(struct tcp_hdr, src_port));
436	pi->tf.flags = tcph->tcp_flags;
437	pi->tf.type = type;
438	pi->csf = m->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
439	pi->port.raw = prt->raw;
440
441	get_seg_info(tcph, si);
442}
443
444static inline uint32_t
445tcp_mbuf_seq_free(struct rte_mbuf *mb[], uint32_t num)
446{
447	uint32_t i, len;
448
449	len = 0;
450	for (i = 0; i != num; i++) {
451		len += mb[i]->pkt_len;
452		rte_pktmbuf_free(mb[i]);
453	}
454
455	return len;
456}
457
458#ifdef __cplusplus
459}
460#endif
461
462#endif /* _TCP_MISC_H_ */
463