1/*
2 * Copyright (c) 2016  Intel Corporation.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <netinet/ip6.h>
17#include <rte_arp.h>
18
19#include "netbe.h"
20#include "dpdk_legacy.h"
21
22struct ptype2cb {
23	uint32_t mask;
24	const char *name;
25	rte_rx_callback_fn fn;
26};
27
28enum {
29	ETHER_PTYPE = 0x1,
30	IPV4_PTYPE = 0x2,
31	IPV4_EXT_PTYPE = 0x4,
32	IPV6_PTYPE = 0x8,
33	IPV6_EXT_PTYPE = 0x10,
34	TCP_PTYPE = 0x20,
35	UDP_PTYPE = 0x40,
36};
37
38static inline uint64_t
39_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
40	uint64_t ol3, uint64_t ol2)
41{
42	return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
43}
44
45static inline void
46fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
47{
48	m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
49}
50
51static inline int
52is_ipv4_frag(const struct ipv4_hdr *iph)
53{
54	const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
55
56	return ((mask & iph->fragment_offset) != 0);
57}
58
59static inline uint32_t
60get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
61{
62	const struct tcp_hdr *tcp;
63
64	tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
65	return (tcp->data_off >> 4) * 4;
66}
67
68static inline void
69adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
70{
71	uint32_t plen, trim;
72	const struct ipv4_hdr *iph;
73
74	iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
75	plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
76	if (plen < m->pkt_len) {
77		trim = m->pkt_len - plen;
78		rte_pktmbuf_trim(m, trim);
79	}
80}
81
82static inline void
83adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
84{
85	uint32_t plen, trim;
86	const struct ipv6_hdr *iph;
87
88	iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
89	plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
90	if (plen < m->pkt_len) {
91		trim = m->pkt_len - plen;
92		rte_pktmbuf_trim(m, trim);
93	}
94}
95
96static inline void
97tcp_stat_update(struct netbe_lcore *lc, const struct rte_mbuf *m,
98	uint32_t l2_len, uint32_t l3_len)
99{
100	const struct tcp_hdr *th;
101
102	th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
103	lc->tcp_stat.flags[th->tcp_flags]++;
104}
105
106static inline uint32_t
107get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
108{
109	const struct ipv4_hdr *iph;
110	int32_t dlen, len;
111
112	dlen = rte_pktmbuf_data_len(m);
113	dlen -= l2;
114
115	iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
116	len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
117
118	if (frag != 0 && is_ipv4_frag(iph)) {
119		m->packet_type &= ~RTE_PTYPE_L4_MASK;
120		m->packet_type |= RTE_PTYPE_L4_FRAG;
121	}
122
123	if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
124		m->packet_type = RTE_PTYPE_UNKNOWN;
125
126	return len;
127}
128
129static inline void
130fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
131	uint32_t frag, uint32_t l4_len)
132{
133	uint32_t len;
134
135	len = get_ipv4_hdr_len(m, l2, proto, frag);
136	fill_pkt_hdr_len(m, l2, len, l4_len);
137	adjust_ipv4_pktlen(m, l2);
138}
139
140static inline int
141ipv6x_hdr(uint32_t proto)
142{
143	return (proto == IPPROTO_HOPOPTS ||
144		proto == IPPROTO_ROUTING ||
145		proto == IPPROTO_FRAGMENT ||
146		proto == IPPROTO_AH ||
147		proto == IPPROTO_NONE ||
148		proto == IPPROTO_DSTOPTS);
149}
150
151static inline uint32_t
152get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
153	uint32_t fproto)
154{
155	const struct ip6_ext *ipx;
156	int32_t dlen, len, ofs;
157
158	len = sizeof(struct ipv6_hdr);
159
160	dlen = rte_pktmbuf_data_len(m);
161	dlen -= l2;
162
163	ofs = l2 + len;
164	ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
165
166	while (ofs > 0 && len < dlen) {
167
168		switch (nproto) {
169		case IPPROTO_HOPOPTS:
170		case IPPROTO_ROUTING:
171		case IPPROTO_DSTOPTS:
172			ofs = (ipx->ip6e_len + 1) << 3;
173			break;
174		case IPPROTO_AH:
175			ofs = (ipx->ip6e_len + 2) << 2;
176			break;
177		case IPPROTO_FRAGMENT:
178			/*
179			 * tso_segsz is not used by RX, so use it as temporary
180			 * buffer to store the fragment offset.
181			 */
182			m->tso_segsz = ofs;
183			ofs = sizeof(struct ip6_frag);
184			m->packet_type &= ~RTE_PTYPE_L4_MASK;
185			m->packet_type |= RTE_PTYPE_L4_FRAG;
186			break;
187		default:
188			ofs = 0;
189		}
190
191		if (ofs > 0) {
192			nproto = ipx->ip6e_nxt;
193			len += ofs;
194			ipx += ofs / sizeof(*ipx);
195		}
196	}
197
198	/* unrecognized or invalid packet. */
199	if ((ofs == 0 && nproto != fproto) || len > dlen)
200		m->packet_type = RTE_PTYPE_UNKNOWN;
201
202	return len;
203}
204
205static inline uint32_t
206get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
207{
208	const struct ipv6_hdr *iph;
209
210	iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
211		sizeof(struct ether_hdr));
212
213	if (iph->proto == fproto)
214		return sizeof(struct ipv6_hdr);
215	else if (ipv6x_hdr(iph->proto) != 0)
216		return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
217
218	m->packet_type = RTE_PTYPE_UNKNOWN;
219	return 0;
220}
221
222static inline void
223fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto,
224	uint32_t l4_len)
225{
226	uint32_t len;
227
228	len = get_ipv6_hdr_len(m, l2, fproto);
229	fill_pkt_hdr_len(m, l2, len, l4_len);
230	adjust_ipv6_pktlen(m, l2);
231}
232
233static inline struct rte_mbuf *
234handle_arp(struct rte_mbuf *m, struct netbe_lcore *lc, dpdk_port_t port,
235	uint32_t l2len)
236{
237	const struct arp_hdr *ahdr;
238	struct pkt_buf *abuf;
239
240	ahdr = rte_pktmbuf_mtod_offset(m, const struct arp_hdr *, l2len);
241
242	if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) ||
243		ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4) ||
244		ahdr->arp_op != rte_be_to_cpu_16(ARP_OP_REQUEST)) {
245
246		m->packet_type = RTE_PTYPE_UNKNOWN;
247		return m;
248	}
249
250	m->l2_len = l2len;
251	abuf = &lc->prtq[port].arp_buf;
252	if (abuf->num >= RTE_DIM(abuf->pkt))
253		return m;
254
255	abuf->pkt[abuf->num++] = m;
256
257	return NULL;
258}
259
260static inline struct rte_mbuf *
261fill_eth_tcp_arp_hdr_len(struct rte_mbuf *m, struct netbe_lcore *lc,
262	dpdk_port_t port)
263{
264	uint32_t dlen, l2_len, l3_len, l4_len;
265	uint16_t etp;
266	const struct ether_hdr *eth;
267
268	dlen = rte_pktmbuf_data_len(m);
269
270	/* check that first segment is at least 54B long. */
271	if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
272			sizeof(struct tcp_hdr)) {
273		m->packet_type = RTE_PTYPE_UNKNOWN;
274		return m;
275	}
276
277	l2_len = sizeof(*eth);
278
279	eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
280	etp = eth->ether_type;
281	if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
282		l2_len += sizeof(struct vlan_hdr);
283
284	if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP))
285		return handle_arp(m, lc, port, l2_len);
286
287	if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
288		m->packet_type = RTE_PTYPE_L4_TCP |
289			RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
290			RTE_PTYPE_L2_ETHER;
291		l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
292		l4_len = get_tcp_header_size(m, l2_len, l3_len);
293		fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
294		adjust_ipv4_pktlen(m, l2_len);
295	} else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
296			dlen >= l2_len + sizeof(struct ipv6_hdr) +
297			sizeof(struct tcp_hdr)) {
298		m->packet_type = RTE_PTYPE_L4_TCP |
299			RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
300			RTE_PTYPE_L2_ETHER;
301		l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
302		l4_len = get_tcp_header_size(m, l2_len, l3_len);
303		fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
304		adjust_ipv6_pktlen(m, l2_len);
305	} else
306		m->packet_type = RTE_PTYPE_UNKNOWN;
307
308	return m;
309}
310
311static inline void
312fill_eth_tcp_hdr_len(struct rte_mbuf *m)
313{
314	uint32_t dlen, l2_len, l3_len, l4_len;
315	uint16_t etp;
316	const struct ether_hdr *eth;
317
318	dlen = rte_pktmbuf_data_len(m);
319
320	/* check that first segment is at least 54B long. */
321	if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
322			sizeof(struct tcp_hdr)) {
323		m->packet_type = RTE_PTYPE_UNKNOWN;
324		return;
325	}
326
327	l2_len = sizeof(*eth);
328
329	eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
330	etp = eth->ether_type;
331	if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
332		l2_len += sizeof(struct vlan_hdr);
333
334	if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
335		m->packet_type = RTE_PTYPE_L4_TCP |
336			RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
337			RTE_PTYPE_L2_ETHER;
338		l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
339		l4_len = get_tcp_header_size(m, l2_len, l3_len);
340		fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
341		adjust_ipv4_pktlen(m, l2_len);
342	} else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
343			dlen >= l2_len + sizeof(struct ipv6_hdr) +
344			sizeof(struct tcp_hdr)) {
345		m->packet_type = RTE_PTYPE_L4_TCP |
346			RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
347			RTE_PTYPE_L2_ETHER;
348		l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
349		l4_len = get_tcp_header_size(m, l2_len, l3_len);
350		fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
351		adjust_ipv6_pktlen(m, l2_len);
352	} else
353		m->packet_type = RTE_PTYPE_UNKNOWN;
354}
355
356static inline void
357fill_eth_udp_hdr_len(struct rte_mbuf *m)
358{
359	uint32_t dlen, l2_len;
360	uint16_t etp;
361	const struct ether_hdr *eth;
362
363	dlen = rte_pktmbuf_data_len(m);
364
365	/* check that first segment is at least 42B long. */
366	if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
367			sizeof(struct udp_hdr)) {
368		m->packet_type = RTE_PTYPE_UNKNOWN;
369		return;
370	}
371
372	l2_len = sizeof(*eth);
373
374	eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
375	etp = eth->ether_type;
376	if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
377		l2_len += sizeof(struct vlan_hdr);
378
379	if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
380		m->packet_type = RTE_PTYPE_L4_UDP |
381			RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
382			RTE_PTYPE_L2_ETHER;
383		fill_ipv4_hdr_len(m, l2_len, IPPROTO_UDP, 1,
384			sizeof(struct udp_hdr));
385	} else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
386			dlen >= l2_len + sizeof(struct ipv6_hdr) +
387			sizeof(struct udp_hdr)) {
388		m->packet_type = RTE_PTYPE_L4_UDP |
389			RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
390			RTE_PTYPE_L2_ETHER;
391		fill_ipv6_hdr_len(m, l2_len, IPPROTO_UDP,
392			sizeof(struct udp_hdr));
393	} else
394		m->packet_type = RTE_PTYPE_UNKNOWN;
395}
396
397static inline uint16_t
398ipv4x_cksum(const void *iph, size_t len)
399{
400	uint16_t cksum;
401
402	cksum = rte_raw_cksum(iph, len);
403	return (cksum == 0xffff) ? cksum : ~cksum;
404}
405
406static inline void
407fix_reassembled(struct rte_mbuf *m, int32_t hwcsum, uint32_t proto)
408{
409	struct ipv4_hdr *iph;
410
411	/* update packet type. */
412	m->packet_type &= ~RTE_PTYPE_L4_MASK;
413
414	if (proto == IPPROTO_TCP)
415		m->packet_type |= RTE_PTYPE_L4_TCP;
416	else
417		m->packet_type |= RTE_PTYPE_L4_UDP;
418
419	/* fix reassemble setting TX flags. */
420	m->ol_flags &= ~PKT_TX_IP_CKSUM;
421
422	/* fix l3_len after reassemble. */
423	if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
424		m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment);
425
426	/* recalculate ipv4 cksum after reassemble. */
427	else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
428		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
429		iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len);
430	}
431}
432
433static struct rte_mbuf *
434reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms,
435	dpdk_port_t port, uint32_t proto)
436{
437	uint32_t l3cs;
438	struct rte_ip_frag_tbl *tbl;
439	struct rte_ip_frag_death_row *dr;
440
441	tbl = lc->ftbl;
442	dr = &lc->death_row;
443	l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM;
444
445	if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
446
447		struct ipv4_hdr *iph;
448
449		iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
450
451		/* process this fragment. */
452		m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph);
453
454	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
455
456		struct ipv6_hdr *iph;
457		struct ipv6_extension_fragment *fhdr;
458
459		iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
460
461		/*
462		 * we store fragment header offset in tso_segsz before
463		 * temporary, just to avoid another scan of ipv6 header.
464		 */
465		fhdr = rte_pktmbuf_mtod_offset(m,
466			struct ipv6_extension_fragment *, m->tso_segsz);
467		m->tso_segsz = 0;
468
469		/* process this fragment. */
470		m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr);
471
472	} else {
473		rte_pktmbuf_free(m);
474		m = NULL;
475	}
476
477	/* got reassembled packet. */
478	if (m != NULL)
479		fix_reassembled(m, l3cs, proto);
480
481	return m;
482}
483
484/* exclude NULLs from the final list of packets. */
485static inline uint32_t
486compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
487{
488	uint32_t i, j, k, l;
489
490	for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
491
492		/* found a hole. */
493		if (pkt[j] == NULL) {
494
495			/* find how big is it. */
496			for (i = j; i-- != 0 && pkt[i] == NULL; )
497				;
498			/* fill the hole. */
499			for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
500				pkt[l] = pkt[k];
501
502			nb_pkt -= j - i;
503			nb_zero -= j - i;
504			j = i + 1;
505		}
506	}
507
508	return nb_pkt;
509}
510
511/*
512 * if it is a fragment, try to reassemble it,
513 * if by some reason it can't be done, then
514 * set pkt[] entry to NULL.
515 */
516#define DO_REASSEMBLE(proto) \
517do { \
518	if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == \
519			RTE_PTYPE_L4_FRAG) { \
520		cts = (cts == 0) ? rte_rdtsc() : cts; \
521		pkt[j] = reassemble(pkt[j], lc, cts, port, (proto)); \
522		x += (pkt[j] == NULL); \
523	} \
524} while (0)
525
526/*
527 * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
528 */
529static uint16_t
530type0_tcp_rx_callback(__rte_unused dpdk_port_t port,
531	__rte_unused uint16_t queue,
532	struct rte_mbuf *pkt[], uint16_t nb_pkts,
533	__rte_unused uint16_t max_pkts, void *user_param)
534{
535	uint32_t j, tp;
536	struct netbe_lcore *lc;
537	uint32_t l4_len, l3_len, l2_len;
538	const struct ether_hdr *eth;
539
540	lc = user_param;
541	l2_len = sizeof(*eth);
542
543	RTE_SET_USED(lc);
544
545	for (j = 0; j != nb_pkts; j++) {
546
547		NETBE_PKT_DUMP(pkt[j]);
548
549		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
550			RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
551
552		switch (tp) {
553		/* non fragmented tcp packets. */
554		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
555				RTE_PTYPE_L2_ETHER):
556			l4_len = get_tcp_header_size(pkt[j], l2_len,
557				sizeof(struct ipv4_hdr));
558			fill_pkt_hdr_len(pkt[j], l2_len,
559				sizeof(struct ipv4_hdr), l4_len);
560			adjust_ipv4_pktlen(pkt[j], l2_len);
561			break;
562		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
563				RTE_PTYPE_L2_ETHER):
564			l4_len = get_tcp_header_size(pkt[j], l2_len,
565				sizeof(struct ipv6_hdr));
566			fill_pkt_hdr_len(pkt[j], l2_len,
567				sizeof(struct ipv6_hdr), l4_len);
568			adjust_ipv6_pktlen(pkt[j], l2_len);
569			break;
570		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
571				RTE_PTYPE_L2_ETHER):
572			l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
573				IPPROTO_TCP, 0);
574			l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
575			fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
576			adjust_ipv4_pktlen(pkt[j], l2_len);
577			break;
578		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
579				RTE_PTYPE_L2_ETHER):
580			l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
581			l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
582			fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
583			adjust_ipv6_pktlen(pkt[j], l2_len);
584			break;
585		default:
586			/* treat packet types as invalid. */
587			pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
588			break;
589		}
590	}
591
592	return nb_pkts;
593}
594
595/*
596 * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
597 */
598static uint16_t
599type0_udp_rx_callback(dpdk_port_t port, __rte_unused uint16_t queue,
600	struct rte_mbuf *pkt[], uint16_t nb_pkts,
601	__rte_unused uint16_t max_pkts, void *user_param)
602{
603	uint32_t j, tp, x;
604	uint64_t cts;
605	struct netbe_lcore *lc;
606	uint32_t l2_len;
607	const struct ether_hdr *eth;
608
609	lc = user_param;
610	cts = 0;
611	l2_len = sizeof(*eth);
612
613	x = 0;
614	for (j = 0; j != nb_pkts; j++) {
615
616		NETBE_PKT_DUMP(pkt[j]);
617
618		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
619			RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
620
621		switch (tp) {
622		/* non fragmented udp packets. */
623		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 |
624				RTE_PTYPE_L2_ETHER):
625			fill_pkt_hdr_len(pkt[j], l2_len,
626				sizeof(struct ipv4_hdr),
627				sizeof(struct udp_hdr));
628			adjust_ipv4_pktlen(pkt[j], l2_len);
629			break;
630		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 |
631				RTE_PTYPE_L2_ETHER):
632			fill_pkt_hdr_len(pkt[j], l2_len,
633				sizeof(struct ipv6_hdr),
634				sizeof(struct udp_hdr));
635			adjust_ipv6_pktlen(pkt[j], l2_len);
636			break;
637		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT |
638				RTE_PTYPE_L2_ETHER):
639			fill_ipv4_hdr_len(pkt[j], l2_len,
640				UINT32_MAX, 0, sizeof(struct udp_hdr));
641			break;
642		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT |
643				RTE_PTYPE_L2_ETHER):
644			fill_ipv6_hdr_len(pkt[j], l2_len,
645				IPPROTO_UDP, sizeof(struct udp_hdr));
646			break;
647		/* possibly fragmented udp packets. */
648		case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
649		case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
650			fill_ipv4_hdr_len(pkt[j], l2_len,
651				IPPROTO_UDP, 1, sizeof(struct udp_hdr));
652			break;
653		case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
654		case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
655			fill_ipv6_hdr_len(pkt[j], l2_len,
656				IPPROTO_UDP, sizeof(struct udp_hdr));
657			break;
658		default:
659			/* treat packet types as invalid. */
660			pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
661			break;
662		}
663
664		DO_REASSEMBLE(IPPROTO_UDP);
665	}
666
667	/* reassemble was invoked, cleanup its death-row. */
668	if (cts != 0)
669		rte_ip_frag_free_death_row(&lc->death_row, 0);
670
671	if (x == 0)
672		return nb_pkts;
673
674	NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
675		"%u non-reassembled fragments;\n",
676		__func__, port, queue, nb_pkts, x);
677
678	return compress_pkt_list(pkt, nb_pkts, x);
679}
680
681/*
682 * HW can recognize L2/L3/L4 and fragments (i40e).
683 */
684static uint16_t
685type1_tcp_rx_callback(__rte_unused dpdk_port_t port,
686	__rte_unused uint16_t queue,
687	struct rte_mbuf *pkt[], uint16_t nb_pkts,
688	__rte_unused uint16_t max_pkts, void *user_param)
689{
690	uint32_t j, tp;
691	struct netbe_lcore *lc;
692	uint32_t l4_len, l3_len, l2_len;
693	const struct ether_hdr *eth;
694
695	lc = user_param;
696	l2_len = sizeof(*eth);
697
698	RTE_SET_USED(lc);
699
700	for (j = 0; j != nb_pkts; j++) {
701
702		NETBE_PKT_DUMP(pkt[j]);
703
704		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
705			RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
706
707		switch (tp) {
708		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
709				RTE_PTYPE_L2_ETHER):
710			l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
711				IPPROTO_TCP, 0);
712			l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
713			fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
714			adjust_ipv4_pktlen(pkt[j], l2_len);
715			tcp_stat_update(lc, pkt[j], l2_len, l3_len);
716			break;
717		case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
718				RTE_PTYPE_L2_ETHER):
719			l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
720			l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
721			fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
722			adjust_ipv6_pktlen(pkt[j], l2_len);
723			tcp_stat_update(lc, pkt[j], l2_len, l3_len);
724			break;
725		default:
726			/* treat packet types as invalid. */
727			pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
728			break;
729		}
730
731	}
732
733	return nb_pkts;
734}
735
736/*
737 * HW can recognize L2/L3/L4 and fragments (i40e).
738 */
739static uint16_t
740type1_udp_rx_callback(dpdk_port_t port, __rte_unused uint16_t queue,
741	struct rte_mbuf *pkt[], uint16_t nb_pkts,
742	__rte_unused uint16_t max_pkts, void *user_param)
743{
744	uint32_t j, tp, x;
745	uint64_t cts;
746	struct netbe_lcore *lc;
747	uint32_t l2_len;
748	const struct ether_hdr *eth;
749
750	lc = user_param;
751	cts = 0;
752	l2_len = sizeof(*eth);
753
754	x = 0;
755	for (j = 0; j != nb_pkts; j++) {
756
757		NETBE_PKT_DUMP(pkt[j]);
758
759		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
760			RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
761
762		switch (tp) {
763		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
764				RTE_PTYPE_L2_ETHER):
765			fill_ipv4_hdr_len(pkt[j], l2_len,
766				UINT32_MAX, 0, sizeof(struct udp_hdr));
767			break;
768		case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
769				RTE_PTYPE_L2_ETHER):
770			fill_ipv6_hdr_len(pkt[j], l2_len,
771				IPPROTO_UDP, sizeof(struct udp_hdr));
772			break;
773		case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
774				RTE_PTYPE_L2_ETHER):
775			fill_ipv4_hdr_len(pkt[j], l2_len,
776				IPPROTO_UDP, 0, sizeof(struct udp_hdr));
777			break;
778		case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
779				RTE_PTYPE_L2_ETHER):
780			fill_ipv6_hdr_len(pkt[j], l2_len,
781				IPPROTO_UDP, sizeof(struct udp_hdr));
782			break;
783		default:
784			/* treat packet types as invalid. */
785			pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
786			break;
787		}
788
789		DO_REASSEMBLE(IPPROTO_UDP);
790	}
791
792	/* reassemble was invoked, cleanup its death-row. */
793	if (cts != 0)
794		rte_ip_frag_free_death_row(&lc->death_row, 0);
795
796	if (x == 0)
797		return nb_pkts;
798
799	NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
800		"%u non-reassembled fragments;\n",
801		__func__, port, queue, nb_pkts, x);
802
803	return compress_pkt_list(pkt, nb_pkts, x);
804}
805
806/*
807 * generic, assumes HW doesn't recognize any packet type.
808 */
809static uint16_t
810typen_tcp_arp_rx_callback(dpdk_port_t port, uint16_t queue,
811	struct rte_mbuf *pkt[], uint16_t nb_pkts, uint16_t max_pkts,
812	void *user_param)
813{
814	uint32_t j, x;
815	struct netbe_lcore *lc;
816
817	lc = user_param;
818
819	RTE_SET_USED(queue);
820	RTE_SET_USED(max_pkts);
821
822	x = 0;
823	for (j = 0; j != nb_pkts; j++) {
824
825		NETBE_PKT_DUMP(pkt[j]);
826		pkt[j] = fill_eth_tcp_arp_hdr_len(pkt[j], lc, port);
827		x += (pkt[j] == NULL);
828	}
829
830	if (x == 0)
831		return nb_pkts;
832
833	return compress_pkt_list(pkt, nb_pkts, x);
834}
835
836static uint16_t
837typen_tcp_rx_callback(__rte_unused dpdk_port_t port,
838	__rte_unused uint16_t queue, struct rte_mbuf *pkt[], uint16_t nb_pkts,
839	__rte_unused uint16_t max_pkts, void *user_param)
840{
841	uint32_t j;
842	struct netbe_lcore *lc;
843
844	lc = user_param;
845
846	RTE_SET_USED(lc);
847
848	for (j = 0; j != nb_pkts; j++) {
849
850		NETBE_PKT_DUMP(pkt[j]);
851		fill_eth_tcp_hdr_len(pkt[j]);
852	}
853
854	return nb_pkts;
855}
856
857static uint16_t
858typen_udp_rx_callback(dpdk_port_t port, __rte_unused uint16_t queue,
859	struct rte_mbuf *pkt[], uint16_t nb_pkts,
860	__rte_unused uint16_t max_pkts, void *user_param)
861{
862	uint32_t j, x;
863	uint64_t cts;
864	struct netbe_lcore *lc;
865
866	lc = user_param;
867	cts = 0;
868
869	x = 0;
870	for (j = 0; j != nb_pkts; j++) {
871
872		NETBE_PKT_DUMP(pkt[j]);
873		fill_eth_udp_hdr_len(pkt[j]);
874
875		DO_REASSEMBLE(IPPROTO_UDP);
876	}
877
878	/* reassemble was invoked, cleanup its death-row. */
879	if (cts != 0)
880		rte_ip_frag_free_death_row(&lc->death_row, 0);
881
882	if (x == 0)
883		return nb_pkts;
884
885	NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
886		"%u non-reassembled fragments;\n",
887		__func__, port, queue, nb_pkts, x);
888
889	return compress_pkt_list(pkt, nb_pkts, x);
890}
891
892static uint32_t
893get_ptypes(const struct netbe_port *uprt)
894{
895	uint32_t smask;
896	int32_t i, rc;
897	const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
898		RTE_PTYPE_L4_MASK;
899
900	smask = 0;
901	rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0);
902	if (rc < 0) {
903		RTE_LOG(ERR, USER1,
904			"%s(port=%u) failed to get supported ptypes;\n",
905			__func__, uprt->id);
906		return smask;
907	}
908
909	uint32_t ptype[rc];
910	rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc);
911
912	for (i = 0; i != rc; i++) {
913		switch (ptype[i]) {
914		case RTE_PTYPE_L2_ETHER:
915			smask |= ETHER_PTYPE;
916			break;
917		case RTE_PTYPE_L3_IPV4:
918		case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
919			smask |= IPV4_PTYPE;
920			break;
921		case RTE_PTYPE_L3_IPV4_EXT:
922			smask |= IPV4_EXT_PTYPE;
923			break;
924		case RTE_PTYPE_L3_IPV6:
925		case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
926			smask |= IPV6_PTYPE;
927			break;
928		case RTE_PTYPE_L3_IPV6_EXT:
929			smask |= IPV6_EXT_PTYPE;
930			break;
931		case RTE_PTYPE_L4_TCP:
932			smask |= TCP_PTYPE;
933			break;
934		case RTE_PTYPE_L4_UDP:
935			smask |= UDP_PTYPE;
936			break;
937		}
938	}
939
940	return smask;
941}
942
943int
944setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
945	uint16_t qid, uint32_t arp)
946{
947	int32_t rc;
948	uint32_t i, n, smask;
949	const void *cb;
950	const struct ptype2cb *ptype2cb;
951
952	static const struct ptype2cb tcp_ptype2cb[] = {
953		{
954			.mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
955				IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
956			.name = "HW l2/l3x/l4-tcp ptype",
957			.fn = type0_tcp_rx_callback,
958		},
959		{
960			.mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
961				TCP_PTYPE,
962			.name = "HW l2/l3/l4-tcp ptype",
963			.fn = type1_tcp_rx_callback,
964		},
965		{
966			.mask = 0,
967			.name = "tcp no HW ptype",
968			.fn = typen_tcp_rx_callback,
969		},
970	};
971
972	static const struct ptype2cb tcp_arp_ptype2cb[] = {
973		{
974			.mask = 0,
975			.name = "tcp with arp no HW ptype",
976			.fn = typen_tcp_arp_rx_callback,
977		},
978	};
979
980	static const struct ptype2cb udp_ptype2cb[] = {
981		{
982			.mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
983				IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE,
984			.name = "HW l2/l3x/l4-udp ptype",
985			.fn = type0_udp_rx_callback,
986		},
987		{
988			.mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
989				UDP_PTYPE,
990			.name = "HW l2/l3/l4-udp ptype",
991			.fn = type1_udp_rx_callback,
992		},
993		{
994			.mask = 0,
995			.name = "udp no HW ptype",
996			.fn = typen_udp_rx_callback,
997		},
998	};
999
1000	smask = get_ptypes(uprt);
1001
1002	if (lc->proto == TLE_PROTO_TCP) {
1003		if (arp != 0) {
1004			ptype2cb = tcp_arp_ptype2cb;
1005			n = RTE_DIM(tcp_arp_ptype2cb);
1006		} else {
1007			ptype2cb = tcp_ptype2cb;
1008			n = RTE_DIM(tcp_ptype2cb);
1009		}
1010	} else if (lc->proto == TLE_PROTO_UDP) {
1011		ptype2cb = udp_ptype2cb;
1012		n = RTE_DIM(udp_ptype2cb);
1013	} else {
1014		RTE_LOG(ERR, USER1,
1015			"%s(lc=%u) unsupported proto: %u\n",
1016			__func__, lc->id, lc->proto);
1017		return -EINVAL;
1018	}
1019
1020	for (i = 0; i != n; i++) {
1021		if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
1022			cb = rte_eth_add_rx_callback(uprt->id, qid,
1023				ptype2cb[i].fn, lc);
1024			rc = -rte_errno;
1025			RTE_LOG(ERR, USER1,
1026				"%s(port=%u), setup RX callback \"%s\" "
1027				"returns %p;\n",
1028				__func__, uprt->id,  ptype2cb[i].name, cb);
1029				return ((cb == NULL) ? rc : 0);
1030		}
1031	}
1032
1033	/* no proper callback found. */
1034	RTE_LOG(ERR, USER1,
1035		"%s(port=%u) failed to find an appropriate callback;\n",
1036		__func__, uprt->id);
1037	return -ENOENT;
1038}
1039
1040