1809f0800SChristian Ehrhardt/*-
2809f0800SChristian Ehrhardt *   BSD LICENSE
3809f0800SChristian Ehrhardt *
4809f0800SChristian Ehrhardt *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5809f0800SChristian Ehrhardt *   All rights reserved.
6809f0800SChristian Ehrhardt *
7809f0800SChristian Ehrhardt *   Redistribution and use in source and binary forms, with or without
8809f0800SChristian Ehrhardt *   modification, are permitted provided that the following conditions
9809f0800SChristian Ehrhardt *   are met:
10809f0800SChristian Ehrhardt *
11809f0800SChristian Ehrhardt *     * Redistributions of source code must retain the above copyright
12809f0800SChristian Ehrhardt *       notice, this list of conditions and the following disclaimer.
13809f0800SChristian Ehrhardt *     * Redistributions in binary form must reproduce the above copyright
14809f0800SChristian Ehrhardt *       notice, this list of conditions and the following disclaimer in
15809f0800SChristian Ehrhardt *       the documentation and/or other materials provided with the
16809f0800SChristian Ehrhardt *       distribution.
17809f0800SChristian Ehrhardt *     * Neither the name of Intel Corporation nor the names of its
18809f0800SChristian Ehrhardt *       contributors may be used to endorse or promote products derived
19809f0800SChristian Ehrhardt *       from this software without specific prior written permission.
20809f0800SChristian Ehrhardt *
21809f0800SChristian Ehrhardt *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22809f0800SChristian Ehrhardt *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23809f0800SChristian Ehrhardt *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24809f0800SChristian Ehrhardt *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25809f0800SChristian Ehrhardt *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26809f0800SChristian Ehrhardt *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27809f0800SChristian Ehrhardt *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28809f0800SChristian Ehrhardt *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29809f0800SChristian Ehrhardt *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30809f0800SChristian Ehrhardt *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31809f0800SChristian Ehrhardt *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32809f0800SChristian Ehrhardt */
33809f0800SChristian Ehrhardt
34809f0800SChristian Ehrhardt#include <stdint.h>
35809f0800SChristian Ehrhardt#include <rte_ethdev.h>
36809f0800SChristian Ehrhardt#include <rte_malloc.h>
37809f0800SChristian Ehrhardt
38809f0800SChristian Ehrhardt#include "ixgbe_ethdev.h"
39809f0800SChristian Ehrhardt#include "ixgbe_rxtx.h"
40809f0800SChristian Ehrhardt#include "ixgbe_rxtx_vec_common.h"
41809f0800SChristian Ehrhardt
42809f0800SChristian Ehrhardt#include <arm_neon.h>
43809f0800SChristian Ehrhardt
44809f0800SChristian Ehrhardt#pragma GCC diagnostic ignored "-Wcast-qual"
45809f0800SChristian Ehrhardt
46809f0800SChristian Ehrhardtstatic inline void
47809f0800SChristian Ehrhardtixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
48809f0800SChristian Ehrhardt{
49809f0800SChristian Ehrhardt	int i;
50809f0800SChristian Ehrhardt	uint16_t rx_id;
51809f0800SChristian Ehrhardt	volatile union ixgbe_adv_rx_desc *rxdp;
52809f0800SChristian Ehrhardt	struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
53809f0800SChristian Ehrhardt	struct rte_mbuf *mb0, *mb1;
54809f0800SChristian Ehrhardt	uint64x2_t dma_addr0, dma_addr1;
55809f0800SChristian Ehrhardt	uint64x2_t zero = vdupq_n_u64(0);
56809f0800SChristian Ehrhardt	uint64_t paddr;
57809f0800SChristian Ehrhardt	uint8x8_t p;
58809f0800SChristian Ehrhardt
59809f0800SChristian Ehrhardt	rxdp = rxq->rx_ring + rxq->rxrearm_start;
60809f0800SChristian Ehrhardt
61809f0800SChristian Ehrhardt	/* Pull 'n' more MBUFs into the software ring */
62809f0800SChristian Ehrhardt	if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
63809f0800SChristian Ehrhardt					  (void *)rxep,
64809f0800SChristian Ehrhardt					  RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
65809f0800SChristian Ehrhardt		if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
66809f0800SChristian Ehrhardt		    rxq->nb_rx_desc) {
67809f0800SChristian Ehrhardt			for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
68809f0800SChristian Ehrhardt				rxep[i].mbuf = &rxq->fake_mbuf;
69809f0800SChristian Ehrhardt				vst1q_u64((uint64_t *)&rxdp[i].read,
70809f0800SChristian Ehrhardt					  zero);
71809f0800SChristian Ehrhardt			}
72809f0800SChristian Ehrhardt		}
73809f0800SChristian Ehrhardt		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
74809f0800SChristian Ehrhardt			RTE_IXGBE_RXQ_REARM_THRESH;
75809f0800SChristian Ehrhardt		return;
76809f0800SChristian Ehrhardt	}
77809f0800SChristian Ehrhardt
78809f0800SChristian Ehrhardt	p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
79809f0800SChristian Ehrhardt
80809f0800SChristian Ehrhardt	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
81809f0800SChristian Ehrhardt	for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
82809f0800SChristian Ehrhardt		mb0 = rxep[0].mbuf;
83809f0800SChristian Ehrhardt		mb1 = rxep[1].mbuf;
84809f0800SChristian Ehrhardt
85809f0800SChristian Ehrhardt		/*
86809f0800SChristian Ehrhardt		 * Flush mbuf with pkt template.
87809f0800SChristian Ehrhardt		 * Data to be rearmed is 6 bytes long.
88809f0800SChristian Ehrhardt		 * Though, RX will overwrite ol_flags that are coming next
89809f0800SChristian Ehrhardt		 * anyway. So overwrite whole 8 bytes with one load:
90809f0800SChristian Ehrhardt		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
91809f0800SChristian Ehrhardt		 */
92809f0800SChristian Ehrhardt		vst1_u8((uint8_t *)&mb0->rearm_data, p);
93809f0800SChristian Ehrhardt		paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
94809f0800SChristian Ehrhardt		dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
95809f0800SChristian Ehrhardt		/* flush desc with pa dma_addr */
96809f0800SChristian Ehrhardt		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
97809f0800SChristian Ehrhardt
98809f0800SChristian Ehrhardt		vst1_u8((uint8_t *)&mb1->rearm_data, p);
99809f0800SChristian Ehrhardt		paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
100809f0800SChristian Ehrhardt		dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
101809f0800SChristian Ehrhardt		vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
102809f0800SChristian Ehrhardt	}
103809f0800SChristian Ehrhardt
104809f0800SChristian Ehrhardt	rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
105809f0800SChristian Ehrhardt	if (rxq->rxrearm_start >= rxq->nb_rx_desc)
106809f0800SChristian Ehrhardt		rxq->rxrearm_start = 0;
107809f0800SChristian Ehrhardt
108809f0800SChristian Ehrhardt	rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
109809f0800SChristian Ehrhardt
110809f0800SChristian Ehrhardt	rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
111809f0800SChristian Ehrhardt			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
112809f0800SChristian Ehrhardt
113809f0800SChristian Ehrhardt	/* Update the tail pointer on the NIC */
114809f0800SChristian Ehrhardt	IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
115809f0800SChristian Ehrhardt}
116809f0800SChristian Ehrhardt
117809f0800SChristian Ehrhardt/* Handling the offload flags (olflags) field takes computation
118809f0800SChristian Ehrhardt * time when receiving packets. Therefore we provide a flag to disable
119809f0800SChristian Ehrhardt * the processing of the olflags field when they are not needed. This
120809f0800SChristian Ehrhardt * gives improved performance, at the cost of losing the offload info
121809f0800SChristian Ehrhardt * in the received packet
122809f0800SChristian Ehrhardt */
123809f0800SChristian Ehrhardt#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
124809f0800SChristian Ehrhardt
125809f0800SChristian Ehrhardt#define VTAG_SHIFT     (3)
126809f0800SChristian Ehrhardt
127809f0800SChristian Ehrhardtstatic inline void
128809f0800SChristian Ehrhardtdesc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
129809f0800SChristian Ehrhardt		  uint8x16_t staterr, struct rte_mbuf **rx_pkts)
130809f0800SChristian Ehrhardt{
131809f0800SChristian Ehrhardt	uint8x16_t ptype;
132809f0800SChristian Ehrhardt	uint8x16_t vtag;
133809f0800SChristian Ehrhardt
134809f0800SChristian Ehrhardt	union {
135809f0800SChristian Ehrhardt		uint8_t e[4];
136809f0800SChristian Ehrhardt		uint32_t word;
137809f0800SChristian Ehrhardt	} vol;
138809f0800SChristian Ehrhardt
139809f0800SChristian Ehrhardt	const uint8x16_t pkttype_msk = {
140809f0800SChristian Ehrhardt			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
141809f0800SChristian Ehrhardt			PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
142809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00,
143809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00,
144809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00};
145809f0800SChristian Ehrhardt
146809f0800SChristian Ehrhardt	const uint8x16_t rsstype_msk = {
147809f0800SChristian Ehrhardt			0x0F, 0x0F, 0x0F, 0x0F,
148809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00,
149809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00,
150809f0800SChristian Ehrhardt			0x00, 0x00, 0x00, 0x00};
151809f0800SChristian Ehrhardt
152809f0800SChristian Ehrhardt	const uint8x16_t rss_flags = {
153809f0800SChristian Ehrhardt			0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
154809f0800SChristian Ehrhardt			0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
155809f0800SChristian Ehrhardt			PKT_RX_RSS_HASH, 0, 0, 0,
156809f0800SChristian Ehrhardt			0, 0, 0, PKT_RX_FDIR};
157809f0800SChristian Ehrhardt
158809f0800SChristian Ehrhardt	ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
159809f0800SChristian Ehrhardt	ptype = vandq_u8(ptype, rsstype_msk);
160809f0800SChristian Ehrhardt	ptype = vqtbl1q_u8(rss_flags, ptype);
161809f0800SChristian Ehrhardt
162809f0800SChristian Ehrhardt	vtag = vshrq_n_u8(staterr, VTAG_SHIFT);
163809f0800SChristian Ehrhardt	vtag = vandq_u8(vtag, pkttype_msk);
164809f0800SChristian Ehrhardt	vtag = vorrq_u8(ptype, vtag);
165809f0800SChristian Ehrhardt
166809f0800SChristian Ehrhardt	vol.word = vgetq_lane_u32(vreinterpretq_u32_u8(vtag), 0);
167809f0800SChristian Ehrhardt
168809f0800SChristian Ehrhardt	rx_pkts[0]->ol_flags = vol.e[0];
169809f0800SChristian Ehrhardt	rx_pkts[1]->ol_flags = vol.e[1];
170809f0800SChristian Ehrhardt	rx_pkts[2]->ol_flags = vol.e[2];
171809f0800SChristian Ehrhardt	rx_pkts[3]->ol_flags = vol.e[3];
172809f0800SChristian Ehrhardt}
173809f0800SChristian Ehrhardt#else
174809f0800SChristian Ehrhardt#define desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, rx_pkts)
175809f0800SChristian Ehrhardt#endif
176809f0800SChristian Ehrhardt
177809f0800SChristian Ehrhardt/*
178809f0800SChristian Ehrhardt * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
179809f0800SChristian Ehrhardt *
180809f0800SChristian Ehrhardt * Notice:
181809f0800SChristian Ehrhardt * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
182809f0800SChristian Ehrhardt * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
183809f0800SChristian Ehrhardt *   numbers of DD bit
184809f0800SChristian Ehrhardt * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
185809f0800SChristian Ehrhardt * - don't support ol_flags for rss and csum err
186809f0800SChristian Ehrhardt */
187809f0800SChristian Ehrhardt
188809f0800SChristian Ehrhardt#define IXGBE_VPMD_DESC_DD_MASK		0x01010101
189809f0800SChristian Ehrhardt#define IXGBE_VPMD_DESC_EOP_MASK	0x02020202
190809f0800SChristian Ehrhardt
191809f0800SChristian Ehrhardtstatic inline uint16_t
192809f0800SChristian Ehrhardt_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
193809f0800SChristian Ehrhardt		   uint16_t nb_pkts, uint8_t *split_packet)
194809f0800SChristian Ehrhardt{
195809f0800SChristian Ehrhardt	volatile union ixgbe_adv_rx_desc *rxdp;
196809f0800SChristian Ehrhardt	struct ixgbe_rx_entry *sw_ring;
197809f0800SChristian Ehrhardt	uint16_t nb_pkts_recd;
198809f0800SChristian Ehrhardt	int pos;
199809f0800SChristian Ehrhardt	uint8x16_t shuf_msk = {
200809f0800SChristian Ehrhardt		0xFF, 0xFF,
201809f0800SChristian Ehrhardt		0xFF, 0xFF,  /* skip 32 bits pkt_type */
202809f0800SChristian Ehrhardt		12, 13,      /* octet 12~13, low 16 bits pkt_len */
203809f0800SChristian Ehrhardt		0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
204809f0800SChristian Ehrhardt		12, 13,      /* octet 12~13, 16 bits data_len */
205809f0800SChristian Ehrhardt		14, 15,      /* octet 14~15, low 16 bits vlan_macip */
206809f0800SChristian Ehrhardt		4, 5, 6, 7  /* octet 4~7, 32bits rss */
207809f0800SChristian Ehrhardt		};
208809f0800SChristian Ehrhardt	uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
209809f0800SChristian Ehrhardt				 rxq->crc_len, 0, 0, 0};
210809f0800SChristian Ehrhardt
211809f0800SChristian Ehrhardt	/* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
212809f0800SChristian Ehrhardt	nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
213809f0800SChristian Ehrhardt
214809f0800SChristian Ehrhardt	/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
215809f0800SChristian Ehrhardt	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
216809f0800SChristian Ehrhardt
217809f0800SChristian Ehrhardt	/* Just the act of getting into the function from the application is
218809f0800SChristian Ehrhardt	 * going to cost about 7 cycles
219809f0800SChristian Ehrhardt	 */
220809f0800SChristian Ehrhardt	rxdp = rxq->rx_ring + rxq->rx_tail;
221809f0800SChristian Ehrhardt
222809f0800SChristian Ehrhardt	rte_prefetch_non_temporal(rxdp);
223809f0800SChristian Ehrhardt
224809f0800SChristian Ehrhardt	/* See if we need to rearm the RX queue - gives the prefetch a bit
225809f0800SChristian Ehrhardt	 * of time to act
226809f0800SChristian Ehrhardt	 */
227809f0800SChristian Ehrhardt	if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
228809f0800SChristian Ehrhardt		ixgbe_rxq_rearm(rxq);
229809f0800SChristian Ehrhardt
230809f0800SChristian Ehrhardt	/* Before we start moving massive data around, check to see if
231809f0800SChristian Ehrhardt	 * there is actually a packet available
232809f0800SChristian Ehrhardt	 */
233809f0800SChristian Ehrhardt	if (!(rxdp->wb.upper.status_error &
234809f0800SChristian Ehrhardt				rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
235809f0800SChristian Ehrhardt		return 0;
236809f0800SChristian Ehrhardt
237809f0800SChristian Ehrhardt	/* Cache is empty -> need to scan the buffer rings, but first move
238809f0800SChristian Ehrhardt	 * the next 'n' mbufs into the cache
239809f0800SChristian Ehrhardt	 */
240809f0800SChristian Ehrhardt	sw_ring = &rxq->sw_ring[rxq->rx_tail];
241809f0800SChristian Ehrhardt
242809f0800SChristian Ehrhardt	/* A. load 4 packet in one loop
243809f0800SChristian Ehrhardt	 * B. copy 4 mbuf point from swring to rx_pkts
244809f0800SChristian Ehrhardt	 * C. calc the number of DD bits among the 4 packets
245809f0800SChristian Ehrhardt	 * [C*. extract the end-of-packet bit, if requested]
246809f0800SChristian Ehrhardt	 * D. fill info. from desc to mbuf
247809f0800SChristian Ehrhardt	 */
248809f0800SChristian Ehrhardt	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
249809f0800SChristian Ehrhardt			pos += RTE_IXGBE_DESCS_PER_LOOP,
250809f0800SChristian Ehrhardt			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
251809f0800SChristian Ehrhardt		uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP];
252809f0800SChristian Ehrhardt		uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
253809f0800SChristian Ehrhardt		uint8x16x2_t sterr_tmp1, sterr_tmp2;
254809f0800SChristian Ehrhardt		uint64x2_t mbp1, mbp2;
255809f0800SChristian Ehrhardt		uint8x16_t staterr;
256809f0800SChristian Ehrhardt		uint16x8_t tmp;
2570c3ed7dcSChristian Ehrhardt		uint32_t var = 0;
258809f0800SChristian Ehrhardt		uint32_t stat;
259809f0800SChristian Ehrhardt
260809f0800SChristian Ehrhardt		/* B.1 load 1 mbuf point */
261809f0800SChristian Ehrhardt		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
262809f0800SChristian Ehrhardt
263809f0800SChristian Ehrhardt		/* B.2 copy 2 mbuf point into rx_pkts  */
264809f0800SChristian Ehrhardt		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
265809f0800SChristian Ehrhardt
266809f0800SChristian Ehrhardt		/* B.1 load 1 mbuf point */
267809f0800SChristian Ehrhardt		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
268809f0800SChristian Ehrhardt
2690c3ed7dcSChristian Ehrhardt		/* A. load 4 pkts descs */
270809f0800SChristian Ehrhardt		descs[0] =  vld1q_u64((uint64_t *)(rxdp));
2710c3ed7dcSChristian Ehrhardt		descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
2720c3ed7dcSChristian Ehrhardt		descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
2730c3ed7dcSChristian Ehrhardt		descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
2740c3ed7dcSChristian Ehrhardt		rte_smp_rmb();
275809f0800SChristian Ehrhardt
276809f0800SChristian Ehrhardt		/* B.2 copy 2 mbuf point into rx_pkts  */
277809f0800SChristian Ehrhardt		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
278809f0800SChristian Ehrhardt
279809f0800SChristian Ehrhardt		if (split_packet) {
280809f0800SChristian Ehrhardt			rte_mbuf_prefetch_part2(rx_pkts[pos]);
281809f0800SChristian Ehrhardt			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
282809f0800SChristian Ehrhardt			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
283809f0800SChristian Ehrhardt			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
284809f0800SChristian Ehrhardt		}
285809f0800SChristian Ehrhardt
286809f0800SChristian Ehrhardt		/* D.1 pkt 3,4 convert format from desc to pktmbuf */
287809f0800SChristian Ehrhardt		pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk);
288809f0800SChristian Ehrhardt		pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk);
289809f0800SChristian Ehrhardt
290809f0800SChristian Ehrhardt		/* D.1 pkt 1,2 convert format from desc to pktmbuf */
291809f0800SChristian Ehrhardt		pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk);
292809f0800SChristian Ehrhardt		pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk);
293809f0800SChristian Ehrhardt
294809f0800SChristian Ehrhardt		/* C.1 4=>2 filter staterr info only */
295809f0800SChristian Ehrhardt		sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]),
296809f0800SChristian Ehrhardt				      vreinterpretq_u8_u64(descs[3]));
297809f0800SChristian Ehrhardt		/* C.1 4=>2 filter staterr info only */
298809f0800SChristian Ehrhardt		sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]),
299809f0800SChristian Ehrhardt				      vreinterpretq_u8_u64(descs[2]));
300809f0800SChristian Ehrhardt
301809f0800SChristian Ehrhardt		/* C.2 get 4 pkts staterr value  */
302809f0800SChristian Ehrhardt		staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
303809f0800SChristian Ehrhardt		stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
304809f0800SChristian Ehrhardt
305809f0800SChristian Ehrhardt		/* set ol_flags with vlan packet type */
306809f0800SChristian Ehrhardt		desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr,
307809f0800SChristian Ehrhardt				  &rx_pkts[pos]);
308809f0800SChristian Ehrhardt
309809f0800SChristian Ehrhardt		/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
310809f0800SChristian Ehrhardt		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
311809f0800SChristian Ehrhardt		pkt_mb4 = vreinterpretq_u8_u16(tmp);
312809f0800SChristian Ehrhardt		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
313809f0800SChristian Ehrhardt		pkt_mb3 = vreinterpretq_u8_u16(tmp);
314809f0800SChristian Ehrhardt
315809f0800SChristian Ehrhardt		/* D.3 copy final 3,4 data to rx_pkts */
316809f0800SChristian Ehrhardt		vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
317809f0800SChristian Ehrhardt			 pkt_mb4);
318809f0800SChristian Ehrhardt		vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
319809f0800SChristian Ehrhardt			 pkt_mb3);
320809f0800SChristian Ehrhardt
321809f0800SChristian Ehrhardt		/* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
322809f0800SChristian Ehrhardt		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
323809f0800SChristian Ehrhardt		pkt_mb2 = vreinterpretq_u8_u16(tmp);
324809f0800SChristian Ehrhardt		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
325809f0800SChristian Ehrhardt		pkt_mb1 = vreinterpretq_u8_u16(tmp);
326809f0800SChristian Ehrhardt
327809f0800SChristian Ehrhardt		/* C* extract and record EOP bit */
328809f0800SChristian Ehrhardt		if (split_packet) {
329809f0800SChristian Ehrhardt			/* and with mask to extract bits, flipping 1-0 */
330809f0800SChristian Ehrhardt			*(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
331809f0800SChristian Ehrhardt
332809f0800SChristian Ehrhardt			split_packet += RTE_IXGBE_DESCS_PER_LOOP;
333809f0800SChristian Ehrhardt
334809f0800SChristian Ehrhardt			/* zero-out next pointers */
335809f0800SChristian Ehrhardt			rx_pkts[pos]->next = NULL;
336809f0800SChristian Ehrhardt			rx_pkts[pos + 1]->next = NULL;
337809f0800SChristian Ehrhardt			rx_pkts[pos + 2]->next = NULL;
338809f0800SChristian Ehrhardt			rx_pkts[pos + 3]->next = NULL;
339809f0800SChristian Ehrhardt		}
340809f0800SChristian Ehrhardt
341809f0800SChristian Ehrhardt		rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
342809f0800SChristian Ehrhardt
343809f0800SChristian Ehrhardt		/* D.3 copy final 1,2 data to rx_pkts */
344809f0800SChristian Ehrhardt		vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
345809f0800SChristian Ehrhardt			 pkt_mb2);
346809f0800SChristian Ehrhardt		vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1,
347809f0800SChristian Ehrhardt			 pkt_mb1);
348809f0800SChristian Ehrhardt
3490c3ed7dcSChristian Ehrhardt		stat &= IXGBE_VPMD_DESC_DD_MASK;
3500c3ed7dcSChristian Ehrhardt
351809f0800SChristian Ehrhardt		/* C.4 calc avaialbe number of desc */
3520c3ed7dcSChristian Ehrhardt		if (likely(stat != IXGBE_VPMD_DESC_DD_MASK)) {
3530c3ed7dcSChristian Ehrhardt			while (stat & 0x01) {
3540c3ed7dcSChristian Ehrhardt				++var;
3550c3ed7dcSChristian Ehrhardt				stat = stat >> 8;
3560c3ed7dcSChristian Ehrhardt			}
3570c3ed7dcSChristian Ehrhardt			nb_pkts_recd += var;
358809f0800SChristian Ehrhardt			break;
3590c3ed7dcSChristian Ehrhardt		} else {
3600c3ed7dcSChristian Ehrhardt			nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
3610c3ed7dcSChristian Ehrhardt		}
362809f0800SChristian Ehrhardt	}
363809f0800SChristian Ehrhardt
364809f0800SChristian Ehrhardt	/* Update our internal tail pointer */
365809f0800SChristian Ehrhardt	rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
366809f0800SChristian Ehrhardt	rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
367809f0800SChristian Ehrhardt	rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
368809f0800SChristian Ehrhardt
369809f0800SChristian Ehrhardt	return nb_pkts_recd;
370809f0800SChristian Ehrhardt}
371809f0800SChristian Ehrhardt
372809f0800SChristian Ehrhardt/*
373809f0800SChristian Ehrhardt * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
374809f0800SChristian Ehrhardt *
375809f0800SChristian Ehrhardt * Notice:
376809f0800SChristian Ehrhardt * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
377809f0800SChristian Ehrhardt * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
378809f0800SChristian Ehrhardt *   numbers of DD bit
379809f0800SChristian Ehrhardt * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
380809f0800SChristian Ehrhardt * - don't support ol_flags for rss and csum err
381809f0800SChristian Ehrhardt */
382809f0800SChristian Ehrhardtuint16_t
383809f0800SChristian Ehrhardtixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
384809f0800SChristian Ehrhardt		uint16_t nb_pkts)
385809f0800SChristian Ehrhardt{
386809f0800SChristian Ehrhardt	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
387809f0800SChristian Ehrhardt}
388809f0800SChristian Ehrhardt
389809f0800SChristian Ehrhardt/*
390809f0800SChristian Ehrhardt * vPMD receive routine that reassembles scattered packets
391809f0800SChristian Ehrhardt *
392809f0800SChristian Ehrhardt * Notice:
393809f0800SChristian Ehrhardt * - don't support ol_flags for rss and csum err
394809f0800SChristian Ehrhardt * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
395809f0800SChristian Ehrhardt * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
396809f0800SChristian Ehrhardt *   numbers of DD bit
397809f0800SChristian Ehrhardt * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
398809f0800SChristian Ehrhardt */
399809f0800SChristian Ehrhardtuint16_t
400809f0800SChristian Ehrhardtixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
401809f0800SChristian Ehrhardt		uint16_t nb_pkts)
402809f0800SChristian Ehrhardt{
403809f0800SChristian Ehrhardt	struct ixgbe_rx_queue *rxq = rx_queue;
404809f0800SChristian Ehrhardt	uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
405809f0800SChristian Ehrhardt
406809f0800SChristian Ehrhardt	/* get some new buffers */
407809f0800SChristian Ehrhardt	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
408809f0800SChristian Ehrhardt			split_flags);
409809f0800SChristian Ehrhardt	if (nb_bufs == 0)
410809f0800SChristian Ehrhardt		return 0;
411809f0800SChristian Ehrhardt
412809f0800SChristian Ehrhardt	/* happy day case, full burst + no packets to be joined */
413809f0800SChristian Ehrhardt	const uint64_t *split_fl64 = (uint64_t *)split_flags;
414809f0800SChristian Ehrhardt	if (rxq->pkt_first_seg == NULL &&
415809f0800SChristian Ehrhardt			split_fl64[0] == 0 && split_fl64[1] == 0 &&
416809f0800SChristian Ehrhardt			split_fl64[2] == 0 && split_fl64[3] == 0)
417809f0800SChristian Ehrhardt		return nb_bufs;
418809f0800SChristian Ehrhardt
419809f0800SChristian Ehrhardt	/* reassemble any packets that need reassembly*/
420809f0800SChristian Ehrhardt	unsigned int i = 0;
421809f0800SChristian Ehrhardt	if (rxq->pkt_first_seg == NULL) {
422809f0800SChristian Ehrhardt		/* find the first split flag, and only reassemble then*/
423809f0800SChristian Ehrhardt		while (i < nb_bufs && !split_flags[i])
424809f0800SChristian Ehrhardt			i++;
425809f0800SChristian Ehrhardt		if (i == nb_bufs)
426809f0800SChristian Ehrhardt			return nb_bufs;
427809f0800SChristian Ehrhardt	}
428809f0800SChristian Ehrhardt	return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
429809f0800SChristian Ehrhardt		&split_flags[i]);
430809f0800SChristian Ehrhardt}
431809f0800SChristian Ehrhardt
432809f0800SChristian Ehrhardtstatic inline void
433809f0800SChristian Ehrhardtvtx1(volatile union ixgbe_adv_tx_desc *txdp,
434809f0800SChristian Ehrhardt		struct rte_mbuf *pkt, uint64_t flags)
435809f0800SChristian Ehrhardt{
436809f0800SChristian Ehrhardt	uint64x2_t descriptor = {
437809f0800SChristian Ehrhardt			pkt->buf_physaddr + pkt->data_off,
438809f0800SChristian Ehrhardt			(uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len};
439809f0800SChristian Ehrhardt
440809f0800SChristian Ehrhardt	vst1q_u64((uint64_t *)&txdp->read, descriptor);
441809f0800SChristian Ehrhardt}
442809f0800SChristian Ehrhardt
443809f0800SChristian Ehrhardtstatic inline void
444809f0800SChristian Ehrhardtvtx(volatile union ixgbe_adv_tx_desc *txdp,
445809f0800SChristian Ehrhardt		struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
446809f0800SChristian Ehrhardt{
447809f0800SChristian Ehrhardt	int i;
448809f0800SChristian Ehrhardt
449809f0800SChristian Ehrhardt	for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
450809f0800SChristian Ehrhardt		vtx1(txdp, *pkt, flags);
451809f0800SChristian Ehrhardt}
452809f0800SChristian Ehrhardt
453809f0800SChristian Ehrhardtuint16_t
454809f0800SChristian Ehrhardtixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
455809f0800SChristian Ehrhardt		       uint16_t nb_pkts)
456809f0800SChristian Ehrhardt{
457809f0800SChristian Ehrhardt	struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
458809f0800SChristian Ehrhardt	volatile union ixgbe_adv_tx_desc *txdp;
459809f0800SChristian Ehrhardt	struct ixgbe_tx_entry_v *txep;
460809f0800SChristian Ehrhardt	uint16_t n, nb_commit, tx_id;
461809f0800SChristian Ehrhardt	uint64_t flags = DCMD_DTYP_FLAGS;
462809f0800SChristian Ehrhardt	uint64_t rs = IXGBE_ADVTXD_DCMD_RS | DCMD_DTYP_FLAGS;
463809f0800SChristian Ehrhardt	int i;
464809f0800SChristian Ehrhardt
465809f0800SChristian Ehrhardt	/* cross rx_thresh boundary is not allowed */
466809f0800SChristian Ehrhardt	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
467809f0800SChristian Ehrhardt
468809f0800SChristian Ehrhardt	if (txq->nb_tx_free < txq->tx_free_thresh)
469809f0800SChristian Ehrhardt		ixgbe_tx_free_bufs(txq);
470809f0800SChristian Ehrhardt
471809f0800SChristian Ehrhardt	nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
472809f0800SChristian Ehrhardt	if (unlikely(nb_pkts == 0))
473809f0800SChristian Ehrhardt		return 0;
474809f0800SChristian Ehrhardt
475809f0800SChristian Ehrhardt	tx_id = txq->tx_tail;
476809f0800SChristian Ehrhardt	txdp = &txq->tx_ring[tx_id];
477809f0800SChristian Ehrhardt	txep = &txq->sw_ring_v[tx_id];
478809f0800SChristian Ehrhardt
479809f0800SChristian Ehrhardt	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
480809f0800SChristian Ehrhardt
481809f0800SChristian Ehrhardt	n = (uint16_t)(txq->nb_tx_desc - tx_id);
482809f0800SChristian Ehrhardt	if (nb_commit >= n) {
483809f0800SChristian Ehrhardt		tx_backlog_entry(txep, tx_pkts, n);
484809f0800SChristian Ehrhardt
485809f0800SChristian Ehrhardt		for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
486809f0800SChristian Ehrhardt			vtx1(txdp, *tx_pkts, flags);
487809f0800SChristian Ehrhardt
488809f0800SChristian Ehrhardt		vtx1(txdp, *tx_pkts++, rs);
489809f0800SChristian Ehrhardt
490809f0800SChristian Ehrhardt		nb_commit = (uint16_t)(nb_commit - n);
491809f0800SChristian Ehrhardt
492809f0800SChristian Ehrhardt		tx_id = 0;
493809f0800SChristian Ehrhardt		txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
494809f0800SChristian Ehrhardt
495809f0800SChristian Ehrhardt		/* avoid reach the end of ring */
496809f0800SChristian Ehrhardt		txdp = &txq->tx_ring[tx_id];
497809f0800SChristian Ehrhardt		txep = &txq->sw_ring_v[tx_id];
498809f0800SChristian Ehrhardt	}
499809f0800SChristian Ehrhardt
500809f0800SChristian Ehrhardt	tx_backlog_entry(txep, tx_pkts, nb_commit);
501809f0800SChristian Ehrhardt
502809f0800SChristian Ehrhardt	vtx(txdp, tx_pkts, nb_commit, flags);
503809f0800SChristian Ehrhardt
504809f0800SChristian Ehrhardt	tx_id = (uint16_t)(tx_id + nb_commit);
505809f0800SChristian Ehrhardt	if (tx_id > txq->tx_next_rs) {
506809f0800SChristian Ehrhardt		txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |=
507809f0800SChristian Ehrhardt			rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
508809f0800SChristian Ehrhardt		txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
509809f0800SChristian Ehrhardt			txq->tx_rs_thresh);
510809f0800SChristian Ehrhardt	}
511809f0800SChristian Ehrhardt
512809f0800SChristian Ehrhardt	txq->tx_tail = tx_id;
513809f0800SChristian Ehrhardt
514809f0800SChristian Ehrhardt	IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
515809f0800SChristian Ehrhardt
516809f0800SChristian Ehrhardt	return nb_pkts;
517809f0800SChristian Ehrhardt}
518809f0800SChristian Ehrhardt
519809f0800SChristian Ehrhardtstatic void __attribute__((cold))
520809f0800SChristian Ehrhardtixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
521809f0800SChristian Ehrhardt{
522809f0800SChristian Ehrhardt	_ixgbe_tx_queue_release_mbufs_vec(txq);
523809f0800SChristian Ehrhardt}
524809f0800SChristian Ehrhardt
525809f0800SChristian Ehrhardtvoid __attribute__((cold))
526809f0800SChristian Ehrhardtixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
527809f0800SChristian Ehrhardt{
528809f0800SChristian Ehrhardt	_ixgbe_rx_queue_release_mbufs_vec(rxq);
529809f0800SChristian Ehrhardt}
530809f0800SChristian Ehrhardt
531809f0800SChristian Ehrhardtstatic void __attribute__((cold))
532809f0800SChristian Ehrhardtixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
533809f0800SChristian Ehrhardt{
534809f0800SChristian Ehrhardt	_ixgbe_tx_free_swring_vec(txq);
535809f0800SChristian Ehrhardt}
536809f0800SChristian Ehrhardt
537809f0800SChristian Ehrhardtstatic void __attribute__((cold))
538809f0800SChristian Ehrhardtixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
539809f0800SChristian Ehrhardt{
540809f0800SChristian Ehrhardt	_ixgbe_reset_tx_queue_vec(txq);
541809f0800SChristian Ehrhardt}
542809f0800SChristian Ehrhardt
543809f0800SChristian Ehrhardtstatic const struct ixgbe_txq_ops vec_txq_ops = {
544809f0800SChristian Ehrhardt	.release_mbufs = ixgbe_tx_queue_release_mbufs_vec,
545809f0800SChristian Ehrhardt	.free_swring = ixgbe_tx_free_swring,
546809f0800SChristian Ehrhardt	.reset = ixgbe_reset_tx_queue,
547809f0800SChristian Ehrhardt};
548809f0800SChristian Ehrhardt
549809f0800SChristian Ehrhardtint __attribute__((cold))
550809f0800SChristian Ehrhardtixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
551809f0800SChristian Ehrhardt{
552809f0800SChristian Ehrhardt	return ixgbe_rxq_vec_setup_default(rxq);
553809f0800SChristian Ehrhardt}
554809f0800SChristian Ehrhardt
555809f0800SChristian Ehrhardtint __attribute__((cold))
556809f0800SChristian Ehrhardtixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
557809f0800SChristian Ehrhardt{
558809f0800SChristian Ehrhardt	return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
559809f0800SChristian Ehrhardt}
560809f0800SChristian Ehrhardt
561809f0800SChristian Ehrhardtint __attribute__((cold))
562809f0800SChristian Ehrhardtixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
563809f0800SChristian Ehrhardt{
5643d9b7210SChristian Ehrhardt	struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5653d9b7210SChristian Ehrhardt
5663d9b7210SChristian Ehrhardt	/* no csum error report support */
5673d9b7210SChristian Ehrhardt	if (rxmode->hw_ip_checksum == 1)
5683d9b7210SChristian Ehrhardt		return -1;
5693d9b7210SChristian Ehrhardt
570809f0800SChristian Ehrhardt	return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
571809f0800SChristian Ehrhardt}
572