rte_mbuf.c revision 3d9b7210
1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   Copyright 2014 6WIND S.A.
6 *   All rights reserved.
7 *
8 *   Redistribution and use in source and binary forms, with or without
9 *   modification, are permitted provided that the following conditions
10 *   are met:
11 *
12 *     * Redistributions of source code must retain the above copyright
13 *       notice, this list of conditions and the following disclaimer.
14 *     * Redistributions in binary form must reproduce the above copyright
15 *       notice, this list of conditions and the following disclaimer in
16 *       the documentation and/or other materials provided with the
17 *       distribution.
18 *     * Neither the name of Intel Corporation nor the names of its
19 *       contributors may be used to endorse or promote products derived
20 *       from this software without specific prior written permission.
21 *
22 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <string.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <stdint.h>
39#include <stdarg.h>
40#include <inttypes.h>
41#include <errno.h>
42#include <ctype.h>
43#include <sys/queue.h>
44
45#include <rte_debug.h>
46#include <rte_common.h>
47#include <rte_log.h>
48#include <rte_memory.h>
49#include <rte_memzone.h>
50#include <rte_launch.h>
51#include <rte_eal.h>
52#include <rte_per_lcore.h>
53#include <rte_lcore.h>
54#include <rte_atomic.h>
55#include <rte_branch_prediction.h>
56#include <rte_mempool.h>
57#include <rte_mbuf.h>
58#include <rte_string_fns.h>
59#include <rte_hexdump.h>
60#include <rte_errno.h>
61#include <rte_memcpy.h>
62
63/*
64 * ctrlmbuf constructor, given as a callback function to
65 * rte_mempool_create()
66 */
67void
68rte_ctrlmbuf_init(struct rte_mempool *mp,
69		__attribute__((unused)) void *opaque_arg,
70		void *_m,
71		__attribute__((unused)) unsigned i)
72{
73	struct rte_mbuf *m = _m;
74	rte_pktmbuf_init(mp, opaque_arg, _m, i);
75	m->ol_flags |= CTRL_MBUF_FLAG;
76}
77
78/*
79 * pktmbuf pool constructor, given as a callback function to
80 * rte_mempool_create()
81 */
82void
83rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg)
84{
85	struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv;
86	struct rte_pktmbuf_pool_private default_mbp_priv;
87	uint16_t roomsz;
88
89	RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf));
90
91	/* if no structure is provided, assume no mbuf private area */
92	user_mbp_priv = opaque_arg;
93	if (user_mbp_priv == NULL) {
94		default_mbp_priv.mbuf_priv_size = 0;
95		if (mp->elt_size > sizeof(struct rte_mbuf))
96			roomsz = mp->elt_size - sizeof(struct rte_mbuf);
97		else
98			roomsz = 0;
99		default_mbp_priv.mbuf_data_room_size = roomsz;
100		user_mbp_priv = &default_mbp_priv;
101	}
102
103	RTE_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) +
104		user_mbp_priv->mbuf_data_room_size +
105		user_mbp_priv->mbuf_priv_size);
106
107	mbp_priv = rte_mempool_get_priv(mp);
108	memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv));
109}
110
111/*
112 * pktmbuf constructor, given as a callback function to
113 * rte_mempool_create().
114 * Set the fields of a packet mbuf to their default values.
115 */
116void
117rte_pktmbuf_init(struct rte_mempool *mp,
118		 __attribute__((unused)) void *opaque_arg,
119		 void *_m,
120		 __attribute__((unused)) unsigned i)
121{
122	struct rte_mbuf *m = _m;
123	uint32_t mbuf_size, buf_len, priv_size;
124
125	priv_size = rte_pktmbuf_priv_size(mp);
126	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
127	buf_len = rte_pktmbuf_data_room_size(mp);
128
129	RTE_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size);
130	RTE_ASSERT(mp->elt_size >= mbuf_size);
131	RTE_ASSERT(buf_len <= UINT16_MAX);
132
133	memset(m, 0, mp->elt_size);
134
135	/* start of buffer is after mbuf structure and priv data */
136	m->priv_size = priv_size;
137	m->buf_addr = (char *)m + mbuf_size;
138	m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
139	m->buf_len = (uint16_t)buf_len;
140
141	/* keep some headroom between start of buffer and data */
142	m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
143
144	/* init some constant fields */
145	m->pool = mp;
146	m->nb_segs = 1;
147	m->port = 0xff;
148}
149
150/* helper to create a mbuf pool */
151struct rte_mempool *
152rte_pktmbuf_pool_create(const char *name, unsigned n,
153	unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
154	int socket_id)
155{
156	struct rte_mempool *mp;
157	struct rte_pktmbuf_pool_private mbp_priv;
158	unsigned elt_size;
159	int ret;
160
161	if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) {
162		RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n",
163			priv_size);
164		rte_errno = EINVAL;
165		return NULL;
166	}
167	elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size +
168		(unsigned)data_room_size;
169	mbp_priv.mbuf_data_room_size = data_room_size;
170	mbp_priv.mbuf_priv_size = priv_size;
171
172	mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
173		 sizeof(struct rte_pktmbuf_pool_private), socket_id, 0);
174	if (mp == NULL)
175		return NULL;
176
177	ret = rte_mempool_set_ops_byname(mp,
178		RTE_MBUF_DEFAULT_MEMPOOL_OPS, NULL);
179	if (ret != 0) {
180		RTE_LOG(ERR, MBUF, "error setting mempool handler\n");
181		rte_mempool_free(mp);
182		rte_errno = -ret;
183		return NULL;
184	}
185	rte_pktmbuf_pool_init(mp, &mbp_priv);
186
187	ret = rte_mempool_populate_default(mp);
188	if (ret < 0) {
189		rte_mempool_free(mp);
190		rte_errno = -ret;
191		return NULL;
192	}
193
194	rte_mempool_obj_iter(mp, rte_pktmbuf_init, NULL);
195
196	return mp;
197}
198
199/* do some sanity checks on a mbuf: panic if it fails */
200void
201rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
202{
203	const struct rte_mbuf *m_seg;
204	unsigned nb_segs;
205
206	if (m == NULL)
207		rte_panic("mbuf is NULL\n");
208
209	/* generic checks */
210	if (m->pool == NULL)
211		rte_panic("bad mbuf pool\n");
212	if (m->buf_physaddr == 0)
213		rte_panic("bad phys addr\n");
214	if (m->buf_addr == NULL)
215		rte_panic("bad virt addr\n");
216
217	uint16_t cnt = rte_mbuf_refcnt_read(m);
218	if ((cnt == 0) || (cnt == UINT16_MAX))
219		rte_panic("bad ref cnt\n");
220
221	/* nothing to check for sub-segments */
222	if (is_header == 0)
223		return;
224
225	nb_segs = m->nb_segs;
226	m_seg = m;
227	while (m_seg && nb_segs != 0) {
228		m_seg = m_seg->next;
229		nb_segs--;
230	}
231	if (nb_segs != 0)
232		rte_panic("bad nb_segs\n");
233}
234
235/* dump a mbuf on console */
236void
237rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
238{
239	unsigned int len;
240	unsigned nb_segs;
241
242	__rte_mbuf_sanity_check(m, 1);
243
244	fprintf(f, "dump mbuf at %p, phys=%"PRIx64", buf_len=%u\n",
245	       m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len);
246	fprintf(f, "  pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, "
247	       "in_port=%u\n", m->pkt_len, m->ol_flags,
248	       (unsigned)m->nb_segs, (unsigned)m->port);
249	nb_segs = m->nb_segs;
250
251	while (m && nb_segs != 0) {
252		__rte_mbuf_sanity_check(m, 0);
253
254		fprintf(f, "  segment at %p, data=%p, data_len=%u\n",
255			m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len);
256		len = dump_len;
257		if (len > m->data_len)
258			len = m->data_len;
259		if (len != 0)
260			rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len);
261		dump_len -= len;
262		m = m->next;
263		nb_segs --;
264	}
265}
266
267/* read len data bytes in a mbuf at specified offset (internal) */
268const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
269	uint32_t len, void *buf)
270{
271	const struct rte_mbuf *seg = m;
272	uint32_t buf_off = 0, copy_len;
273
274	if (off + len > rte_pktmbuf_pkt_len(m))
275		return NULL;
276
277	while (off >= rte_pktmbuf_data_len(seg)) {
278		off -= rte_pktmbuf_data_len(seg);
279		seg = seg->next;
280	}
281
282	if (off + len <= rte_pktmbuf_data_len(seg))
283		return rte_pktmbuf_mtod_offset(seg, char *, off);
284
285	/* rare case: header is split among several segments */
286	while (len > 0) {
287		copy_len = rte_pktmbuf_data_len(seg) - off;
288		if (copy_len > len)
289			copy_len = len;
290		rte_memcpy((char *)buf + buf_off,
291			rte_pktmbuf_mtod_offset(seg, char *, off), copy_len);
292		off = 0;
293		buf_off += copy_len;
294		len -= copy_len;
295		seg = seg->next;
296	}
297
298	return buf;
299}
300
301/*
302 * Get the name of a RX offload flag. Must be kept synchronized with flag
303 * definitions in rte_mbuf.h.
304 */
305const char *rte_get_rx_ol_flag_name(uint64_t mask)
306{
307	switch (mask) {
308	case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT";
309	case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH";
310	case PKT_RX_FDIR: return "PKT_RX_FDIR";
311	case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD";
312	case PKT_RX_L4_CKSUM_GOOD: return "PKT_RX_L4_CKSUM_GOOD";
313	case PKT_RX_L4_CKSUM_NONE: return "PKT_RX_L4_CKSUM_NONE";
314	case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD";
315	case PKT_RX_IP_CKSUM_GOOD: return "PKT_RX_IP_CKSUM_GOOD";
316	case PKT_RX_IP_CKSUM_NONE: return "PKT_RX_IP_CKSUM_NONE";
317	case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD";
318	case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
319	case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
320	case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
321	case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
322	case PKT_RX_LRO: return "PKT_RX_LRO";
323	default: return NULL;
324	}
325}
326
327struct flag_mask {
328	uint64_t flag;
329	uint64_t mask;
330	const char *default_name;
331};
332
333/* write the list of rx ol flags in buffer buf */
334int
335rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
336{
337	const struct flag_mask rx_flags[] = {
338		{ PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT, NULL },
339		{ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, NULL },
340		{ PKT_RX_FDIR, PKT_RX_FDIR, NULL },
341		{ PKT_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_MASK, NULL },
342		{ PKT_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_MASK, NULL },
343		{ PKT_RX_L4_CKSUM_NONE, PKT_RX_L4_CKSUM_MASK, NULL },
344		{ PKT_RX_L4_CKSUM_UNKNOWN, PKT_RX_L4_CKSUM_MASK,
345		  "PKT_RX_L4_CKSUM_UNKNOWN" },
346		{ PKT_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK, NULL },
347		{ PKT_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_MASK, NULL },
348		{ PKT_RX_IP_CKSUM_NONE, PKT_RX_IP_CKSUM_MASK, NULL },
349		{ PKT_RX_IP_CKSUM_UNKNOWN, PKT_RX_IP_CKSUM_MASK,
350		  "PKT_RX_IP_CKSUM_UNKNOWN" },
351		{ PKT_RX_EIP_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD, NULL },
352		{ PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
353		{ PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
354		{ PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
355		{ PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
356		{ PKT_RX_LRO, PKT_RX_LRO, NULL },
357	};
358	const char *name;
359	unsigned int i;
360	int ret;
361
362	if (buflen == 0)
363		return -1;
364
365	buf[0] = '\0';
366	for (i = 0; i < RTE_DIM(rx_flags); i++) {
367		if ((mask & rx_flags[i].mask) != rx_flags[i].flag)
368			continue;
369		name = rte_get_rx_ol_flag_name(rx_flags[i].flag);
370		if (name == NULL)
371			name = rx_flags[i].default_name;
372		ret = snprintf(buf, buflen, "%s ", name);
373		if (ret < 0)
374			return -1;
375		if ((size_t)ret >= buflen)
376			return -1;
377		buf += ret;
378		buflen -= ret;
379	}
380
381	return 0;
382}
383
384/*
385 * Get the name of a TX offload flag. Must be kept synchronized with flag
386 * definitions in rte_mbuf.h.
387 */
388const char *rte_get_tx_ol_flag_name(uint64_t mask)
389{
390	switch (mask) {
391	case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
392	case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
393	case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
394	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
395	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
396	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
397	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
398	case PKT_TX_IPV4: return "PKT_TX_IPV4";
399	case PKT_TX_IPV6: return "PKT_TX_IPV6";
400	case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM";
401	case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4";
402	case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6";
403	case PKT_TX_TUNNEL_VXLAN: return "PKT_TX_TUNNEL_VXLAN";
404	case PKT_TX_TUNNEL_GRE: return "PKT_TX_TUNNEL_GRE";
405	case PKT_TX_TUNNEL_IPIP: return "PKT_TX_TUNNEL_IPIP";
406	case PKT_TX_TUNNEL_GENEVE: return "PKT_TX_TUNNEL_GENEVE";
407	default: return NULL;
408	}
409}
410
411/* write the list of tx ol flags in buffer buf */
412int
413rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
414{
415	const struct flag_mask tx_flags[] = {
416		{ PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
417		{ PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
418		{ PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
419		{ PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
420		{ PKT_TX_UDP_CKSUM, PKT_TX_L4_MASK, NULL },
421		{ PKT_TX_L4_NO_CKSUM, PKT_TX_L4_MASK, "PKT_TX_L4_NO_CKSUM" },
422		{ PKT_TX_IEEE1588_TMST, PKT_TX_IEEE1588_TMST, NULL },
423		{ PKT_TX_TCP_SEG, PKT_TX_TCP_SEG, NULL },
424		{ PKT_TX_IPV4, PKT_TX_IPV4, NULL },
425		{ PKT_TX_IPV6, PKT_TX_IPV6, NULL },
426		{ PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
427		{ PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
428		{ PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
429		{ PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
430		  "PKT_TX_TUNNEL_NONE" },
431		{ PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
432		  "PKT_TX_TUNNEL_NONE" },
433		{ PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
434		  "PKT_TX_TUNNEL_NONE" },
435		{ PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
436		  "PKT_TX_TUNNEL_NONE" },
437	};
438	const char *name;
439	unsigned int i;
440	int ret;
441
442	if (buflen == 0)
443		return -1;
444
445	buf[0] = '\0';
446	for (i = 0; i < RTE_DIM(tx_flags); i++) {
447		if ((mask & tx_flags[i].mask) != tx_flags[i].flag)
448			continue;
449		name = rte_get_tx_ol_flag_name(tx_flags[i].flag);
450		if (name == NULL)
451			name = tx_flags[i].default_name;
452		ret = snprintf(buf, buflen, "%s ", name);
453		if (ret < 0)
454			return -1;
455		if ((size_t)ret >= buflen)
456			return -1;
457		buf += ret;
458		buflen -= ret;
459	}
460
461	return 0;
462}
463