rte_ring.h revision 3d9b7210
1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34/*
35 * Derived from FreeBSD's bufring.h
36 *
37 **************************************************************************
38 *
39 * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
40 * All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions are met:
44 *
45 * 1. Redistributions of source code must retain the above copyright notice,
46 *    this list of conditions and the following disclaimer.
47 *
48 * 2. The name of Kip Macy nor the names of other
49 *    contributors may be used to endorse or promote products derived from
50 *    this software without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
53 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
56 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 *
64 ***************************************************************************/
65
66#ifndef _RTE_RING_H_
67#define _RTE_RING_H_
68
69/**
70 * @file
71 * RTE Ring
72 *
73 * The Ring Manager is a fixed-size queue, implemented as a table of
74 * pointers. Head and tail pointers are modified atomically, allowing
75 * concurrent access to it. It has the following features:
76 *
77 * - FIFO (First In First Out)
78 * - Maximum size is fixed; the pointers are stored in a table.
79 * - Lockless implementation.
80 * - Multi- or single-consumer dequeue.
81 * - Multi- or single-producer enqueue.
82 * - Bulk dequeue.
83 * - Bulk enqueue.
84 *
85 * Note: the ring implementation is not preemptable. A lcore must not
86 * be interrupted by another task that uses the same ring.
87 *
88 */
89
90#ifdef __cplusplus
91extern "C" {
92#endif
93
94#include <stdio.h>
95#include <stdint.h>
96#include <sys/queue.h>
97#include <errno.h>
98#include <rte_common.h>
99#include <rte_memory.h>
100#include <rte_lcore.h>
101#include <rte_atomic.h>
102#include <rte_branch_prediction.h>
103#include <rte_memzone.h>
104
105#define RTE_TAILQ_RING_NAME "RTE_RING"
106
107enum rte_ring_queue_behavior {
108	RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */
109	RTE_RING_QUEUE_VARIABLE   /* Enq/Deq as many items a possible from ring */
110};
111
112#ifdef RTE_LIBRTE_RING_DEBUG
113/**
114 * A structure that stores the ring statistics (per-lcore).
115 */
116struct rte_ring_debug_stats {
117	uint64_t enq_success_bulk; /**< Successful enqueues number. */
118	uint64_t enq_success_objs; /**< Objects successfully enqueued. */
119	uint64_t enq_quota_bulk;   /**< Successful enqueues above watermark. */
120	uint64_t enq_quota_objs;   /**< Objects enqueued above watermark. */
121	uint64_t enq_fail_bulk;    /**< Failed enqueues number. */
122	uint64_t enq_fail_objs;    /**< Objects that failed to be enqueued. */
123	uint64_t deq_success_bulk; /**< Successful dequeues number. */
124	uint64_t deq_success_objs; /**< Objects successfully dequeued. */
125	uint64_t deq_fail_bulk;    /**< Failed dequeues number. */
126	uint64_t deq_fail_objs;    /**< Objects that failed to be dequeued. */
127} __rte_cache_aligned;
128#endif
129
130#define RTE_RING_MZ_PREFIX "RG_"
131/**< The maximum length of a ring name. */
132#define RTE_RING_NAMESIZE (RTE_MEMZONE_NAMESIZE - \
133			   sizeof(RTE_RING_MZ_PREFIX) + 1)
134
135#ifndef RTE_RING_PAUSE_REP_COUNT
136#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield
137                                    *   if RTE_RING_PAUSE_REP not defined. */
138#endif
139
140struct rte_memzone; /* forward declaration, so as not to require memzone.h */
141
142/**
143 * An RTE ring structure.
144 *
145 * The producer and the consumer have a head and a tail index. The particularity
146 * of these index is that they are not between 0 and size(ring). These indexes
147 * are between 0 and 2^32, and we mask their value when we access the ring[]
148 * field. Thanks to this assumption, we can do subtractions between 2 index
149 * values in a modulo-32bit base: that's why the overflow of the indexes is not
150 * a problem.
151 */
152struct rte_ring {
153	/*
154	 * Note: this field kept the RTE_MEMZONE_NAMESIZE size due to ABI
155	 * compatibility requirements, it could be changed to RTE_RING_NAMESIZE
156	 * next time the ABI changes
157	 */
158	char name[RTE_MEMZONE_NAMESIZE];    /**< Name of the ring. */
159	int flags;                       /**< Flags supplied at creation. */
160	const struct rte_memzone *memzone;
161			/**< Memzone, if any, containing the rte_ring */
162
163	/** Ring producer status. */
164	struct prod {
165		uint32_t watermark;      /**< Maximum items before EDQUOT. */
166		uint32_t sp_enqueue;     /**< True, if single producer. */
167		uint32_t size;           /**< Size of ring. */
168		uint32_t mask;           /**< Mask (size-1) of ring. */
169		volatile uint32_t head;  /**< Producer head. */
170		volatile uint32_t tail;  /**< Producer tail. */
171	} prod __rte_cache_aligned;
172
173	/** Ring consumer status. */
174	struct cons {
175		uint32_t sc_dequeue;     /**< True, if single consumer. */
176		uint32_t size;           /**< Size of the ring. */
177		uint32_t mask;           /**< Mask (size-1) of ring. */
178		volatile uint32_t head;  /**< Consumer head. */
179		volatile uint32_t tail;  /**< Consumer tail. */
180#ifdef RTE_RING_SPLIT_PROD_CONS
181	} cons __rte_cache_aligned;
182#else
183	} cons;
184#endif
185
186#ifdef RTE_LIBRTE_RING_DEBUG
187	struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
188#endif
189
190	void *ring[] __rte_cache_aligned;   /**< Memory space of ring starts here.
191	                                     * not volatile so need to be careful
192	                                     * about compiler re-ordering */
193};
194
195#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
196#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */
197#define RTE_RING_QUOT_EXCEED (1 << 31)  /**< Quota exceed for burst ops */
198#define RTE_RING_SZ_MASK  (unsigned)(0x0fffffff) /**< Ring size mask */
199
200/**
201 * @internal When debug is enabled, store ring statistics.
202 * @param r
203 *   A pointer to the ring.
204 * @param name
205 *   The name of the statistics field to increment in the ring.
206 * @param n
207 *   The number to add to the object-oriented statistics.
208 */
209#ifdef RTE_LIBRTE_RING_DEBUG
210#define __RING_STAT_ADD(r, name, n) do {                        \
211		unsigned __lcore_id = rte_lcore_id();           \
212		if (__lcore_id < RTE_MAX_LCORE) {               \
213			r->stats[__lcore_id].name##_objs += n;  \
214			r->stats[__lcore_id].name##_bulk += 1;  \
215		}                                               \
216	} while(0)
217#else
218#define __RING_STAT_ADD(r, name, n) do {} while(0)
219#endif
220
221/**
222 * Calculate the memory size needed for a ring
223 *
224 * This function returns the number of bytes needed for a ring, given
225 * the number of elements in it. This value is the sum of the size of
226 * the structure rte_ring and the size of the memory needed by the
227 * objects pointers. The value is aligned to a cache line size.
228 *
229 * @param count
230 *   The number of elements in the ring (must be a power of 2).
231 * @return
232 *   - The memory size needed for the ring on success.
233 *   - -EINVAL if count is not a power of 2.
234 */
235ssize_t rte_ring_get_memsize(unsigned count);
236
237/**
238 * Initialize a ring structure.
239 *
240 * Initialize a ring structure in memory pointed by "r". The size of the
241 * memory area must be large enough to store the ring structure and the
242 * object table. It is advised to use rte_ring_get_memsize() to get the
243 * appropriate size.
244 *
245 * The ring size is set to *count*, which must be a power of two. Water
246 * marking is disabled by default. The real usable ring size is
247 * *count-1* instead of *count* to differentiate a free ring from an
248 * empty ring.
249 *
250 * The ring is not added in RTE_TAILQ_RING global list. Indeed, the
251 * memory given by the caller may not be shareable among dpdk
252 * processes.
253 *
254 * @param r
255 *   The pointer to the ring structure followed by the objects table.
256 * @param name
257 *   The name of the ring.
258 * @param count
259 *   The number of elements in the ring (must be a power of 2).
260 * @param flags
261 *   An OR of the following:
262 *    - RING_F_SP_ENQ: If this flag is set, the default behavior when
263 *      using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
264 *      is "single-producer". Otherwise, it is "multi-producers".
265 *    - RING_F_SC_DEQ: If this flag is set, the default behavior when
266 *      using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
267 *      is "single-consumer". Otherwise, it is "multi-consumers".
268 * @return
269 *   0 on success, or a negative value on error.
270 */
271int rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
272	unsigned flags);
273
274/**
275 * Create a new ring named *name* in memory.
276 *
277 * This function uses ``memzone_reserve()`` to allocate memory. Then it
278 * calls rte_ring_init() to initialize an empty ring.
279 *
280 * The new ring size is set to *count*, which must be a power of
281 * two. Water marking is disabled by default. The real usable ring size
282 * is *count-1* instead of *count* to differentiate a free ring from an
283 * empty ring.
284 *
285 * The ring is added in RTE_TAILQ_RING list.
286 *
287 * @param name
288 *   The name of the ring.
289 * @param count
290 *   The size of the ring (must be a power of 2).
291 * @param socket_id
292 *   The *socket_id* argument is the socket identifier in case of
293 *   NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
294 *   constraint for the reserved zone.
295 * @param flags
296 *   An OR of the following:
297 *    - RING_F_SP_ENQ: If this flag is set, the default behavior when
298 *      using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
299 *      is "single-producer". Otherwise, it is "multi-producers".
300 *    - RING_F_SC_DEQ: If this flag is set, the default behavior when
301 *      using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
302 *      is "single-consumer". Otherwise, it is "multi-consumers".
303 * @return
304 *   On success, the pointer to the new allocated ring. NULL on error with
305 *    rte_errno set appropriately. Possible errno values include:
306 *    - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
307 *    - E_RTE_SECONDARY - function was called from a secondary process instance
308 *    - EINVAL - count provided is not a power of 2
309 *    - ENOSPC - the maximum number of memzones has already been allocated
310 *    - EEXIST - a memzone with the same name already exists
311 *    - ENOMEM - no appropriate memory area found in which to create memzone
312 */
313struct rte_ring *rte_ring_create(const char *name, unsigned count,
314				 int socket_id, unsigned flags);
315/**
316 * De-allocate all memory used by the ring.
317 *
318 * @param r
319 *   Ring to free
320 */
321void rte_ring_free(struct rte_ring *r);
322
323/**
324 * Change the high water mark.
325 *
326 * If *count* is 0, water marking is disabled. Otherwise, it is set to the
327 * *count* value. The *count* value must be greater than 0 and less
328 * than the ring size.
329 *
330 * This function can be called at any time (not necessarily at
331 * initialization).
332 *
333 * @param r
334 *   A pointer to the ring structure.
335 * @param count
336 *   The new water mark value.
337 * @return
338 *   - 0: Success; water mark changed.
339 *   - -EINVAL: Invalid water mark value.
340 */
341int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
342
343/**
344 * Dump the status of the ring to a file.
345 *
346 * @param f
347 *   A pointer to a file for output
348 * @param r
349 *   A pointer to the ring structure.
350 */
351void rte_ring_dump(FILE *f, const struct rte_ring *r);
352
353/* the actual enqueue of pointers on the ring.
354 * Placed here since identical code needed in both
355 * single and multi producer enqueue functions */
356#define ENQUEUE_PTRS() do { \
357	const uint32_t size = r->prod.size; \
358	uint32_t idx = prod_head & mask; \
359	if (likely(idx + n < size)) { \
360		for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \
361			r->ring[idx] = obj_table[i]; \
362			r->ring[idx+1] = obj_table[i+1]; \
363			r->ring[idx+2] = obj_table[i+2]; \
364			r->ring[idx+3] = obj_table[i+3]; \
365		} \
366		switch (n & 0x3) { \
367			case 3: r->ring[idx++] = obj_table[i++]; \
368			case 2: r->ring[idx++] = obj_table[i++]; \
369			case 1: r->ring[idx++] = obj_table[i++]; \
370		} \
371	} else { \
372		for (i = 0; idx < size; i++, idx++)\
373			r->ring[idx] = obj_table[i]; \
374		for (idx = 0; i < n; i++, idx++) \
375			r->ring[idx] = obj_table[i]; \
376	} \
377} while(0)
378
379/* the actual copy of pointers on the ring to obj_table.
380 * Placed here since identical code needed in both
381 * single and multi consumer dequeue functions */
382#define DEQUEUE_PTRS() do { \
383	uint32_t idx = cons_head & mask; \
384	const uint32_t size = r->cons.size; \
385	if (likely(idx + n < size)) { \
386		for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\
387			obj_table[i] = r->ring[idx]; \
388			obj_table[i+1] = r->ring[idx+1]; \
389			obj_table[i+2] = r->ring[idx+2]; \
390			obj_table[i+3] = r->ring[idx+3]; \
391		} \
392		switch (n & 0x3) { \
393			case 3: obj_table[i++] = r->ring[idx++]; \
394			case 2: obj_table[i++] = r->ring[idx++]; \
395			case 1: obj_table[i++] = r->ring[idx++]; \
396		} \
397	} else { \
398		for (i = 0; idx < size; i++, idx++) \
399			obj_table[i] = r->ring[idx]; \
400		for (idx = 0; i < n; i++, idx++) \
401			obj_table[i] = r->ring[idx]; \
402	} \
403} while (0)
404
405/**
406 * @internal Enqueue several objects on the ring (multi-producers safe).
407 *
408 * This function uses a "compare and set" instruction to move the
409 * producer index atomically.
410 *
411 * @param r
412 *   A pointer to the ring structure.
413 * @param obj_table
414 *   A pointer to a table of void * pointers (objects).
415 * @param n
416 *   The number of objects to add in the ring from the obj_table.
417 * @param behavior
418 *   RTE_RING_QUEUE_FIXED:    Enqueue a fixed number of items from a ring
419 *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
420 * @return
421 *   Depend on the behavior value
422 *   if behavior = RTE_RING_QUEUE_FIXED
423 *   - 0: Success; objects enqueue.
424 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
425 *     high water mark is exceeded.
426 *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
427 *   if behavior = RTE_RING_QUEUE_VARIABLE
428 *   - n: Actual number of objects enqueued.
429 */
430static inline int __attribute__((always_inline))
431__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table,
432			 unsigned n, enum rte_ring_queue_behavior behavior)
433{
434	uint32_t prod_head, prod_next;
435	uint32_t cons_tail, free_entries;
436	const unsigned max = n;
437	int success;
438	unsigned i, rep = 0;
439	uint32_t mask = r->prod.mask;
440	int ret;
441
442	/* Avoid the unnecessary cmpset operation below, which is also
443	 * potentially harmful when n equals 0. */
444	if (n == 0)
445		return 0;
446
447	/* move prod.head atomically */
448	do {
449		/* Reset n to the initial burst count */
450		n = max;
451
452		prod_head = r->prod.head;
453		cons_tail = r->cons.tail;
454		/* The subtraction is done between two unsigned 32bits value
455		 * (the result is always modulo 32 bits even if we have
456		 * prod_head > cons_tail). So 'free_entries' is always between 0
457		 * and size(ring)-1. */
458		free_entries = (mask + cons_tail - prod_head);
459
460		/* check that we have enough room in ring */
461		if (unlikely(n > free_entries)) {
462			if (behavior == RTE_RING_QUEUE_FIXED) {
463				__RING_STAT_ADD(r, enq_fail, n);
464				return -ENOBUFS;
465			}
466			else {
467				/* No free entry available */
468				if (unlikely(free_entries == 0)) {
469					__RING_STAT_ADD(r, enq_fail, n);
470					return 0;
471				}
472
473				n = free_entries;
474			}
475		}
476
477		prod_next = prod_head + n;
478		success = rte_atomic32_cmpset(&r->prod.head, prod_head,
479					      prod_next);
480	} while (unlikely(success == 0));
481
482	/* write entries in ring */
483	ENQUEUE_PTRS();
484	rte_smp_wmb();
485
486	/* if we exceed the watermark */
487	if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
488		ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
489				(int)(n | RTE_RING_QUOT_EXCEED);
490		__RING_STAT_ADD(r, enq_quota, n);
491	}
492	else {
493		ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n;
494		__RING_STAT_ADD(r, enq_success, n);
495	}
496
497	/*
498	 * If there are other enqueues in progress that preceded us,
499	 * we need to wait for them to complete
500	 */
501	while (unlikely(r->prod.tail != prod_head)) {
502		rte_pause();
503
504		/* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
505		 * for other thread finish. It gives pre-empted thread a chance
506		 * to proceed and finish with ring dequeue operation. */
507		if (RTE_RING_PAUSE_REP_COUNT &&
508		    ++rep == RTE_RING_PAUSE_REP_COUNT) {
509			rep = 0;
510			sched_yield();
511		}
512	}
513	r->prod.tail = prod_next;
514	return ret;
515}
516
517/**
518 * @internal Enqueue several objects on a ring (NOT multi-producers safe).
519 *
520 * @param r
521 *   A pointer to the ring structure.
522 * @param obj_table
523 *   A pointer to a table of void * pointers (objects).
524 * @param n
525 *   The number of objects to add in the ring from the obj_table.
526 * @param behavior
527 *   RTE_RING_QUEUE_FIXED:    Enqueue a fixed number of items from a ring
528 *   RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring
529 * @return
530 *   Depend on the behavior value
531 *   if behavior = RTE_RING_QUEUE_FIXED
532 *   - 0: Success; objects enqueue.
533 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
534 *     high water mark is exceeded.
535 *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
536 *   if behavior = RTE_RING_QUEUE_VARIABLE
537 *   - n: Actual number of objects enqueued.
538 */
539static inline int __attribute__((always_inline))
540__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table,
541			 unsigned n, enum rte_ring_queue_behavior behavior)
542{
543	uint32_t prod_head, cons_tail;
544	uint32_t prod_next, free_entries;
545	unsigned i;
546	uint32_t mask = r->prod.mask;
547	int ret;
548
549	prod_head = r->prod.head;
550	cons_tail = r->cons.tail;
551	/* The subtraction is done between two unsigned 32bits value
552	 * (the result is always modulo 32 bits even if we have
553	 * prod_head > cons_tail). So 'free_entries' is always between 0
554	 * and size(ring)-1. */
555	free_entries = mask + cons_tail - prod_head;
556
557	/* check that we have enough room in ring */
558	if (unlikely(n > free_entries)) {
559		if (behavior == RTE_RING_QUEUE_FIXED) {
560			__RING_STAT_ADD(r, enq_fail, n);
561			return -ENOBUFS;
562		}
563		else {
564			/* No free entry available */
565			if (unlikely(free_entries == 0)) {
566				__RING_STAT_ADD(r, enq_fail, n);
567				return 0;
568			}
569
570			n = free_entries;
571		}
572	}
573
574	prod_next = prod_head + n;
575	r->prod.head = prod_next;
576
577	/* write entries in ring */
578	ENQUEUE_PTRS();
579	rte_smp_wmb();
580
581	/* if we exceed the watermark */
582	if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
583		ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
584			(int)(n | RTE_RING_QUOT_EXCEED);
585		__RING_STAT_ADD(r, enq_quota, n);
586	}
587	else {
588		ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n;
589		__RING_STAT_ADD(r, enq_success, n);
590	}
591
592	r->prod.tail = prod_next;
593	return ret;
594}
595
596/**
597 * @internal Dequeue several objects from a ring (multi-consumers safe). When
598 * the request objects are more than the available objects, only dequeue the
599 * actual number of objects
600 *
601 * This function uses a "compare and set" instruction to move the
602 * consumer index atomically.
603 *
604 * @param r
605 *   A pointer to the ring structure.
606 * @param obj_table
607 *   A pointer to a table of void * pointers (objects) that will be filled.
608 * @param n
609 *   The number of objects to dequeue from the ring to the obj_table.
610 * @param behavior
611 *   RTE_RING_QUEUE_FIXED:    Dequeue a fixed number of items from a ring
612 *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
613 * @return
614 *   Depend on the behavior value
615 *   if behavior = RTE_RING_QUEUE_FIXED
616 *   - 0: Success; objects dequeued.
617 *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
618 *     dequeued.
619 *   if behavior = RTE_RING_QUEUE_VARIABLE
620 *   - n: Actual number of objects dequeued.
621 */
622
623static inline int __attribute__((always_inline))
624__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table,
625		 unsigned n, enum rte_ring_queue_behavior behavior)
626{
627	uint32_t cons_head, prod_tail;
628	uint32_t cons_next, entries;
629	const unsigned max = n;
630	int success;
631	unsigned i, rep = 0;
632	uint32_t mask = r->prod.mask;
633
634	/* Avoid the unnecessary cmpset operation below, which is also
635	 * potentially harmful when n equals 0. */
636	if (n == 0)
637		return 0;
638
639	/* move cons.head atomically */
640	do {
641		/* Restore n as it may change every loop */
642		n = max;
643
644		cons_head = r->cons.head;
645		prod_tail = r->prod.tail;
646		/* The subtraction is done between two unsigned 32bits value
647		 * (the result is always modulo 32 bits even if we have
648		 * cons_head > prod_tail). So 'entries' is always between 0
649		 * and size(ring)-1. */
650		entries = (prod_tail - cons_head);
651
652		/* Set the actual entries for dequeue */
653		if (n > entries) {
654			if (behavior == RTE_RING_QUEUE_FIXED) {
655				__RING_STAT_ADD(r, deq_fail, n);
656				return -ENOENT;
657			}
658			else {
659				if (unlikely(entries == 0)){
660					__RING_STAT_ADD(r, deq_fail, n);
661					return 0;
662				}
663
664				n = entries;
665			}
666		}
667
668		cons_next = cons_head + n;
669		success = rte_atomic32_cmpset(&r->cons.head, cons_head,
670					      cons_next);
671	} while (unlikely(success == 0));
672
673	/* copy in table */
674	DEQUEUE_PTRS();
675	rte_smp_rmb();
676
677	/*
678	 * If there are other dequeues in progress that preceded us,
679	 * we need to wait for them to complete
680	 */
681	while (unlikely(r->cons.tail != cons_head)) {
682		rte_pause();
683
684		/* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
685		 * for other thread finish. It gives pre-empted thread a chance
686		 * to proceed and finish with ring dequeue operation. */
687		if (RTE_RING_PAUSE_REP_COUNT &&
688		    ++rep == RTE_RING_PAUSE_REP_COUNT) {
689			rep = 0;
690			sched_yield();
691		}
692	}
693	__RING_STAT_ADD(r, deq_success, n);
694	r->cons.tail = cons_next;
695
696	return behavior == RTE_RING_QUEUE_FIXED ? 0 : n;
697}
698
699/**
700 * @internal Dequeue several objects from a ring (NOT multi-consumers safe).
701 * When the request objects are more than the available objects, only dequeue
702 * the actual number of objects
703 *
704 * @param r
705 *   A pointer to the ring structure.
706 * @param obj_table
707 *   A pointer to a table of void * pointers (objects) that will be filled.
708 * @param n
709 *   The number of objects to dequeue from the ring to the obj_table.
710 * @param behavior
711 *   RTE_RING_QUEUE_FIXED:    Dequeue a fixed number of items from a ring
712 *   RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring
713 * @return
714 *   Depend on the behavior value
715 *   if behavior = RTE_RING_QUEUE_FIXED
716 *   - 0: Success; objects dequeued.
717 *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
718 *     dequeued.
719 *   if behavior = RTE_RING_QUEUE_VARIABLE
720 *   - n: Actual number of objects dequeued.
721 */
722static inline int __attribute__((always_inline))
723__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table,
724		 unsigned n, enum rte_ring_queue_behavior behavior)
725{
726	uint32_t cons_head, prod_tail;
727	uint32_t cons_next, entries;
728	unsigned i;
729	uint32_t mask = r->prod.mask;
730
731	cons_head = r->cons.head;
732	prod_tail = r->prod.tail;
733	/* The subtraction is done between two unsigned 32bits value
734	 * (the result is always modulo 32 bits even if we have
735	 * cons_head > prod_tail). So 'entries' is always between 0
736	 * and size(ring)-1. */
737	entries = prod_tail - cons_head;
738
739	if (n > entries) {
740		if (behavior == RTE_RING_QUEUE_FIXED) {
741			__RING_STAT_ADD(r, deq_fail, n);
742			return -ENOENT;
743		}
744		else {
745			if (unlikely(entries == 0)){
746				__RING_STAT_ADD(r, deq_fail, n);
747				return 0;
748			}
749
750			n = entries;
751		}
752	}
753
754	cons_next = cons_head + n;
755	r->cons.head = cons_next;
756
757	/* copy in table */
758	DEQUEUE_PTRS();
759	rte_smp_rmb();
760
761	__RING_STAT_ADD(r, deq_success, n);
762	r->cons.tail = cons_next;
763	return behavior == RTE_RING_QUEUE_FIXED ? 0 : n;
764}
765
766/**
767 * Enqueue several objects on the ring (multi-producers safe).
768 *
769 * This function uses a "compare and set" instruction to move the
770 * producer index atomically.
771 *
772 * @param r
773 *   A pointer to the ring structure.
774 * @param obj_table
775 *   A pointer to a table of void * pointers (objects).
776 * @param n
777 *   The number of objects to add in the ring from the obj_table.
778 * @return
779 *   - 0: Success; objects enqueue.
780 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
781 *     high water mark is exceeded.
782 *   - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued.
783 */
784static inline int __attribute__((always_inline))
785rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
786			 unsigned n)
787{
788	return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
789}
790
791/**
792 * Enqueue several objects on a ring (NOT multi-producers safe).
793 *
794 * @param r
795 *   A pointer to the ring structure.
796 * @param obj_table
797 *   A pointer to a table of void * pointers (objects).
798 * @param n
799 *   The number of objects to add in the ring from the obj_table.
800 * @return
801 *   - 0: Success; objects enqueued.
802 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
803 *     high water mark is exceeded.
804 *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
805 */
806static inline int __attribute__((always_inline))
807rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
808			 unsigned n)
809{
810	return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
811}
812
813/**
814 * Enqueue several objects on a ring.
815 *
816 * This function calls the multi-producer or the single-producer
817 * version depending on the default behavior that was specified at
818 * ring creation time (see flags).
819 *
820 * @param r
821 *   A pointer to the ring structure.
822 * @param obj_table
823 *   A pointer to a table of void * pointers (objects).
824 * @param n
825 *   The number of objects to add in the ring from the obj_table.
826 * @return
827 *   - 0: Success; objects enqueued.
828 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
829 *     high water mark is exceeded.
830 *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
831 */
832static inline int __attribute__((always_inline))
833rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
834		      unsigned n)
835{
836	if (r->prod.sp_enqueue)
837		return rte_ring_sp_enqueue_bulk(r, obj_table, n);
838	else
839		return rte_ring_mp_enqueue_bulk(r, obj_table, n);
840}
841
842/**
843 * Enqueue one object on a ring (multi-producers safe).
844 *
845 * This function uses a "compare and set" instruction to move the
846 * producer index atomically.
847 *
848 * @param r
849 *   A pointer to the ring structure.
850 * @param obj
851 *   A pointer to the object to be added.
852 * @return
853 *   - 0: Success; objects enqueued.
854 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
855 *     high water mark is exceeded.
856 *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
857 */
858static inline int __attribute__((always_inline))
859rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
860{
861	return rte_ring_mp_enqueue_bulk(r, &obj, 1);
862}
863
864/**
865 * Enqueue one object on a ring (NOT multi-producers safe).
866 *
867 * @param r
868 *   A pointer to the ring structure.
869 * @param obj
870 *   A pointer to the object to be added.
871 * @return
872 *   - 0: Success; objects enqueued.
873 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
874 *     high water mark is exceeded.
875 *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
876 */
877static inline int __attribute__((always_inline))
878rte_ring_sp_enqueue(struct rte_ring *r, void *obj)
879{
880	return rte_ring_sp_enqueue_bulk(r, &obj, 1);
881}
882
883/**
884 * Enqueue one object on a ring.
885 *
886 * This function calls the multi-producer or the single-producer
887 * version, depending on the default behaviour that was specified at
888 * ring creation time (see flags).
889 *
890 * @param r
891 *   A pointer to the ring structure.
892 * @param obj
893 *   A pointer to the object to be added.
894 * @return
895 *   - 0: Success; objects enqueued.
896 *   - -EDQUOT: Quota exceeded. The objects have been enqueued, but the
897 *     high water mark is exceeded.
898 *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued.
899 */
900static inline int __attribute__((always_inline))
901rte_ring_enqueue(struct rte_ring *r, void *obj)
902{
903	if (r->prod.sp_enqueue)
904		return rte_ring_sp_enqueue(r, obj);
905	else
906		return rte_ring_mp_enqueue(r, obj);
907}
908
909/**
910 * Dequeue several objects from a ring (multi-consumers safe).
911 *
912 * This function uses a "compare and set" instruction to move the
913 * consumer index atomically.
914 *
915 * @param r
916 *   A pointer to the ring structure.
917 * @param obj_table
918 *   A pointer to a table of void * pointers (objects) that will be filled.
919 * @param n
920 *   The number of objects to dequeue from the ring to the obj_table.
921 * @return
922 *   - 0: Success; objects dequeued.
923 *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
924 *     dequeued.
925 */
926static inline int __attribute__((always_inline))
927rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
928{
929	return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
930}
931
932/**
933 * Dequeue several objects from a ring (NOT multi-consumers safe).
934 *
935 * @param r
936 *   A pointer to the ring structure.
937 * @param obj_table
938 *   A pointer to a table of void * pointers (objects) that will be filled.
939 * @param n
940 *   The number of objects to dequeue from the ring to the obj_table,
941 *   must be strictly positive.
942 * @return
943 *   - 0: Success; objects dequeued.
944 *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
945 *     dequeued.
946 */
947static inline int __attribute__((always_inline))
948rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
949{
950	return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED);
951}
952
953/**
954 * Dequeue several objects from a ring.
955 *
956 * This function calls the multi-consumers or the single-consumer
957 * version, depending on the default behaviour that was specified at
958 * ring creation time (see flags).
959 *
960 * @param r
961 *   A pointer to the ring structure.
962 * @param obj_table
963 *   A pointer to a table of void * pointers (objects) that will be filled.
964 * @param n
965 *   The number of objects to dequeue from the ring to the obj_table.
966 * @return
967 *   - 0: Success; objects dequeued.
968 *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
969 *     dequeued.
970 */
971static inline int __attribute__((always_inline))
972rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
973{
974	if (r->cons.sc_dequeue)
975		return rte_ring_sc_dequeue_bulk(r, obj_table, n);
976	else
977		return rte_ring_mc_dequeue_bulk(r, obj_table, n);
978}
979
980/**
981 * Dequeue one object from a ring (multi-consumers safe).
982 *
983 * This function uses a "compare and set" instruction to move the
984 * consumer index atomically.
985 *
986 * @param r
987 *   A pointer to the ring structure.
988 * @param obj_p
989 *   A pointer to a void * pointer (object) that will be filled.
990 * @return
991 *   - 0: Success; objects dequeued.
992 *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
993 *     dequeued.
994 */
995static inline int __attribute__((always_inline))
996rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)
997{
998	return rte_ring_mc_dequeue_bulk(r, obj_p, 1);
999}
1000
1001/**
1002 * Dequeue one object from a ring (NOT multi-consumers safe).
1003 *
1004 * @param r
1005 *   A pointer to the ring structure.
1006 * @param obj_p
1007 *   A pointer to a void * pointer (object) that will be filled.
1008 * @return
1009 *   - 0: Success; objects dequeued.
1010 *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
1011 *     dequeued.
1012 */
1013static inline int __attribute__((always_inline))
1014rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)
1015{
1016	return rte_ring_sc_dequeue_bulk(r, obj_p, 1);
1017}
1018
1019/**
1020 * Dequeue one object from a ring.
1021 *
1022 * This function calls the multi-consumers or the single-consumer
1023 * version depending on the default behaviour that was specified at
1024 * ring creation time (see flags).
1025 *
1026 * @param r
1027 *   A pointer to the ring structure.
1028 * @param obj_p
1029 *   A pointer to a void * pointer (object) that will be filled.
1030 * @return
1031 *   - 0: Success, objects dequeued.
1032 *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
1033 *     dequeued.
1034 */
1035static inline int __attribute__((always_inline))
1036rte_ring_dequeue(struct rte_ring *r, void **obj_p)
1037{
1038	if (r->cons.sc_dequeue)
1039		return rte_ring_sc_dequeue(r, obj_p);
1040	else
1041		return rte_ring_mc_dequeue(r, obj_p);
1042}
1043
1044/**
1045 * Test if a ring is full.
1046 *
1047 * @param r
1048 *   A pointer to the ring structure.
1049 * @return
1050 *   - 1: The ring is full.
1051 *   - 0: The ring is not full.
1052 */
1053static inline int
1054rte_ring_full(const struct rte_ring *r)
1055{
1056	uint32_t prod_tail = r->prod.tail;
1057	uint32_t cons_tail = r->cons.tail;
1058	return ((cons_tail - prod_tail - 1) & r->prod.mask) == 0;
1059}
1060
1061/**
1062 * Test if a ring is empty.
1063 *
1064 * @param r
1065 *   A pointer to the ring structure.
1066 * @return
1067 *   - 1: The ring is empty.
1068 *   - 0: The ring is not empty.
1069 */
1070static inline int
1071rte_ring_empty(const struct rte_ring *r)
1072{
1073	uint32_t prod_tail = r->prod.tail;
1074	uint32_t cons_tail = r->cons.tail;
1075	return !!(cons_tail == prod_tail);
1076}
1077
1078/**
1079 * Return the number of entries in a ring.
1080 *
1081 * @param r
1082 *   A pointer to the ring structure.
1083 * @return
1084 *   The number of entries in the ring.
1085 */
1086static inline unsigned
1087rte_ring_count(const struct rte_ring *r)
1088{
1089	uint32_t prod_tail = r->prod.tail;
1090	uint32_t cons_tail = r->cons.tail;
1091	return (prod_tail - cons_tail) & r->prod.mask;
1092}
1093
1094/**
1095 * Return the number of free entries in a ring.
1096 *
1097 * @param r
1098 *   A pointer to the ring structure.
1099 * @return
1100 *   The number of free entries in the ring.
1101 */
1102static inline unsigned
1103rte_ring_free_count(const struct rte_ring *r)
1104{
1105	uint32_t prod_tail = r->prod.tail;
1106	uint32_t cons_tail = r->cons.tail;
1107	return (cons_tail - prod_tail - 1) & r->prod.mask;
1108}
1109
1110/**
1111 * Dump the status of all rings on the console
1112 *
1113 * @param f
1114 *   A pointer to a file for output
1115 */
1116void rte_ring_list_dump(FILE *f);
1117
1118/**
1119 * Search a ring from its name
1120 *
1121 * @param name
1122 *   The name of the ring.
1123 * @return
1124 *   The pointer to the ring matching the name, or NULL if not found,
1125 *   with rte_errno set appropriately. Possible rte_errno values include:
1126 *    - ENOENT - required entry not available to return.
1127 */
1128struct rte_ring *rte_ring_lookup(const char *name);
1129
1130/**
1131 * Enqueue several objects on the ring (multi-producers safe).
1132 *
1133 * This function uses a "compare and set" instruction to move the
1134 * producer index atomically.
1135 *
1136 * @param r
1137 *   A pointer to the ring structure.
1138 * @param obj_table
1139 *   A pointer to a table of void * pointers (objects).
1140 * @param n
1141 *   The number of objects to add in the ring from the obj_table.
1142 * @return
1143 *   - n: Actual number of objects enqueued.
1144 */
1145static inline unsigned __attribute__((always_inline))
1146rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
1147			 unsigned n)
1148{
1149	return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
1150}
1151
1152/**
1153 * Enqueue several objects on a ring (NOT multi-producers safe).
1154 *
1155 * @param r
1156 *   A pointer to the ring structure.
1157 * @param obj_table
1158 *   A pointer to a table of void * pointers (objects).
1159 * @param n
1160 *   The number of objects to add in the ring from the obj_table.
1161 * @return
1162 *   - n: Actual number of objects enqueued.
1163 */
1164static inline unsigned __attribute__((always_inline))
1165rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
1166			 unsigned n)
1167{
1168	return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
1169}
1170
1171/**
1172 * Enqueue several objects on a ring.
1173 *
1174 * This function calls the multi-producer or the single-producer
1175 * version depending on the default behavior that was specified at
1176 * ring creation time (see flags).
1177 *
1178 * @param r
1179 *   A pointer to the ring structure.
1180 * @param obj_table
1181 *   A pointer to a table of void * pointers (objects).
1182 * @param n
1183 *   The number of objects to add in the ring from the obj_table.
1184 * @return
1185 *   - n: Actual number of objects enqueued.
1186 */
1187static inline unsigned __attribute__((always_inline))
1188rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
1189		      unsigned n)
1190{
1191	if (r->prod.sp_enqueue)
1192		return rte_ring_sp_enqueue_burst(r, obj_table, n);
1193	else
1194		return rte_ring_mp_enqueue_burst(r, obj_table, n);
1195}
1196
1197/**
1198 * Dequeue several objects from a ring (multi-consumers safe). When the request
1199 * objects are more than the available objects, only dequeue the actual number
1200 * of objects
1201 *
1202 * This function uses a "compare and set" instruction to move the
1203 * consumer index atomically.
1204 *
1205 * @param r
1206 *   A pointer to the ring structure.
1207 * @param obj_table
1208 *   A pointer to a table of void * pointers (objects) that will be filled.
1209 * @param n
1210 *   The number of objects to dequeue from the ring to the obj_table.
1211 * @return
1212 *   - n: Actual number of objects dequeued, 0 if ring is empty
1213 */
1214static inline unsigned __attribute__((always_inline))
1215rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
1216{
1217	return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
1218}
1219
1220/**
1221 * Dequeue several objects from a ring (NOT multi-consumers safe).When the
1222 * request objects are more than the available objects, only dequeue the
1223 * actual number of objects
1224 *
1225 * @param r
1226 *   A pointer to the ring structure.
1227 * @param obj_table
1228 *   A pointer to a table of void * pointers (objects) that will be filled.
1229 * @param n
1230 *   The number of objects to dequeue from the ring to the obj_table.
1231 * @return
1232 *   - n: Actual number of objects dequeued, 0 if ring is empty
1233 */
1234static inline unsigned __attribute__((always_inline))
1235rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
1236{
1237	return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
1238}
1239
1240/**
1241 * Dequeue multiple objects from a ring up to a maximum number.
1242 *
1243 * This function calls the multi-consumers or the single-consumer
1244 * version, depending on the default behaviour that was specified at
1245 * ring creation time (see flags).
1246 *
1247 * @param r
1248 *   A pointer to the ring structure.
1249 * @param obj_table
1250 *   A pointer to a table of void * pointers (objects) that will be filled.
1251 * @param n
1252 *   The number of objects to dequeue from the ring to the obj_table.
1253 * @return
1254 *   - Number of objects dequeued
1255 */
1256static inline unsigned __attribute__((always_inline))
1257rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
1258{
1259	if (r->cons.sc_dequeue)
1260		return rte_ring_sc_dequeue_burst(r, obj_table, n);
1261	else
1262		return rte_ring_mc_dequeue_burst(r, obj_table, n);
1263}
1264
1265#ifdef __cplusplus
1266}
1267#endif
1268
1269#endif /* _RTE_RING_H_ */
1270