rte_table_hash_key16.c revision 9ca4a157
1/*-
2 *	 BSD LICENSE
3 *
4 *	 Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *	 All rights reserved.
6 *
7 *	 Redistribution and use in source and binary forms, with or without
8 *	 modification, are permitted provided that the following conditions
9 *	 are met:
10 *
11 *	* Redistributions of source code must retain the above copyright
12 *		 notice, this list of conditions and the following disclaimer.
13 *	* Redistributions in binary form must reproduce the above copyright
14 *		 notice, this list of conditions and the following disclaimer in
15 *		 the documentation and/or other materials provided with the
16 *		 distribution.
17 *	* Neither the name of Intel Corporation nor the names of its
18 *		 contributors may be used to endorse or promote products derived
19 *		 from this software without specific prior written permission.
20 *
21 *	 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *	 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *	 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *	 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *	 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *	 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *	 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *	 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *	 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *	 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33#include <string.h>
34#include <stdio.h>
35
36#include <rte_common.h>
37#include <rte_mbuf.h>
38#include <rte_memory.h>
39#include <rte_malloc.h>
40#include <rte_log.h>
41
42#include "rte_table_hash.h"
43#include "rte_lru.h"
44
45#define RTE_TABLE_HASH_KEY_SIZE						16
46
47#define RTE_BUCKET_ENTRY_VALID						0x1LLU
48
49#ifdef RTE_TABLE_STATS_COLLECT
50
51#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val) \
52	table->stats.n_pkts_in += val
53#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val) \
54	table->stats.n_pkts_lookup_miss += val
55
56#else
57
58#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val)
59#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val)
60
61#endif
62
63struct rte_bucket_4_16 {
64	/* Cache line 0 */
65	uint64_t signature[4 + 1];
66	uint64_t lru_list;
67	struct rte_bucket_4_16 *next;
68	uint64_t next_valid;
69
70	/* Cache line 1 */
71	uint64_t key[4][2];
72
73	/* Cache line 2 */
74	uint8_t data[0];
75};
76
77struct rte_table_hash {
78	struct rte_table_stats stats;
79
80	/* Input parameters */
81	uint32_t n_buckets;
82	uint32_t n_entries_per_bucket;
83	uint32_t key_size;
84	uint32_t entry_size;
85	uint32_t bucket_size;
86	uint32_t signature_offset;
87	uint32_t key_offset;
88	uint64_t key_mask[2];
89	rte_table_hash_op_hash f_hash;
90	uint64_t seed;
91
92	/* Extendible buckets */
93	uint32_t n_buckets_ext;
94	uint32_t stack_pos;
95	uint32_t *stack;
96
97	/* Lookup table */
98	uint8_t memory[0] __rte_cache_aligned;
99};
100
101static int
102check_params_create_lru(struct rte_table_hash_key16_lru_params *params) {
103	/* n_entries */
104	if (params->n_entries == 0) {
105		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
106		return -EINVAL;
107	}
108
109	/* f_hash */
110	if (params->f_hash == NULL) {
111		RTE_LOG(ERR, TABLE,
112			"%s: f_hash function pointer is NULL\n", __func__);
113		return -EINVAL;
114	}
115
116	return 0;
117}
118
119static void *
120rte_table_hash_create_key16_lru(void *params,
121		int socket_id,
122		uint32_t entry_size)
123{
124	struct rte_table_hash_key16_lru_params *p =
125			(struct rte_table_hash_key16_lru_params *) params;
126	struct rte_table_hash *f;
127	uint32_t n_buckets, n_entries_per_bucket,
128			key_size, bucket_size_cl, total_size, i;
129
130	/* Check input parameters */
131	if ((check_params_create_lru(p) != 0) ||
132		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
133		((sizeof(struct rte_bucket_4_16) % 64) != 0))
134		return NULL;
135	n_entries_per_bucket = 4;
136	key_size = 16;
137
138	/* Memory allocation */
139	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
140		n_entries_per_bucket);
141	bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
142		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
143	total_size = sizeof(struct rte_table_hash) + n_buckets *
144		bucket_size_cl * RTE_CACHE_LINE_SIZE;
145
146	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
147	if (f == NULL) {
148		RTE_LOG(ERR, TABLE,
149		"%s: Cannot allocate %u bytes for hash table\n",
150		__func__, total_size);
151		return NULL;
152	}
153	RTE_LOG(INFO, TABLE,
154		"%s: Hash table memory footprint is %u bytes\n",
155		__func__, total_size);
156
157	/* Memory initialization */
158	f->n_buckets = n_buckets;
159	f->n_entries_per_bucket = n_entries_per_bucket;
160	f->key_size = key_size;
161	f->entry_size = entry_size;
162	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
163	f->signature_offset = p->signature_offset;
164	f->key_offset = p->key_offset;
165	f->f_hash = p->f_hash;
166	f->seed = p->seed;
167
168	if (p->key_mask != NULL) {
169		f->key_mask[0] = ((uint64_t *)p->key_mask)[0];
170		f->key_mask[1] = ((uint64_t *)p->key_mask)[1];
171	} else {
172		f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
173		f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
174	}
175
176	for (i = 0; i < n_buckets; i++) {
177		struct rte_bucket_4_16 *bucket;
178
179		bucket = (struct rte_bucket_4_16 *) &f->memory[i *
180			f->bucket_size];
181		lru_init(bucket);
182	}
183
184	return f;
185}
186
187static int
188rte_table_hash_free_key16_lru(void *table)
189{
190	struct rte_table_hash *f = (struct rte_table_hash *) table;
191
192	/* Check input parameters */
193	if (f == NULL) {
194		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
195		return -EINVAL;
196	}
197
198	rte_free(f);
199	return 0;
200}
201
202static int
203rte_table_hash_entry_add_key16_lru(
204	void *table,
205	void *key,
206	void *entry,
207	int *key_found,
208	void **entry_ptr)
209{
210	struct rte_table_hash *f = (struct rte_table_hash *) table;
211	struct rte_bucket_4_16 *bucket;
212	uint64_t signature, pos;
213	uint32_t bucket_index, i;
214
215	signature = f->f_hash(key, f->key_size, f->seed);
216	bucket_index = signature & (f->n_buckets - 1);
217	bucket = (struct rte_bucket_4_16 *)
218			&f->memory[bucket_index * f->bucket_size];
219	signature |= RTE_BUCKET_ENTRY_VALID;
220
221	/* Key is present in the bucket */
222	for (i = 0; i < 4; i++) {
223		uint64_t bucket_signature = bucket->signature[i];
224		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
225
226		if ((bucket_signature == signature) &&
227				(memcmp(key, bucket_key, f->key_size) == 0)) {
228			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
229
230			memcpy(bucket_data, entry, f->entry_size);
231			lru_update(bucket, i);
232			*key_found = 1;
233			*entry_ptr = (void *) bucket_data;
234			return 0;
235		}
236	}
237
238	/* Key is not present in the bucket */
239	for (i = 0; i < 4; i++) {
240		uint64_t bucket_signature = bucket->signature[i];
241		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
242
243		if (bucket_signature == 0) {
244			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
245
246			bucket->signature[i] = signature;
247			memcpy(bucket_key, key, f->key_size);
248			memcpy(bucket_data, entry, f->entry_size);
249			lru_update(bucket, i);
250			*key_found = 0;
251			*entry_ptr = (void *) bucket_data;
252
253			return 0;
254		}
255	}
256
257	/* Bucket full: replace LRU entry */
258	pos = lru_pos(bucket);
259	bucket->signature[pos] = signature;
260	memcpy(bucket->key[pos], key, f->key_size);
261	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
262	lru_update(bucket, pos);
263	*key_found = 0;
264	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
265
266	return 0;
267}
268
269static int
270rte_table_hash_entry_delete_key16_lru(
271	void *table,
272	void *key,
273	int *key_found,
274	void *entry)
275{
276	struct rte_table_hash *f = (struct rte_table_hash *) table;
277	struct rte_bucket_4_16 *bucket;
278	uint64_t signature;
279	uint32_t bucket_index, i;
280
281	signature = f->f_hash(key, f->key_size, f->seed);
282	bucket_index = signature & (f->n_buckets - 1);
283	bucket = (struct rte_bucket_4_16 *)
284			&f->memory[bucket_index * f->bucket_size];
285	signature |= RTE_BUCKET_ENTRY_VALID;
286
287	/* Key is present in the bucket */
288	for (i = 0; i < 4; i++) {
289		uint64_t bucket_signature = bucket->signature[i];
290		uint8_t *bucket_key = (uint8_t *) bucket->key[i];
291
292		if ((bucket_signature == signature) &&
293				(memcmp(key, bucket_key, f->key_size) == 0)) {
294			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
295
296			bucket->signature[i] = 0;
297			*key_found = 1;
298			if (entry)
299				memcpy(entry, bucket_data, f->entry_size);
300			return 0;
301		}
302	}
303
304	/* Key is not present in the bucket */
305	*key_found = 0;
306	return 0;
307}
308
309static int
310check_params_create_ext(struct rte_table_hash_key16_ext_params *params) {
311	/* n_entries */
312	if (params->n_entries == 0) {
313		RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
314		return -EINVAL;
315	}
316
317	/* n_entries_ext */
318	if (params->n_entries_ext == 0) {
319		RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
320		return -EINVAL;
321	}
322
323	/* f_hash */
324	if (params->f_hash == NULL) {
325		RTE_LOG(ERR, TABLE,
326			"%s: f_hash function pointer is NULL\n", __func__);
327		return -EINVAL;
328	}
329
330	return 0;
331}
332
333static void *
334rte_table_hash_create_key16_ext(void *params,
335		int socket_id,
336		uint32_t entry_size)
337{
338	struct rte_table_hash_key16_ext_params *p =
339			(struct rte_table_hash_key16_ext_params *) params;
340	struct rte_table_hash *f;
341	uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
342			bucket_size_cl, stack_size_cl, total_size, i;
343
344	/* Check input parameters */
345	if ((check_params_create_ext(p) != 0) ||
346		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
347		((sizeof(struct rte_bucket_4_16) % 64) != 0))
348		return NULL;
349
350	n_entries_per_bucket = 4;
351	key_size = 16;
352
353	/* Memory allocation */
354	n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
355		n_entries_per_bucket);
356	n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
357		n_entries_per_bucket;
358	bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket
359		* entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
360	stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1)
361		/ RTE_CACHE_LINE_SIZE;
362	total_size = sizeof(struct rte_table_hash) +
363		((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) *
364		RTE_CACHE_LINE_SIZE;
365
366	f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id);
367	if (f == NULL) {
368		RTE_LOG(ERR, TABLE,
369			"%s: Cannot allocate %u bytes for hash table\n",
370			__func__, total_size);
371		return NULL;
372	}
373	RTE_LOG(INFO, TABLE,
374		"%s: Hash table memory footprint is %u bytes\n",
375		__func__, total_size);
376
377	/* Memory initialization */
378	f->n_buckets = n_buckets;
379	f->n_entries_per_bucket = n_entries_per_bucket;
380	f->key_size = key_size;
381	f->entry_size = entry_size;
382	f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE;
383	f->signature_offset = p->signature_offset;
384	f->key_offset = p->key_offset;
385	f->f_hash = p->f_hash;
386	f->seed = p->seed;
387
388	f->n_buckets_ext = n_buckets_ext;
389	f->stack_pos = n_buckets_ext;
390	f->stack = (uint32_t *)
391		&f->memory[(n_buckets + n_buckets_ext) * f->bucket_size];
392
393	for (i = 0; i < n_buckets_ext; i++)
394		f->stack[i] = i;
395
396	if (p->key_mask != NULL) {
397		f->key_mask[0] = (((uint64_t *)p->key_mask)[0]);
398		f->key_mask[1] = (((uint64_t *)p->key_mask)[1]);
399	} else {
400		f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
401		f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
402	}
403
404	return f;
405}
406
407static int
408rte_table_hash_free_key16_ext(void *table)
409{
410	struct rte_table_hash *f = (struct rte_table_hash *) table;
411
412	/* Check input parameters */
413	if (f == NULL) {
414		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
415		return -EINVAL;
416	}
417
418	rte_free(f);
419	return 0;
420}
421
422static int
423rte_table_hash_entry_add_key16_ext(
424	void *table,
425	void *key,
426	void *entry,
427	int *key_found,
428	void **entry_ptr)
429{
430	struct rte_table_hash *f = (struct rte_table_hash *) table;
431	struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
432	uint64_t signature;
433	uint32_t bucket_index, i;
434
435	signature = f->f_hash(key, f->key_size, f->seed);
436	bucket_index = signature & (f->n_buckets - 1);
437	bucket0 = (struct rte_bucket_4_16 *)
438			&f->memory[bucket_index * f->bucket_size];
439	signature |= RTE_BUCKET_ENTRY_VALID;
440
441	/* Key is present in the bucket */
442	for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
443		for (i = 0; i < 4; i++) {
444			uint64_t bucket_signature = bucket->signature[i];
445			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
446
447			if ((bucket_signature == signature) &&
448				(memcmp(key, bucket_key, f->key_size) == 0)) {
449				uint8_t *bucket_data = &bucket->data[i *
450					f->entry_size];
451
452				memcpy(bucket_data, entry, f->entry_size);
453				*key_found = 1;
454				*entry_ptr = (void *) bucket_data;
455				return 0;
456			}
457		}
458
459	/* Key is not present in the bucket */
460	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
461			 bucket_prev = bucket, bucket = bucket->next)
462		for (i = 0; i < 4; i++) {
463			uint64_t bucket_signature = bucket->signature[i];
464			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
465
466			if (bucket_signature == 0) {
467				uint8_t *bucket_data = &bucket->data[i *
468					f->entry_size];
469
470				bucket->signature[i] = signature;
471				memcpy(bucket_key, key, f->key_size);
472				memcpy(bucket_data, entry, f->entry_size);
473				*key_found = 0;
474				*entry_ptr = (void *) bucket_data;
475
476				return 0;
477			}
478		}
479
480	/* Bucket full: extend bucket */
481	if (f->stack_pos > 0) {
482		bucket_index = f->stack[--f->stack_pos];
483
484		bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
485			bucket_index) * f->bucket_size];
486		bucket_prev->next = bucket;
487		bucket_prev->next_valid = 1;
488
489		bucket->signature[0] = signature;
490		memcpy(bucket->key[0], key, f->key_size);
491		memcpy(&bucket->data[0], entry, f->entry_size);
492		*key_found = 0;
493		*entry_ptr = (void *) &bucket->data[0];
494		return 0;
495	}
496
497	return -ENOSPC;
498}
499
500static int
501rte_table_hash_entry_delete_key16_ext(
502	void *table,
503	void *key,
504	int *key_found,
505	void *entry)
506{
507	struct rte_table_hash *f = (struct rte_table_hash *) table;
508	struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
509	uint64_t signature;
510	uint32_t bucket_index, i;
511
512	signature = f->f_hash(key, f->key_size, f->seed);
513	bucket_index = signature & (f->n_buckets - 1);
514	bucket0 = (struct rte_bucket_4_16 *)
515		&f->memory[bucket_index * f->bucket_size];
516	signature |= RTE_BUCKET_ENTRY_VALID;
517
518	/* Key is present in the bucket */
519	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
520		bucket_prev = bucket, bucket = bucket->next)
521		for (i = 0; i < 4; i++) {
522			uint64_t bucket_signature = bucket->signature[i];
523			uint8_t *bucket_key = (uint8_t *) bucket->key[i];
524
525			if ((bucket_signature == signature) &&
526				(memcmp(key, bucket_key, f->key_size) == 0)) {
527				uint8_t *bucket_data = &bucket->data[i *
528					f->entry_size];
529
530				bucket->signature[i] = 0;
531				*key_found = 1;
532				if (entry)
533					memcpy(entry, bucket_data,
534					f->entry_size);
535
536				if ((bucket->signature[0] == 0) &&
537					(bucket->signature[1] == 0) &&
538					(bucket->signature[2] == 0) &&
539					(bucket->signature[3] == 0) &&
540					(bucket_prev != NULL)) {
541					bucket_prev->next = bucket->next;
542					bucket_prev->next_valid =
543						bucket->next_valid;
544
545					memset(bucket, 0,
546						sizeof(struct rte_bucket_4_16));
547					bucket_index = (((uint8_t *)bucket -
548						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
549					f->stack[f->stack_pos++] = bucket_index;
550				}
551
552				return 0;
553			}
554		}
555
556	/* Key is not present in the bucket */
557	*key_found = 0;
558	return 0;
559}
560
561#define lookup_key16_cmp(key_in, bucket, pos)			\
562{								\
563	uint64_t xor[4][2], or[4], signature[4];		\
564								\
565	signature[0] = (~bucket->signature[0]) & 1;		\
566	signature[1] = (~bucket->signature[1]) & 1;		\
567	signature[2] = (~bucket->signature[2]) & 1;		\
568	signature[3] = (~bucket->signature[3]) & 1;		\
569								\
570	xor[0][0] = key_in[0] ^	 bucket->key[0][0];		\
571	xor[0][1] = key_in[1] ^	 bucket->key[0][1];		\
572								\
573	xor[1][0] = key_in[0] ^	 bucket->key[1][0];		\
574	xor[1][1] = key_in[1] ^	 bucket->key[1][1];		\
575								\
576	xor[2][0] = key_in[0] ^	 bucket->key[2][0];		\
577	xor[2][1] = key_in[1] ^	 bucket->key[2][1];		\
578								\
579	xor[3][0] = key_in[0] ^	 bucket->key[3][0];		\
580	xor[3][1] = key_in[1] ^	 bucket->key[3][1];		\
581								\
582	or[0] = xor[0][0] | xor[0][1] | signature[0];		\
583	or[1] = xor[1][0] | xor[1][1] | signature[1];		\
584	or[2] = xor[2][0] | xor[2][1] | signature[2];		\
585	or[3] = xor[3][0] | xor[3][1] | signature[3];		\
586								\
587	pos = 4;						\
588	if (or[0] == 0)						\
589		pos = 0;					\
590	if (or[1] == 0)						\
591		pos = 1;					\
592	if (or[2] == 0)						\
593		pos = 2;					\
594	if (or[3] == 0)						\
595		pos = 3;					\
596}
597
598#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
599{								\
600	uint64_t pkt_mask;					\
601	uint32_t key_offset = f->key_offset;\
602								\
603	pkt0_index = __builtin_ctzll(pkts_mask);		\
604	pkt_mask = 1LLU << pkt0_index;				\
605	pkts_mask &= ~pkt_mask;					\
606								\
607	mbuf0 = pkts[pkt0_index];				\
608	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
609}
610
611#define lookup1_stage1(mbuf1, bucket1, f)			\
612{								\
613	uint64_t signature;					\
614	uint32_t bucket_index;					\
615								\
616	signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\
617	bucket_index = signature & (f->n_buckets - 1);		\
618	bucket1 = (struct rte_bucket_4_16 *)			\
619		&f->memory[bucket_index * f->bucket_size];	\
620	rte_prefetch0(bucket1);					\
621	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
622}
623
624#define lookup1_stage1_dosig(mbuf1, bucket1, f)			\
625{								\
626	uint64_t *key;						\
627	uint64_t signature = 0;				\
628	uint32_t bucket_index;				\
629	uint64_t hash_key_buffer[2];		\
630								\
631	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
632								\
633	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
634	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
635	signature = f->f_hash(hash_key_buffer,			\
636			RTE_TABLE_HASH_KEY_SIZE, f->seed);		\
637								\
638	bucket_index = signature & (f->n_buckets - 1);		\
639	bucket1 = (struct rte_bucket_4_16 *)			\
640		&f->memory[bucket_index * f->bucket_size];	\
641	rte_prefetch0(bucket1);					\
642	rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
643}
644
645#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
646		pkts_mask_out, entries, f)			\
647{								\
648	void *a;						\
649	uint64_t pkt_mask;					\
650	uint64_t *key;						\
651	uint64_t hash_key_buffer[2];		\
652	uint32_t pos;						\
653								\
654	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
655	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
656	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
657								\
658	lookup_key16_cmp(hash_key_buffer, bucket2, pos);	\
659								\
660	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
661	pkts_mask_out |= pkt_mask;				\
662								\
663	a = (void *) &bucket2->data[pos * f->entry_size];	\
664	rte_prefetch0(a);					\
665	entries[pkt2_index] = a;				\
666	lru_update(bucket2, pos);				\
667}
668
669#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
670	buckets_mask, buckets, keys, f)				\
671{								\
672	struct rte_bucket_4_16 *bucket_next;			\
673	void *a;						\
674	uint64_t pkt_mask, bucket_mask;				\
675	uint64_t *key;						\
676	uint64_t hash_key_buffer[2];		\
677	uint32_t pos;						\
678								\
679	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
680	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
681	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
682								\
683	lookup_key16_cmp(hash_key_buffer, bucket2, pos);	\
684								\
685	pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
686	pkts_mask_out |= pkt_mask;				\
687								\
688	a = (void *) &bucket2->data[pos * f->entry_size];	\
689	rte_prefetch0(a);					\
690	entries[pkt2_index] = a;				\
691								\
692	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
693	buckets_mask |= bucket_mask;				\
694	bucket_next = bucket2->next;				\
695	buckets[pkt2_index] = bucket_next;			\
696	keys[pkt2_index] = key;					\
697}
698
699#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
700	buckets_mask, f)					\
701{								\
702	struct rte_bucket_4_16 *bucket, *bucket_next;		\
703	void *a;						\
704	uint64_t pkt_mask, bucket_mask;				\
705	uint64_t *key;						\
706	uint64_t hash_key_buffer[2];		\
707	uint32_t pos;						\
708								\
709	bucket = buckets[pkt_index];				\
710	key = keys[pkt_index];					\
711	hash_key_buffer[0] = key[0] & f->key_mask[0];	\
712	hash_key_buffer[1] = key[1] & f->key_mask[1];	\
713								\
714	lookup_key16_cmp(hash_key_buffer, bucket, pos);	\
715								\
716	pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
717	pkts_mask_out |= pkt_mask;				\
718								\
719	a = (void *) &bucket->data[pos * f->entry_size];	\
720	rte_prefetch0(a);					\
721	entries[pkt_index] = a;					\
722								\
723	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
724	buckets_mask |= bucket_mask;				\
725	bucket_next = bucket->next;				\
726	rte_prefetch0(bucket_next);				\
727	rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
728	buckets[pkt_index] = bucket_next;			\
729	keys[pkt_index] = key;					\
730}
731
732#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
733		pkts, pkts_mask, f)				\
734{								\
735	uint64_t pkt00_mask, pkt01_mask;			\
736	uint32_t key_offset = f->key_offset;		\
737								\
738	pkt00_index = __builtin_ctzll(pkts_mask);		\
739	pkt00_mask = 1LLU << pkt00_index;			\
740	pkts_mask &= ~pkt00_mask;				\
741								\
742	mbuf00 = pkts[pkt00_index];				\
743	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
744								\
745	pkt01_index = __builtin_ctzll(pkts_mask);		\
746	pkt01_mask = 1LLU << pkt01_index;			\
747	pkts_mask &= ~pkt01_mask;				\
748								\
749	mbuf01 = pkts[pkt01_index];				\
750	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
751}
752
753#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
754		mbuf00, mbuf01, pkts, pkts_mask, f)		\
755{								\
756	uint64_t pkt00_mask, pkt01_mask;			\
757	uint32_t key_offset = f->key_offset;		\
758								\
759	pkt00_index = __builtin_ctzll(pkts_mask);		\
760	pkt00_mask = 1LLU << pkt00_index;			\
761	pkts_mask &= ~pkt00_mask;				\
762								\
763	mbuf00 = pkts[pkt00_index];				\
764	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));	\
765								\
766	pkt01_index = __builtin_ctzll(pkts_mask);		\
767	if (pkts_mask == 0)					\
768		pkt01_index = pkt00_index;			\
769	pkt01_mask = 1LLU << pkt01_index;			\
770	pkts_mask &= ~pkt01_mask;				\
771								\
772	mbuf01 = pkts[pkt01_index];				\
773	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));	\
774}
775
776#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)	\
777{								\
778	uint64_t signature10, signature11;			\
779	uint32_t bucket10_index, bucket11_index;		\
780								\
781	signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\
782	bucket10_index = signature10 & (f->n_buckets - 1);	\
783	bucket10 = (struct rte_bucket_4_16 *)			\
784		&f->memory[bucket10_index * f->bucket_size];	\
785	rte_prefetch0(bucket10);				\
786	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
787								\
788	signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\
789	bucket11_index = signature11 & (f->n_buckets - 1);	\
790	bucket11 = (struct rte_bucket_4_16 *)			\
791		&f->memory[bucket11_index * f->bucket_size];	\
792	rte_prefetch0(bucket11);				\
793	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
794}
795
796#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)	\
797{								\
798	uint64_t *key10, *key11;					\
799	uint64_t hash_offset_buffer[2];				\
800	uint64_t signature10, signature11;			\
801	uint32_t bucket10_index, bucket11_index;	\
802								\
803	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
804	hash_offset_buffer[0] = key10[0] & f->key_mask[0];	\
805	hash_offset_buffer[1] = key10[1] & f->key_mask[1];	\
806	signature10 = f->f_hash(hash_offset_buffer,			\
807			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
808	bucket10_index = signature10 & (f->n_buckets - 1);	\
809	bucket10 = (struct rte_bucket_4_16 *)				\
810		&f->memory[bucket10_index * f->bucket_size];	\
811	rte_prefetch0(bucket10);				\
812	rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
813								\
814	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
815	hash_offset_buffer[0] = key11[0] & f->key_mask[0];	\
816	hash_offset_buffer[1] = key11[1] & f->key_mask[1];	\
817	signature11 = f->f_hash(hash_offset_buffer,			\
818			RTE_TABLE_HASH_KEY_SIZE, f->seed);\
819	bucket11_index = signature11 & (f->n_buckets - 1);	\
820	bucket11 = (struct rte_bucket_4_16 *)			\
821		&f->memory[bucket11_index * f->bucket_size];	\
822	rte_prefetch0(bucket11);				\
823	rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
824}
825
826#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
827		bucket20, bucket21, pkts_mask_out, entries, f)	\
828{								\
829	void *a20, *a21;					\
830	uint64_t pkt20_mask, pkt21_mask;			\
831	uint64_t *key20, *key21;				\
832	uint64_t hash_key_buffer20[2];			\
833	uint64_t hash_key_buffer21[2];			\
834	uint32_t pos20, pos21;					\
835								\
836	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
837	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
838	hash_key_buffer20[0] = key20[0] & f->key_mask[0];	\
839	hash_key_buffer20[1] = key20[1] & f->key_mask[1];	\
840	hash_key_buffer21[0] = key21[0] & f->key_mask[0];	\
841	hash_key_buffer21[1] = key21[1] & f->key_mask[1];	\
842								\
843	lookup_key16_cmp(hash_key_buffer20, bucket20, pos20);	\
844	lookup_key16_cmp(hash_key_buffer21, bucket21, pos21);	\
845								\
846	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
847	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
848	pkts_mask_out |= pkt20_mask | pkt21_mask;			\
849								\
850	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
851	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
852	rte_prefetch0(a20);					\
853	rte_prefetch0(a21);					\
854	entries[pkt20_index] = a20;				\
855	entries[pkt21_index] = a21;				\
856	lru_update(bucket20, pos20);				\
857	lru_update(bucket21, pos21);				\
858}
859
860#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
861	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
862{								\
863	struct rte_bucket_4_16 *bucket20_next, *bucket21_next;	\
864	void *a20, *a21;					\
865	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
866	uint64_t *key20, *key21;				\
867	uint64_t hash_key_buffer20[2];			\
868	uint64_t hash_key_buffer21[2];			\
869	uint32_t pos20, pos21;					\
870								\
871	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
872	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
873	hash_key_buffer20[0] = key20[0] & f->key_mask[0];	\
874	hash_key_buffer20[1] = key20[1] & f->key_mask[1];	\
875	hash_key_buffer21[0] = key21[0] & f->key_mask[0];	\
876	hash_key_buffer21[1] = key21[1] & f->key_mask[1];	\
877								\
878	lookup_key16_cmp(hash_key_buffer20, bucket20, pos20);	\
879	lookup_key16_cmp(hash_key_buffer21, bucket21, pos21);	\
880								\
881	pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
882	pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
883	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
884								\
885	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
886	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
887	rte_prefetch0(a20);					\
888	rte_prefetch0(a21);					\
889	entries[pkt20_index] = a20;				\
890	entries[pkt21_index] = a21;				\
891								\
892	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
893	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
894	buckets_mask |= bucket20_mask | bucket21_mask;		\
895	bucket20_next = bucket20->next;				\
896	bucket21_next = bucket21->next;				\
897	buckets[pkt20_index] = bucket20_next;			\
898	buckets[pkt21_index] = bucket21_next;			\
899	keys[pkt20_index] = key20;				\
900	keys[pkt21_index] = key21;				\
901}
902
903static int
904rte_table_hash_lookup_key16_lru(
905	void *table,
906	struct rte_mbuf **pkts,
907	uint64_t pkts_mask,
908	uint64_t *lookup_hit_mask,
909	void **entries)
910{
911	struct rte_table_hash *f = (struct rte_table_hash *) table;
912	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
913	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
914	uint32_t pkt00_index, pkt01_index, pkt10_index;
915	uint32_t pkt11_index, pkt20_index, pkt21_index;
916	uint64_t pkts_mask_out = 0;
917
918	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
919	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
920
921	/* Cannot run the pipeline with less than 5 packets */
922	if (__builtin_popcountll(pkts_mask) < 5) {
923		for ( ; pkts_mask; ) {
924			struct rte_bucket_4_16 *bucket;
925			struct rte_mbuf *mbuf;
926			uint32_t pkt_index;
927
928			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
929			lookup1_stage1(mbuf, bucket, f);
930			lookup1_stage2_lru(pkt_index, mbuf, bucket,
931				pkts_mask_out, entries, f);
932		}
933
934		*lookup_hit_mask = pkts_mask_out;
935		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f,
936			n_pkts_in - __builtin_popcountll(pkts_mask_out));
937		return 0;
938	}
939
940	/*
941	 * Pipeline fill
942	 *
943	 */
944	/* Pipeline stage 0 */
945	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
946		pkts_mask, f);
947
948	/* Pipeline feed */
949	mbuf10 = mbuf00;
950	mbuf11 = mbuf01;
951	pkt10_index = pkt00_index;
952	pkt11_index = pkt01_index;
953
954	/* Pipeline stage 0 */
955	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
956		pkts_mask, f);
957
958	/* Pipeline stage 1 */
959	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
960
961	/*
962	 * Pipeline run
963	 *
964	 */
965	for ( ; pkts_mask; ) {
966		/* Pipeline feed */
967		bucket20 = bucket10;
968		bucket21 = bucket11;
969		mbuf20 = mbuf10;
970		mbuf21 = mbuf11;
971		mbuf10 = mbuf00;
972		mbuf11 = mbuf01;
973		pkt20_index = pkt10_index;
974		pkt21_index = pkt11_index;
975		pkt10_index = pkt00_index;
976		pkt11_index = pkt01_index;
977
978		/* Pipeline stage 0 */
979		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
980			mbuf00, mbuf01, pkts, pkts_mask, f);
981
982		/* Pipeline stage 1 */
983		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
984
985		/* Pipeline stage 2 */
986		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
987			bucket20, bucket21, pkts_mask_out, entries, f);
988	}
989
990	/*
991	 * Pipeline flush
992	 *
993	 */
994	/* Pipeline feed */
995	bucket20 = bucket10;
996	bucket21 = bucket11;
997	mbuf20 = mbuf10;
998	mbuf21 = mbuf11;
999	mbuf10 = mbuf00;
1000	mbuf11 = mbuf01;
1001	pkt20_index = pkt10_index;
1002	pkt21_index = pkt11_index;
1003	pkt10_index = pkt00_index;
1004	pkt11_index = pkt01_index;
1005
1006	/* Pipeline stage 1 */
1007	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1008
1009	/* Pipeline stage 2 */
1010	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
1011		bucket20, bucket21, pkts_mask_out, entries, f);
1012
1013	/* Pipeline feed */
1014	bucket20 = bucket10;
1015	bucket21 = bucket11;
1016	mbuf20 = mbuf10;
1017	mbuf21 = mbuf11;
1018	pkt20_index = pkt10_index;
1019	pkt21_index = pkt11_index;
1020
1021	/* Pipeline stage 2 */
1022	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
1023		bucket20, bucket21, pkts_mask_out, entries, f);
1024
1025	*lookup_hit_mask = pkts_mask_out;
1026	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1027		__builtin_popcountll(pkts_mask_out));
1028	return 0;
1029} /* rte_table_hash_lookup_key16_lru() */
1030
1031static int
1032rte_table_hash_lookup_key16_lru_dosig(
1033	void *table,
1034	struct rte_mbuf **pkts,
1035	uint64_t pkts_mask,
1036	uint64_t *lookup_hit_mask,
1037	void **entries)
1038{
1039	struct rte_table_hash *f = (struct rte_table_hash *) table;
1040	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
1041	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
1042	uint32_t pkt00_index, pkt01_index, pkt10_index;
1043	uint32_t pkt11_index, pkt20_index, pkt21_index;
1044	uint64_t pkts_mask_out = 0;
1045
1046	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
1047
1048	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
1049
1050	/* Cannot run the pipeline with less than 5 packets */
1051	if (__builtin_popcountll(pkts_mask) < 5) {
1052		for ( ; pkts_mask; ) {
1053			struct rte_bucket_4_16 *bucket;
1054			struct rte_mbuf *mbuf;
1055			uint32_t pkt_index;
1056
1057			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
1058			lookup1_stage1_dosig(mbuf, bucket, f);
1059			lookup1_stage2_lru(pkt_index, mbuf, bucket,
1060				pkts_mask_out, entries, f);
1061		}
1062
1063		*lookup_hit_mask = pkts_mask_out;
1064		RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1065			__builtin_popcountll(pkts_mask_out));
1066		return 0;
1067	}
1068
1069	/*
1070	 * Pipeline fill
1071	 *
1072	 */
1073	/* Pipeline stage 0 */
1074	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1075		pkts_mask, f);
1076
1077	/* Pipeline feed */
1078	mbuf10 = mbuf00;
1079	mbuf11 = mbuf01;
1080	pkt10_index = pkt00_index;
1081	pkt11_index = pkt01_index;
1082
1083	/* Pipeline stage 0 */
1084	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1085		pkts_mask, f);
1086
1087	/* Pipeline stage 1 */
1088	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1089
1090	/*
1091	 * Pipeline run
1092	 *
1093	 */
1094	for ( ; pkts_mask; ) {
1095		/* Pipeline feed */
1096		bucket20 = bucket10;
1097		bucket21 = bucket11;
1098		mbuf20 = mbuf10;
1099		mbuf21 = mbuf11;
1100		mbuf10 = mbuf00;
1101		mbuf11 = mbuf01;
1102		pkt20_index = pkt10_index;
1103		pkt21_index = pkt11_index;
1104		pkt10_index = pkt00_index;
1105		pkt11_index = pkt01_index;
1106
1107		/* Pipeline stage 0 */
1108		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1109			mbuf00, mbuf01, pkts, pkts_mask, f);
1110
1111		/* Pipeline stage 1 */
1112		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1113
1114		/* Pipeline stage 2 */
1115		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
1116			bucket20, bucket21, pkts_mask_out, entries, f);
1117	}
1118
1119	/*
1120	 * Pipeline flush
1121	 *
1122	 */
1123	/* Pipeline feed */
1124	bucket20 = bucket10;
1125	bucket21 = bucket11;
1126	mbuf20 = mbuf10;
1127	mbuf21 = mbuf11;
1128	mbuf10 = mbuf00;
1129	mbuf11 = mbuf01;
1130	pkt20_index = pkt10_index;
1131	pkt21_index = pkt11_index;
1132	pkt10_index = pkt00_index;
1133	pkt11_index = pkt01_index;
1134
1135	/* Pipeline stage 1 */
1136	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1137
1138	/* Pipeline stage 2 */
1139	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
1140		bucket20, bucket21, pkts_mask_out, entries, f);
1141
1142	/* Pipeline feed */
1143	bucket20 = bucket10;
1144	bucket21 = bucket11;
1145	mbuf20 = mbuf10;
1146	mbuf21 = mbuf11;
1147	pkt20_index = pkt10_index;
1148	pkt21_index = pkt11_index;
1149
1150	/* Pipeline stage 2 */
1151	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
1152		bucket20, bucket21, pkts_mask_out, entries, f);
1153
1154	*lookup_hit_mask = pkts_mask_out;
1155	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1156		__builtin_popcountll(pkts_mask_out));
1157	return 0;
1158} /* rte_table_hash_lookup_key16_lru_dosig() */
1159
1160static int
1161rte_table_hash_lookup_key16_ext(
1162	void *table,
1163	struct rte_mbuf **pkts,
1164	uint64_t pkts_mask,
1165	uint64_t *lookup_hit_mask,
1166	void **entries)
1167{
1168	struct rte_table_hash *f = (struct rte_table_hash *) table;
1169	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
1170	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
1171	uint32_t pkt00_index, pkt01_index, pkt10_index;
1172	uint32_t pkt11_index, pkt20_index, pkt21_index;
1173	uint64_t pkts_mask_out = 0, buckets_mask = 0;
1174	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
1175	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
1176
1177	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
1178	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
1179
1180	/* Cannot run the pipeline with less than 5 packets */
1181	if (__builtin_popcountll(pkts_mask) < 5) {
1182		for ( ; pkts_mask; ) {
1183			struct rte_bucket_4_16 *bucket;
1184			struct rte_mbuf *mbuf;
1185			uint32_t pkt_index;
1186
1187			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
1188			lookup1_stage1(mbuf, bucket, f);
1189			lookup1_stage2_ext(pkt_index, mbuf, bucket,
1190				pkts_mask_out, entries, buckets_mask,
1191				buckets, keys, f);
1192		}
1193
1194		goto grind_next_buckets;
1195	}
1196
1197	/*
1198	 * Pipeline fill
1199	 *
1200	 */
1201	/* Pipeline stage 0 */
1202	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1203		pkts_mask, f);
1204
1205	/* Pipeline feed */
1206	mbuf10 = mbuf00;
1207	mbuf11 = mbuf01;
1208	pkt10_index = pkt00_index;
1209	pkt11_index = pkt01_index;
1210
1211	/* Pipeline stage 0 */
1212	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1213		pkts_mask, f);
1214
1215	/* Pipeline stage 1 */
1216	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1217
1218	/*
1219	 * Pipeline run
1220	 *
1221	 */
1222	for ( ; pkts_mask; ) {
1223		/* Pipeline feed */
1224		bucket20 = bucket10;
1225		bucket21 = bucket11;
1226		mbuf20 = mbuf10;
1227		mbuf21 = mbuf11;
1228		mbuf10 = mbuf00;
1229		mbuf11 = mbuf01;
1230		pkt20_index = pkt10_index;
1231		pkt21_index = pkt11_index;
1232		pkt10_index = pkt00_index;
1233		pkt11_index = pkt01_index;
1234
1235		/* Pipeline stage 0 */
1236		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1237			mbuf00, mbuf01, pkts, pkts_mask, f);
1238
1239		/* Pipeline stage 1 */
1240		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1241
1242		/* Pipeline stage 2 */
1243		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1244			bucket20, bucket21, pkts_mask_out, entries,
1245			buckets_mask, buckets, keys, f);
1246	}
1247
1248	/*
1249	 * Pipeline flush
1250	 *
1251	 */
1252	/* Pipeline feed */
1253	bucket20 = bucket10;
1254	bucket21 = bucket11;
1255	mbuf20 = mbuf10;
1256	mbuf21 = mbuf11;
1257	mbuf10 = mbuf00;
1258	mbuf11 = mbuf01;
1259	pkt20_index = pkt10_index;
1260	pkt21_index = pkt11_index;
1261	pkt10_index = pkt00_index;
1262	pkt11_index = pkt01_index;
1263
1264	/* Pipeline stage 1 */
1265	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1266
1267	/* Pipeline stage 2 */
1268	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1269		bucket20, bucket21, pkts_mask_out, entries,
1270		buckets_mask, buckets, keys, f);
1271
1272	/* Pipeline feed */
1273	bucket20 = bucket10;
1274	bucket21 = bucket11;
1275	mbuf20 = mbuf10;
1276	mbuf21 = mbuf11;
1277	pkt20_index = pkt10_index;
1278	pkt21_index = pkt11_index;
1279
1280	/* Pipeline stage 2 */
1281	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1282		bucket20, bucket21, pkts_mask_out, entries,
1283		buckets_mask, buckets, keys, f);
1284
1285grind_next_buckets:
1286	/* Grind next buckets */
1287	for ( ; buckets_mask; ) {
1288		uint64_t buckets_mask_next = 0;
1289
1290		for ( ; buckets_mask; ) {
1291			uint64_t pkt_mask;
1292			uint32_t pkt_index;
1293
1294			pkt_index = __builtin_ctzll(buckets_mask);
1295			pkt_mask = 1LLU << pkt_index;
1296			buckets_mask &= ~pkt_mask;
1297
1298			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1299				entries, buckets_mask_next, f);
1300		}
1301
1302		buckets_mask = buckets_mask_next;
1303	}
1304
1305	*lookup_hit_mask = pkts_mask_out;
1306	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1307		__builtin_popcountll(pkts_mask_out));
1308	return 0;
1309} /* rte_table_hash_lookup_key16_ext() */
1310
1311static int
1312rte_table_hash_lookup_key16_ext_dosig(
1313	void *table,
1314	struct rte_mbuf **pkts,
1315	uint64_t pkts_mask,
1316	uint64_t *lookup_hit_mask,
1317	void **entries)
1318{
1319	struct rte_table_hash *f = (struct rte_table_hash *) table;
1320	struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
1321	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
1322	uint32_t pkt00_index, pkt01_index, pkt10_index;
1323	uint32_t pkt11_index, pkt20_index, pkt21_index;
1324	uint64_t pkts_mask_out = 0, buckets_mask = 0;
1325	struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
1326	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
1327
1328	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
1329
1330	RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
1331
1332	/* Cannot run the pipeline with less than 5 packets */
1333	if (__builtin_popcountll(pkts_mask) < 5) {
1334		for ( ; pkts_mask; ) {
1335			struct rte_bucket_4_16 *bucket;
1336			struct rte_mbuf *mbuf;
1337			uint32_t pkt_index;
1338
1339			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
1340			lookup1_stage1_dosig(mbuf, bucket, f);
1341			lookup1_stage2_ext(pkt_index, mbuf, bucket,
1342				pkts_mask_out, entries, buckets_mask,
1343				buckets, keys, f);
1344		}
1345
1346		goto grind_next_buckets;
1347	}
1348
1349	/*
1350	 * Pipeline fill
1351	 *
1352	 */
1353	/* Pipeline stage 0 */
1354	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1355		pkts_mask, f);
1356
1357	/* Pipeline feed */
1358	mbuf10 = mbuf00;
1359	mbuf11 = mbuf01;
1360	pkt10_index = pkt00_index;
1361	pkt11_index = pkt01_index;
1362
1363	/* Pipeline stage 0 */
1364	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1365		pkts_mask, f);
1366
1367	/* Pipeline stage 1 */
1368	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1369
1370	/*
1371	 * Pipeline run
1372	 *
1373	 */
1374	for ( ; pkts_mask; ) {
1375		/* Pipeline feed */
1376		bucket20 = bucket10;
1377		bucket21 = bucket11;
1378		mbuf20 = mbuf10;
1379		mbuf21 = mbuf11;
1380		mbuf10 = mbuf00;
1381		mbuf11 = mbuf01;
1382		pkt20_index = pkt10_index;
1383		pkt21_index = pkt11_index;
1384		pkt10_index = pkt00_index;
1385		pkt11_index = pkt01_index;
1386
1387		/* Pipeline stage 0 */
1388		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1389			mbuf00, mbuf01, pkts, pkts_mask, f);
1390
1391		/* Pipeline stage 1 */
1392		lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1393
1394		/* Pipeline stage 2 */
1395		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1396			bucket20, bucket21, pkts_mask_out, entries,
1397			buckets_mask, buckets, keys, f);
1398	}
1399
1400	/*
1401	 * Pipeline flush
1402	 *
1403	 */
1404	/* Pipeline feed */
1405	bucket20 = bucket10;
1406	bucket21 = bucket11;
1407	mbuf20 = mbuf10;
1408	mbuf21 = mbuf11;
1409	mbuf10 = mbuf00;
1410	mbuf11 = mbuf01;
1411	pkt20_index = pkt10_index;
1412	pkt21_index = pkt11_index;
1413	pkt10_index = pkt00_index;
1414	pkt11_index = pkt01_index;
1415
1416	/* Pipeline stage 1 */
1417	lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
1418
1419	/* Pipeline stage 2 */
1420	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1421		bucket20, bucket21, pkts_mask_out, entries,
1422		buckets_mask, buckets, keys, f);
1423
1424	/* Pipeline feed */
1425	bucket20 = bucket10;
1426	bucket21 = bucket11;
1427	mbuf20 = mbuf10;
1428	mbuf21 = mbuf11;
1429	pkt20_index = pkt10_index;
1430	pkt21_index = pkt11_index;
1431
1432	/* Pipeline stage 2 */
1433	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1434		bucket20, bucket21, pkts_mask_out, entries,
1435		buckets_mask, buckets, keys, f);
1436
1437grind_next_buckets:
1438	/* Grind next buckets */
1439	for ( ; buckets_mask; ) {
1440		uint64_t buckets_mask_next = 0;
1441
1442		for ( ; buckets_mask; ) {
1443			uint64_t pkt_mask;
1444			uint32_t pkt_index;
1445
1446			pkt_index = __builtin_ctzll(buckets_mask);
1447			pkt_mask = 1LLU << pkt_index;
1448			buckets_mask &= ~pkt_mask;
1449
1450			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1451				entries, buckets_mask_next, f);
1452		}
1453
1454		buckets_mask = buckets_mask_next;
1455	}
1456
1457	*lookup_hit_mask = pkts_mask_out;
1458	RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1459		__builtin_popcountll(pkts_mask_out));
1460	return 0;
1461} /* rte_table_hash_lookup_key16_ext_dosig() */
1462
1463static int
1464rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
1465{
1466	struct rte_table_hash *t = (struct rte_table_hash *) table;
1467
1468	if (stats != NULL)
1469		memcpy(stats, &t->stats, sizeof(t->stats));
1470
1471	if (clear)
1472		memset(&t->stats, 0, sizeof(t->stats));
1473
1474	return 0;
1475}
1476
1477struct rte_table_ops rte_table_hash_key16_lru_ops = {
1478	.f_create = rte_table_hash_create_key16_lru,
1479	.f_free = rte_table_hash_free_key16_lru,
1480	.f_add = rte_table_hash_entry_add_key16_lru,
1481	.f_delete = rte_table_hash_entry_delete_key16_lru,
1482	.f_add_bulk = NULL,
1483	.f_delete_bulk = NULL,
1484	.f_lookup = rte_table_hash_lookup_key16_lru,
1485	.f_stats = rte_table_hash_key16_stats_read,
1486};
1487
1488struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = {
1489	.f_create = rte_table_hash_create_key16_lru,
1490	.f_free = rte_table_hash_free_key16_lru,
1491	.f_add = rte_table_hash_entry_add_key16_lru,
1492	.f_delete = rte_table_hash_entry_delete_key16_lru,
1493	.f_lookup = rte_table_hash_lookup_key16_lru_dosig,
1494	.f_stats = rte_table_hash_key16_stats_read,
1495};
1496
1497struct rte_table_ops rte_table_hash_key16_ext_ops = {
1498	.f_create = rte_table_hash_create_key16_ext,
1499	.f_free = rte_table_hash_free_key16_ext,
1500	.f_add = rte_table_hash_entry_add_key16_ext,
1501	.f_delete = rte_table_hash_entry_delete_key16_ext,
1502	.f_add_bulk = NULL,
1503	.f_delete_bulk = NULL,
1504	.f_lookup = rte_table_hash_lookup_key16_ext,
1505	.f_stats = rte_table_hash_key16_stats_read,
1506};
1507
1508struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = {
1509	.f_create = rte_table_hash_create_key16_ext,
1510	.f_free = rte_table_hash_free_key16_ext,
1511	.f_add = rte_table_hash_entry_add_key16_ext,
1512	.f_delete = rte_table_hash_entry_delete_key16_ext,
1513	.f_lookup = rte_table_hash_lookup_key16_ext_dosig,
1514	.f_stats = rte_table_hash_key16_stats_read,
1515};
1516