1/*	$NetBSD: radix.c,v 1.47 2016/12/12 03:55:57 ozaki-r Exp $	*/
2
3/*
4 * Copyright (c) 1988, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	@(#)radix.c	8.6 (Berkeley) 10/17/95
32 */
33
34/*
35 * Routines to build and maintain radix trees for routing lookups.
36 */
37
38#include <vnet/util/radix.h>
39
40typedef void (*rn_printer_t)(void *, const char *fmt, ...);
41
42static int max_keylen = 33; // me
43struct radix_mask *rn_mkfreelist;
44struct radix_node_head *mask_rnhead;
45static char *addmask_key;
46static const char normal_chars[] =
47    {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
48static char *rn_zeros, *rn_ones;
49
50#define rn_masktop (mask_rnhead->rnh_treetop)
51
52static int rn_satisfies_leaf(const char *, struct radix_node *, int);
53static int rn_lexobetter(const void *, const void *);
54static struct radix_mask *rn_new_radix_mask(struct radix_node *,
55    struct radix_mask *);
56static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t,
57    void *);
58static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t,
59    void *);
60static void rn_nodeprint(struct radix_node *, rn_printer_t, void *,
61    const char *);
62
63#define	SUBTREE_OPEN	"[ "
64#define	SUBTREE_CLOSE	" ]"
65
66#ifdef RN_DEBUG
67static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *);
68#endif /* RN_DEBUG */
69
70#define MIN(x,y) (((x)<(y))?(x):(y))
71
72static struct radix_mask*
73rm_alloc (void)
74{
75    struct radix_mask *rm = clib_mem_alloc(sizeof(struct radix_mask));
76
77    clib_memset(rm, 0, sizeof(*rm));
78
79    return (rm);
80}
81
82static void
83rm_free (struct radix_mask *rm)
84{
85    clib_mem_free(rm);
86}
87
88#define R_Malloc(p, t, n)                               \
89{                                                       \
90    p = (t) clib_mem_alloc((unsigned int)(n));          \
91    clib_memset(p, 0, n);                                    \
92}
93#define Free(p) clib_mem_free((p))
94#define log(a,b, c...)
95#define bool i32
96
97/*
98 * The data structure for the keys is a radix tree with one way
99 * branching removed.  The index rn_b at an internal node n represents a bit
100 * position to be tested.  The tree is arranged so that all descendants
101 * of a node n have keys whose bits all agree up to position rn_b - 1.
102 * (We say the index of n is rn_b.)
103 *
104 * There is at least one descendant which has a one bit at position rn_b,
105 * and at least one with a zero there.
106 *
107 * A route is determined by a pair of key and mask.  We require that the
108 * bit-wise logical and of the key and mask to be the key.
109 * We define the index of a route to associated with the mask to be
110 * the first bit number in the mask where 0 occurs (with bit number 0
111 * representing the highest order bit).
112 *
113 * We say a mask is normal if every bit is 0, past the index of the mask.
114 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
115 * and m is a normal mask, then the route applies to every descendant of n.
116 * If the index(m) < rn_b, this implies the trailing last few bits of k
117 * before bit b are all 0, (and hence consequently true of every descendant
118 * of n), so the route applies to all descendants of the node as well.
119 *
120 * Similar logic shows that a non-normal mask m such that
121 * index(m) <= index(n) could potentially apply to many children of n.
122 * Thus, for each non-host route, we attach its mask to a list at an internal
123 * node as high in the tree as we can go.
124 *
125 * The present version of the code makes use of normal routes in short-
126 * circuiting an explicit mask and compare operation when testing whether
127 * a key satisfies a normal route, and also in remembering the unique leaf
128 * that governs a subtree.
129 */
130
131struct radix_node *
132rn_search(
133	const void *v_arg,
134	struct radix_node *head)
135{
136	const u8 * const v = v_arg;
137	struct radix_node *x;
138
139	for (x = head; x->rn_b >= 0;) {
140		if (x->rn_bmask & v[x->rn_off])
141			x = x->rn_r;
142		else
143			x = x->rn_l;
144	}
145	return x;
146}
147
148struct radix_node *
149rn_search_m(
150	const void *v_arg,
151	struct radix_node *head,
152	const void *m_arg)
153{
154	struct radix_node *x;
155	const u8 * const v = v_arg;
156	const u8 * const m = m_arg;
157
158	for (x = head; x->rn_b >= 0;) {
159		if ((x->rn_bmask & m[x->rn_off]) &&
160		    (x->rn_bmask & v[x->rn_off]))
161			x = x->rn_r;
162		else
163			x = x->rn_l;
164	}
165	return x;
166}
167
168int
169rn_refines(
170	const void *m_arg,
171	const void *n_arg)
172{
173	const char *m = m_arg;
174	const char *n = n_arg;
175	const char *lim = n + *(const u8 *)n;
176	const char *lim2 = lim;
177	int longer = (*(const u8 *)n++) - (int)(*(const u8 *)m++);
178	int masks_are_equal = 1;
179
180	if (longer > 0)
181		lim -= longer;
182	while (n < lim) {
183		if (*n & ~(*m))
184			return 0;
185		if (*n++ != *m++)
186			masks_are_equal = 0;
187	}
188	while (n < lim2)
189		if (*n++)
190			return 0;
191	if (masks_are_equal && (longer < 0))
192		for (lim2 = m - longer; m < lim2; )
193			if (*m++)
194				return 1;
195	return !masks_are_equal;
196}
197
198struct radix_node *
199rn_lookup(
200	const void *v_arg,
201	const void *m_arg,
202	struct radix_node_head *head)
203{
204	struct radix_node *x;
205	const char *netmask = NULL;
206
207	if (m_arg) {
208		if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
209			return NULL;
210		netmask = x->rn_key;
211	}
212	x = rn_match(v_arg, head);
213	if (x != NULL && netmask != NULL) {
214		while (x != NULL && x->rn_mask != netmask)
215			x = x->rn_dupedkey;
216	}
217	return x;
218}
219
220static int
221rn_satisfies_leaf(
222	const char *trial,
223	struct radix_node *leaf,
224	int skip)
225{
226	const char *cp = trial;
227	const char *cp2 = leaf->rn_key;
228	const char *cp3 = leaf->rn_mask;
229	const char *cplim;
230	int length = MIN(*(const u8 *)cp, *(const u8 *)cp2);
231
232	if (cp3 == 0)
233		cp3 = rn_ones;
234	else
235		length = MIN(length, *(const u8 *)cp3);
236	cplim = cp + length; cp3 += skip; cp2 += skip;
237	for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
238		if ((*cp ^ *cp2) & *cp3)
239			return 0;
240	return 1;
241}
242
243struct radix_node *
244rn_match(
245	const void *v_arg,
246	struct radix_node_head *head)
247{
248	const char * const v = v_arg;
249	struct radix_node *t = head->rnh_treetop;
250	struct radix_node *top = t;
251	struct radix_node *x;
252	struct radix_node *saved_t;
253	const char *cp = v;
254	const char *cp2;
255	const char *cplim;
256	int off = t->rn_off;
257	int vlen = *(const u8 *)cp;
258	int matched_off;
259	int test, b, rn_b;
260
261	/*
262	 * Open code rn_search(v, top) to avoid overhead of extra
263	 * subroutine call.
264	 */
265	for (; t->rn_b >= 0; ) {
266		if (t->rn_bmask & cp[t->rn_off])
267			t = t->rn_r;
268		else
269			t = t->rn_l;
270	}
271	/*
272	 * See if we match exactly as a host destination
273	 * or at least learn how many bits match, for normal mask finesse.
274	 *
275	 * It doesn't hurt us to limit how many bytes to check
276	 * to the length of the mask, since if it matches we had a genuine
277	 * match and the leaf we have is the most specific one anyway;
278	 * if it didn't match with a shorter length it would fail
279	 * with a long one.  This wins big for class B&C netmasks which
280	 * are probably the most common case...
281	 */
282	if (t->rn_mask)
283		vlen = *(const u8 *)t->rn_mask;
284	cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
285	for (; cp < cplim; cp++, cp2++)
286		if (*cp != *cp2)
287			goto on1;
288	/*
289	 * This extra grot is in case we are explicitly asked
290	 * to look up the default.  Ugh!
291	 */
292	if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
293		t = t->rn_dupedkey;
294	return t;
295on1:
296	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
297	for (b = 7; (test >>= 1) > 0;)
298		b--;
299	matched_off = cp - v;
300	b += matched_off << 3;
301	rn_b = -1 - b;
302	/*
303	 * If there is a host route in a duped-key chain, it will be first.
304	 */
305	if ((saved_t = t)->rn_mask == 0)
306		t = t->rn_dupedkey;
307	for (; t; t = t->rn_dupedkey)
308		/*
309		 * Even if we don't match exactly as a host,
310		 * we may match if the leaf we wound up at is
311		 * a route to a net.
312		 */
313		if (t->rn_flags & RNF_NORMAL) {
314			if (rn_b <= t->rn_b)
315				return t;
316		} else if (rn_satisfies_leaf(v, t, matched_off))
317				return t;
318	t = saved_t;
319	/* start searching up the tree */
320	do {
321		struct radix_mask *m;
322		t = t->rn_p;
323		m = t->rn_mklist;
324		if (m) {
325			/*
326			 * If non-contiguous masks ever become important
327			 * we can restore the masking and open coding of
328			 * the search and satisfaction test and put the
329			 * calculation of "off" back before the "do".
330			 */
331			do {
332				if (m->rm_flags & RNF_NORMAL) {
333					if (rn_b <= m->rm_b)
334						return m->rm_leaf;
335				} else {
336					off = MIN(t->rn_off, matched_off);
337					x = rn_search_m(v, t, m->rm_mask);
338					while (x && x->rn_mask != m->rm_mask)
339						x = x->rn_dupedkey;
340					if (x && rn_satisfies_leaf(v, x, off))
341						return x;
342				}
343				m = m->rm_mklist;
344			} while (m);
345		}
346	} while (t != top);
347	return NULL;
348}
349
350static void
351rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg,
352    const char *delim)
353{
354	(*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)",
355	    delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p,
356	    rn->rn_l, rn->rn_r);
357}
358
359#ifdef RN_DEBUG
360int	rn_debug =  1;
361
362static void
363rn_dbg_print(void *arg, const char *fmt, ...)
364{
365	va_list ap;
366
367	va_start(ap, fmt);
368	vlog(LOG_DEBUG, fmt, ap);
369	va_end(ap);
370}
371
372static void
373rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg)
374{
375	struct radix_node *dup, *rn;
376	const char *delim;
377
378	if (printer == NULL)
379		return;
380
381	rn = rn_walkfirst(h->rnh_treetop, printer, arg);
382	for (;;) {
383		/* Process leaves */
384		delim = "";
385		for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) {
386			if ((dup->rn_flags & RNF_ROOT) != 0)
387				continue;
388			rn_nodeprint(dup, printer, arg, delim);
389			delim = ", ";
390		}
391		rn = rn_walknext(rn, printer, arg);
392		if (rn->rn_flags & RNF_ROOT)
393			return;
394	}
395	/* NOTREACHED */
396}
397
398#define	traverse(__head, __rn)	rn_treeprint((__head), rn_dbg_print, (__rn))
399#endif /* RN_DEBUG */
400
401struct radix_node *
402rn_newpair(
403	const void *v,
404	int b,
405	struct radix_node nodes[2])
406{
407	struct radix_node *tt = nodes;
408	struct radix_node *t = tt + 1;
409	t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
410	t->rn_l = tt; t->rn_off = b >> 3;
411	tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t;
412	tt->rn_flags = t->rn_flags = RNF_ACTIVE;
413	return t;
414}
415
416struct radix_node *
417rn_insert(
418	const void *v_arg,
419	struct radix_node_head *head,
420	int *dupentry,
421	struct radix_node nodes[2])
422{
423	struct radix_node *top = head->rnh_treetop;
424	struct radix_node *t = rn_search(v_arg, top);
425	struct radix_node *tt;
426	const char *v = v_arg;
427	int head_off = top->rn_off;
428	int vlen = *((const u8 *)v);
429	const char *cp = v + head_off;
430	int b;
431    	/*
432	 * Find first bit at which v and t->rn_key differ
433	 */
434    {
435	const char *cp2 = t->rn_key + head_off;
436	const char *cplim = v + vlen;
437	int cmp_res;
438
439	while (cp < cplim)
440		if (*cp2++ != *cp++)
441			goto on1;
442	*dupentry = 1;
443	return t;
444on1:
445	*dupentry = 0;
446	cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
447	for (b = (cp - v) << 3; cmp_res; b--)
448		cmp_res >>= 1;
449    }
450    {
451	struct radix_node *p, *x = top;
452	cp = v;
453	do {
454		p = x;
455		if (cp[x->rn_off] & x->rn_bmask)
456			x = x->rn_r;
457		else x = x->rn_l;
458	} while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
459#ifdef RN_DEBUG
460	if (rn_debug)
461		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p);
462#endif
463	t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
464	if ((cp[p->rn_off] & p->rn_bmask) == 0)
465		p->rn_l = t;
466	else
467		p->rn_r = t;
468	x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
469	if ((cp[t->rn_off] & t->rn_bmask) == 0) {
470		t->rn_r = x;
471	} else {
472		t->rn_r = tt; t->rn_l = x;
473	}
474#ifdef RN_DEBUG
475	if (rn_debug) {
476		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
477		    traverse(head, p);
478	}
479#endif /* RN_DEBUG */
480    }
481	return tt;
482}
483
484struct radix_node *
485rn_addmask(
486	const void *n_arg,
487	int search,
488	int skip)
489{
490	const char *netmask = n_arg;
491	const char *cp;
492	const char *cplim;
493	struct radix_node *x;
494	struct radix_node *saved_x;
495	int b = 0, mlen, j;
496	int maskduplicated, m0, isnormal;
497	static int last_zeroed = 0;
498
499	if ((mlen = *(const u8 *)netmask) > max_keylen)
500		mlen = max_keylen;
501	if (skip == 0)
502		skip = 1;
503	if (mlen <= skip)
504		return mask_rnhead->rnh_nodes;
505	if (skip > 1)
506		memmove(addmask_key + 1, rn_ones + 1, skip - 1);
507	if ((m0 = mlen) > skip)
508		memmove(addmask_key + skip, netmask + skip, mlen - skip);
509	/*
510	 * Trim trailing zeroes.
511	 */
512	for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
513		cp--;
514	mlen = cp - addmask_key;
515	if (mlen <= skip) {
516		if (m0 >= last_zeroed)
517			last_zeroed = mlen;
518		return mask_rnhead->rnh_nodes;
519	}
520	if (m0 < last_zeroed)
521		clib_memset(addmask_key + m0, 0, last_zeroed - m0);
522	*addmask_key = last_zeroed = mlen;
523	x = rn_search(addmask_key, rn_masktop);
524	if (memcmp(addmask_key, x->rn_key, mlen) != 0)
525		x = 0;
526	if (x || search)
527		return x;
528	R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
529	if ((saved_x = x) == NULL)
530		return NULL;
531	clib_memset(x, 0, max_keylen + 2 * sizeof (*x));
532	cp = netmask = (void *)(x + 2);
533	memmove(x + 2, addmask_key, mlen);
534	x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
535	if (maskduplicated) {
536                log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
537		Free(saved_x);
538		return x;
539	}
540	/*
541	 * Calculate index of mask, and check for normalcy.
542	 */
543	cplim = netmask + mlen; isnormal = 1;
544	for (cp = netmask + skip; (cp < cplim) && *(const u8 *)cp == 0xff;)
545		cp++;
546	if (cp != cplim) {
547		for (j = 0x80; (j & *cp) != 0; j >>= 1)
548			b++;
549		if (*cp != normal_chars[b] || cp != (cplim - 1))
550			isnormal = 0;
551	}
552	b += (cp - netmask) << 3;
553	x->rn_b = -1 - b;
554	if (isnormal)
555		x->rn_flags |= RNF_NORMAL;
556	return x;
557}
558
559static int	/* XXX: arbitrary ordering for non-contiguous masks */
560rn_lexobetter(
561	const void *m_arg,
562	const void *n_arg)
563{
564	const u8 *mp = m_arg;
565	const u8 *np = n_arg;
566	const u8 *lim;
567
568	if (*mp > *np)
569		return 1;  /* not really, but need to check longer one first */
570	if (*mp == *np)
571		for (lim = mp + *mp; mp < lim;)
572			if (*mp++ > *np++)
573				return 1;
574	return 0;
575}
576
577static struct radix_mask *
578rn_new_radix_mask(
579	struct radix_node *tt,
580	struct radix_mask *next)
581{
582	struct radix_mask *m;
583
584	m = rm_alloc();
585	if (m == NULL) {
586		log(LOG_ERR, "Mask for route not entered\n");
587		return NULL;
588	}
589	clib_memset(m, 0, sizeof(*m));
590	m->rm_b = tt->rn_b;
591	m->rm_flags = tt->rn_flags;
592	if (tt->rn_flags & RNF_NORMAL)
593		m->rm_leaf = tt;
594	else
595		m->rm_mask = tt->rn_mask;
596	m->rm_mklist = next;
597	tt->rn_mklist = m;
598	return m;
599}
600
601struct radix_node *
602rn_addroute(
603	const void *v_arg,
604	const void *n_arg,
605	struct radix_node_head *head,
606	struct radix_node treenodes[2])
607{
608	const char *v = v_arg, *netmask = n_arg;
609	struct radix_node *t, *x = NULL, *tt;
610	struct radix_node *saved_tt, *top = head->rnh_treetop;
611	short b = 0, b_leaf = 0;
612	int keyduplicated;
613	const char *mmask;
614	struct radix_mask *m, **mp;
615
616	/*
617	 * In dealing with non-contiguous masks, there may be
618	 * many different routes which have the same mask.
619	 * We will find it useful to have a unique pointer to
620	 * the mask to speed avoiding duplicate references at
621	 * nodes and possibly save time in calculating indices.
622	 */
623	if (netmask != NULL) {
624		if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL)
625			return NULL;
626		b_leaf = x->rn_b;
627		b = -1 - x->rn_b;
628		netmask = x->rn_key;
629	}
630	/*
631	 * Deal with duplicated keys: attach node to previous instance
632	 */
633	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
634	if (keyduplicated) {
635		for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
636			if (tt->rn_mask == netmask)
637				return NULL;
638			if (netmask == NULL ||
639			    (tt->rn_mask != NULL &&
640			     (b_leaf < tt->rn_b || /* index(netmask) > node */
641			       rn_refines(netmask, tt->rn_mask) ||
642			       rn_lexobetter(netmask, tt->rn_mask))))
643				break;
644		}
645		/*
646		 * If the mask is not duplicated, we wouldn't
647		 * find it among possible duplicate key entries
648		 * anyway, so the above test doesn't hurt.
649		 *
650		 * We sort the masks for a duplicated key the same way as
651		 * in a masklist -- most specific to least specific.
652		 * This may require the unfortunate nuisance of relocating
653		 * the head of the list.
654		 *
655		 * We also reverse, or doubly link the list through the
656		 * parent pointer.
657		 */
658		if (tt == saved_tt) {
659			struct	radix_node *xx = x;
660			/* link in at head of list */
661			(tt = treenodes)->rn_dupedkey = t;
662			tt->rn_flags = t->rn_flags;
663			tt->rn_p = x = t->rn_p;
664			t->rn_p = tt;
665			if (x->rn_l == t)
666				x->rn_l = tt;
667			else
668				x->rn_r = tt;
669			saved_tt = tt;
670			x = xx;
671		} else {
672			(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
673			t->rn_dupedkey = tt;
674			tt->rn_p = t;
675			if (tt->rn_dupedkey)
676				tt->rn_dupedkey->rn_p = tt;
677		}
678		tt->rn_key = v;
679		tt->rn_b = -1;
680		tt->rn_flags = RNF_ACTIVE;
681	}
682	/*
683	 * Put mask in tree.
684	 */
685	if (netmask != NULL) {
686		tt->rn_mask = netmask;
687		tt->rn_b = x->rn_b;
688		tt->rn_flags |= x->rn_flags & RNF_NORMAL;
689	}
690	t = saved_tt->rn_p;
691	if (keyduplicated)
692		goto on2;
693	b_leaf = -1 - t->rn_b;
694	if (t->rn_r == saved_tt)
695		x = t->rn_l;
696	else
697		x = t->rn_r;
698	/* Promote general routes from below */
699	if (x->rn_b < 0) {
700		for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) {
701			if (x->rn_mask != NULL && x->rn_b >= b_leaf &&
702			    x->rn_mklist == NULL) {
703				*mp = m = rn_new_radix_mask(x, NULL);
704				if (m != NULL)
705					mp = &m->rm_mklist;
706			}
707		}
708	} else if (x->rn_mklist != NULL) {
709		/*
710		 * Skip over masks whose index is > that of new node
711		 */
712		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
713			if (m->rm_b >= b_leaf)
714				break;
715		t->rn_mklist = m;
716		*mp = NULL;
717	}
718on2:
719	/* Add new route to highest possible ancestor's list */
720	if (netmask == NULL || b > t->rn_b)
721		return tt; /* can't lift at all */
722	b_leaf = tt->rn_b;
723	do {
724		x = t;
725		t = t->rn_p;
726	} while (b <= t->rn_b && x != top);
727	/*
728	 * Search through routes associated with node to
729	 * insert new route according to index.
730	 * Need same criteria as when sorting dupedkeys to avoid
731	 * double loop on deletion.
732	 */
733	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
734		if (m->rm_b < b_leaf)
735			continue;
736		if (m->rm_b > b_leaf)
737			break;
738		if (m->rm_flags & RNF_NORMAL) {
739			mmask = m->rm_leaf->rn_mask;
740			if (tt->rn_flags & RNF_NORMAL) {
741				log(LOG_ERR, "Non-unique normal route,"
742				    " mask not entered\n");
743				return tt;
744			}
745		} else
746			mmask = m->rm_mask;
747		if (mmask == netmask) {
748			m->rm_refs++;
749			tt->rn_mklist = m;
750			return tt;
751		}
752		if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask))
753			break;
754	}
755	*mp = rn_new_radix_mask(tt, *mp);
756	return tt;
757}
758
759struct radix_node *
760rn_delete1(
761	const void *v_arg,
762	const void *netmask_arg,
763	struct radix_node_head *head,
764	struct radix_node *rn)
765{
766	struct radix_node *t, *p, *x, *tt;
767	struct radix_mask *m, *saved_m, **mp;
768	struct radix_node *dupedkey, *saved_tt, *top;
769	const char *v, *netmask;
770	int b, head_off, vlen;
771
772	v = v_arg;
773	netmask = netmask_arg;
774	x = head->rnh_treetop;
775	tt = rn_search(v, x);
776	head_off = x->rn_off;
777	vlen =  *(const u8 *)v;
778	saved_tt = tt;
779	top = x;
780	if (tt == NULL ||
781	    memcmp(v + head_off, tt->rn_key + head_off, vlen - head_off) != 0)
782		return NULL;
783	/*
784	 * Delete our route from mask lists.
785	 */
786	if (netmask != NULL) {
787		if ((x = rn_addmask(netmask, 1, head_off)) == NULL)
788			return NULL;
789		netmask = x->rn_key;
790		while (tt->rn_mask != netmask)
791			if ((tt = tt->rn_dupedkey) == NULL)
792				return NULL;
793	}
794	if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
795		goto on1;
796	if (tt->rn_flags & RNF_NORMAL) {
797		if (m->rm_leaf != tt || m->rm_refs > 0) {
798			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
799			return NULL;  /* dangling ref could cause disaster */
800		}
801	} else {
802		if (m->rm_mask != tt->rn_mask) {
803			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
804			goto on1;
805		}
806		if (--m->rm_refs >= 0)
807			goto on1;
808	}
809	b = -1 - tt->rn_b;
810	t = saved_tt->rn_p;
811	if (b > t->rn_b)
812		goto on1; /* Wasn't lifted at all */
813	do {
814		x = t;
815		t = t->rn_p;
816	} while (b <= t->rn_b && x != top);
817	for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
818		if (m == saved_m) {
819			*mp = m->rm_mklist;
820			rm_free(m);
821			break;
822		}
823	}
824	if (m == NULL) {
825		log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
826		if (tt->rn_flags & RNF_NORMAL)
827			return NULL; /* Dangling ref to us */
828	}
829on1:
830	/*
831	 * Eliminate us from tree
832	 */
833	if (tt->rn_flags & RNF_ROOT)
834		return NULL;
835#ifdef RN_DEBUG
836	if (rn_debug)
837		log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt);
838#endif
839	t = tt->rn_p;
840	dupedkey = saved_tt->rn_dupedkey;
841	if (dupedkey != NULL) {
842		/*
843		 * Here, tt is the deletion target, and
844		 * saved_tt is the head of the dupedkey chain.
845		 */
846		if (tt == saved_tt) {
847			x = dupedkey;
848			x->rn_p = t;
849			if (t->rn_l == tt)
850				t->rn_l = x;
851			else
852				t->rn_r = x;
853		} else {
854			/* find node in front of tt on the chain */
855			for (x = p = saved_tt;
856			     p != NULL && p->rn_dupedkey != tt;)
857				p = p->rn_dupedkey;
858			if (p != NULL) {
859				p->rn_dupedkey = tt->rn_dupedkey;
860				if (tt->rn_dupedkey != NULL)
861					tt->rn_dupedkey->rn_p = p;
862			} else
863				log(LOG_ERR, "rn_delete: couldn't find us\n");
864		}
865		t = tt + 1;
866		if  (t->rn_flags & RNF_ACTIVE) {
867			*++x = *t;
868			p = t->rn_p;
869			if (p->rn_l == t)
870				p->rn_l = x;
871			else
872				p->rn_r = x;
873			x->rn_l->rn_p = x;
874			x->rn_r->rn_p = x;
875		}
876		goto out;
877	}
878	if (t->rn_l == tt)
879		x = t->rn_r;
880	else
881		x = t->rn_l;
882	p = t->rn_p;
883	if (p->rn_r == t)
884		p->rn_r = x;
885	else
886		p->rn_l = x;
887	x->rn_p = p;
888	/*
889	 * Demote routes attached to us.
890	 */
891	if (t->rn_mklist == NULL)
892		;
893	else if (x->rn_b >= 0) {
894		for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
895			;
896		*mp = t->rn_mklist;
897	} else {
898		/* If there are any key,mask pairs in a sibling
899		   duped-key chain, some subset will appear sorted
900		   in the same order attached to our mklist */
901		for (m = t->rn_mklist;
902		     m != NULL && x != NULL;
903		     x = x->rn_dupedkey) {
904			if (m == x->rn_mklist) {
905				struct radix_mask *mm = m->rm_mklist;
906				x->rn_mklist = NULL;
907				if (--(m->rm_refs) < 0)
908					rm_free(m);
909				m = mm;
910			}
911		}
912		if (m != NULL) {
913			log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n",
914			    m, x);
915		}
916	}
917	/*
918	 * We may be holding an active internal node in the tree.
919	 */
920	x = tt + 1;
921	if (t != x) {
922		*t = *x;
923		t->rn_l->rn_p = t;
924		t->rn_r->rn_p = t;
925		p = x->rn_p;
926		if (p->rn_l == x)
927			p->rn_l = t;
928		else
929			p->rn_r = t;
930	}
931out:
932#ifdef RN_DEBUG
933	if (rn_debug) {
934		log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
935		    traverse(head, tt);
936	}
937#endif /* RN_DEBUG */
938	tt->rn_flags &= ~RNF_ACTIVE;
939	tt[1].rn_flags &= ~RNF_ACTIVE;
940	return tt;
941}
942
943struct radix_node *
944rn_delete(
945	const void *v_arg,
946	const void *netmask_arg,
947	struct radix_node_head *head)
948{
949	return rn_delete1(v_arg, netmask_arg, head, NULL);
950}
951
952static struct radix_node *
953rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg)
954{
955	/* If at right child go back up, otherwise, go right */
956	while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) {
957		if (printer != NULL)
958			(*printer)(arg, SUBTREE_CLOSE);
959		rn = rn->rn_p;
960	}
961	if (printer)
962		rn_nodeprint(rn->rn_p, printer, arg, "");
963	/* Find the next *leaf* since next node might vanish, too */
964	for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) {
965		if (printer != NULL)
966			(*printer)(arg, SUBTREE_OPEN);
967		rn = rn->rn_l;
968	}
969	return rn;
970}
971
972static struct radix_node *
973rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg)
974{
975	/* First time through node, go left */
976	while (rn->rn_b >= 0) {
977		if (printer != NULL)
978			(*printer)(arg, SUBTREE_OPEN);
979		rn = rn->rn_l;
980	}
981	return rn;
982}
983
984int
985rn_walktree(
986	struct radix_node_head *h,
987	int (*f)(struct radix_node *, void *),
988	void *w)
989{
990	int error;
991	struct radix_node *base, *next, *rn;
992	/*
993	 * This gets complicated because we may delete the node
994	 * while applying the function f to it, so we need to calculate
995	 * the successor node in advance.
996	 */
997	rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
998	for (;;) {
999		base = rn;
1000		next = rn_walknext(rn, NULL, NULL);
1001		/* Process leaves */
1002		while ((rn = base) != NULL) {
1003			base = rn->rn_dupedkey;
1004			if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
1005				return error;
1006		}
1007		rn = next;
1008		if (rn->rn_flags & RNF_ROOT)
1009			return 0;
1010	}
1011	/* NOTREACHED */
1012}
1013
1014struct radix_node *
1015rn_search_matched(struct radix_node_head *h,
1016    int (*matcher)(struct radix_node *, void *), void *w)
1017{
1018	bool matched;
1019	struct radix_node *base, *next, *rn;
1020	/*
1021	 * This gets complicated because we may delete the node
1022	 * while applying the function f to it, so we need to calculate
1023	 * the successor node in advance.
1024	 */
1025	rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
1026	for (;;) {
1027		base = rn;
1028		next = rn_walknext(rn, NULL, NULL);
1029		/* Process leaves */
1030		while ((rn = base) != NULL) {
1031			base = rn->rn_dupedkey;
1032			if (!(rn->rn_flags & RNF_ROOT)) {
1033				matched = (*matcher)(rn, w);
1034				if (matched)
1035					return rn;
1036			}
1037		}
1038		rn = next;
1039		if (rn->rn_flags & RNF_ROOT)
1040			return NULL;
1041	}
1042	/* NOTREACHED */
1043}
1044
1045int
1046rn_inithead(void **head, int off)
1047{
1048	struct radix_node_head *rnh;
1049
1050	if (*head != NULL)
1051		return 1;
1052	R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
1053	if (rnh == NULL)
1054		return 0;
1055	*head = rnh;
1056	return rn_inithead0(rnh, off);
1057}
1058
1059int
1060rn_inithead0(struct radix_node_head *rnh, int off)
1061{
1062	struct radix_node *t;
1063	struct radix_node *tt;
1064	struct radix_node *ttt;
1065
1066	clib_memset(rnh, 0, sizeof(*rnh));
1067	t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
1068	ttt = rnh->rnh_nodes + 2;
1069	t->rn_r = ttt;
1070	t->rn_p = t;
1071	tt = t->rn_l;
1072	tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
1073	tt->rn_b = -1 - off;
1074	*ttt = *tt;
1075	ttt->rn_key = rn_ones;
1076	rnh->rnh_addaddr = rn_addroute;
1077	rnh->rnh_deladdr = rn_delete;
1078	rnh->rnh_matchaddr = rn_match;
1079	rnh->rnh_lookup = rn_lookup;
1080	rnh->rnh_treetop = t;
1081	return 1;
1082}
1083
1084static clib_error_t *
1085rn_module_init (vlib_main_t * vm)
1086{
1087	char *cp, *cplim;
1088
1089	R_Malloc(rn_zeros, char *, 3 * max_keylen);
1090	if (rn_zeros == NULL)
1091            return (clib_error_return (0, "RN Zeros..."));
1092
1093	clib_memset(rn_zeros, 0, 3 * max_keylen);
1094	rn_ones = cp = rn_zeros + max_keylen;
1095	addmask_key = cplim = rn_ones + max_keylen;
1096	while (cp < cplim)
1097		*cp++ = -1;
1098	if (rn_inithead((void *)&mask_rnhead, 0) == 0)
1099            return (clib_error_return (0, "RN Init 2"));
1100
1101        return (NULL);
1102}
1103
1104VLIB_INIT_FUNCTION(rn_module_init);
1105