Annotation of sys/net/radix_mpath.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: radix_mpath.c,v 1.7 2006/06/18 12:03:19 pascoe Exp $ */
2: /* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */
3:
4: /*
5: * Copyright (C) 2001 WIDE Project.
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the project nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
32: * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
33: * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
34: * PROPERTIES.
35: */
36:
37: #include <sys/param.h>
38: #include <sys/systm.h>
39: #include <sys/malloc.h>
40: #include <sys/socket.h>
41: #define M_DONTWAIT M_NOWAIT
42: #include <sys/domain.h>
43: #include <sys/syslog.h>
44: #include <net/radix.h>
45: #include <net/radix_mpath.h>
46: #include <net/route.h>
47: #include <dev/rndvar.h>
48:
49: #include <netinet/in.h>
50:
51: extern int ipmultipath;
52: extern int ip6_multipath;
53:
54: u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
55:
56: /*
57: * give some jitter to hash, to avoid synchronization between routers
58: */
59: static u_int32_t hashjitter;
60:
61: int
62: rn_mpath_capable(struct radix_node_head *rnh)
63: {
64: return rnh->rnh_multipath;
65: }
66:
67: struct radix_node *
68: rn_mpath_next(struct radix_node *rn)
69: {
70: struct radix_node *next;
71:
72: if (!rn->rn_dupedkey)
73: return NULL;
74: next = rn->rn_dupedkey;
75: if (rn->rn_mask == next->rn_mask)
76: return next;
77: else
78: return NULL;
79: }
80:
81: int
82: rn_mpath_count(struct radix_node *rn)
83: {
84: int i;
85:
86: i = 1;
87: while ((rn = rn_mpath_next(rn)) != NULL)
88: i++;
89: return i;
90: }
91:
92: struct rtentry *
93: rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
94: {
95: struct radix_node *rn;
96:
97: if (!rn_mpath_next((struct radix_node *)rt))
98: return rt;
99:
100: if (!gate)
101: return NULL;
102: /* beyond here, we use rn as the master copy */
103: rn = (struct radix_node *)rt;
104: do {
105: rt = (struct rtentry *)rn;
106: if (rt->rt_gateway->sa_len == gate->sa_len &&
107: !memcmp(rt->rt_gateway, gate, gate->sa_len))
108: break;
109: } while ((rn = rn_mpath_next(rn)) != NULL);
110: if (!rn)
111: return NULL;
112:
113: return (struct rtentry *)rn;
114: }
115:
116: /*
117: * check if we have the same key/mask/gateway on the table already.
118: */
119: int
120: rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
121: struct sockaddr *netmask, int mpathok)
122: {
123: struct radix_node *rn, *rn1;
124: struct rtentry *rt1;
125: char *p, *q, *eq;
126: int same, l, skip;
127:
128: rn = (struct radix_node *)rt;
129: rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
130: if (!rn1 || rn1->rn_flags & RNF_ROOT)
131: return 0;
132:
133: /*
134: * unlike other functions we have in this file, we have to check
135: * all key/mask/gateway as rnh_lookup can match less specific entry.
136: */
137: rt1 = (struct rtentry *)rn1;
138:
139: /* compare key. */
140: if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
141: bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
142: goto different;
143:
144: /* key was the same. compare netmask. hairy... */
145: if (rt_mask(rt1) && netmask) {
146: skip = rnh->rnh_treetop->rn_off;
147: if (rt_mask(rt1)->sa_len > netmask->sa_len) {
148: /*
149: * as rt_mask(rt1) is made optimal by radix.c,
150: * there must be some 1-bits on rt_mask(rt1)
151: * after netmask->sa_len. therefore, in
152: * this case, the entries are different.
153: */
154: if (rt_mask(rt1)->sa_len > skip)
155: goto different;
156: else {
157: /* no bits to compare, i.e. same*/
158: goto maskmatched;
159: }
160: }
161:
162: l = rt_mask(rt1)->sa_len;
163: if (skip > l) {
164: /* no bits to compare, i.e. same */
165: goto maskmatched;
166: }
167: p = (char *)rt_mask(rt1);
168: q = (char *)netmask;
169: if (bcmp(p + skip, q + skip, l - skip))
170: goto different;
171: /*
172: * need to go through all the bit, as netmask is not
173: * optimal and can contain trailing 0s
174: */
175: eq = (char *)netmask + netmask->sa_len;
176: q += l;
177: same = 1;
178: while (eq > q)
179: if (*q++) {
180: same = 0;
181: break;
182: }
183: if (!same)
184: goto different;
185: } else if (!rt_mask(rt1) && !netmask)
186: ; /* no mask to compare, i.e. same */
187: else {
188: /* one has mask and the other does not, different */
189: goto different;
190: }
191:
192: maskmatched:
193: if (!mpathok)
194: return EEXIST;
195:
196: /* key/mask were the same. compare gateway for all multipaths */
197: do {
198: rt1 = (struct rtentry *)rn1;
199:
200: /* sanity: no use in comparing the same thing */
201: if (rn1 == rn)
202: continue;
203:
204: if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
205: bcmp(rt1->rt_gateway, rt->rt_gateway,
206: rt1->rt_gateway->sa_len))
207: continue;
208:
209: /* all key/mask/gateway are the same. conflicting entry. */
210: return EEXIST;
211: } while ((rn1 = rn_mpath_next(rn1)) != NULL);
212:
213: different:
214: return 0;
215: }
216:
217: /*
218: * allocate a route, potentially using multipath to select the peer.
219: */
220: void
221: rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp, u_int tableid)
222: {
223: #if defined(INET) || defined(INET6)
224: struct radix_node *rn;
225: int hash, npaths, threshold;
226: #endif
227:
228: /*
229: * return a cached entry if it is still valid, otherwise we increase
230: * the risk of disrupting local flows.
231: */
232: if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
233: return;
234: ro->ro_rt = rtalloc1(&ro->ro_dst, 1, tableid);
235:
236: /* if the route does not exist or it is not multipath, don't care */
237: if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
238: return;
239:
240: /* check if multipath routing is enabled for the specified protocol */
241: if (!(0
242: #ifdef INET
243: || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
244: #endif
245: #ifdef INET6
246: || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
247: #endif
248: ))
249: return;
250:
251: #if defined(INET) || defined(INET6)
252: /* gw selection by Hash-Threshold (RFC 2992) */
253: rn = (struct radix_node *)ro->ro_rt;
254: npaths = rn_mpath_count(rn);
255: hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
256: threshold = 1 + (0xffff / npaths);
257: while (hash > threshold && rn) {
258: /* stay within the multipath routes */
259: if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
260: break;
261: rn = rn->rn_dupedkey;
262: hash -= threshold;
263: }
264:
265: /* XXX try filling rt_gwroute and avoid unreachable gw */
266:
267: /* if gw selection fails, use the first match (default) */
268: if (!rn)
269: return;
270:
271: rtfree(ro->ro_rt);
272: ro->ro_rt = (struct rtentry *)rn;
273: ro->ro_rt->rt_refcnt++;
274: #endif
275: }
276:
277: int
278: rn_mpath_inithead(void **head, int off)
279: {
280: struct radix_node_head *rnh;
281:
282: while (hashjitter == 0)
283: hashjitter = arc4random();
284: if (rn_inithead(head, off) == 1) {
285: rnh = (struct radix_node_head *)*head;
286: rnh->rnh_multipath = 1;
287: return 1;
288: } else
289: return 0;
290: }
291:
292: /*
293: * hash function based on pf_hash in pf.c
294: */
295: #define mix(a,b,c) \
296: do { \
297: a -= b; a -= c; a ^= (c >> 13); \
298: b -= c; b -= a; b ^= (a << 8); \
299: c -= a; c -= b; c ^= (b >> 13); \
300: a -= b; a -= c; a ^= (c >> 12); \
301: b -= c; b -= a; b ^= (a << 16); \
302: c -= a; c -= b; c ^= (b >> 5); \
303: a -= b; a -= c; a ^= (c >> 3); \
304: b -= c; b -= a; b ^= (a << 10); \
305: c -= a; c -= b; c ^= (b >> 15); \
306: } while (0)
307:
308: u_int32_t
309: rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
310: {
311: u_int32_t a, b, c;
312:
313: a = b = 0x9e3779b9;
314: c = hashjitter;
315:
316: switch (ro->ro_dst.sa_family) {
317: #ifdef INET
318: case AF_INET:
319: {
320: struct sockaddr_in *sin_dst;
321:
322: sin_dst = (struct sockaddr_in *)&ro->ro_dst;
323: a += sin_dst->sin_addr.s_addr;
324: b += srcaddrp ? srcaddrp[0] : 0;
325: mix(a, b, c);
326: break;
327: }
328: #endif /* INET */
329: #ifdef INET6
330: case AF_INET6:
331: {
332: struct sockaddr_in6 *sin6_dst;
333:
334: sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
335: a += sin6_dst->sin6_addr.s6_addr32[0];
336: b += sin6_dst->sin6_addr.s6_addr32[2];
337: c += srcaddrp ? srcaddrp[0] : 0;
338: mix(a, b, c);
339: a += sin6_dst->sin6_addr.s6_addr32[1];
340: b += sin6_dst->sin6_addr.s6_addr32[3];
341: c += srcaddrp ? srcaddrp[1] : 0;
342: mix(a, b, c);
343: a += sin6_dst->sin6_addr.s6_addr32[2];
344: b += sin6_dst->sin6_addr.s6_addr32[1];
345: c += srcaddrp ? srcaddrp[2] : 0;
346: mix(a, b, c);
347: a += sin6_dst->sin6_addr.s6_addr32[3];
348: b += sin6_dst->sin6_addr.s6_addr32[0];
349: c += srcaddrp ? srcaddrp[3] : 0;
350: mix(a, b, c);
351: break;
352: }
353: #endif /* INET6 */
354: }
355:
356: return c;
357: }
CVSweb