Annotation of sys/net/radix_mpath.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: radix_mpath.c,v 1.7 2006/06/18 12:03:19 pascoe Exp $ */
! 2: /* $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $ */
! 3:
! 4: /*
! 5: * Copyright (C) 2001 WIDE Project.
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: * 3. Neither the name of the project nor the names of its contributors
! 17: * may be used to endorse or promote products derived from this software
! 18: * without specific prior written permission.
! 19: *
! 20: * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
! 21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 23: * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
! 24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 30: * SUCH DAMAGE.
! 31: * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
! 32: * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
! 33: * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
! 34: * PROPERTIES.
! 35: */
! 36:
! 37: #include <sys/param.h>
! 38: #include <sys/systm.h>
! 39: #include <sys/malloc.h>
! 40: #include <sys/socket.h>
! 41: #define M_DONTWAIT M_NOWAIT
! 42: #include <sys/domain.h>
! 43: #include <sys/syslog.h>
! 44: #include <net/radix.h>
! 45: #include <net/radix_mpath.h>
! 46: #include <net/route.h>
! 47: #include <dev/rndvar.h>
! 48:
! 49: #include <netinet/in.h>
! 50:
! 51: extern int ipmultipath;
! 52: extern int ip6_multipath;
! 53:
! 54: u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
! 55:
! 56: /*
! 57: * give some jitter to hash, to avoid synchronization between routers
! 58: */
! 59: static u_int32_t hashjitter;
! 60:
! 61: int
! 62: rn_mpath_capable(struct radix_node_head *rnh)
! 63: {
! 64: return rnh->rnh_multipath;
! 65: }
! 66:
! 67: struct radix_node *
! 68: rn_mpath_next(struct radix_node *rn)
! 69: {
! 70: struct radix_node *next;
! 71:
! 72: if (!rn->rn_dupedkey)
! 73: return NULL;
! 74: next = rn->rn_dupedkey;
! 75: if (rn->rn_mask == next->rn_mask)
! 76: return next;
! 77: else
! 78: return NULL;
! 79: }
! 80:
! 81: int
! 82: rn_mpath_count(struct radix_node *rn)
! 83: {
! 84: int i;
! 85:
! 86: i = 1;
! 87: while ((rn = rn_mpath_next(rn)) != NULL)
! 88: i++;
! 89: return i;
! 90: }
! 91:
! 92: struct rtentry *
! 93: rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
! 94: {
! 95: struct radix_node *rn;
! 96:
! 97: if (!rn_mpath_next((struct radix_node *)rt))
! 98: return rt;
! 99:
! 100: if (!gate)
! 101: return NULL;
! 102: /* beyond here, we use rn as the master copy */
! 103: rn = (struct radix_node *)rt;
! 104: do {
! 105: rt = (struct rtentry *)rn;
! 106: if (rt->rt_gateway->sa_len == gate->sa_len &&
! 107: !memcmp(rt->rt_gateway, gate, gate->sa_len))
! 108: break;
! 109: } while ((rn = rn_mpath_next(rn)) != NULL);
! 110: if (!rn)
! 111: return NULL;
! 112:
! 113: return (struct rtentry *)rn;
! 114: }
! 115:
! 116: /*
! 117: * check if we have the same key/mask/gateway on the table already.
! 118: */
! 119: int
! 120: rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
! 121: struct sockaddr *netmask, int mpathok)
! 122: {
! 123: struct radix_node *rn, *rn1;
! 124: struct rtentry *rt1;
! 125: char *p, *q, *eq;
! 126: int same, l, skip;
! 127:
! 128: rn = (struct radix_node *)rt;
! 129: rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
! 130: if (!rn1 || rn1->rn_flags & RNF_ROOT)
! 131: return 0;
! 132:
! 133: /*
! 134: * unlike other functions we have in this file, we have to check
! 135: * all key/mask/gateway as rnh_lookup can match less specific entry.
! 136: */
! 137: rt1 = (struct rtentry *)rn1;
! 138:
! 139: /* compare key. */
! 140: if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
! 141: bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
! 142: goto different;
! 143:
! 144: /* key was the same. compare netmask. hairy... */
! 145: if (rt_mask(rt1) && netmask) {
! 146: skip = rnh->rnh_treetop->rn_off;
! 147: if (rt_mask(rt1)->sa_len > netmask->sa_len) {
! 148: /*
! 149: * as rt_mask(rt1) is made optimal by radix.c,
! 150: * there must be some 1-bits on rt_mask(rt1)
! 151: * after netmask->sa_len. therefore, in
! 152: * this case, the entries are different.
! 153: */
! 154: if (rt_mask(rt1)->sa_len > skip)
! 155: goto different;
! 156: else {
! 157: /* no bits to compare, i.e. same*/
! 158: goto maskmatched;
! 159: }
! 160: }
! 161:
! 162: l = rt_mask(rt1)->sa_len;
! 163: if (skip > l) {
! 164: /* no bits to compare, i.e. same */
! 165: goto maskmatched;
! 166: }
! 167: p = (char *)rt_mask(rt1);
! 168: q = (char *)netmask;
! 169: if (bcmp(p + skip, q + skip, l - skip))
! 170: goto different;
! 171: /*
! 172: * need to go through all the bit, as netmask is not
! 173: * optimal and can contain trailing 0s
! 174: */
! 175: eq = (char *)netmask + netmask->sa_len;
! 176: q += l;
! 177: same = 1;
! 178: while (eq > q)
! 179: if (*q++) {
! 180: same = 0;
! 181: break;
! 182: }
! 183: if (!same)
! 184: goto different;
! 185: } else if (!rt_mask(rt1) && !netmask)
! 186: ; /* no mask to compare, i.e. same */
! 187: else {
! 188: /* one has mask and the other does not, different */
! 189: goto different;
! 190: }
! 191:
! 192: maskmatched:
! 193: if (!mpathok)
! 194: return EEXIST;
! 195:
! 196: /* key/mask were the same. compare gateway for all multipaths */
! 197: do {
! 198: rt1 = (struct rtentry *)rn1;
! 199:
! 200: /* sanity: no use in comparing the same thing */
! 201: if (rn1 == rn)
! 202: continue;
! 203:
! 204: if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
! 205: bcmp(rt1->rt_gateway, rt->rt_gateway,
! 206: rt1->rt_gateway->sa_len))
! 207: continue;
! 208:
! 209: /* all key/mask/gateway are the same. conflicting entry. */
! 210: return EEXIST;
! 211: } while ((rn1 = rn_mpath_next(rn1)) != NULL);
! 212:
! 213: different:
! 214: return 0;
! 215: }
! 216:
! 217: /*
! 218: * allocate a route, potentially using multipath to select the peer.
! 219: */
! 220: void
! 221: rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp, u_int tableid)
! 222: {
! 223: #if defined(INET) || defined(INET6)
! 224: struct radix_node *rn;
! 225: int hash, npaths, threshold;
! 226: #endif
! 227:
! 228: /*
! 229: * return a cached entry if it is still valid, otherwise we increase
! 230: * the risk of disrupting local flows.
! 231: */
! 232: if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
! 233: return;
! 234: ro->ro_rt = rtalloc1(&ro->ro_dst, 1, tableid);
! 235:
! 236: /* if the route does not exist or it is not multipath, don't care */
! 237: if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
! 238: return;
! 239:
! 240: /* check if multipath routing is enabled for the specified protocol */
! 241: if (!(0
! 242: #ifdef INET
! 243: || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
! 244: #endif
! 245: #ifdef INET6
! 246: || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
! 247: #endif
! 248: ))
! 249: return;
! 250:
! 251: #if defined(INET) || defined(INET6)
! 252: /* gw selection by Hash-Threshold (RFC 2992) */
! 253: rn = (struct radix_node *)ro->ro_rt;
! 254: npaths = rn_mpath_count(rn);
! 255: hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
! 256: threshold = 1 + (0xffff / npaths);
! 257: while (hash > threshold && rn) {
! 258: /* stay within the multipath routes */
! 259: if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
! 260: break;
! 261: rn = rn->rn_dupedkey;
! 262: hash -= threshold;
! 263: }
! 264:
! 265: /* XXX try filling rt_gwroute and avoid unreachable gw */
! 266:
! 267: /* if gw selection fails, use the first match (default) */
! 268: if (!rn)
! 269: return;
! 270:
! 271: rtfree(ro->ro_rt);
! 272: ro->ro_rt = (struct rtentry *)rn;
! 273: ro->ro_rt->rt_refcnt++;
! 274: #endif
! 275: }
! 276:
! 277: int
! 278: rn_mpath_inithead(void **head, int off)
! 279: {
! 280: struct radix_node_head *rnh;
! 281:
! 282: while (hashjitter == 0)
! 283: hashjitter = arc4random();
! 284: if (rn_inithead(head, off) == 1) {
! 285: rnh = (struct radix_node_head *)*head;
! 286: rnh->rnh_multipath = 1;
! 287: return 1;
! 288: } else
! 289: return 0;
! 290: }
! 291:
! 292: /*
! 293: * hash function based on pf_hash in pf.c
! 294: */
! 295: #define mix(a,b,c) \
! 296: do { \
! 297: a -= b; a -= c; a ^= (c >> 13); \
! 298: b -= c; b -= a; b ^= (a << 8); \
! 299: c -= a; c -= b; c ^= (b >> 13); \
! 300: a -= b; a -= c; a ^= (c >> 12); \
! 301: b -= c; b -= a; b ^= (a << 16); \
! 302: c -= a; c -= b; c ^= (b >> 5); \
! 303: a -= b; a -= c; a ^= (c >> 3); \
! 304: b -= c; b -= a; b ^= (a << 10); \
! 305: c -= a; c -= b; c ^= (b >> 15); \
! 306: } while (0)
! 307:
! 308: u_int32_t
! 309: rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
! 310: {
! 311: u_int32_t a, b, c;
! 312:
! 313: a = b = 0x9e3779b9;
! 314: c = hashjitter;
! 315:
! 316: switch (ro->ro_dst.sa_family) {
! 317: #ifdef INET
! 318: case AF_INET:
! 319: {
! 320: struct sockaddr_in *sin_dst;
! 321:
! 322: sin_dst = (struct sockaddr_in *)&ro->ro_dst;
! 323: a += sin_dst->sin_addr.s_addr;
! 324: b += srcaddrp ? srcaddrp[0] : 0;
! 325: mix(a, b, c);
! 326: break;
! 327: }
! 328: #endif /* INET */
! 329: #ifdef INET6
! 330: case AF_INET6:
! 331: {
! 332: struct sockaddr_in6 *sin6_dst;
! 333:
! 334: sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
! 335: a += sin6_dst->sin6_addr.s6_addr32[0];
! 336: b += sin6_dst->sin6_addr.s6_addr32[2];
! 337: c += srcaddrp ? srcaddrp[0] : 0;
! 338: mix(a, b, c);
! 339: a += sin6_dst->sin6_addr.s6_addr32[1];
! 340: b += sin6_dst->sin6_addr.s6_addr32[3];
! 341: c += srcaddrp ? srcaddrp[1] : 0;
! 342: mix(a, b, c);
! 343: a += sin6_dst->sin6_addr.s6_addr32[2];
! 344: b += sin6_dst->sin6_addr.s6_addr32[1];
! 345: c += srcaddrp ? srcaddrp[2] : 0;
! 346: mix(a, b, c);
! 347: a += sin6_dst->sin6_addr.s6_addr32[3];
! 348: b += sin6_dst->sin6_addr.s6_addr32[0];
! 349: c += srcaddrp ? srcaddrp[3] : 0;
! 350: mix(a, b, c);
! 351: break;
! 352: }
! 353: #endif /* INET6 */
! 354: }
! 355:
! 356: return c;
! 357: }
CVSweb