diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/include/linux/ipv6.h merge-2.5/include/linux/ipv6.h --- linux-2.5/include/linux/ipv6.h Wed May 28 20:07:22 2003 +++ merge-2.5/include/linux/ipv6.h Wed May 28 19:51:18 2003 @@ -150,7 +150,9 @@ struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; - +#ifdef CONFIG_IPV6_SUBTREES + struct in6_addr *saddr_cache; +#endif __u32 flow_label; __u32 frag_size; int hop_limit; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/include/net/ip6_route.h merge-2.5/include/net/ip6_route.h --- linux-2.5/include/net/ip6_route.h Wed May 28 20:07:29 2003 +++ merge-2.5/include/net/ip6_route.h Wed May 28 19:51:19 2003 @@ -102,7 +102,8 @@ */ static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, + struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; @@ -110,6 +111,9 @@ write_lock(&sk->dst_lock); __sk_dst_set(sk, dst); np->daddr_cache = daddr; +#ifdef CONFIG_IPV6_SUBTREES + np->saddr_cache = saddr; +#endif np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; write_unlock(&sk->dst_lock); } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/Kconfig merge-2.5/net/ipv6/Kconfig --- linux-2.5/net/ipv6/Kconfig Wed May 28 20:07:41 2003 +++ merge-2.5/net/ipv6/Kconfig Wed May 28 19:51:20 2003 @@ -42,4 +42,12 @@ If unsure, say Y. +config IPV6_SUBTREES + bool "IPv6: Source address routing" + depends on IPV6 + ---help--- + Support for advanced routing by both source and destination address. + + If unsure, say N. + source "net/ipv6/netfilter/Kconfig" diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/ip6_fib.c merge-2.5/net/ipv6/ip6_fib.c --- linux-2.5/net/ipv6/ip6_fib.c Wed May 28 20:07:41 2003 +++ merge-2.5/net/ipv6/ip6_fib.c Wed May 28 19:51:22 2003 @@ -18,6 +18,7 @@ * Yuji SEKIYA @USAGI: Support default route on router node; * remove ip6_null_entry from the top of * routing table. + * Ville Nuorvala: Fixes to source address based routing */ #include #include @@ -40,7 +41,6 @@ #include #define RT6_DEBUG 2 -#undef CONFIG_IPV6_SUBTREES #if RT6_DEBUG >= 3 #define RT6_TRACE(x...) printk(KERN_DEBUG x) @@ -84,6 +84,10 @@ static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +#ifdef CONFIG_IPV6_SUBTREES +static struct in6_addr fib6_addr_any = IN6ADDR_ANY_INIT; +#endif + /* * A routing update causes an increase of the serial number on the * afected subtree. This allows for cached routes to be asynchronously @@ -497,6 +501,8 @@ mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); } +static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); + /* * Add routing information to the routing tree. * / @@ -508,14 +514,16 @@ struct fib6_node *fn; int err = -ENOMEM; - fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), - rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt); +#ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; + fn = fib6_add_1(root, &rt->rt6i_src.addr, sizeof(struct in6_addr), + rt->rt6i_src.plen, (u8*) &rt->rt6i_src - (u8*) rt); + if (fn == NULL) goto out; -#ifdef CONFIG_IPV6_SUBTREES - if (rt->rt6i_src.plen) { + if (rt->rt6i_dst.plen) { struct fib6_node *sn; if (fn->subtree == NULL) { @@ -543,9 +551,9 @@ /* Now add the first leaf node to new subtree */ - sn = fib6_add_1(sfn, &rt->rt6i_src.addr, - sizeof(struct in6_addr), rt->rt6i_src.plen, - (u8*) &rt->rt6i_src - (u8*) rt); + sn = fib6_add_1(sfn, &rt->rt6i_dst.addr, + sizeof(struct in6_addr), rt->rt6i_dst.plen, + (u8*) &rt->rt6i_dst - (u8*) rt); if (sn == NULL) { /* If it is failed, discard just allocated @@ -559,21 +567,30 @@ /* Now link new subtree to main tree */ sfn->parent = fn; fn->subtree = sfn; - if (fn->leaf == NULL) { - fn->leaf = rt; - atomic_inc(&rt->rt6i_ref); - } } else { - sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, - sizeof(struct in6_addr), rt->rt6i_src.plen, - (u8*) &rt->rt6i_src - (u8*) rt); + sn = fib6_add_1(fn->subtree, &rt->rt6i_dst.addr, + sizeof(struct in6_addr), rt->rt6i_dst.plen, + (u8*) &rt->rt6i_dst - (u8*) rt); if (sn == NULL) goto st_failure; } + /* fib6_add_1 might have cleared the old leaf pointer */ + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } + + pn = fn; fn = sn; } +#else + fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), + rt->rt6i_dst.plen, (u8*) &rt->rt6i_dst - (u8*) rt); + + if (fn == NULL) + goto out; #endif err = fib6_add_rt2node(fn, rt, nlh); @@ -585,8 +602,25 @@ } out: - if (err) + if (err) { +#ifdef CONFIG_IPV6_SUBTREES + + /* If fib6_add_1 has cleared the old leaf pointer in the + super-tree leaf node we have to find a new one for it. */ + + if (pn && !(pn->fn_flags & RTN_RTINFO)) { + pn->leaf = fib6_find_prefix(pn); +#if RT6_DEBUG >= 2 + if (!pn->leaf) { + BUG_TRAP(pn->leaf); + pn->leaf = &ip6_null_entry; + } +#endif + atomic_inc(&pn->leaf->rt6i_ref); + } +#endif dst_free(&rt->u.dst); + } return err; #ifdef CONFIG_IPV6_SUBTREES @@ -594,8 +628,8 @@ is orphan. If it is, shoot it. */ st_failure: - if (fn && !(fn->fn_flags&RTN_RTINFO|RTN_ROOT)) - fib_repair_tree(fn); + if (fn && !(fn->fn_flags &(RTN_RTINFO|RTN_ROOT))) + fib6_repair_tree(fn); dst_free(&rt->u.dst); return err; #endif @@ -638,22 +672,28 @@ break; } - while ((fn->fn_flags & RTN_ROOT) == 0) { + for (;;) { #ifdef CONFIG_IPV6_SUBTREES if (fn->subtree) { - struct fib6_node *st; - struct lookup_args *narg; - - narg = args + 1; - - if (narg->addr) { - st = fib6_lookup_1(fn->subtree, narg); + struct rt6key *key; - if (st && !(st->fn_flags & RTN_ROOT)) - return st; + key = (struct rt6key *) ((u8 *) fn->leaf + + args->offset); + + if (addr_match(&key->addr, args->addr, key->plen)) { + struct fib6_node *st; + struct lookup_args *narg = args + 1; + if (!ipv6_addr_any(narg->addr)) { + st = fib6_lookup_1(fn->subtree, narg); + + if (st && !(st->fn_flags & RTN_ROOT)) + return st; + } } } #endif + if (fn->fn_flags & RTN_ROOT) + break; if (fn->fn_flags & RTN_RTINFO) { struct rt6key *key; @@ -677,13 +717,17 @@ struct lookup_args args[2]; struct rt6_info *rt = NULL; struct fib6_node *fn; +#ifdef CONFIG_IPV6_SUBTREES + if (saddr == NULL) + saddr = &fib6_addr_any; + args[0].offset = (u8*) &rt->rt6i_src - (u8*) rt; + args[0].addr = saddr; + args[1].offset = (u8*) &rt->rt6i_dst - (u8*) rt; + args[1].addr = daddr; +#else args[0].offset = (u8*) &rt->rt6i_dst - (u8*) rt; args[0].addr = daddr; - -#ifdef CONFIG_IPV6_SUBTREES - args[1].offset = (u8*) &rt->rt6i_src - (u8*) rt; - args[1].addr = saddr; #endif fn = fib6_lookup_1(root, args); @@ -736,19 +780,22 @@ { struct rt6_info *rt = NULL; struct fib6_node *fn; - - fn = fib6_locate_1(root, daddr, dst_len, - (u8*) &rt->rt6i_dst - (u8*) rt); - #ifdef CONFIG_IPV6_SUBTREES - if (src_len) { - BUG_TRAP(saddr!=NULL); - if (fn == NULL) - fn = fn->subtree; + if (saddr == NULL) + saddr = &fib6_addr_any; + + fn = fib6_locate_1(root, saddr, src_len, + (u8*) &rt->rt6i_src - (u8*) rt); + if (dst_len) { if (fn) - fn = fib6_locate_1(fn, saddr, src_len, - (u8*) &rt->rt6i_src - (u8*) rt); + fn = fib6_locate_1(fn->subtree, daddr, dst_len, + (u8*) &rt->rt6i_dst - (u8*) rt); + else + return NULL; } +#else + fn = fib6_locate_1(root, daddr, dst_len, + (u8*) &rt->rt6i_dst - (u8*) rt); #endif if (fn && fn->fn_flags&RTN_RTINFO) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/ip6_output.c merge-2.5/net/ipv6/ip6_output.c --- linux-2.5/net/ipv6/ip6_output.c Wed May 28 20:07:41 2003 +++ merge-2.5/net/ipv6/ip6_output.c Wed May 28 19:51:23 2003 @@ -527,6 +527,7 @@ struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr final_dst_buf, *final_dst = NULL; struct dst_entry *dst; + struct rt6_info *rt; int err = 0; unsigned int pktlength, jumbolen, mtu; @@ -542,11 +543,11 @@ dst = __sk_dst_check(sk, np->dst_cookie); if (dst) { - struct rt6_info *rt = (struct rt6_info*)dst; + rt = (struct rt6_info*)dst; /* Yes, checking route validity in not connected case is not very simple. Take into account, - that we do not support routing by source, TOS, + that we do not support routing by TOS, and MSG_DONTROUTE --ANK (980726) 1. If route was host route, check that @@ -566,6 +567,13 @@ ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr)) && (np->daddr_cache == NULL || ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache))) +#ifdef CONFIG_IPV6_SUBTREES + || (!ipv6_addr_any(&fl->fl6_src) + && (rt->rt6i_src.plen != 128 || + ipv6_addr_cmp(&fl->fl6_src, &rt->rt6i_src.addr)) + && (np->saddr_cache == NULL || + ipv6_addr_cmp(&fl->fl6_src, np->saddr_cache))) +#endif || (fl->oif && fl->oif != dst->dev->ifindex)) { dst = NULL; } else @@ -592,6 +600,20 @@ goto out; } } +#ifdef CONFIG_IPV6_SUBTREES + rt = (struct rt6_info*)dst; + if (ipv6_addr_cmp(&fl->fl6_src, &np->saddr) && + (rt->rt6i_src.plen != 128 || + ipv6_addr_cmp(&fl->fl6_src, &rt->rt6i_src.addr))) { + dst_release(dst); + dst = ip6_route_output(sk, fl); + if (dst->error) { + IP6_INC_STATS(Ip6OutNoRoutes); + dst_release(dst); + return -ENETUNREACH; + } + } +#endif pktlength = length; if (dst) { @@ -715,7 +737,9 @@ out: ip6_dst_store(sk, dst, !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, + !ipv6_addr_cmp(&fl->fl6_src, &np->saddr) ? + &np->saddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; return err; @@ -1138,15 +1162,16 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) { struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt; int err = 0; *dst = __sk_dst_check(sk, np->dst_cookie); if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; + rt = (struct rt6_info*)*dst; /* Yes, checking route validity in not connected case is not very simple. Take into account, - that we do not support routing by source, TOS, + that we do not support routing by TOS, and MSG_DONTROUTE --ANK (980726) 1. If route was host route, check that @@ -1166,6 +1191,13 @@ ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr)) && (np->daddr_cache == NULL || ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache))) +#ifdef CONFIG_IPV6_SUBTREES + || (!ipv6_addr_any(&fl->fl6_src) + && (rt->rt6i_src.plen != 128 || + ipv6_addr_cmp(&fl->fl6_src, &rt->rt6i_src.addr)) + && (np->saddr_cache == NULL || + ipv6_addr_cmp(&fl->fl6_src, np->saddr_cache))) +#endif || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { *dst = NULL; } else @@ -1192,7 +1224,20 @@ return err; } } - +#ifdef CONFIG_IPV6_SUBTREES + rt = (struct rt6_info*)*dst; + if (ipv6_addr_cmp(&fl->fl6_src, &np->saddr) && + (rt->rt6i_src.plen != 128 || + ipv6_addr_cmp(&fl->fl6_src, &rt->rt6i_src.addr))) { + dst_release(*dst); + *dst = ip6_route_output(sk, fl); + if ((*dst)->error) { + IP6_INC_STATS(Ip6OutNoRoutes); + dst_release(*dst); + return -ENETUNREACH; + } + } +#endif if (*dst) { if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) { dst_release(*dst); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/raw.c merge-2.5/net/ipv6/raw.c --- linux-2.5/net/ipv6/raw.c Wed May 28 20:07:42 2003 +++ merge-2.5/net/ipv6/raw.c Wed May 28 19:51:24 2003 @@ -700,7 +700,9 @@ done: ip6_dst_store(sk, dst, !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, + !ipv6_addr_cmp(&fl.fl6_src, &np->saddr) ? + &np->saddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/route.c merge-2.5/net/ipv6/route.c --- linux-2.5/net/ipv6/route.c Wed May 28 20:07:42 2003 +++ merge-2.5/net/ipv6/route.c Wed May 28 19:51:25 2003 @@ -361,12 +361,8 @@ rt->u.dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES - if (rt->rt6i_src.plen && saddr) { - ipv6_addr_copy(&rt->rt6i_src.addr, saddr); - rt->rt6i_src.plen = 128; - } + rt->rt6i_src.plen = ort->rt6i_src.plen; #endif - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); dst_hold(&rt->u.dst); @@ -883,7 +879,7 @@ struct rt6_info *rt, *nrt; /* Locate old route to this destination. */ - rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); + rt = rt6_lookup(dest, saddr, neigh->dev->ifindex, 1); if (rt == NULL) return; @@ -1050,6 +1046,9 @@ nrt = ip6_rt_copy(rt); if (nrt == NULL) goto out; +#ifdef CONFIG_IPV6_SUBTREES + nrt->rt6i_src.plen = rt->rt6i_src.plen; +#endif ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr); nrt->rt6i_dst.plen = 128; nrt->u.dst.flags |= DST_HOST; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/tcp_ipv6.c merge-2.5/net/ipv6/tcp_ipv6.c --- linux-2.5/net/ipv6/tcp_ipv6.c Wed May 28 20:07:42 2003 +++ merge-2.5/net/ipv6/tcp_ipv6.c Wed May 28 19:51:25 2003 @@ -563,10 +563,10 @@ struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct in6_addr *saddr = NULL; - struct in6_addr saddr_buf; struct flowi fl; struct dst_entry *dst; int addr_type; + int reroute = 0; int err; if (addr_len < SIN6_LEN_RFC2133) @@ -685,24 +685,40 @@ dst = ip6_route_output(sk, &fl); +#ifdef CONFIG_IPV6_SUBTREES + reroute = (saddr == NULL); +#endif if ((err = dst->error) != 0) { dst_release(dst); goto failure; } - - ip6_dst_store(sk, dst, NULL); - sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO); - + if (!reroute) { + ip6_dst_store(sk, dst, NULL, NULL); + sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO); + } if (saddr == NULL) { - err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf); + err = ipv6_get_saddr(dst, &np->daddr, &fl.fl6_src); + + if (reroute) + dst_release(dst); if (err) goto failure; - saddr = &saddr_buf; +#ifdef CONFIG_IPV6_SUBTREES + dst = ip6_route_output(sk, &fl); + + if ((err = dst->error) != 0) { + dst_release(dst); + goto failure; + } + ip6_dst_store(sk, dst, NULL, NULL); + sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO); +#endif + saddr = &fl.fl6_src; + ipv6_addr_copy(&np->rcv_saddr, saddr); } /* set the source address */ - ipv6_addr_copy(&np->rcv_saddr, saddr); ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; @@ -1363,7 +1379,7 @@ atomic_inc(&inet6_sock_nr); #endif - ip6_dst_store(newsk, dst, NULL); + ip6_dst_store(newsk, dst, NULL, NULL); sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO); newtcp6sk = (struct tcp6_sock *)newsk; @@ -1754,7 +1770,7 @@ return err; } - ip6_dst_store(sk, dst, NULL); + ip6_dst_store(sk, dst, NULL, NULL); sk->route_caps = dst->dev->features&~(NETIF_F_IP_CSUM|NETIF_F_TSO); } @@ -1795,7 +1811,7 @@ return -sk->err_soft; } - ip6_dst_store(sk, dst, NULL); + ip6_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5/net/ipv6/udp.c merge-2.5/net/ipv6/udp.c --- linux-2.5/net/ipv6/udp.c Wed May 28 20:07:42 2003 +++ merge-2.5/net/ipv6/udp.c Wed May 28 19:54:09 2003 @@ -254,12 +254,12 @@ struct inet_opt *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr; - struct in6_addr saddr; struct dst_entry *dst; struct flowi fl; struct ip6_flowlabel *flowlabel = NULL; int addr_type; int err; + int reroute = 0; if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) @@ -355,7 +355,6 @@ fl.proto = IPPROTO_UDP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &saddr); fl.oif = sk->bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; @@ -380,21 +379,37 @@ fl6_sock_release(flowlabel); return err; } - - ip6_dst_store(sk, dst, &fl.fl6_dst); - +#if CONFIG_IPV6_SUBTREES + reroute = 1; +#endif /* get the source address used in the appropriate device */ - err = ipv6_get_saddr(dst, daddr, &saddr); + err = ipv6_get_saddr(dst, daddr, &fl.fl6_src); + + if (reroute) + dst_release(dst); if (err == 0) { +#ifdef CONFIG_IPV6_SUBTREES + if (reroute) { + dst = ip6_route_output(sk, &fl); + if ((err = dst->error) != 0) { + dst_release(dst); + fl6_sock_release(flowlabel); + return err; + } + } +#endif if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &saddr); + ipv6_addr_copy(&np->saddr, &fl.fl6_src); if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &saddr); + ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->rcv_saddr = LOOPBACK4_IPV6; } + ip6_dst_store(sk, dst, &np->daddr, + !ipv6_addr_cmp(&fl.fl6_src, &np->saddr) ? + &np->saddr : NULL); sk->state = TCP_ESTABLISHED; } fl6_sock_release(flowlabel); @@ -1003,7 +1018,9 @@ ip6_dst_store(sk, dst, !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, + !ipv6_addr_cmp(&fl.fl6_src, &np->saddr) ? + &np->saddr : NULL); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk);