# This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1276.28.3 -> 1.1276.28.4 # net/ipv6/ip6_tunnel.c 1.9 -> 1.10 # include/net/ip6_tunnel.h 1.1 -> 1.2 # net/ipv6/ipv6_syms.c 1.17 -> 1.18 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/09/01 vnuorval@xxxxxxxxxxxxxxxxxxxxxxxxxx 1.1276.28.4 # Since the IPv6-in-IPv6 tunnels don't need sockets anymore, nested # IPv6 encapsulations through different tunnel devices on the same node # are now possible. # -------------------------------------------- # diff -Nru a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h --- a/include/net/ip6_tunnel.h Mon Sep 1 01:58:23 2003 +++ b/include/net/ip6_tunnel.h Mon Sep 1 01:58:23 2003 @@ -25,6 +25,8 @@ int recursion; /* depth of hard_start_xmit recursion */ struct ip6_tnl_parm parms; /* tunnel configuration paramters */ struct flowi fl; /* flowi template for xmit */ + struct dst_entry *dst_cache; /* cached dst */ + u32 dst_cookie; }; /* Tunnel encapsulation limit destination sub-option */ diff -Nru a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c --- a/net/ipv6/ip6_tunnel.c Mon Sep 1 01:58:23 2003 +++ b/net/ipv6/ip6_tunnel.c Mon Sep 1 01:58:23 2003 @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -37,12 +36,12 @@ #include #include #include +#include #include #include #include -#include #include #include #include @@ -63,22 +62,6 @@ #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -/* socket(s) used by ip6ip6_tnl_xmit() for resending packets */ -static struct socket *__ip6_socket[NR_CPUS]; -#define ip6_socket __ip6_socket[smp_processor_id()] - -static void ip6_xmit_lock(void) -{ - local_bh_disable(); - if (unlikely(!spin_trylock(&ip6_socket->sk->sk_lock.slock))) - BUG(); -} - -static void ip6_xmit_unlock(void) -{ - spin_unlock_bh(&ip6_socket->sk->sk_lock.slock); -} - #define HASH_SIZE 32 #define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ @@ -101,6 +84,33 @@ /* lock for the tunnel lists */ static rwlock_t ip6ip6_lock = RW_LOCK_UNLOCKED; +static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +{ + struct dst_entry *dst = t->dst_cache; + + if (dst && dst->obsolete && + dst->ops->check(dst, t->dst_cookie) == NULL) { + t->dst_cache = NULL; + return NULL; + } + + return dst; +} + +static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) +{ + dst_release(t->dst_cache); + t->dst_cache = NULL; +} + +static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +{ + struct rt6_info *rt = (struct rt6_info *) dst; + t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + dst_release(t->dst_cache); + t->dst_cache = dst; +} + /** * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @remote: the address of the tunnel exit-point @@ -294,13 +304,16 @@ static void ip6ip6_tnl_dev_uninit(struct net_device *dev) { + struct ip6_tnl *t = dev->priv; + if (dev == ip6ip6_fb_tnl_dev) { write_lock_bh(&ip6ip6_lock); tnls_wc[0] = NULL; write_unlock_bh(&ip6ip6_lock); } else { - ip6ip6_tnl_unlink((struct ip6_tnl *) dev->priv); + ip6ip6_tnl_unlink(t); } + ip6_tnl_dst_reset(t); dev_put(dev); } @@ -522,112 +535,34 @@ return 0; } -/** - * txopt_len - get necessary size for new &struct ipv6_txoptions - * @orig_opt: old options - * - * Return: - * Size of old one plus size of tunnel encapsulation limit option - **/ - -static inline int -txopt_len(struct ipv6_txoptions *orig_opt) -{ - int len = sizeof (*orig_opt) + 8; - - if (orig_opt && orig_opt->dst0opt) - len += ipv6_optlen(orig_opt->dst0opt); - return len; -} - -/** - * merge_options - add encapsulation limit to original options - * @encap_limit: number of allowed encapsulation limits - * @orig_opt: original options - * - * Return: - * Pointer to new &struct ipv6_txoptions containing the tunnel - * encapsulation limit - **/ - -static struct ipv6_txoptions * -merge_options(struct sock *sk, __u8 encap_limit, - struct ipv6_txoptions *orig_opt) +static inline struct ipv6_txoptions *create_tel(__u8 encap_limit) { struct ipv6_tlv_tnl_enc_lim *tel; struct ipv6_txoptions *opt; __u8 *raw; - __u8 pad_to = 8; - int opt_len = txopt_len(orig_opt); - if (!(opt = sock_kmalloc(sk, opt_len, GFP_ATOMIC))) { + int opt_len = sizeof(*opt) + 8; + + if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) { return NULL; } - memset(opt, 0, opt_len); opt->tot_len = opt_len; opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1); opt->opt_nflen = 8; - raw = (__u8 *) opt->dst0opt; - tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1); tel->type = IPV6_TLV_TNL_ENCAP_LIMIT; tel->length = 1; tel->encap_limit = encap_limit; - if (orig_opt) { - __u8 *orig_raw; - - opt->hopopt = orig_opt->hopopt; - - /* Keep the original destination options properly - aligned and merge possible old paddings to the - new padding option */ - if ((orig_raw = (__u8 *) orig_opt->dst0opt) != NULL) { - __u8 type; - int i = sizeof (struct ipv6_opt_hdr); - pad_to += sizeof (struct ipv6_opt_hdr); - while (i < ipv6_optlen(orig_opt->dst0opt)) { - type = orig_raw[i++]; - if (type == IPV6_TLV_PAD0) - pad_to++; - else if (type == IPV6_TLV_PADN) { - int len = orig_raw[i++]; - i += len; - pad_to += len + 2; - } else { - break; - } - } - opt->dst0opt->hdrlen = orig_opt->dst0opt->hdrlen + 1; - memcpy(raw + pad_to, orig_raw + pad_to - 8, - opt_len - sizeof (*opt) - pad_to); - } - opt->srcrt = orig_opt->srcrt; - opt->opt_nflen += orig_opt->opt_nflen; - - opt->dst1opt = orig_opt->dst1opt; - opt->auth = orig_opt->auth; - opt->opt_flen = orig_opt->opt_flen; - } + raw = (__u8 *) opt->dst0opt; raw[5] = IPV6_TLV_PADN; - - /* subtract lengths of destination suboption header, - tunnel encapsulation limit and pad N header */ - raw[6] = pad_to - 7; + raw[6] = 1; return opt; } -static int -ip6ip6_getfrag(void *from, char *to, int offset, int len, int odd, - struct sk_buff *skb) -{ - memcpy(to, (char *) from + offset, len); - return 0; -} - /** * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device @@ -655,7 +590,7 @@ * * Description: * Build new header and do some sanity checks on the packet before sending - * it to ip6_build_xmit(). + * it. * * Return: * 0 @@ -666,18 +601,17 @@ struct ip6_tnl *t = (struct ip6_tnl *) dev->priv; struct net_device_stats *stats = &t->stat; struct ipv6hdr *ipv6h = skb->nh.ipv6h; - struct ipv6_txoptions *orig_opt = NULL; struct ipv6_txoptions *opt = NULL; int encap_limit = -1; __u16 offset; struct flowi fl; - struct ip6_flowlabel *fl_lbl = NULL; - int err = 0; struct dst_entry *dst; - int link_failure = 0; - struct sock *sk = ip6_socket->sk; - struct ipv6_pinfo *np = inet6_sk(sk); + struct net_device *tdev; int mtu; + int max_headroom = sizeof(struct ipv6hdr); + u8 proto; + int err; + int pkt_len; if (t->recursion++) { stats->collisions++; @@ -700,58 +634,39 @@ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { encap_limit = t->parms.encap_limit; } - ip6_xmit_lock(); - memcpy(&fl, &t->fl, sizeof (fl)); + proto = fl.proto; if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK); - if (fl.fl6_flowlabel) { - fl_lbl = fl6_sock_lookup(sk, fl.fl6_flowlabel); - if (fl_lbl) - orig_opt = fl_lbl->opt; - } - if (encap_limit >= 0) { - if (!(opt = merge_options(sk, encap_limit, orig_opt))) { - goto tx_err_free_fl_lbl; - } - } else { - opt = orig_opt; - } - dst = __sk_dst_check(sk, np->dst_cookie); + if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL) + goto tx_err; - if (dst) { - if (np->daddr_cache == NULL || - ipv6_addr_cmp(&fl.fl6_dst, np->daddr_cache) || - (fl.oif && fl.oif != dst->dev->ifindex)) { - dst = NULL; - } - } - if (dst == NULL) { - dst = ip6_route_output(sk, &fl); - if (dst->error) { - stats->tx_carrier_errors++; - link_failure = 1; - goto tx_err_dst_release; - } - /* local routing loop */ - if (dst->dev == dev) { - stats->collisions++; - if (net_ratelimit()) - printk(KERN_WARNING - "%s: Local routing loop detected!\n", - t->parms.name); - goto tx_err_dst_release; - } - ipv6_addr_copy(&np->daddr, &fl.fl6_dst); - ipv6_addr_copy(&np->saddr, &fl.fl6_src); + if ((dst = ip6_tnl_dst_check(t)) != NULL) + dst_hold(dst); + else + dst = ip6_route_output(NULL, &fl); + + if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0) + goto tx_err_link_failure; + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + if (net_ratelimit()) + printk(KERN_WARNING + "%s: Local routing loop detected!\n", + t->parms.name); + goto tx_err_dst_release; } mtu = dst_pmtu(dst) - sizeof (*ipv6h); if (opt) { - mtu -= (opt->opt_nflen + opt->opt_flen); + max_headroom += 8; + mtu -= 8; } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -764,41 +679,71 @@ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); goto tx_err_dst_release; } - err = ip6_append_data(sk, ip6ip6_getfrag, skb->nh.raw, skb->len, 0, - t->parms.hop_limit, opt, &fl, - (struct rt6_info *)dst, MSG_DONTWAIT); + skb->h.raw = skb->nh.raw; - if (err) { - ip6_flush_pending_frames(sk); - } else { - err = ip6_push_pending_frames(sk); - err = (err < 0 ? err : 0); + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom += LL_RESERVED_SPACE(tdev); + + if (skb_headroom(skb) < max_headroom || + skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb; + + if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; } - if (!err) { - stats->tx_bytes += skb->len; + dst_release(skb->dst); + skb->dst = dst_clone(dst); + + if (opt) + ipv6_push_nfrag_opts(skb, opt, &proto, NULL); + + skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr)); + ipv6h = skb->nh.ipv6h; + *(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000); + ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = proto; + ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src); + ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst); +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +#endif + pkt_len = skb->len; + err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, + skb->dst->dev, dst_output); + + if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) { + stats->tx_bytes += pkt_len; stats->tx_packets++; } else { stats->tx_errors++; stats->tx_aborted_errors++; } - if (opt && opt != orig_opt) - sock_kfree_s(sk, opt, opt->tot_len); + ip6_tnl_dst_store(t, dst); + + if (opt) + kfree(opt); - fl6_sock_release(fl_lbl); - ip6_dst_store(sk, dst, &np->daddr); - ip6_xmit_unlock(); - kfree_skb(skb); t->recursion--; return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); tx_err_dst_release: dst_release(dst); - if (opt && opt != orig_opt) - sock_kfree_s(sk, opt, opt->tot_len); -tx_err_free_fl_lbl: - fl6_sock_release(fl_lbl); - ip6_xmit_unlock(); - if (link_failure) - dst_link_failure(skb); + if (opt) + kfree(opt); tx_err: stats->tx_errors++; stats->tx_dropped++; @@ -850,13 +795,12 @@ { struct net_device *dev = t->dev; struct ip6_tnl_parm *p = &t->parms; - struct flowi *fl; + struct flowi *fl = &t->fl; memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ - fl = &t->fl; ipv6_addr_copy(&fl->fl6_src, &p->laddr); ipv6_addr_copy(&fl->fl6_dst, &p->raddr); fl->oif = p->link; @@ -881,10 +825,7 @@ if (rt == NULL) return; - /* as long as tunnels use the same socket for transmission, - locally nested tunnels won't work */ - - if (rt->rt6i_dev && rt->rt6i_dev->type != ARPHRD_TUNNEL6) { + if (rt->rt6i_dev) { dev->iflink = rt->rt6i_dev->ifindex; dev->hard_header_len = rt->rt6i_dev->hard_header_len + @@ -1139,7 +1080,7 @@ int ip6ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = dev->priv; - ip6ip6_tnl_dev_init_gen(dev); + ip6ip6_tnl_dev_init_gen(dev); dev_hold(dev); tnls_wc[0] = t; return 0; @@ -1159,61 +1100,28 @@ int __init ip6_tunnel_init(void) { - int i, j, err; - struct sock *sk; - struct ipv6_pinfo *np; - - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - - err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_IPV6, - &__ip6_socket[i]); - if (err < 0) { - printk(KERN_ERR - "Failed to create the IPv6 tunnel socket " - "(err %d).\n", - err); - goto fail; - } - sk = __ip6_socket[i]->sk; - sk->sk_allocation = GFP_ATOMIC; - - np = inet6_sk(sk); - np->hop_limit = 255; - np->mc_loop = 0; + int err; - sk->sk_prot->unhash(sk); - } if ((err = inet6_add_protocol(&ip6ip6_protocol, IPPROTO_IPV6)) < 0) { printk(KERN_ERR "Failed to register IPv6 protocol\n"); - goto fail; + return err; } - - ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", ip6ip6_tnl_dev_setup); if (!ip6ip6_fb_tnl_dev) { err = -ENOMEM; - goto tnl_fail; + goto fail; } ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init; if ((err = register_netdev(ip6ip6_fb_tnl_dev))) { kfree(ip6ip6_fb_tnl_dev); - goto tnl_fail; + goto fail; } return 0; -tnl_fail: - inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6); fail: - for (j = 0; j < i; j++) { - if (!cpu_possible(j)) - continue; - sock_release(__ip6_socket[j]); - __ip6_socket[j] = NULL; - } + inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6); return err; } @@ -1223,18 +1131,8 @@ void ip6_tunnel_cleanup(void) { - int i; - unregister_netdev(ip6ip6_fb_tnl_dev); - inet6_del_protocol(&ip6ip6_protocol, IPPROTO_IPV6); - - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - sock_release(__ip6_socket[i]); - __ip6_socket[i] = NULL; - } } #ifdef MODULE diff -Nru a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c --- a/net/ipv6/ipv6_syms.c Mon Sep 1 01:58:23 2003 +++ b/net/ipv6/ipv6_syms.c Mon Sep 1 01:58:23 2003 @@ -45,3 +45,4 @@ EXPORT_SYMBOL(ip6_append_data); EXPORT_SYMBOL(ip6_flush_pending_frames); EXPORT_SYMBOL(ip6_push_pending_frames); +EXPORT_SYMBOL(ipv6_push_nfrag_opts);