netdev
[Top] [All Lists]

[PATCH] anycast support for IPv6, updated to 2.5.44

To: kuznet@xxxxxxxxxxxxx, davem@xxxxxxxxxx
Subject: [PATCH] anycast support for IPv6, updated to 2.5.44
From: "David Stevens" <dlstevens@xxxxxxxxxx>
Date: Mon, 28 Oct 2002 14:06:00 -0700
Cc: linux-kernel@xxxxxxxxxxxxxxx, netdev@xxxxxxxxxxx
Importance: Normal
Sender: netdev-bounce@xxxxxxxxxxx
Sensitivity:
Below is a patch to add anycast support for IPv6. It's the same patch as
I've posted previously, but updated with comments from Chris Hellwig and
for kernel version 2.5.44.
      My mailer mangles tabs, so I'm including it in-line (for easy
viewing) and as an attachment at the end (for applying to kernels). A
fuller description is available at:
http://marc.theaimsgroup.com/?l=linux-net&m=103057141321548&w=2
      Currently, IPv6 mobile code is the only user of anycasting, but there
won't be users if it isn't supported. Please consider for inclusion in 2.5.

                                    +-DLS

diff -ruN linux-2.5.44/include/linux/in6.h linux-2.5.44AC/include/linux/in6.h
--- linux-2.5.44/include/linux/in6.h      Fri Oct 18 21:01:59 2002
+++ linux-2.5.44AC/include/linux/in6.h    Mon Oct 28 12:38:09 2002
@@ -56,6 +56,8 @@
      int         ipv6mr_ifindex;
 };

+#define ipv6mr_acaddr  ipv6mr_multiaddr
+
 struct in6_flowlabel_req
 {
      struct in6_addr   flr_dst;
@@ -156,6 +158,9 @@
 #define IPV6_MTU_DISCOVER    23
 #define IPV6_MTU       24
 #define IPV6_RECVERR         25
+/* 26 is IPV6_V6ONLY in USAGI code */
+#define IPV6_JOIN_ANYCAST    27
+#define IPV6_LEAVE_ANYCAST   28

 /* IPV6_MTU_DISCOVER values */
 #define IPV6_PMTUDISC_DONT         0
diff -ruN linux-2.5.44/include/linux/ipv6.h linux-2.5.44AC/include/linux/ipv6.h
--- linux-2.5.44/include/linux/ipv6.h     Fri Oct 18 21:02:26 2002
+++ linux-2.5.44AC/include/linux/ipv6.h   Mon Oct 28 12:38:09 2002
@@ -155,6 +155,7 @@
                              pmtudisc:2;

      struct ipv6_mc_socklist *ipv6_mc_list;
+     struct ipv6_ac_socklist *ipv6_ac_list;
      struct ipv6_fl_socklist *ipv6_fl_list;
      __u32             dst_cookie;

diff -ruN linux-2.5.44/include/linux/netdevice.h 
linux-2.5.44AC/include/linux/netdevice.h
--- linux-2.5.44/include/linux/netdevice.h      Fri Oct 18 21:02:31 2002
+++ linux-2.5.44AC/include/linux/netdevice.h    Mon Oct 28 12:38:09 2002
@@ -464,6 +464,10 @@
 extern void            dev_add_pack(struct packet_type *pt);
 extern void            dev_remove_pack(struct packet_type *pt);
 extern int       dev_get(const char *name);
+extern struct net_device     *dev_getany(unsigned short flags,
+                             unsigned short mask);
+extern struct net_device     *__dev_getany(unsigned short flags,
+                             unsigned short mask);
 extern struct net_device     *dev_get_by_name(const char *name);
 extern struct net_device     *__dev_get_by_name(const char *name);
 extern struct net_device     *dev_alloc(const char *name, int *err);
diff -ruN linux-2.5.44/include/net/addrconf.h 
linux-2.5.44AC/include/net/addrconf.h
--- linux-2.5.44/include/net/addrconf.h   Fri Oct 18 21:01:18 2002
+++ linux-2.5.44AC/include/net/addrconf.h Mon Oct 28 12:38:09 2002
@@ -58,7 +58,15 @@
 extern int             ipv6_get_saddr(struct dst_entry *dst,
                                     struct in6_addr *daddr,
                                     struct in6_addr *saddr);
+extern int             ipv6_dev_get_saddr(struct net_device *dev,
+                                    struct in6_addr *daddr,
+                                    struct in6_addr *saddr,
+                                    int onlink);
 extern int             ipv6_get_lladdr(struct net_device *dev, struct in6_addr 
*);
+extern void                  addrconf_join_solict(struct net_device *dev,
+                             struct in6_addr *addr);
+extern void                  addrconf_leave_solict(struct net_device *dev,
+                             struct in6_addr *addr);

 /*
  *   multicast prototypes (mcast.c)
@@ -88,6 +96,26 @@
 extern void                  addrconf_prefix_rcv(struct net_device *dev,
                                        u8 *opt, int len);

+/*
+ *   anycast prototypes (anycast.c)
+ */
+extern int             ipv6_sock_ac_join(struct sock *sk,
+                                     int ifindex,
+                                     struct in6_addr *addr);
+extern int             ipv6_sock_ac_drop(struct sock *sk,
+                                     int ifindex,
+                                     struct in6_addr *addr);
+extern void                  ipv6_sock_ac_close(struct sock *sk);
+extern int             inet6_ac_check(struct sock *sk, struct in6_addr *addr, 
int ifindex);
+
+extern int             ipv6_dev_ac_inc(struct net_device *dev,
+                                   struct in6_addr *addr);
+extern int             ipv6_dev_ac_dec(struct net_device *dev,
+                                   struct in6_addr *addr);
+extern int             ipv6_chk_acast_addr(struct net_device *dev,
+                                   struct in6_addr *addr);
+
+
 /* Device notifier */
 extern int register_inet6addr_notifier(struct notifier_block *nb);
 extern int unregister_inet6addr_notifier(struct notifier_block *nb);
diff -ruN linux-2.5.44/include/net/if_inet6.h 
linux-2.5.44AC/include/net/if_inet6.h
--- linux-2.5.44/include/net/if_inet6.h   Fri Oct 18 21:02:34 2002
+++ linux-2.5.44AC/include/net/if_inet6.h Mon Oct 28 12:38:09 2002
@@ -69,6 +69,25 @@
      spinlock_t        mca_lock;
 };

+/* Anycast stuff */
+
+struct ipv6_ac_socklist
+{
+     struct in6_addr         acl_addr;
+     int               acl_ifindex;
+     struct ipv6_ac_socklist *acl_next;
+};
+
+struct ifacaddr6
+{
+     struct in6_addr         aca_addr;
+     struct inet6_dev  *aca_idev;
+     struct ifacaddr6  *aca_next;
+     int               aca_users;
+     atomic_t          aca_refcnt;
+     spinlock_t        aca_lock;
+};
+
 #define    IFA_HOST    IPV6_ADDR_LOOPBACK
 #define    IFA_LINK    IPV6_ADDR_LINKLOCAL
 #define    IFA_SITE    IPV6_ADDR_SITELOCAL
@@ -96,6 +115,7 @@

      struct inet6_ifaddr     *addr_list;
      struct ifmcaddr6  *mc_list;
+     struct ifacaddr6  *ac_list;
      rwlock_t          lock;
      atomic_t          refcnt;
      __u32             if_flags;
diff -ruN linux-2.5.44/net/core/dev.c linux-2.5.44AC/net/core/dev.c
--- linux-2.5.44/net/core/dev.c     Fri Oct 18 21:01:54 2002
+++ linux-2.5.44AC/net/core/dev.c   Mon Oct 28 12:38:09 2002
@@ -541,6 +541,50 @@
 }

 /**
+ *   dev_getany - find any device with given flags
+ *   @if_flags: IFF_* values
+ *   @mask: bitmask of bits in if_flags to check
+ *
+ *   Search for any interface with the given flags. Returns NULL if a device
+ *   is not found or a pointer to the device. The device returned has
+ *   had a reference added and the pointer is safe until the user calls
+ *   dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_getany(unsigned short if_flags, unsigned short mask)
+{
+     struct net_device *dev;
+
+     read_lock(&dev_base_lock);
+     dev = __dev_getany(if_flags, mask);
+     if (dev)
+           dev_hold(dev);
+     read_unlock(&dev_base_lock);
+     return dev;
+}
+
+/**
+ *   __dev_getany - find any device with given flags
+ *   @if_flags: IFF_* values
+ *   @mask: bitmask of bits in if_flags to check
+ *
+ *   Search for any interface with the given flags. Returns NULL if a device
+ *   is not found or a pointer to the device. The caller must hold either
+ *   the RTNL semaphore or @dev_base_lock.
+ */
+
+struct net_device *__dev_getany(unsigned short if_flags, unsigned short mask)
+{
+     struct net_device *dev;
+
+     for (dev = dev_base; dev != NULL; dev = dev->next) {
+           if (((dev->flags ^ if_flags) & mask) == 0)
+                 return dev;
+     }
+     return NULL;
+}
+
+/**
  *   dev_alloc_name - allocate a name for a device
  *   @dev: device
  *   @name: name format string
diff -ruN linux-2.5.44/net/ipv6/Makefile linux-2.5.44AC/net/ipv6/Makefile
--- linux-2.5.44/net/ipv6/Makefile  Fri Oct 18 21:02:27 2002
+++ linux-2.5.44AC/net/ipv6/Makefile      Mon Oct 28 12:38:09 2002
@@ -6,7 +6,7 @@

 obj-$(CONFIG_IPV6) += ipv6.o

-ipv6-objs :=     af_inet6.o ip6_output.o ip6_input.o addrconf.o sit.o \
+ipv6-objs :=     af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o 
sit.o \
            route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
            protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
            exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
diff -ruN linux-2.5.44/net/ipv6/addrconf.c linux-2.5.44AC/net/ipv6/addrconf.c
--- linux-2.5.44/net/ipv6/addrconf.c      Fri Oct 18 21:02:31 2002
+++ linux-2.5.44AC/net/ipv6/addrconf.c    Mon Oct 28 12:49:03 2002
@@ -137,21 +137,15 @@

 int ipv6_addr_type(struct in6_addr *addr)
 {
+     int type;
      u32 st;

      st = addr->s6_addr32[0];

-     /* Consider all addresses with the first three bits different of
-        000 and 111 as unicasts.
-      */
-     if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
-         (st & htonl(0xE0000000)) != htonl(0xE0000000))
-           return IPV6_ADDR_UNICAST;
-
-     if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
-           int type = IPV6_ADDR_MULTICAST;
+     if ((st & __constant_htonl(0xFF000000)) == __constant_htonl(0xFF000000)) {
+           type = IPV6_ADDR_MULTICAST;

-           switch((st & htonl(0x00FF0000))) {
+           switch((st & __constant_htonl(0x00FF0000))) {
                  case __constant_htonl(0x00010000):
                        type |= IPV6_ADDR_LOOPBACK;
                        break;
@@ -166,29 +160,53 @@
            };
            return type;
      }
+     /* check for reserved anycast addresses */
+
+     if ((st & __constant_htonl(0xE0000000)) &&
+         ((addr->s6_addr32[2] == __constant_htonl(0xFDFFFFFF) &&
+         (addr->s6_addr32[3] | __constant_htonl(0x7F)) == (u32)~0) ||
+         (addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0)))
+           type = IPV6_ADDR_ANYCAST;
+     else
+           type = IPV6_ADDR_UNICAST;
+
+     /* Consider all addresses with the first three bits different of
+        000 and 111 as finished.
+      */
+     if ((st & __constant_htonl(0xE0000000)) != __constant_htonl(0x00000000) &&
+         (st & __constant_htonl(0xE0000000)) != __constant_htonl(0xE0000000))
+           return type;

-     if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
-           return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST);
+     if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFE800000))
+           return (IPV6_ADDR_LINKLOCAL | type);

-     if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
-           return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST);
+     if ((st & __constant_htonl(0xFFC00000)) == __constant_htonl(0xFEC00000))
+           return (IPV6_ADDR_SITELOCAL | type);

      if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
            if (addr->s6_addr32[2] == 0) {
-                 if (addr->s6_addr32[3] == 0)
+                 if (addr->in6_u.u6_addr32[3] == 0)
                        return IPV6_ADDR_ANY;

-                 if (addr->s6_addr32[3] == htonl(0x00000001))
-                       return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST);
+                 if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
+                       return (IPV6_ADDR_LOOPBACK | type);

-                 return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST);
+                 return (IPV6_ADDR_COMPATv4 | type);
            }

-           if (addr->s6_addr32[2] == htonl(0x0000ffff))
+           if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff))
                  return IPV6_ADDR_MAPPED;
      }

-     return IPV6_ADDR_RESERVED;
+     st &= __constant_htonl(0xFF000000);
+     if (st == 0)
+           return IPV6_ADDR_RESERVED;
+     st &= __constant_htonl(0xFE000000);
+     if (st == __constant_htonl(0x02000000))
+           return IPV6_ADDR_RESERVED;    /* for NSAP */
+     if (st == __constant_htonl(0x04000000))
+           return IPV6_ADDR_RESERVED;    /* for IPX */
+     return type;
 }

 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -224,7 +242,6 @@
      add_timer(&ifp->timer);
 }

-
 /* Nobody refers to this device, we may destroy it. */

 void in6_dev_finish_destroy(struct inet6_dev *idev)
@@ -303,24 +320,91 @@
      return idev;
 }

+void ipv6_addr_prefix(struct in6_addr *prefix,
+     struct in6_addr *addr, int prefix_len)
+{
+     unsigned long mask;
+     int ncopy, nbits;
+
+     memset(prefix, 0, sizeof(*prefix));
+
+     if (prefix_len <= 0)
+           return;
+     if (prefix_len > 128)
+           prefix_len = 128;
+
+     ncopy = prefix_len / 32;
+     switch (ncopy) {
+     case 4:     prefix->s6_addr32[3] = addr->s6_addr32[3];
+     case 3:     prefix->s6_addr32[2] = addr->s6_addr32[2];
+     case 2:     prefix->s6_addr32[1] = addr->s6_addr32[1];
+     case 1:     prefix->s6_addr32[0] = addr->s6_addr32[0];
+     case 0:     break;
+     }
+     nbits = prefix_len % 32;
+     if (nbits == 0)
+           return;
+
+     mask = ~((1 << (32 - nbits)) - 1);
+     mask = htonl(mask);
+
+     prefix->s6_addr32[ncopy] = addr->s6_addr32[ncopy] & mask;
+}
+
+
+static void dev_forward_change(struct inet6_dev *idev)
+{
+     struct net_device *dev;
+     struct inet6_ifaddr *ifa;
+     struct in6_addr addr;
+
+     if (!idev)
+           return;
+     dev = idev->dev;
+     if (dev && (dev->flags & IFF_MULTICAST)) {
+           ipv6_addr_all_routers(&addr);
+
+           if (idev->cnf.forwarding)
+                 ipv6_dev_mc_inc(dev, &addr);
+           else
+                 ipv6_dev_mc_dec(dev, &addr);
+     }
+     for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+           ipv6_addr_prefix(&addr, &ifa->addr, ifa->prefix_len);
+           if (addr.s6_addr32[0] == 0 && addr.s6_addr32[1] == 0 &&
+               addr.s6_addr32[2] == 0 && addr.s6_addr32[3] == 0)
+                 continue;
+           if (idev->cnf.forwarding)
+                 ipv6_dev_ac_inc(idev->dev, &addr);
+           else
+                 ipv6_dev_ac_dec(idev->dev, &addr);
+     }
+}
+
+
 static void addrconf_forward_change(struct inet6_dev *idev)
 {
      struct net_device *dev;

-     if (idev)
+     if (idev) {
+           dev_forward_change(idev);
            return;
+     }

      read_lock(&dev_base_lock);
      for (dev=dev_base; dev; dev=dev->next) {
            read_lock(&addrconf_lock);
            idev = __in6_dev_get(dev);
-           if (idev)
+           if (idev) {
                  idev->cnf.forwarding = ipv6_devconf.forwarding;
+                 dev_forward_change(idev);
+           }
            read_unlock(&addrconf_lock);
      }
      read_unlock(&dev_base_lock);
 }

+
 /* Nobody refers to this ifaddr, destroy it */

 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
@@ -458,29 +542,20 @@
  *         an address of the attached interface
  *   iii)  don't use deprecated addresses
  */
-int ipv6_get_saddr(struct dst_entry *dst,
-              struct in6_addr *daddr, struct in6_addr *saddr)
+int ipv6_dev_get_saddr(struct net_device *dev,
+              struct in6_addr *daddr, struct in6_addr *saddr, int onlink)
 {
-     int scope;
      struct inet6_ifaddr *ifp = NULL;
      struct inet6_ifaddr *match = NULL;
-     struct net_device *dev = NULL;
      struct inet6_dev *idev;
-     struct rt6_info *rt;
+     int scope;
      int err;

-     rt = (struct rt6_info *) dst;
-     if (rt)
-           dev = rt->rt6i_dev;

-     scope = ipv6_addr_scope(daddr);
-     if (rt && (rt->rt6i_flags & RTF_ALLONLINK)) {
-           /*
-            *    route for the "all destinations on link" rule
-            *    when no routers are present
-            */
+     if (!onlink)
+           scope = ipv6_addr_scope(daddr);
+     else
            scope = IFA_LINK;
-     }

      /*
       *    known dev
@@ -568,6 +643,24 @@
      return err;
 }

+
+int ipv6_get_saddr(struct dst_entry *dst,
+              struct in6_addr *daddr, struct in6_addr *saddr)
+{
+     struct rt6_info *rt;
+     struct net_device *dev = NULL;
+     int onlink;
+
+     rt = (struct rt6_info *) dst;
+     if (rt)
+           dev = rt->rt6i_dev;
+
+     onlink = (rt && (rt->rt6i_flags & RTF_ALLONLINK));
+
+     return ipv6_dev_get_saddr(dev, daddr, saddr, onlink);
+}
+
+
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 {
      struct inet6_dev *idev;
@@ -660,7 +753,7 @@

 /* Join to solicited addr multicast group. */

-static void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
 {
      struct in6_addr maddr;

@@ -671,7 +764,7 @@
      ipv6_dev_mc_inc(dev, &maddr);
 }

-static void addrconf_leave_solict(struct net_device *dev, struct in6_addr 
*addr)
+void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr)
 {
      struct in6_addr maddr;

@@ -1554,6 +1647,15 @@
            addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
            spin_unlock_bh(&ifp->lock);
      }
+
+     if (ifp->idev->cnf.forwarding) {
+           struct in6_addr addr;
+
+           ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+           if (addr.s6_addr32[0] || addr.s6_addr32[1] ||
+               addr.s6_addr32[2] || addr.s6_addr32[3])
+                 ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+     }
 }

 #ifdef CONFIG_PROC_FS
@@ -1853,6 +1955,14 @@
            break;
      case RTM_DELADDR:
            addrconf_leave_solict(ifp->idev->dev, &ifp->addr);
+           if (ifp->idev->cnf.forwarding) {
+                 struct in6_addr addr;
+
+                 ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+                 if (addr.s6_addr32[0] || addr.s6_addr32[1] ||
+                     addr.s6_addr32[2] || addr.s6_addr32[3])
+                       ipv6_dev_ac_dec(ifp->idev->dev, &addr);
+           }
            if (!ipv6_chk_addr(&ifp->addr, NULL))
                  ip6_rt_addr_del(&ifp->addr, ifp->idev->dev);
            break;
@@ -1875,11 +1985,7 @@
            struct inet6_dev *idev = NULL;

            if (valp != &ipv6_devconf.forwarding) {
-                 struct net_device *dev = dev_get_by_index(ctl->ctl_name);
-                 if (dev) {
-                       idev = in6_dev_get(dev);
-                       dev_put(dev);
-                 }
+                 idev = (struct inet6_dev *)ctl->extra1;
                  if (idev == NULL)
                        return ret;
            } else
@@ -1889,8 +1995,6 @@

            if (*valp)
                  rt6_purge_dflt_routers(0);
-           if (idev)
-                 in6_dev_put(idev);
      }

         return ret;
@@ -1967,6 +2071,7 @@
      for (i=0; i<sizeof(t->addrconf_vars)/sizeof(t->addrconf_vars[0])-1; i++) {
            t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
            t->addrconf_vars[i].de = NULL;
+           t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
      }
      if (dev) {
            t->addrconf_dev[0].procname = dev->name;
diff -ruN linux-2.5.44/net/ipv6/af_inet6.c linux-2.5.44AC/net/ipv6/af_inet6.c
--- linux-2.5.44/net/ipv6/af_inet6.c      Fri Oct 18 21:01:57 2002
+++ linux-2.5.44AC/net/ipv6/af_inet6.c    Mon Oct 28 12:38:09 2002
@@ -76,6 +76,7 @@
 /* IPv6 procfs goodies... */

 #ifdef CONFIG_PROC_FS
+extern int anycast6_get_info(char *, char **, off_t, int);
 extern int raw6_get_info(char *, char **, off_t, int);
 extern int tcp6_get_info(char *, char **, off_t, int);
 extern int udp6_get_info(char *, char **, off_t, int);
@@ -380,6 +381,9 @@
      /* Free mc lists */
      ipv6_sock_mc_close(sk);

+     /* Free ac lists */
+     ipv6_sock_ac_close(sk);
+
      return inet_release(sock);
 }

@@ -694,6 +698,8 @@
            goto proc_sockstat6_fail;
      if (!proc_net_create("snmp6", 0, afinet6_get_snmp))
            goto proc_snmp6_fail;
+     if (!proc_net_create("anycast6", 0, anycast6_get_info))
+           goto proc_anycast6_fail;
 #endif
      ipv6_netdev_notif_init();
      ipv6_packet_init();
@@ -709,6 +715,8 @@
      return 0;

 #ifdef CONFIG_PROC_FS
+proc_anycast6_fail:
+     proc_net_remove("anycast6");
 proc_snmp6_fail:
      proc_net_remove("sockstat6");
 proc_sockstat6_fail:
@@ -744,6 +752,7 @@
      proc_net_remove("udp6");
      proc_net_remove("sockstat6");
      proc_net_remove("snmp6");
+     proc_net_remove("anycast6");
 #endif
      /* Cleanup code parts. */
      sit_cleanup();
diff -ruN linux-2.5.44/net/ipv6/anycast.c linux-2.5.44AC/net/ipv6/anycast.c
--- linux-2.5.44/net/ipv6/anycast.c Wed Dec 31 16:00:00 1969
+++ linux-2.5.44AC/net/ipv6/anycast.c     Mon Oct 28 13:28:07 2002
@@ -0,0 +1,489 @@
+/*
+ *   Anycast support for IPv6
+ *   Linux INET6 implementation
+ *
+ *   Authors:
+ *   David L Stevens (dlstevens@xxxxxxxxxx)
+ *
+ *   based heavily on net/ipv6/mcast.c
+ *
+ *   This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/checksum.h>
+
+/* Big ac list lock for all the sockets */
+static rwlock_t ipv6_sk_ac_lock = RW_LOCK_UNLOCKED;
+
+/* XXX ip6_addr_match() and ip6_onlink() really belong in net/core.c */
+
+static int
+ip6_addr_match(struct in6_addr *addr1, struct in6_addr *addr2, int prefix)
+{
+     __u32 mask;
+     int   i;
+
+     if (prefix > 128 || prefix < 0)
+           return 0;
+     if (prefix == 0)
+           return 1;
+     for (i=0; i<4; ++i) {
+           if (prefix >= 32)
+                 mask = ~0;
+           else
+                 mask = htonl(~0 << (32 - prefix));
+           if ((addr1->s6_addr32[i] ^ addr2->s6_addr32[i]) & mask)
+                 return 0;
+           prefix -= 32;
+           if (prefix <= 0)
+                 break;
+     }
+     return 1;
+}
+
+static int
+ip6_onlink(struct in6_addr *addr, struct net_device *dev)
+{
+     struct inet6_dev  *idev;
+     struct inet6_ifaddr     *ifa;
+     int   onlink;
+
+     onlink = 0;
+     read_lock(&addrconf_lock);
+     idev = __in6_dev_get(dev);
+     if (idev) {
+           read_lock_bh(&idev->lock);
+           for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+                 onlink = ip6_addr_match(addr, &ifa->addr,
+                             ifa->prefix_len);
+                 if (onlink)
+                       break;
+           }
+           read_unlock_bh(&idev->lock);
+     }
+     read_unlock(&addrconf_lock);
+     return onlink;
+}
+
+
+/*
+ *   socket join an anycast group
+ */
+
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+     struct ipv6_pinfo *np = inet6_sk(sk);
+     struct net_device *dev = NULL;
+     struct inet6_dev *idev;
+     struct ipv6_ac_socklist *pac;
+     int   ishost = !ipv6_devconf.forwarding;
+     int   err = 0;
+
+     if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)
+           return -EINVAL;
+
+     pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+     if (pac == NULL)
+           return -ENOMEM;
+     pac->acl_next = NULL;
+     ipv6_addr_copy(&pac->acl_addr, addr);
+
+     if (ifindex == 0) {
+           struct rt6_info *rt;
+
+           rt = rt6_lookup(addr, NULL, 0, 0);
+           if (rt) {
+                 dev = rt->rt6i_dev;
+                 dev_hold(dev);
+                 dst_release(&rt->u.dst);
+           } else if (ishost) {
+                 sock_kfree_s(sk, pac, sizeof(*pac));
+                 return -EADDRNOTAVAIL;
+           } else {
+                 /* router, no matching interface: just pick one */
+
+                 dev = dev_getany(IFF_UP, IFF_UP|IFF_LOOPBACK);
+           }
+     } else
+           dev = dev_get_by_index(ifindex);
+
+     if (dev == NULL) {
+           sock_kfree_s(sk, pac, sizeof(*pac));
+           return -ENODEV;
+     }
+
+     idev = in6_dev_get(dev);
+     if (!idev) {
+           sock_kfree_s(sk, pac, sizeof(*pac));
+           dev_put(dev);
+           if (ifindex)
+                 return -ENODEV;
+           else
+                 return -EADDRNOTAVAIL;
+     }
+     /* reset ishost, now that we have a specific device */
+     ishost = !idev->cnf.forwarding;
+     in6_dev_put(idev);
+
+     pac->acl_ifindex = dev->ifindex;
+
+     /* XXX
+      * For hosts, allow link-local or matching prefix anycasts.
+      * This obviates the need for propagating anycast routes while
+      * still allowing some non-router anycast participation.
+      *
+      * allow anyone to join anycasts that don't require a special route
+      * and can't be spoofs of unicast addresses (reserved anycast only)
+      */
+     if (!ip6_onlink(addr, dev)) {
+           if (ishost)
+                 err = -EADDRNOTAVAIL;
+           else if (!capable(CAP_NET_ADMIN))
+                 err = -EPERM;
+           if (err) {
+                 sock_kfree_s(sk, pac, sizeof(*pac));
+                 dev_put(dev);
+                 return err;
+           }
+     } else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) &&
+              !capable(CAP_NET_ADMIN))
+           return -EPERM;
+
+     err = ipv6_dev_ac_inc(dev, addr);
+     if (err) {
+           sock_kfree_s(sk, pac, sizeof(*pac));
+           dev_put(dev);
+           return err;
+     }
+
+     write_lock_bh(&ipv6_sk_ac_lock);
+     pac->acl_next = np->ipv6_ac_list;
+     np->ipv6_ac_list = pac;
+     write_unlock_bh(&ipv6_sk_ac_lock);
+
+     dev_put(dev);
+
+     return 0;
+}
+
+/*
+ *   socket leave an anycast group
+ */
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+     struct ipv6_pinfo *np = inet6_sk(sk);
+     struct net_device *dev;
+     struct ipv6_ac_socklist *pac, *prev_pac;
+
+     write_lock_bh(&ipv6_sk_ac_lock);
+     prev_pac = 0;
+     for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
+           if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
+                ipv6_addr_cmp(&pac->acl_addr, addr) == 0)
+                 break;
+           prev_pac = pac;
+     }
+     if (!pac) {
+           write_unlock_bh(&ipv6_sk_ac_lock);
+           return -ENOENT;
+     }
+     if (prev_pac)
+           prev_pac->acl_next = pac->acl_next;
+     else
+           np->ipv6_ac_list = pac->acl_next;
+
+     write_unlock_bh(&ipv6_sk_ac_lock);
+
+     dev = dev_get_by_index(pac->acl_ifindex);
+     if (dev) {
+           ipv6_dev_ac_dec(dev, &pac->acl_addr);
+           dev_put(dev);
+     }
+     sock_kfree_s(sk, pac, sizeof(*pac));
+     return 0;
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+     struct ipv6_pinfo *np = inet6_sk(sk);
+     struct net_device *dev = 0;
+     struct ipv6_ac_socklist *pac;
+     int   prev_index;
+
+     write_lock_bh(&ipv6_sk_ac_lock);
+     pac = np->ipv6_ac_list;
+     np->ipv6_ac_list = 0;
+     write_unlock_bh(&ipv6_sk_ac_lock);
+
+     prev_index = 0;
+     while (pac) {
+           struct ipv6_ac_socklist *next = pac->acl_next;
+
+           if (pac->acl_ifindex != prev_index) {
+                 if (dev)
+                       dev_put(dev);
+                 dev = dev_get_by_index(pac->acl_ifindex);
+                 prev_index = pac->acl_ifindex;
+           }
+           if (dev)
+                 ipv6_dev_ac_dec(dev, &pac->acl_addr);
+           sock_kfree_s(sk, pac, sizeof(*pac));
+           pac = next;
+     }
+     if (dev)
+           dev_put(dev);
+}
+
+int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
+{
+     struct ipv6_ac_socklist *pac;
+     struct ipv6_pinfo *np = inet6_sk(sk);
+     int   found;
+
+     found = 0;
+     read_lock(&ipv6_sk_ac_lock);
+     for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
+           if (ifindex && pac->acl_ifindex != ifindex)
+                 continue;
+           found = ipv6_addr_cmp(&pac->acl_addr, addr) == 0;
+           if (found)
+                 break;
+     }
+     read_unlock(&ipv6_sk_ac_lock);
+
+     return found;
+}
+
+static void aca_put(struct ifacaddr6 *ac)
+{
+     if (atomic_dec_and_test(&ac->aca_refcnt)) {
+           in6_dev_put(ac->aca_idev);
+           kfree(ac);
+     }
+}
+
+/*
+ *   device anycast group inc (add if not found)
+ */
+int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
+{
+     struct ifacaddr6 *aca;
+     struct inet6_dev *idev;
+
+     idev = in6_dev_get(dev);
+
+     if (idev == NULL)
+           return -EINVAL;
+
+     write_lock_bh(&idev->lock);
+     if (idev->dead) {
+           write_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+           return -ENODEV;
+     }
+
+     for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+           if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0) {
+                 aca->aca_users++;
+                 write_unlock_bh(&idev->lock);
+                 in6_dev_put(idev);
+                 return 0;
+           }
+     }
+
+     /*
+      *    not found: create a new one.
+      */
+
+     aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
+
+     if (aca == NULL) {
+           write_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+           return -ENOMEM;
+     }
+
+     memset(aca, 0, sizeof(struct ifacaddr6));
+
+     ipv6_addr_copy(&aca->aca_addr, addr);
+     aca->aca_idev = idev;
+     aca->aca_users = 1;
+     atomic_set(&aca->aca_refcnt, 2);
+     aca->aca_lock = SPIN_LOCK_UNLOCKED;
+
+     aca->aca_next = idev->ac_list;
+     idev->ac_list = aca;
+     write_unlock_bh(&idev->lock);
+
+     ip6_rt_addr_add(&aca->aca_addr, dev);
+
+     addrconf_join_solict(dev, &aca->aca_addr);
+
+     aca_put(aca);
+     return 0;
+}
+
+/*
+ *   device anycast group decrement
+ */
+int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
+{
+     struct inet6_dev *idev;
+     struct ifacaddr6 *aca, *prev_aca;
+
+     idev = in6_dev_get(dev);
+     if (idev == NULL)
+           return -ENODEV;
+
+     write_lock_bh(&idev->lock);
+     prev_aca = 0;
+     for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+           if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
+                 break;
+           prev_aca = aca;
+     }
+     if (!aca) {
+           write_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+           return -ENOENT;
+     }
+     if (--aca->aca_users > 0) {
+           write_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+           return 0;
+     }
+     if (prev_aca)
+           prev_aca->aca_next = aca->aca_next;
+     else
+           idev->ac_list = aca->aca_next;
+     write_unlock_bh(&idev->lock);
+     addrconf_leave_solict(dev, &aca->aca_addr);
+
+     ip6_rt_addr_del(&aca->aca_addr, dev);
+
+     aca_put(aca);
+     in6_dev_put(idev);
+     return 0;
+}
+
+/*
+ *   check if the interface has this anycast address
+ */
+static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
+{
+     struct inet6_dev *idev;
+     struct ifacaddr6 *aca;
+
+     idev = in6_dev_get(dev);
+     if (idev) {
+           read_lock_bh(&idev->lock);
+           for (aca = idev->ac_list; aca; aca = aca->aca_next)
+                 if (ipv6_addr_cmp(&aca->aca_addr, addr) == 0)
+                       break;
+           read_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+           return aca != 0;
+     }
+     return 0;
+}
+
+/*

+ *   check if given interface (or any, if dev==0) has this anycast address
+ */
+int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
+{
+     if (dev)
+           return ipv6_chk_acast_dev(dev, addr);
+     read_lock(&dev_base_lock);
+     for (dev=dev_base; dev; dev=dev->next)
+           if (ipv6_chk_acast_dev(dev, addr))
+                 break;
+     read_unlock(&dev_base_lock);
+     return dev != 0;
+}
+
+
+#ifdef CONFIG_PROC_FS
+int anycast6_get_info(char *buffer, char **start, off_t offset, int length)
+{
+     off_t pos=0, begin=0;
+     struct ifacaddr6 *im;
+     int len=0;
+     struct net_device *dev;
+
+     read_lock(&dev_base_lock);
+     for (dev = dev_base; dev; dev = dev->next) {
+           struct inet6_dev *idev;
+
+           if ((idev = in6_dev_get(dev)) == NULL)
+                 continue;
+
+           read_lock_bh(&idev->lock);
+           for (im = idev->ac_list; im; im = im->aca_next) {
+                 int i;
+
+                 len += sprintf(buffer+len,"%-4d %-15s ", dev->ifindex, 
dev->name);
+
+                 for (i=0; i<16; i++)
+                       len += sprintf(buffer+len, "%02x", 
im->aca_addr.s6_addr[i]);
+
+                 len += sprintf(buffer+len, " %5d\n", im->aca_users);
+
+                 pos=begin+len;
+                 if (pos < offset) {
+                       len=0;
+                       begin=pos;
+                 }
+                 if (pos > offset+length) {
+                       read_unlock_bh(&idev->lock);
+                       in6_dev_put(idev);
+                       goto done;
+                 }
+           }
+           read_unlock_bh(&idev->lock);
+           in6_dev_put(idev);
+     }
+
+done:
+     read_unlock(&dev_base_lock);
+
+     *start=buffer+(offset-begin);
+     len-=(offset-begin);
+     if(len>length)
+           len=length;
+     if (len<0)
+           len=0;
+     return len;
+}
+
+#endif
diff -ruN linux-2.5.44/net/ipv6/icmp.c linux-2.5.44AC/net/ipv6/icmp.c
--- linux-2.5.44/net/ipv6/icmp.c    Fri Oct 18 21:01:20 2002
+++ linux-2.5.44AC/net/ipv6/icmp.c  Mon Oct 28 12:38:09 2002
@@ -388,7 +388,8 @@

      saddr = &skb->nh.ipv6h->daddr;

-     if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST)
+     if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST ||
+         ipv6_chk_acast_addr(0, saddr))
            saddr = NULL;

      msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY;
diff -ruN linux-2.5.44/net/ipv6/ipv6_sockglue.c 
linux-2.5.44AC/net/ipv6/ipv6_sockglue.c
--- linux-2.5.44/net/ipv6/ipv6_sockglue.c Fri Oct 18 21:01:09 2002
+++ linux-2.5.44AC/net/ipv6/ipv6_sockglue.c     Mon Oct 28 12:38:09 2002
@@ -351,6 +351,24 @@
                  retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, 
&mreq.ipv6mr_multiaddr);
            break;
      }
+     case IPV6_JOIN_ANYCAST:
+     case IPV6_LEAVE_ANYCAST:
+     {
+           struct ipv6_mreq mreq;
+
+           if (optlen != sizeof(struct ipv6_mreq))
+                 goto e_inval;
+
+           retv = -EFAULT;
+           if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+                 break;
+
+           if (optname == IPV6_JOIN_ANYCAST)
+                 retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, 
&mreq.ipv6mr_acaddr);
+           else
+                 retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, 
&mreq.ipv6mr_acaddr);
+           break;
+     }
      case IPV6_ROUTER_ALERT:
            retv = ip6_ra_control(sk, val, NULL);
            break;
diff -ruN linux-2.5.44/net/ipv6/ndisc.c linux-2.5.44AC/net/ipv6/ndisc.c
--- linux-2.5.44/net/ipv6/ndisc.c   Fri Oct 18 21:01:59 2002
+++ linux-2.5.44AC/net/ipv6/ndisc.c Mon Oct 28 13:12:27 2002
@@ -378,7 +378,10 @@
               struct in6_addr *daddr, struct in6_addr *solicited_addr,
               int router, int solicited, int override, int inc_opt)
 {
+     static struct in6_addr tmpaddr;
+     struct inet6_ifaddr *ifp;
         struct sock *sk = ndisc_socket->sk;
+     struct in6_addr *src_addr;
         struct nd_msg *msg;
         int len;
         struct sk_buff *skb;
@@ -400,13 +403,23 @@
            ND_PRINTK1("send_na: alloc skb failed\n");
            return;
      }
+     /* for anycast or proxy, solicited_addr != src_addr */
+     ifp = ipv6_get_ifaddr(solicited_addr, dev);
+     if (ifp) {
+           src_addr = solicited_addr;
+           in6_ifa_put(ifp);
+     } else {
+           if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0))
+                 return;
+           src_addr = &tmpaddr;
+     }

      if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
            kfree_skb(skb);
            return;
      }

-     ip6_nd_hdr(sk, skb, dev, solicited_addr, daddr, IPPROTO_ICMPV6, len);
+     ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);

      msg = (struct nd_msg *) skb_put(skb, len);

@@ -426,7 +439,7 @@
            ndisc_fill_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, 
dev->addr_len);

      /* checksum */
-     msg->icmph.icmp6_cksum = csum_ipv6_magic(solicited_addr, daddr, len,
+     msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
                                     IPPROTO_ICMPV6,
                                     csum_partial((__u8 *) msg,
                                                len, 0));
@@ -1133,6 +1146,51 @@
                        }
                  }
                  in6_ifa_put(ifp);
+           } else if (ipv6_chk_acast_addr(dev, &msg->target)) {
+                 struct inet6_dev *idev = in6_dev_get(dev);
+                 int addr_type = ipv6_addr_type(saddr);
+
+                 /* anycast */
+
+                 if (!idev) {
+                       /* XXX: count this drop? */
+                       return 0;
+                 }
+
+                 if (addr_type == IPV6_ADDR_ANY) {
+                       struct in6_addr maddr;
+
+                       ipv6_addr_all_nodes(&maddr);
+                       ndisc_send_na(dev, NULL, &maddr, &msg->target,
+                                   idev->cnf.forwarding, 0, 0, 1);
+                       in6_dev_put(idev);
+                       return 0;
+                 }
+
+                 if (addr_type & IPV6_ADDR_UNICAST) {
+                       int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
+                       if (inc)
+                             nd_tbl.stats.rcv_probes_mcast++;
+                       else
+                             nd_tbl.stats.rcv_probes_ucast++;
+
+                       /*
+                        *   update / create cache entry
+                        *   for the source adddress
+                        */
+
+                       neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, 
skb->dev);
+
+                       if (neigh || !dev->hard_header) {
+                             ndisc_send_na(dev, neigh, saddr,
+                                   &msg->target,
+                                     idev->cnf.forwarding, 1, 0, inc);
+                             if (neigh)
+                                   neigh_release(neigh);
+                       }
+                 }
+                 in6_dev_put(idev);
+
            } else {
                  struct inet6_dev *in6_dev = in6_dev_get(dev);
                  int addr_type = ipv6_addr_type(saddr);
diff -ruN linux-2.5.44/net/netsyms.c linux-2.5.44AC/net/netsyms.c
--- linux-2.5.44/net/netsyms.c      Fri Oct 18 21:01:49 2002
+++ linux-2.5.44AC/net/netsyms.c    Mon Oct 28 12:38:09 2002
@@ -469,6 +469,8 @@
 EXPORT_SYMBOL(unregister_netdevice);
 EXPORT_SYMBOL(netdev_state_change);
 EXPORT_SYMBOL(dev_new_index);
+EXPORT_SYMBOL(dev_getany);
+EXPORT_SYMBOL(__dev_getany);
 EXPORT_SYMBOL(dev_get_by_index);
 EXPORT_SYMBOL(__dev_get_by_index);
 EXPORT_SYMBOL(dev_get_by_name);

(See attached file: anycast-2.5.44.patch)

Attachment: anycast-2.5.44.patch
Description: Binary data

<Prev in Thread] Current Thread [Next in Thread>
  • [PATCH] anycast support for IPv6, updated to 2.5.44, David Stevens <=