netdev
[Top] [All Lists]

Re: OOPS: Multipath routing 2.4.17

To: kuznet@xxxxxxxxxxxxx
Subject: Re: OOPS: Multipath routing 2.4.17
From: Julian Anastasov <ja@xxxxxx>
Date: Sun, 3 Mar 2002 02:08:10 +0000 (GMT)
Cc: netdev@xxxxxxxxxxx, Andi Kleen <ak@xxxxxxx>
In-reply-to: <200203021436.RAA20557@xxxxxxxxxxxxx>
Sender: owner-netdev@xxxxxxxxxxx
        Hello,

        Moved from lk to netdev

On Sat, 2 Mar 2002 kuznet@xxxxxxxxxxxxx wrote:

> I remember your approach had some inacceptable issues, but 2.5 is exactly
> the place to resolve them. :-)
>
> But actually I would like to see a fix for 2.4 for beginning.

        OK, I tested the appended patch (2.4.18) on UP, the scheduler
works correctly. Use it (if needed at all) in kernel 2.4/2.5, where
appropriate. The features:

- "slow start" (nh_power=0) for each newly marked alive
nexthop (starts on the next round), as before

- nh_power protected from fib_multipath_lock

- constant distribution

- shorter

- not sure for the speed, though, the modern CPUs divide faster
and faster

> Alexey

Regards

--
Julian Anastasov <ja@xxxxxx>


--- net/ipv4/fib_semantics.c.orig       Sun Mar  3 00:24:47 2002
+++ net/ipv4/fib_semantics.c    Sat Mar  2 21:57:27 2002
@@ -48,6 +48,7 @@
 static struct fib_info         *fib_info_list;
 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
 int fib_info_cnt;
+static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;

 #define for_fib_info() { struct fib_info *fi; \
        for (fi = fib_info_list; fi; fi = fi->fib_next)
@@ -868,10 +869,6 @@
                                else if (nh->nh_dev == dev &&
                                         nh->nh_scope != scope) {
                                        nh->nh_flags |= RTNH_F_DEAD;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-                                       fi->fib_power -= nh->nh_power;
-                                       nh->nh_power = 0;
-#endif
                                        dead++;
                                }
                        } endfor_nexthops(fi)
@@ -931,44 +928,38 @@
 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
 {
        struct fib_info *fi = res->fi;
-       int w;
-
-       if (fi->fib_power <= 0) {
-               int power = 0;
-               change_nexthops(fi) {
-                       if (!(nh->nh_flags&RTNH_F_DEAD)) {
-                               power += nh->nh_weight;
-                               nh->nh_power = nh->nh_weight;
-                       }
-               } endfor_nexthops(fi);
-               fi->fib_power = power;
-#if 1
-               if (power <= 0) {
-                       printk(KERN_CRIT "impossible 777\n");
-                       return;
-               }
-#endif
-       }
+       int w = -1, sel = 0;

+       spin_lock_bh(&fib_multipath_lock);

-       /* w should be random number [0..fi->fib_power-1],
-          it is pretty bad approximation.
-        */
-
-       w = jiffies % fi->fib_power;
+       repeat:

        change_nexthops(fi) {
-               if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
-                       if ((w -= nh->nh_power) <= 0) {
-                               nh->nh_power--;
-                               fi->fib_power--;
-                               res->nh_sel = nhsel;
-                               return;
-                       }
+               if (nh->nh_power > w && !(nh->nh_flags&RTNH_F_DEAD)) {
+                       w = nh->nh_power;
+                       sel = nhsel;
                }
        } endfor_nexthops(fi);
+       if (w > 0) {
+               fi->fib_nh[sel].nh_power--;
+               spin_unlock_bh(&fib_multipath_lock);
+               res->nh_sel = sel;
+               return;
+       }
+
+       if (!w) {
+               change_nexthops(fi) {
+                       if (!(nh->nh_flags&RTNH_F_DEAD))
+                               nh->nh_power = nh->nh_weight;
+               } endfor_nexthops(fi);
+               w = -1;
+               goto repeat;
+       }
+
+       spin_unlock_bh(&fib_multipath_lock);

 #if 1
+       /* Probably all nexthops are dead */
        printk(KERN_CRIT "impossible 888\n");
 #endif
        return;


<Prev in Thread] Current Thread [Next in Thread>