netdev
[Top] [All Lists]

[PATCH] Extend lock less TX to real devices

To: davem@xxxxxxxxxx, kuznet@xxxxxxxxxxxxx, netdev@xxxxxxxxxxx, akepner@xxxxxxx
Subject: [PATCH] Extend lock less TX to real devices
From: Andi Kleen <ak@xxxxxx>
Date: Tue, 31 Aug 2004 14:38:20 +0200
Sender: netdev-bounce@xxxxxxxxxxx
User-agent: Gnus/5.110003 (No Gnus v0.3) Emacs/21.2 (gnu/linux)
This patch extends the recently added NETIF_F_LLTX to real devices.
A lot of modern network drivers have a single big lock around their 
hard_start_xmit, and it doesn't make sense to take the xmit lock too.
They also have enough locking to handle setting of multicast lists
properly.

Now they can set NETIF_F_LLTX and get one lock less.  For drivers
that don't set this flag nothing changes.

I added support for trylocking instead of spinning like sch_generic
does - for that the driver has to return -1, then the packet is requeued.
The check for a local device deadlock is lost for this case, 
but that doesn't seem to be a big loss (I've never seen this printk 
ever get triggered)

The patch looks bigger than it really is because i moved some code
around and converted the macros into inlines. 

I also did an additional micro optimization: on a preemptive kernel
dev_queue_xmit would change the local atomic count several times
in the hot path. This patch changes it to disable preemption only once,
to hopefully make it a bit faster.

I changed the lltx handling to not change xmit_lock_owner.
Without a lock it is not safe anyways and it will prevent
another cache line from bouncing.

With only this patch nothing changes because no driver uses
the new flag yet.

-Andi

diff -u linux-2.6.8-work/net/core/dev.c-o linux-2.6.8-work/net/core/dev.c
--- linux-2.6.8-work/net/core/dev.c-o   2004-08-05 04:31:12.000000000 +0200
+++ linux-2.6.8-work/net/core/dev.c     2004-08-31 13:25:35.000000000 +0200
@@ -1255,19 +1255,24 @@
        return 0;
 }
 
-#define HARD_TX_LOCK_BH(dev, cpu) {                    \
-       if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               spin_lock_bh(&dev->xmit_lock);          \
-               dev->xmit_lock_owner = cpu;             \
-       }                                               \
-}
+/* Preemption must be off here. A driver setting LLTX has to 
+   use interrupt safe locking. */
 
-#define HARD_TX_UNLOCK_BH(dev) {                       \
-       if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               dev->xmit_lock_owner = -1;              \
-               spin_unlock_bh(&dev->xmit_lock);        \
-       }                                               \
-}
+static inline int hard_tx_lock(struct net_device *dev) 
+{ 
+       if (dev->features & NETIF_F_LLTX)
+               return 1; 
+       spin_lock(&dev->xmit_lock);
+       dev->xmit_lock_owner = smp_processor_id(); 
+} 
+
+static inline void hard_tx_unlock(struct net_device *dev) 
+{ 
+       if (dev->features & NETIF_F_LLTX) 
+               return; 
+       dev->xmit_lock_owner = -1;
+       spin_unlock(&dev->xmit_lock);   
+} 
 
 static inline void qdisc_run(struct net_device *dev)
 {
@@ -1319,7 +1324,15 @@
                if (skb_checksum_help(&skb, 0))
                        goto out_kfree_skb;
 
-       rcu_read_lock();
+       /* 
+        * Various locks need a _bh disable and there are CPU local
+        * data structures too. Instead of changing the preempt count
+        * with each lock just grab it here once for the whole
+        * function. This also gives the require atomicity for the RCU
+        * use below.
+        */ 
+       local_bh_disable();
+
        /* Updates of qdisc are serialized by queue_lock. 
         * The struct Qdisc which is pointed to by qdisc is now a 
         * rcu structure - it may be accessed without acquiring 
@@ -1339,18 +1352,17 @@
 #endif
        if (q->enqueue) {
                /* Grab device queue */
-               spin_lock_bh(&dev->queue_lock);
+               spin_lock(&dev->queue_lock);
 
                rc = q->enqueue(skb, q);
 
                qdisc_run(dev);
 
-               spin_unlock_bh(&dev->queue_lock);
-               rcu_read_unlock();
+               spin_unlock(&dev->queue_lock);
                rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
                goto out;
        }
-       rcu_read_unlock();
+
 
        /* The device has no queue. Common case for software devices:
           loopback, all the sorts of tunnels...
@@ -1363,32 +1375,27 @@
 
           Check this and shot the lock. It is not prone from deadlocks.
           Either shot noqueue qdisc, it is even simpler 8)
+
+          BHs are still disabled here
         */
        if (dev->flags & IFF_UP) {
-               int cpu = get_cpu();
-
-               if (dev->xmit_lock_owner != cpu) {
-
-                       HARD_TX_LOCK_BH(dev, cpu);
-                       put_cpu();
-
+               if (hard_tx_lock(dev)) { 
                        if (!netif_queue_stopped(dev)) {
                                if (netdev_nit)
                                        dev_queue_xmit_nit(skb, dev);
 
                                rc = 0;
                                if (!dev->hard_start_xmit(skb, dev)) {
-                                       HARD_TX_UNLOCK_BH(dev);
+                                       hard_tx_unlock(dev);
                                        goto out;
                                }
                        }
-                       HARD_TX_UNLOCK_BH(dev);
+                       hard_tx_unlock(dev);
                        if (net_ratelimit())
                                printk(KERN_CRIT "Virtual device %s asks to "
                                       "queue packet!\n", dev->name);
                        goto out_enetdown;
                } else {
-                       put_cpu();
                        /* Recursion is detected! It is possible,
                         * unfortunately */
                        if (net_ratelimit())
@@ -1401,6 +1408,7 @@
 out_kfree_skb:
        kfree_skb(skb);
 out:
+       local_bh_enable();
        return rc;
 }
 
diff -u linux-2.6.8-work/net/core/pktgen.c-o linux-2.6.8-work/net/core/pktgen.c
--- linux-2.6.8-work/net/core/pktgen.c-o        2004-08-05 04:31:12.000000000 
+0200
+++ linux-2.6.8-work/net/core/pktgen.c  2004-08-15 17:51:22.000000000 +0200
@@ -629,8 +629,9 @@
                }
 
                nr_frags = skb_shinfo(skb)->nr_frags;
-                  
-               spin_lock_bh(&odev->xmit_lock);
+
+               if (!(odev->features & NETIF_F_LLTX))
+                       spin_lock_bh(&odev->xmit_lock);
                if (!netif_queue_stopped(odev)) {
 
                        atomic_inc(&skb->users);
@@ -655,8 +656,8 @@
                        last_ok = 0;
                }
                
-
-               spin_unlock_bh(&odev->xmit_lock);
+               if (!(odev->features & NETIF_F_LLTX))
+                       spin_unlock_bh(&odev->xmit_lock);
 
                if (info->ipg) {
                        /* Try not to busy-spin if we have larger sleep times.
diff -u linux-2.6.8-work/net/sched/sch_generic.c-o 
linux-2.6.8-work/net/sched/sch_generic.c
--- linux-2.6.8-work/net/sched/sch_generic.c-o  2004-08-13 03:44:03.000000000 
+0200
+++ linux-2.6.8-work/net/sched/sch_generic.c    2004-08-31 13:13:36.000000000 
+0200
@@ -68,6 +68,34 @@
        write_unlock_bh(&qdisc_tree_lock);
 }
 
+static int qdisc_coll(struct net_device *dev, struct sk_buff *skb)
+{
+       /* So, someone grabbed the driver. */
+       
+       /* It may be transient configuration error,
+          when hard_start_xmit() recurses. We detect
+          it by checking xmit owner and drop the
+          packet when deadloop is detected.
+       */
+       if (dev->xmit_lock_owner == smp_processor_id()) {
+               kfree_skb(skb);
+               if (net_ratelimit())
+                       printk(KERN_DEBUG "Dead loop on netdevice %s, fix it 
urgently!\n", dev->name);
+               return -1;
+       }
+       __get_cpu_var(netdev_rx_stat).cpu_collision++;
+       return 0;
+}
+
+static inline void qdisc_unlock_driver(struct net_device *dev)
+{
+       if (!(dev->features & NETIF_F_LLTX)) { 
+               dev->xmit_lock_owner = -1;
+               spin_unlock(&dev->xmit_lock);
+       }
+       spin_lock(&dev->queue_lock);
+}
+
 /* 
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
@@ -97,50 +125,49 @@
 
        /* Dequeue packet */
        if ((skb = q->dequeue(q)) != NULL) {
-               if (spin_trylock(&dev->xmit_lock)) {
+               /* 
+                * When the driver has LLTX set it does its own locking 
+                * in start_xmit. No need to add additional overhead by
+                * locking again. These checks are worth it because
+                * even uncongested locks can be quite expensive.
+                */
+               if ((dev->features & NETIF_F_LLTX) == 0) { 
+                       if (!spin_trylock(&dev->xmit_lock)) {
+                               if (qdisc_coll(dev, skb) < 0) 
+                                       return -1;
+                               goto out; 
+                               
+                       }
                        /* Remember that the driver is grabbed by us. */
                        dev->xmit_lock_owner = smp_processor_id();
-
-                       /* And release queue */
-                       spin_unlock(&dev->queue_lock);
-
-                       if (!netif_queue_stopped(dev)) {
-                               if (netdev_nit)
-                                       dev_queue_xmit_nit(skb, dev);
-
-                               if (dev->hard_start_xmit(skb, dev) == 0) {
-                                       dev->xmit_lock_owner = -1;
-                                       spin_unlock(&dev->xmit_lock);
-
-                                       spin_lock(&dev->queue_lock);
-                                       return -1;
+               }
+               
+               /* And release queue */
+               spin_unlock(&dev->queue_lock);
+               
+               if (!netif_queue_stopped(dev)) {
+                       int ret;
+
+                       if (netdev_nit)
+                               dev_queue_xmit_nit(skb, dev);
+                       
+                       ret = dev->hard_start_xmit(skb, dev);
+                       if (ret <= 0) { 
+                               qdisc_unlock_driver(dev);
+                               /* requeue in case of lock collision */
+                               if (ret < 0) {
+                                       
__get_cpu_var(netdev_rx_stat).cpu_collision++;
+                                       goto out; 
                                }
-                       }
-
-                       /* Release the driver */
-                       dev->xmit_lock_owner = -1;
-                       spin_unlock(&dev->xmit_lock);
-                       spin_lock(&dev->queue_lock);
-                       q = dev->qdisc;
-               } else {
-                       /* So, someone grabbed the driver. */
-
-                       /* It may be transient configuration error,
-                          when hard_start_xmit() recurses. We detect
-                          it by checking xmit owner and drop the
-                          packet when deadloop is detected.
-                        */
-                       if (dev->xmit_lock_owner == smp_processor_id()) {
-                               kfree_skb(skb);
-                               if (net_ratelimit())
-                                       printk(KERN_DEBUG "Dead loop on 
netdevice %s, fix it urgently!\n", dev->name);
                                return -1;
                        }
-                       __get_cpu_var(netdev_rx_stat).cpu_collision++;
                }
 
+               qdisc_unlock_driver(dev);
+               q = dev->qdisc;
+
                /* Device kicked us out :(
-                  This is possible in three cases:
+                  This is possible in four cases:
 
                   0. driver is locked
                   1. fastroute is enabled
@@ -149,6 +176,7 @@
                   3. device is buggy (ppp)
                 */
 
+       out:
                q->ops->requeue(skb, q);
                netif_schedule(dev);
                return 1;





<Prev in Thread] Current Thread [Next in Thread>
  • [PATCH] Extend lock less TX to real devices, Andi Kleen <=