netdev
[Top] [All Lists]

Re: serious netpoll bug w/NAPI

To: "David S. Miller" <davem@xxxxxxxxxxxxx>
Subject: Re: serious netpoll bug w/NAPI
From: Matt Mackall <mpm@xxxxxxxxxxx>
Date: Wed, 9 Feb 2005 17:11:04 -0800
Cc: jmoyer@xxxxxxxxxx, netdev@xxxxxxxxxxx
In-reply-to: <20050209164658.409f8950.davem@xxxxxxxxxxxxx>
References: <20050208201634.03074349.davem@xxxxxxxxxxxxx> <20050209183219.GA2366@xxxxxxxxx> <20050209164658.409f8950.davem@xxxxxxxxxxxxx>
Sender: netdev-bounce@xxxxxxxxxxx
User-agent: Mutt/1.5.6+20040907i
On Wed, Feb 09, 2005 at 04:46:58PM -0800, David S. Miller wrote:
> On Wed, 9 Feb 2005 10:32:19 -0800
> Matt Mackall <mpm@xxxxxxxxxxx> wrote:
> 
> > On closer inspection, there's a couple other related failure cases
> > with the new ->poll logic in netpoll. I'm afraid it looks like
> > CONFIG_NETPOLL will need to guard ->poll() with a per-device spinlock
> > on netpoll-enabled devices.
> > 
> > This will mean putting a pointer to struct netpoll in struct
> > net_device (which I should have done in the first place) and will take
> > a few patches to sort out.
> 
> Will this ->poll() guarding lock be acquired only in the netpoll
> code or system-wide?  If the latter, this introduced an incredible
> performance regression for devices using the LLTX locking scheme
> (ie. the most important high-performance gigabit drivers in the
> tree use this).

The lock will only be taken in net_rx_action iff netpoll is enabled
for the given device. Lock contention should be basically nil.

Here's my current patch (on top of -mm), which I'm struggling to find
an appropriate test box for (my dual box with NAPI got pressed into
service as a web server a couple weeks ago). I've attached the other
two patches it relies on as well.

--------------

Introduce a per-client poll lock and flag. The lock assures we never
have more than one caller in dev->poll(). The flag provides recursion
avoidance on UP where the lock disappears.

Index: mm1npc/include/linux/netpoll.h
===================================================================
--- mm1npc.orig/include/linux/netpoll.h 2005-02-09 14:15:12.471051000 -0800
+++ mm1npc/include/linux/netpoll.h      2005-02-09 14:46:22.374083000 -0800
@@ -21,6 +21,8 @@
        u32 local_ip, remote_ip;
        u16 local_port, remote_port;
        unsigned char local_mac[6], remote_mac[6];
+       spinlock_t poll_lock;
+       int poll_flag;
 };
 
 void netpoll_poll(struct netpoll *np);
@@ -37,8 +39,27 @@
 {
        return skb->dev->np && skb->dev->np->rx_flags && __netpoll_rx(skb);
 }
+
+static inline void netpoll_poll_lock(struct net_device *dev)
+{
+       if (dev->np) {
+               spin_lock(&dev->np->poll_lock);
+               dev->np->poll_flag = 1;
+       }
+}
+
+static inline void netpoll_poll_unlock(struct net_device *dev)
+{
+       if (dev->np) {
+               dev->np->poll_flag = 0;
+               spin_unlock(&dev->np->poll_lock);
+       }
+}
+
 #else
 #define netpoll_rx(a) 0
+#define netpoll_poll_lock(a)
+#define netpoll_poll_unlock(a)
 #endif
 
 #endif
Index: mm1npc/net/core/dev.c
===================================================================
--- mm1npc.orig/net/core/dev.c  2005-02-09 14:15:11.236086000 -0800
+++ mm1npc/net/core/dev.c       2005-02-09 14:15:13.710042000 -0800
@@ -1772,6 +1772,7 @@
 
                dev = list_entry(queue->poll_list.next,
                                 struct net_device, poll_list);
+               netpoll_poll_lock(dev);
 
                if (dev->quota <= 0 || dev->poll(dev, &budget)) {
                        local_irq_disable();
@@ -1782,9 +1783,11 @@
                        else
                                dev->quota = dev->weight;
                } else {
+                       netpoll_poll_unlock(dev);
                        dev_put(dev);
                        local_irq_disable();
                }
+               netpoll_poll_unlock(dev);
 
 #ifdef CONFIG_KGDBOE
                kgdb_process_breakpoint();
Index: mm1npc/net/core/netpoll.c
===================================================================
--- mm1npc.orig/net/core/netpoll.c      2005-02-09 14:15:12.466070000 -0800
+++ mm1npc/net/core/netpoll.c   2005-02-09 14:48:44.506107000 -0800
@@ -36,7 +36,6 @@
 static struct sk_buff *skbs;
 
 static atomic_t trapped;
-static DEFINE_SPINLOCK(netpoll_poll_lock);
 
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -63,8 +62,15 @@
 }
 
 /*
- * Check whether delayed processing was scheduled for our current CPU,
- * and then manually invoke NAPI polling to pump data off the card.
+ * Check whether delayed processing was scheduled for our NIC. If so,
+ * we attempt to grab the poll lock and use ->poll() to pump the card.
+ * If this fails, either we've recursed in ->poll() or it's already
+ * running on another CPU.
+ *
+ * Note: we don't mask interrupts with this lock because we're using
+ * trylock here and interrupts are already disabled in the softirq
+ * case. Further, we test the poll_flag to avoid recursion on UP
+ * systems where the lock doesn't exist.
  *
  * In cases where there is bi-directional communications, reading only
  * one message at a time can lead to packets being dropped by the
@@ -74,13 +80,9 @@
 static void poll_napi(struct netpoll *np)
 {
        int budget = 16;
-       unsigned long flags;
-       struct softnet_data *queue;
 
-       spin_lock_irqsave(&netpoll_poll_lock, flags);
-       queue = &__get_cpu_var(softnet_data);
        if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
-           !list_empty(&queue->poll_list)) {
+           !np->poll_flag && spin_trylock(&np->poll_lock)) {
                np->rx_flags |= NETPOLL_RX_DROP;
                atomic_inc(&trapped);
 
@@ -88,8 +90,8 @@
 
                atomic_dec(&trapped);
                np->rx_flags &= ~NETPOLL_RX_DROP;
+               spin_unlock(&np->poll_lock);
        }
-       spin_unlock_irqrestore(&netpoll_poll_lock, flags);
 }
 
 void netpoll_poll(struct netpoll *np)
@@ -276,7 +278,6 @@
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
        u32 sip, tip;
        struct sk_buff *send_skb;
-       unsigned long flags;
        struct netpoll *np = skb->dev->np;
 
        if (!np) return;
@@ -360,7 +361,7 @@
        int proto, len, ulen;
        struct iphdr *iph;
        struct udphdr *uh;
-       struct netpoll *np;
+       struct netpoll *np = skb->dev->np;
 
        if (!np->rx_hook)
                goto out;
@@ -618,6 +619,7 @@
 
        if(np->rx_hook)
                np->rx_flags = NETPOLL_RX_ENABLED;
+       np->poll_lock = SPIN_LOCK_UNLOCKED;
        np->dev = ndev;
        ndev->np = np;
 

 
> I know you want to do anything except drop the packet.  What you
> may do instead, therefore, is add the packet to the normal device
> mid-layer queue and kick NET_TX_ACTION if netif_queue_stopped() is
> true.

Actually, I think it's better to keep the midlayer out of it. Netpoll
clients like netdump and kgdb basically stop the machine so queueing
to avoid deadlock is just not going to work. 

> As an aside, ipt_LOG is a great stress test for netpoll, because 4
> incoming packets can generate 8 outgoing packets worth of netconsole
> traffic :-)

-- 
Mathematics is the supreme nostalgia of our time.

Attachment: netpoll-helpers.patch
Description: Text document

Attachment: netpoll-in-dev.patch
Description: Text document

<Prev in Thread] Current Thread [Next in Thread>