netdev
[Top] [All Lists]

Re: [patch,rfc] allow registration of multiple netpolls per interface

To: Matt Mackall <mpm@xxxxxxxxxxx>
Subject: Re: [patch,rfc] allow registration of multiple netpolls per interface
From: Jeff Moyer <jmoyer@xxxxxxxxxx>
Date: Wed, 22 Jun 2005 17:05:15 -0400
Cc: netdev@xxxxxxxxxxx, netdev@xxxxxxxxxxxxxxx, linux-kernel@xxxxxxxxxxxxxxx
In-reply-to: <20050622170128.GV27572@xxxxxxxxx>
References: <17080.35214.507402.998984@xxxxxxxxxxxxxxxxxxxxxxxxxx> <20050621225252.GY27572@xxxxxxxxx> <17081.20441.714191.528270@xxxxxxxxxxxxxxxxxxxxxxxxxx> <20050622170128.GV27572@xxxxxxxxx>
Reply-to: jmoyer@xxxxxxxxxx
Sender: netdev-bounce@xxxxxxxxxxx
==> Regarding Re: [patch,rfc] allow registration of multiple netpolls per 
interface; Matt Mackall <mpm@xxxxxxxxxxx> adds:

mpm> On Wed, Jun 22, 2005 at 07:47:37AM -0400, Jeff Moyer wrote:
mpm> Hmm. It's conceivable we'll want netdump and kgdb on the same
mpm> interface so we'll have to address this eventually..
>> 
>> Well, do you want to address it eventually, or now?  As I said, it's never
>> bitten anyone before.

mpm> Later's fine. I just don't want to design it out by accident again.
 
OK.

>> >> struct netpoll_info {
>> >>         spinlock_t poll_lock;
>> >>         int poll_owner;
>> >>         int rx_flags;
>> >>         spinlock_t rx_lock;
>> >>         struct netpoll *rx_np; /* netpoll that registered an rx_hook */
>> >> };

[snip]

mpm> It might be simpler to have a single lock here..?
>> 
>> Maybe.  You can't really have netpoll code running on multiple cpus at the
>> same time, right?  This is the rx path, remember, so the other cpu should
>> be spinning on the poll_lock.
>> 
>> Keeping separate locks would allow you to unregister a struct netpoll
>> associated with another net device without causing lock contention.  This
>> is a very minor win, obviously.
>> 
>> I still feel like this npinfo struct is the right place for this, though.
>> If you're strongly opposed to that, I'll change it.

mpm> No, certainly having it in npinfo makes sense. I just was wondering if
mpm> we really need two locks in there.

Oh, I misunderstood.  Well, one protects recursing into the driver's poll
routine, the other protects access to the np_rx pointer, which may later
become a list.  I don't think we can lump these two together, do you?

>> >> + spin_lock_irqsave(&npinfo->rx_lock, flags);
>> >> + if (npinfo->rx_np->dev == skb->dev)
>> >> +         np = npinfo->rx_np;
>> >> + spin_unlock_irqrestore(&npinfo->rx_lock, flags);
>> 
mpm> And I think that means we don't need the lock here either.  
>> 
>> Sure we do.  We need to protect against rmmod's.

mpm> How can we have an rmmmod when we're trapped?

Looking over the code, I don't see what would prevent this.  Could you
point me the code which prevents this?

It's a good thing we're discussing it, since I found that I didn't take the
lock in netpoll_cleanup.


Okay, so here's the full patch again, with the changes we've discussed.
I've also included an interdiff.  As you can see, the first version I sent
didn't have some basic compile fixes, sorry about that.  Anyway, I have
booted and tested this version with multiple netpoll clients.

Barring any negative feedback, I'll break this up and send it as a patch
series.

Thanks,

Jeff

(Interdiff first)

diff -u linux-2.6.12-rc6/net/core/netpoll.c linux-2.6.12-rc6/net/core/netpoll.c
--- linux-2.6.12-rc6/net/core/netpoll.c 2005-06-21 16:03:22.409620400 -0400
+++ linux-2.6.12-rc6/net/core/netpoll.c 2005-06-22 16:51:24.336062231 -0400
@@ -130,8 +130,8 @@
  */
 static void poll_napi(struct netpoll *np)
 {
-       int budget = 16;
        struct netpoll_info *npinfo = np->dev->npinfo;
+       int budget = 16;
 
        if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
            npinfo->poll_owner != smp_processor_id() &&
@@ -344,22 +344,22 @@
 
 static void arp_reply(struct sk_buff *skb)
 {
+       struct netpoll_info *npinfo = skb->dev->npinfo;
        struct arphdr *arp;
        unsigned char *arp_ptr;
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
        u32 sip, tip;
+       unsigned long flags;
        struct sk_buff *send_skb;
-       struct netpoll *np;
-       struct netpoll_info *npinfo = skb->dev->npinfo;
-
-       if (!npinfo) return;
+       struct netpoll *np = NULL;
 
        spin_lock_irqsave(&npinfo->rx_lock, flags);
-       if (npinfo->rx_np->dev == skb->dev)
+       if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
                np = npinfo->rx_np;
        spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-       if (!np) return;
+       if (!np)
+               return;
 
        /* No arp on this interface */
        if (skb->dev->flags & IFF_NOARP)
@@ -716,7 +716,7 @@
                spin_lock_irqsave(&npinfo->rx_lock, flags);
                npinfo->rx_flags |= NETPOLL_RX_ENABLED;
                npinfo->rx_np = np;
-               spin_unlock_irqsave(&npinfo->rx_lock, flags);
+               spin_unlock_irqrestore(&npinfo->rx_lock, flags);
        }
        /* last thing to do is link it to the net device structure */
        ndev->npinfo = npinfo;
@@ -734,12 +734,15 @@
 void netpoll_cleanup(struct netpoll *np)
 {
        struct netpoll_info *npinfo;
+       unsigned long flags;
 
        if (np->dev) {
                npinfo = np->dev->npinfo;
                if (npinfo && npinfo->rx_np == np) {
+                       spin_lock_irqsave(&npinfo->rx_lock, flags);
                        npinfo->rx_np = NULL;
                        npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+                       spin_unlock_irqrestore(&npinfo->rx_lock, flags);
                }
                dev_put(np->dev);
        }
reverted:
--- linux-2.6.12-rc6/net/core/dev.c     2005-06-21 13:53:51.583407710 -0400
+++ linux-2.6.12-rc6/net/core/dev.c.orig        2005-06-20 19:51:59.000000000 
-0400
@@ -1656,7 +1656,6 @@
        unsigned short type;
 
        /* if we've gotten here through NAPI, check netpoll */
-       /* how else can we get here?  --phro */
        if (skb->dev->poll && netpoll_rx(skb))
                return NET_RX_DROP;
 


And now, the full diff:

--- linux-2.6.12-rc6/net/core/netpoll.c.orig    2005-06-20 19:51:56.000000000 
-0400
+++ linux-2.6.12-rc6/net/core/netpoll.c 2005-06-22 16:51:24.336062231 -0400
@@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff *
  */
 static void poll_napi(struct netpoll *np)
 {
+       struct netpoll_info *npinfo = np->dev->npinfo;
        int budget = 16;
 
        if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
-           np->poll_owner != smp_processor_id() &&
-           spin_trylock(&np->poll_lock)) {
-               np->rx_flags |= NETPOLL_RX_DROP;
+           npinfo->poll_owner != smp_processor_id() &&
+           spin_trylock(&npinfo->poll_lock)) {
+               npinfo->rx_flags |= NETPOLL_RX_DROP;
                atomic_inc(&trapped);
 
                np->dev->poll(np->dev, &budget);
 
                atomic_dec(&trapped);
-               np->rx_flags &= ~NETPOLL_RX_DROP;
-               spin_unlock(&np->poll_lock);
+               npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+               spin_unlock(&npinfo->poll_lock);
        }
 }
 
@@ -245,6 +246,7 @@ repeat:
 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
        int status;
+       struct netpoll_info *npinfo;
 
 repeat:
        if(!np || !np->dev || !netif_running(np->dev)) {
@@ -253,7 +255,8 @@ repeat:
        }
 
        /* avoid recursion */
-       if(np->poll_owner == smp_processor_id() ||
+       npinfo = np->dev->npinfo;
+       if(npinfo->poll_owner == smp_processor_id() ||
           np->dev->xmit_lock_owner == smp_processor_id()) {
                if (np->drop)
                        np->drop(skb);
@@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np
 
 static void arp_reply(struct sk_buff *skb)
 {
+       struct netpoll_info *npinfo = skb->dev->npinfo;
        struct arphdr *arp;
        unsigned char *arp_ptr;
        int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
        u32 sip, tip;
+       unsigned long flags;
        struct sk_buff *send_skb;
-       struct netpoll *np = skb->dev->np;
+       struct netpoll *np = NULL;
+
+       spin_lock_irqsave(&npinfo->rx_lock, flags);
+       if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
+               np = npinfo->rx_np;
+       spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-       if (!np) return;
+       if (!np)
+               return;
 
        /* No arp on this interface */
        if (skb->dev->flags & IFF_NOARP)
@@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb)
        int proto, len, ulen;
        struct iphdr *iph;
        struct udphdr *uh;
-       struct netpoll *np = skb->dev->np;
+       struct netpoll *np = skb->dev->npinfo->rx_np;
 
-       if (!np->rx_hook)
+       if (!np)
                goto out;
        if (skb->dev->type != ARPHRD_ETHER)
                goto out;
@@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np)
 {
        struct net_device *ndev = NULL;
        struct in_device *in_dev;
-
-       np->poll_lock = SPIN_LOCK_UNLOCKED;
-       np->poll_owner = -1;
+       struct netpoll_info *npinfo;
+       unsigned long flags;
 
        if (np->dev_name)
                ndev = dev_get_by_name(np->dev_name);
@@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np)
        }
 
        np->dev = ndev;
-       ndev->np = np;
+       if (!ndev->npinfo) {
+               npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+               if (!npinfo)
+                       goto release;
+
+               npinfo->rx_np = NULL;
+               npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
+               npinfo->poll_owner = -1;
+               npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
+       } else
+               npinfo = ndev->npinfo;
 
        if (!ndev->poll_controller) {
                printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
@@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np)
                       np->name, HIPQUAD(np->local_ip));
        }
 
-       if(np->rx_hook)
-               np->rx_flags = NETPOLL_RX_ENABLED;
+       if(np->rx_hook) {
+               spin_lock_irqsave(&npinfo->rx_lock, flags);
+               npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+               npinfo->rx_np = np;
+               spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+       }
+       /* last thing to do is link it to the net device structure */
+       ndev->npinfo = npinfo;
 
        return 0;
 
  release:
-       ndev->np = NULL;
+       if (!ndev->npinfo)
+               kfree(npinfo);
        np->dev = NULL;
        dev_put(ndev);
        return -1;
@@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np)
 
 void netpoll_cleanup(struct netpoll *np)
 {
-       if (np->dev)
-               np->dev->np = NULL;
-       dev_put(np->dev);
+       struct netpoll_info *npinfo;
+       unsigned long flags;
+
+       if (np->dev) {
+               npinfo = np->dev->npinfo;
+               if (npinfo && npinfo->rx_np == np) {
+                       spin_lock_irqsave(&npinfo->rx_lock, flags);
+                       npinfo->rx_np = NULL;
+                       npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+                       spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+               }
+               dev_put(np->dev);
+       }
+
        np->dev = NULL;
 }
 
--- linux-2.6.12-rc6/include/linux/netpoll.h.orig       2005-06-20 
19:51:47.000000000 -0400
+++ linux-2.6.12-rc6/include/linux/netpoll.h    2005-06-21 15:29:48.000000000 
-0400
@@ -16,14 +16,19 @@ struct netpoll;
 struct netpoll {
        struct net_device *dev;
        char dev_name[16], *name;
-       int rx_flags;
        void (*rx_hook)(struct netpoll *, int, char *, int);
        void (*drop)(struct sk_buff *skb);
        u32 local_ip, remote_ip;
        u16 local_port, remote_port;
        unsigned char local_mac[6], remote_mac[6];
+};
+
+struct netpoll_info {
        spinlock_t poll_lock;
        int poll_owner;
+       int rx_flags;
+       spinlock_t rx_lock;
+       struct netpoll *rx_np; /* netpoll that registered an rx_hook */
 };
 
 void netpoll_poll(struct netpoll *np);
@@ -39,22 +44,35 @@ void netpoll_queue(struct sk_buff *skb);
 #ifdef CONFIG_NETPOLL
 static inline int netpoll_rx(struct sk_buff *skb)
 {
-       return skb->dev->np && skb->dev->np->rx_flags && __netpoll_rx(skb);
+       struct netpoll_info *npinfo = skb->dev->npinfo;
+       unsigned long flags;
+       int ret = 0;
+
+       if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags))
+               return 0;
+
+       spin_lock_irqsave(&npinfo->rx_lock, flags);
+       /* check rx_flags again with the lock held */
+       if (npinfo->rx_flags && __netpoll_rx(skb))
+               ret = 1;
+       spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
+       return ret;
 }
 
 static inline void netpoll_poll_lock(struct net_device *dev)
 {
-       if (dev->np) {
-               spin_lock(&dev->np->poll_lock);
-               dev->np->poll_owner = smp_processor_id();
+       if (dev->npinfo) {
+               spin_lock(&dev->npinfo->poll_lock);
+               dev->npinfo->poll_owner = smp_processor_id();
        }
 }
 
 static inline void netpoll_poll_unlock(struct net_device *dev)
 {
-       if (dev->np) {
-               spin_unlock(&dev->np->poll_lock);
-               dev->np->poll_owner = -1;
+       if (dev->npinfo) {
+               dev->npinfo->poll_owner = -1;
+               spin_unlock(&dev->npinfo->poll_lock);
        }
 }
 
--- linux-2.6.12-rc6/include/linux/netdevice.h.orig     2005-06-20 
20:26:21.000000000 -0400
+++ linux-2.6.12-rc6/include/linux/netdevice.h  2005-06-21 14:46:52.000000000 
-0400
@@ -41,7 +41,7 @@
 struct divert_blk;
 struct vlan_group;
 struct ethtool_ops;
-struct netpoll;
+struct netpoll_info;
                                        /* source back-compat hooks */
 #define SET_ETHTOOL_OPS(netdev,ops) \
        ( (netdev)->ethtool_ops = (ops) )
@@ -468,7 +468,7 @@ struct net_device
                                                     unsigned char *haddr);
        int                     (*neigh_setup)(struct net_device *dev, struct 
neigh_parms *);
 #ifdef CONFIG_NETPOLL
-       struct netpoll          *np;
+       struct netpoll_info     *npinfo;
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
        void                    (*poll_controller)(struct net_device *dev);

<Prev in Thread] Current Thread [Next in Thread>