netdev
[Top] [All Lists]

Re: [PATCH 2.5.69] Network packet type using RCU

To: Stephen Hemminger <shemminger@xxxxxxxx>
Subject: Re: [PATCH 2.5.69] Network packet type using RCU
From: Jamal Hadi <hadi@xxxxxxxxxxxxxxxx>
Date: Wed, 14 May 2003 07:23:32 -0400 (EDT)
Cc: "David S. Miller" <davem@xxxxxxxxxx>, netdev@xxxxxxxxxxx
In-reply-to: <20030509142538.548cbd71.shemminger@xxxxxxxx>
References: <20030509142538.548cbd71.shemminger@xxxxxxxx>
Sender: netdev-bounce@xxxxxxxxxxx
Apologies i was offline for a bit so dont know the history here.
I see a lot of move to rcu. There are benefits to using RCU in certain
cases (example occasional list edit with a lot more reads). Why does
there  seem to be a massive migration it seems to RCU? Something wrong
with the linked lists?

cheers,
jamal

On Fri, 9 May 2003, Stephen Hemminger wrote:

> Convert network packet type list from using brlock to RCU based list.
> Some collataral changes to af_packet were necessary since it needs to
> unregister packet types without sleeping.
>
> Summary:
> * packet type converted from linked list to list_macro
> * writer lock replaced with spin lock, readers use RCU
> * add __dev_remove_pack for callers that can't sleep.
> * af_packet changes to handle and sleeping requirements, and possible
>   races that could cause.
>
> Tested on 8-way SMP running 2.5.69 latest.
>
> Dave, please wait till Monday to apply this in case there are any
> comments.
>
> diff -urNp -X dontdiff linux-2.5/include/linux/netdevice.h 
> linux-2.5-nbr/include/linux/netdevice.h
> --- linux-2.5/include/linux/netdevice.h       2003-04-14 13:32:21.000000000 
> -0700
> +++ linux-2.5-nbr/include/linux/netdevice.h   2003-05-02 15:07:06.000000000 
> -0700
> @@ -456,7 +456,7 @@ struct packet_type
>       int                     (*func) (struct sk_buff *, struct net_device *,
>                                        struct packet_type *);
>       void                    *data;  /* Private to the packet type           
> */
> -     struct packet_type      *next;
> +     struct list_head        list;
>  };
>
>
> @@ -472,6 +472,7 @@ extern int                        
> netdev_boot_setup_check(st
>  extern struct net_device    *dev_getbyhwaddr(unsigned short type, char 
> *hwaddr);
>  extern void          dev_add_pack(struct packet_type *pt);
>  extern void          dev_remove_pack(struct packet_type *pt);
> +extern void          __dev_remove_pack(struct packet_type *pt);
>  extern int           dev_get(const char *name);
>  extern struct net_device     *dev_get_by_flags(unsigned short flags,
>                                                 unsigned short mask);
> diff -urNp -X dontdiff linux-2.5/net/core/dev.c linux-2.5-nbr/net/core/dev.c
> --- linux-2.5/net/core/dev.c  2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/core/dev.c      2003-05-05 09:44:36.000000000 -0700
> @@ -90,7 +90,6 @@
>  #include <linux/etherdevice.h>
>  #include <linux/notifier.h>
>  #include <linux/skbuff.h>
> -#include <linux/brlock.h>
>  #include <net/sock.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/proc_fs.h>
> @@ -170,8 +169,9 @@ const char *if_port_text[] = {
>   *           86DD    IPv6
>   */
>
> -static struct packet_type *ptype_base[16];   /* 16 way hashed list */
> -static struct packet_type *ptype_all;                /* Taps */
> +static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
> +static struct list_head ptype_base[16];      /* 16 way hashed list */
> +static struct list_head ptype_all;           /* Taps */
>
>  #ifdef OFFLINE_SAMPLE
>  static void sample_queue(unsigned long dummy);
> @@ -239,14 +239,17 @@ int netdev_nit;
>   *   Add a protocol handler to the networking stack. The passed &packet_type
>   *   is linked into kernel lists and may not be freed until it has been
>   *   removed from the kernel lists.
> + *
> + *   This call does not sleep therefore it can not
> + *   guarantee all CPU's that are in middle of receiving packets
> + *   will see the new packet type (until the next received packet).
>   */
>
>  void dev_add_pack(struct packet_type *pt)
>  {
>       int hash;
>
> -     br_write_lock_bh(BR_NETPROTO_LOCK);
> -
> +     spin_lock_bh(&ptype_lock);
>  #ifdef CONFIG_NET_FASTROUTE
>       /* Hack to detect packet socket */
>       if (pt->data && (long)(pt->data) != 1) {
> @@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt
>  #endif
>       if (pt->type == htons(ETH_P_ALL)) {
>               netdev_nit++;
> -             pt->next  = ptype_all;
> -             ptype_all = pt;
> +             list_add_rcu(&pt->list, &ptype_all);
>       } else {
>               hash = ntohs(pt->type) & 15;
> -             pt->next = ptype_base[hash];
> -             ptype_base[hash] = pt;
> +             list_add_rcu(&pt->list, &ptype_base[hash]);
>       }
> -     br_write_unlock_bh(BR_NETPROTO_LOCK);
> +     spin_unlock_bh(&ptype_lock);
>  }
>
>  extern void linkwatch_run_queue(void);
>
> +
> +
>  /**
> - *   dev_remove_pack  - remove packet handler
> + *   __dev_remove_pack        - remove packet handler
>   *   @pt: packet type declaration
>   *
>   *   Remove a protocol handler that was previously added to the kernel
>   *   protocol handlers by dev_add_pack(). The passed &packet_type is removed
>   *   from the kernel lists and can be freed or reused once this function
> - *   returns.
> + *   returns.
> + *
> + *      The packet type might still be in use by receivers
> + *   and must not be freed until after all the CPU's have gone
> + *   through a quiescent state.
>   */
> -void dev_remove_pack(struct packet_type *pt)
> +void __dev_remove_pack(struct packet_type *pt)
>  {
> -     struct packet_type **pt1;
> +     struct list_head *head;
> +     struct packet_type *pt1;
>
> -     br_write_lock_bh(BR_NETPROTO_LOCK);
> +     spin_lock_bh(&ptype_lock);
>
>       if (pt->type == htons(ETH_P_ALL)) {
>               netdev_nit--;
> -             pt1 = &ptype_all;
> +             head = &ptype_all;
>       } else
> -             pt1 = &ptype_base[ntohs(pt->type) & 15];
> +             head = &ptype_base[ntohs(pt->type) & 15];
>
> -     for (; *pt1; pt1 = &((*pt1)->next)) {
> -             if (pt == *pt1) {
> -                     *pt1 = pt->next;
> +     list_for_each_entry(pt1, head, list) {
> +             if (pt == pt1) {
>  #ifdef CONFIG_NET_FASTROUTE
>                       if (pt->data)
>                               netdev_fastroute_obstacles--;
>  #endif
> +                     list_del_rcu(&pt->list);
>                       goto out;
>               }
>       }
> +
>       printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
>  out:
> -     br_write_unlock_bh(BR_NETPROTO_LOCK);
> +     spin_unlock_bh(&ptype_lock);
> +}
> +/**
> + *   dev_remove_pack  - remove packet handler
> + *   @pt: packet type declaration
> + *
> + *   Remove a protocol handler that was previously added to the kernel
> + *   protocol handlers by dev_add_pack(). The passed &packet_type is removed
> + *   from the kernel lists and can be freed or reused once this function
> + *   returns.
> + *
> + *   This call sleeps to guarantee that no CPU is looking at the packet
> + *   type after return.
> + */
> +void dev_remove_pack(struct packet_type *pt)
> +{
> +     __dev_remove_pack(pt);
> +
> +     synchronize_net();
>  }
>
>  
> /******************************************************************************
> @@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *
>       struct packet_type *ptype;
>       do_gettimeofday(&skb->stamp);
>
> -     br_read_lock(BR_NETPROTO_LOCK);
> -     for (ptype = ptype_all; ptype; ptype = ptype->next) {
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(ptype, &ptype_all, list) {
>               /* Never send packets back to the socket
>                * they originated from - MvS (miquels@xxxxxxxxxxxxxx)
>                */
> @@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *
>                       ptype->func(skb2, skb->dev, ptype);
>               }
>       }
> -     br_read_unlock(BR_NETPROTO_LOCK);
> +     rcu_read_unlock();
>  }
>
>  /* Calculate csum in the case, when packet is misrouted.
> @@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *sk
>       skb->h.raw = skb->nh.raw = skb->data;
>
>       pt_prev = NULL;
> -     for (ptype = ptype_all; ptype; ptype = ptype->next) {
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(ptype, &ptype_all, list) {
>               if (!ptype->dev || ptype->dev == skb->dev) {
>                       if (pt_prev) {
>                               if (!pt_prev->data) {
> @@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *sk
>
>  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
>       if (skb->dev->br_port) {
> -             int ret;
> -
>               ret = handle_bridge(skb, pt_prev);
>               if (br_handle_frame_hook(skb) == 0)
> -                     return ret;
> +                     goto out;
>
>               pt_prev = NULL;
>       }
>  #endif
>
> -     for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) {
> +     list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
>               if (ptype->type == type &&
>                   (!ptype->dev || ptype->dev == skb->dev)) {
>                       if (pt_prev) {
> @@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *sk
>               ret = NET_RX_DROP;
>       }
>
> + out:
> +     rcu_read_unlock();
>       return ret;
>  }
>
> @@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq
>       unsigned long start_time = jiffies;
>       int budget = netdev_max_backlog;
>
> -     br_read_lock(BR_NETPROTO_LOCK);
> +
> +     preempt_disable();
>       local_irq_disable();
>
>       while (!list_empty(&queue->poll_list)) {
> @@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq
>       }
>  out:
>       local_irq_enable();
> -     br_read_unlock(BR_NETPROTO_LOCK);
> +     preempt_enable();
>       return;
>
>  softnet_break:
> @@ -1995,9 +2024,9 @@ int netdev_set_master(struct net_device
>               dev_hold(master);
>       }
>
> -     br_write_lock_bh(BR_NETPROTO_LOCK);
>       slave->master = master;
> -     br_write_unlock_bh(BR_NETPROTO_LOCK);
> +
> +     synchronize_net();
>
>       if (old)
>               dev_put(old);
> @@ -2661,8 +2690,8 @@ int netdev_finish_unregister(struct net_
>  /* Synchronize with packet receive processing. */
>  void synchronize_net(void)
>  {
> -     br_write_lock_bh(BR_NETPROTO_LOCK);
> -     br_write_unlock_bh(BR_NETPROTO_LOCK);
> +     might_sleep();
> +     synchronize_kernel();
>  }
>
>  /**
> @@ -2846,6 +2875,10 @@ static int __init net_dev_init(void)
>
>       subsystem_register(&net_subsys);
>
> +     INIT_LIST_HEAD(&ptype_all);
> +     for (i = 0; i < 16; i++)
> +             INIT_LIST_HEAD(&ptype_base[i]);
> +
>  #ifdef CONFIG_NET_DIVERT
>       dv_init();
>  #endif /* CONFIG_NET_DIVERT */
> diff -urNp -X dontdiff linux-2.5/net/netsyms.c linux-2.5-nbr/net/netsyms.c
> --- linux-2.5/net/netsyms.c   2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/netsyms.c       2003-05-05 09:44:36.000000000 -0700
> @@ -577,6 +577,7 @@ EXPORT_SYMBOL(netif_rx);
>  EXPORT_SYMBOL(netif_receive_skb);
>  EXPORT_SYMBOL(dev_add_pack);
>  EXPORT_SYMBOL(dev_remove_pack);
> +EXPORT_SYMBOL(__dev_remove_pack);
>  EXPORT_SYMBOL(dev_get);
>  EXPORT_SYMBOL(dev_alloc);
>  EXPORT_SYMBOL(dev_alloc_name);
> diff -urNp -X dontdiff linux-2.5/net/packet/af_packet.c 
> linux-2.5-nbr/net/packet/af_packet.c
> --- linux-2.5/net/packet/af_packet.c  2003-05-05 09:41:03.000000000 -0700
> +++ linux-2.5-nbr/net/packet/af_packet.c      2003-05-05 11:14:52.000000000 
> -0700
> @@ -774,6 +774,7 @@ static int packet_release(struct socket
>                */
>               dev_remove_pack(&po->prot_hook);
>               po->running = 0;
> +             po->num = 0;
>               __sock_put(sk);
>       }
>
> @@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *s
>
>       spin_lock(&po->bind_lock);
>       if (po->running) {
> -             dev_remove_pack(&po->prot_hook);
>               __sock_put(sk);
>               po->running = 0;
> +             po->num = 0;
> +             spin_unlock(&po->bind_lock);
> +             dev_remove_pack(&po->prot_hook);
> +             spin_lock(&po->bind_lock);
>       }
>
>       po->num = protocol;
> @@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifi
>                       if (dev->ifindex == po->ifindex) {
>                               spin_lock(&po->bind_lock);
>                               if (po->running) {
> -                                     dev_remove_pack(&po->prot_hook);
> +                                     __dev_remove_pack(&po->prot_hook);
>                                       __sock_put(sk);
>                                       po->running = 0;
>                                       sk->err = ENETDOWN;
> @@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock *
>
>       /* Detach socket from network */
>       spin_lock(&po->bind_lock);
> -     if (po->running)
> -             dev_remove_pack(&po->prot_hook);
> +     if (po->running) {
> +             __dev_remove_pack(&po->prot_hook);
> +             po->num = 0;
> +             po->running = 0;
> +     }
>       spin_unlock(&po->bind_lock);
> +
> +     synchronize_net();
>
>       err = -EBUSY;
>       if (closing || atomic_read(&po->mapped) == 0) {
>
>
>

<Prev in Thread] Current Thread [Next in Thread>