--- linux.orig//drivers/net/loopback.c 2004-06-08 15:36:50.000000000 -0700 +++ linux//drivers/net/loopback.c 2004-06-11 14:04:33.000000000 -0700 @@ -56,6 +56,7 @@ #include #include +static struct net_device_stats *loopback_stats; #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) @@ -117,13 +118,17 @@ dev_kfree_skb(skb); } +#define LOOPBACK_STAT_INC(field) \ + (per_cpu_ptr(loopback_stats, smp_processor_id())->field++) +#define LOOPBACK_STAT_ADD(field, n) \ + (per_cpu_ptr(loopback_stats, smp_processor_id())->field += n) + /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = dev->priv; skb_orphan(skb); @@ -142,11 +147,11 @@ } dev->last_rx = jiffies; - if (likely(stats)) { - stats->rx_bytes+=skb->len; - stats->tx_bytes+=skb->len; - stats->rx_packets++; - stats->tx_packets++; + if (likely(loopback_stats)) { + LOOPBACK_STAT_ADD(rx_bytes, skb->len); + LOOPBACK_STAT_ADD(tx_bytes, skb->len); + LOOPBACK_STAT_INC(rx_packets); + LOOPBACK_STAT_INC(tx_packets); } netif_rx(skb); @@ -156,7 +161,28 @@ static struct net_device_stats *get_stats(struct net_device *dev) { - return (struct net_device_stats *)dev->priv; + struct net_device_stats *stats = dev->priv; + int i; + + if (unlikely(!stats)) { + return NULL; + } + + memset(stats, 0, sizeof(struct net_device_stats)); + if (unlikely(!loopback_stats)) { + return stats; + } + + for (i=0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + stats->rx_bytes += per_cpu_ptr(loopback_stats, i)->rx_bytes; + stats->tx_bytes += per_cpu_ptr(loopback_stats, i)->tx_bytes; + stats->rx_packets += per_cpu_ptr(loopback_stats, i)->rx_packets; + stats->tx_packets += per_cpu_ptr(loopback_stats, i)->tx_packets; + } + + return stats; } struct net_device loopback_dev = { @@ -173,7 +199,8 @@ .rebuild_header = eth_rebuild_header, .flags = IFF_LOOPBACK, .features = NETIF_F_SG|NETIF_F_FRAGLIST - |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA, + |NETIF_F_NO_CSUM|NETIF_F_HIGHDMA + |NETIF_F_LLTX, }; /* Setup and register the of the LOOPBACK device. */ @@ -188,6 +215,8 @@ loopback_dev.priv = stats; loopback_dev.get_stats = &get_stats; } + + loopback_stats = alloc_percpu(struct net_device_stats); return register_netdev(&loopback_dev); }; --- linux.orig//include/linux/netdevice.h 2004-06-08 15:36:51.000000000 -0700 +++ linux//include/linux/netdevice.h 2004-06-14 09:20:26.000000000 -0700 @@ -403,6 +403,7 @@ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ +#define NETIF_F_LLTX 4096 /* LockLess TX */ /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); --- linux.orig//include/net/pkt_sched.h 2004-06-08 15:36:51.000000000 -0700 +++ linux//include/net/pkt_sched.h 2004-06-11 13:46:25.000000000 -0700 @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_TSC @@ -92,6 +93,7 @@ struct net_device *dev; struct tc_stats stats; + struct rcu_head q_rcu; int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); /* This field is deprecated, but it is still used by CBQ --- linux.orig//net/core/dev.c 2004-06-08 15:36:53.000000000 -0700 +++ linux//net/core/dev.c 2004-06-14 09:26:18.000000000 -0700 @@ -107,6 +107,7 @@ #include #include #include +#include #ifdef CONFIG_NET_RADIO #include /* Note : will define WIRELESS_EXT */ #include @@ -1277,6 +1278,20 @@ return 0; } +#define HARD_TX_LOCK_BH(dev, cpu) { \ + if ( dev->features && NETIF_F_LLTX == 0 ) { \ + spin_lock_bh(&dev->xmit_lock); \ + dev->xmit_lock_owner = cpu; \ + } \ +} + +#define HARD_TX_UNLOCK_BH(dev) { \ + if ( dev->features && NETIF_F_LLTX == 0 ) { \ + dev->xmit_lock_owner = -1; \ + spin_unlock_bh(&dev->xmit_lock); \ + } \ +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -1321,18 +1336,34 @@ goto out; } - /* Grab device queue */ - spin_lock_bh(&dev->queue_lock); + rcu_read_lock(); + /* Updates of qdisc are serialized by queue_lock. + * The struct Qdisc which is pointed to by qdisc is now a + * rcu structure - it may be accessed without acquiring + * a lock (but the structure may be stale.) The freeing of the + * qdisc will be deferred until it's known that there are no + * more references to it. + * + * If the qdisc has an enqueue function, we still need to + * hold the queue_lock before calling it, since queue_lock + * also serializes access to the device queue. + */ + q = dev->qdisc; if (q->enqueue) { + /* Grab device queue */ + spin_lock_bh(&dev->queue_lock); + rc = q->enqueue(skb, q); qdisc_run(dev); spin_unlock_bh(&dev->queue_lock); + rcu_read_unlock(); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } + rcu_read_unlock(); /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... @@ -1347,17 +1378,12 @@ Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { + preempt_disable(); int cpu = smp_processor_id(); if (dev->xmit_lock_owner != cpu) { - /* - * The spin_lock effectivly does a preempt lock, but - * we are about to drop that... - */ - preempt_disable(); - spin_unlock(&dev->queue_lock); - spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = cpu; + + HARD_TX_LOCK_BH(dev, cpu); preempt_enable(); if (!netif_queue_stopped(dev)) { @@ -1366,18 +1392,17 @@ rc = 0; if (!dev->hard_start_xmit(skb, dev)) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); + HARD_TX_UNLOCK_BH(dev); goto out; } } - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); + HARD_TX_UNLOCK_BH(dev); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); goto out_enetdown; } else { + preempt_enable(); /* Recursion is detected! It is possible, * unfortunately */ if (net_ratelimit()) @@ -1385,7 +1410,6 @@ "%s, fix it urgently!\n", dev->name); } } - spin_unlock_bh(&dev->queue_lock); out_enetdown: rc = -ENETDOWN; out_kfree_skb: --- linux.orig//net/sched/sch_generic.c 2004-06-08 15:36:53.000000000 -0700 +++ linux//net/sched/sch_generic.c 2004-06-11 13:52:06.000000000 -0700 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -404,18 +405,36 @@ ops->reset(qdisc); } +/* this is the rcu callback function to clean up a qdisc when there + * are no further references to it */ + +static void __qdisc_destroy (void * arg) +{ + struct Qdisc *qdisc = (struct Qdisc *) arg; + struct Qdisc_ops *ops = qdisc->ops; + +#ifdef CONFIG_NET_ESTIMATOR + qdisc_kill_estimator(&qdisc->stats); +#endif + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + module_put(ops->owner); + + if (!(qdisc->flags&TCQ_F_BUILTIN)) + kfree(qdisc); +} + /* Under dev->queue_lock and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { - struct Qdisc_ops *ops = qdisc->ops; - struct net_device *dev; + struct net_device *dev = qdisc->dev; if (!atomic_dec_and_test(&qdisc->refcnt)) return; - dev = qdisc->dev; - if (dev) { struct Qdisc *q, **qp; for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { @@ -425,16 +444,9 @@ } } } -#ifdef CONFIG_NET_ESTIMATOR - qdisc_kill_estimator(&qdisc->stats); -#endif - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - module_put(ops->owner); - if (!(qdisc->flags&TCQ_F_BUILTIN)) - kfree(qdisc); + + call_rcu(&qdisc->q_rcu, __qdisc_destroy, qdisc); + }