Ok Simon/Robert/Mr.Foo :), give this a try, it's my final installment
for the evening :-)
If this shows improvement, we can make even larger strides
by moving the struct flowi up into struct dst_entry.
--- net/core/dst.c.~1~ Mon Jun 9 01:47:26 2003
+++ net/core/dst.c Mon Jun 9 03:13:56 2003
@@ -122,13 +122,34 @@ void * dst_alloc(struct dst_ops * ops)
dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC);
if (!dst)
return NULL;
- memset(dst, 0, ops->entry_size);
+ dst->next = NULL;
atomic_set(&dst->__refcnt, 0);
- dst->ops = ops;
+ dst->__use = 0;
+ dst->child = NULL;
+ dst->dev = NULL;
+ dst->obsolete = 0;
+ dst->flags = 0;
dst->lastuse = jiffies;
+ dst->expires = 0;
+ dst->header_len = 0;
+ dst->trailer_len = 0;
+ memset(dst->metrics, 0, sizeof(dst->metrics));
dst->path = dst;
+ dst->rate_last = 0;
+ dst->rate_tokens = 0;
+ dst->error = 0;
+ dst->neighbour = NULL;
+ dst->hh = NULL;
+ dst->xfrm = NULL;
dst->input = dst_discard;
dst->output = dst_blackhole;
+#ifdef CONFIG_NET_CLS_ROUTE
+ dst->tclassid = 0;
+#endif
+ dst->ops = ops;
+ INIT_RCU_HEAD(&dst->rcu_head);
+ memset(dst->info, 0,
+ ops->entry_size - offsetof(struct dst_entry, info));
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
--- net/ipv4/route.c.~1~ Sun Jun 8 23:28:00 2003
+++ net/ipv4/route.c Mon Jun 9 06:49:15 2003
@@ -88,6 +88,7 @@
#include <linux/random.h>
#include <linux/jhash.h>
#include <linux/rcupdate.h>
+#include <linux/prefetch.h>
#include <net/protocol.h>
#include <net/ip.h>
#include <net/route.h>
@@ -882,6 +883,60 @@ static void rt_del(unsigned hash, struct
spin_unlock_bh(&rt_hash_table[hash].lock);
}
+static void __rt_hash_shrink(unsigned int hash)
+{
+ struct rtable *rth, **rthp;
+ struct rtable *cand, **candp;
+ unsigned int min_use = ~(unsigned int) 0;
+
+ spin_lock_bh(&rt_hash_table[hash].lock);
+ cand = NULL;
+ candp = NULL;
+ rthp = &rt_hash_table[hash].chain;
+ while ((rth = *rthp) != NULL) {
+ if (!atomic_read(&rth->u.dst.__refcnt) &&
+ ((unsigned int) rth->u.dst.__use) < min_use) {
+ cand = rth;
+ candp = rthp;
+ min_use = rth->u.dst.__use;
+ }
+ rthp = &rth->u.rt_next;
+ }
+ if (cand) {
+ *candp = cand->u.rt_next;
+ rt_free(cand);
+ }
+
+ spin_unlock_bh(&rt_hash_table[hash].lock);
+}
+
+static inline struct rtable *ip_rt_dst_alloc(unsigned int hash)
+{
+ if (atomic_read(&ipv4_dst_ops.entries) >
+ ipv4_dst_ops.gc_thresh)
+ __rt_hash_shrink(hash);
+
+ return dst_alloc(&ipv4_dst_ops);
+}
+
+static void ip_rt_copy(struct rtable *rt, struct rtable *old)
+{
+ memcpy(rt, old, sizeof(*rt));
+
+ INIT_RCU_HEAD(&rt->u.dst.rcu_head);
+ rt->u.dst.__use = 1;
+ atomic_set(&rt->u.dst.__refcnt, 1);
+ rt->u.dst.child = NULL;
+ if (rt->u.dst.dev)
+ dev_hold(rt->u.dst.dev);
+ rt->u.dst.obsolete = 0;
+ rt->u.dst.lastuse = jiffies;
+ rt->u.dst.path = &rt->u.dst;
+ rt->u.dst.neighbour = NULL;
+ rt->u.dst.hh = NULL;
+ rt->u.dst.xfrm = NULL;
+}
+
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
u32 saddr, u8 tos, struct net_device *dev)
{
@@ -912,9 +967,10 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
for (i = 0; i < 2; i++) {
for (k = 0; k < 2; k++) {
- unsigned hash = rt_hash_code(daddr,
- skeys[i] ^ (ikeys[k] << 5),
- tos);
+ unsigned int hash = rt_hash_code(daddr,
+ skeys[i] ^
+ (ikeys[k] << 5),
+ tos);
rthp=&rt_hash_table[hash].chain;
@@ -942,7 +998,7 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
dst_hold(&rth->u.dst);
rcu_read_unlock();
- rt = dst_alloc(&ipv4_dst_ops);
+ rt = ip_rt_dst_alloc(hash);
if (rt == NULL) {
ip_rt_put(rth);
in_dev_put(in_dev);
@@ -950,19 +1006,7 @@ void ip_rt_redirect(u32 old_gw, u32 dadd
}
/* Copy all the information. */
- *rt = *rth;
- INIT_RCU_HEAD(&rt->u.dst.rcu_head);
- rt->u.dst.__use = 1;
- atomic_set(&rt->u.dst.__refcnt, 1);
- rt->u.dst.child = NULL;
- if (rt->u.dst.dev)
- dev_hold(rt->u.dst.dev);
- rt->u.dst.obsolete = 0;
- rt->u.dst.lastuse = jiffies;
- rt->u.dst.path = &rt->u.dst;
- rt->u.dst.neighbour = NULL;
- rt->u.dst.hh = NULL;
- rt->u.dst.xfrm = NULL;
+ ip_rt_copy(rt, rth);
rt->rt_flags |= RTCF_REDIRECTED;
@@ -1352,7 +1396,7 @@ static void rt_set_nexthop(struct rtable
static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
u8 tos, struct net_device *dev, int our)
{
- unsigned hash;
+ unsigned int hash;
struct rtable *rth;
u32 spec_dst;
struct in_device *in_dev = in_dev_get(dev);
@@ -1375,7 +1419,9 @@ static int ip_route_input_mc(struct sk_b
dev, &spec_dst, &itag) < 0)
goto e_inval;
- rth = dst_alloc(&ipv4_dst_ops);
+ hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
+
+ rth = ip_rt_dst_alloc(hash);
if (!rth)
goto e_nobufs;
@@ -1421,7 +1467,6 @@ static int ip_route_input_mc(struct sk_b
RT_CACHE_STAT_INC(in_slow_mc);
in_dev_put(in_dev);
- hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
e_nobufs:
@@ -1584,45 +1629,42 @@ int ip_route_input_slow(struct sk_buff *
goto e_inval;
}
- rth = dst_alloc(&ipv4_dst_ops);
+ rth = ip_rt_dst_alloc(hash);
if (!rth)
goto e_nobufs;
atomic_set(&rth->u.dst.__refcnt, 1);
- rth->u.dst.flags= DST_HOST;
- if (in_dev->cnf.no_policy)
- rth->u.dst.flags |= DST_NOPOLICY;
- if (in_dev->cnf.no_xfrm)
- rth->u.dst.flags |= DST_NOXFRM;
- rth->fl.fl4_dst = daddr;
+ rth->u.dst.dev = out_dev->dev;
+ dev_hold(out_dev->dev);
+ rth->u.dst.flags= (DST_HOST |
+ (in_dev->cnf.no_policy ? DST_NOPOLICY : 0) |
+ (in_dev->cnf.no_xfrm ? DST_NOXFRM : 0));
+ rth->u.dst.input = ip_forward;
+ rth->u.dst.output = ip_output;
+
+ rth->rt_flags = flags;
+ rth->rt_src = saddr;
rth->rt_dst = daddr;
- rth->fl.fl4_tos = tos;
+ rth->rt_iif = dev->ifindex;
+ rth->rt_gateway = daddr;
+
+ rth->fl.iif = dev->ifindex;
+ rth->fl.fl4_dst = daddr;
+ rth->fl.fl4_src = saddr;
#ifdef CONFIG_IP_ROUTE_FWMARK
rth->fl.fl4_fwmark= skb->nfmark;
#endif
- rth->fl.fl4_src = saddr;
- rth->rt_src = saddr;
- rth->rt_gateway = daddr;
+ rth->fl.fl4_tos = tos;
+ rth->rt_spec_dst= spec_dst;
#ifdef CONFIG_IP_ROUTE_NAT
rth->rt_src_map = fl.fl4_src;
rth->rt_dst_map = fl.fl4_dst;
- if (flags&RTCF_DNAT)
+ if (flags & RTCF_DNAT)
rth->rt_gateway = fl.fl4_dst;
#endif
- rth->rt_iif =
- rth->fl.iif = dev->ifindex;
- rth->u.dst.dev = out_dev->dev;
- dev_hold(rth->u.dst.dev);
- rth->fl.oif = 0;
- rth->rt_spec_dst= spec_dst;
-
- rth->u.dst.input = ip_forward;
- rth->u.dst.output = ip_output;
rt_set_nexthop(rth, &res, itag);
- rth->rt_flags = flags;
-
#ifdef CONFIG_NET_FASTROUTE
if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) {
struct net_device *odev = rth->u.dst.dev;
@@ -1663,45 +1705,45 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
- rth = dst_alloc(&ipv4_dst_ops);
+ rth = ip_rt_dst_alloc(hash);
if (!rth)
goto e_nobufs;
+ atomic_set(&rth->u.dst.__refcnt, 1);
+ rth->u.dst.dev = &loopback_dev;
+ dev_hold(&loopback_dev);
+ rth->u.dst.flags= (DST_HOST |
+ (in_dev->cnf.no_policy ? DST_NOPOLICY : 0));
+ rth->u.dst.input= ip_local_deliver;
rth->u.dst.output= ip_rt_bug;
+#ifdef CONFIG_NET_CLS_ROUTE
+ rth->u.dst.tclassid = itag;
+#endif
- atomic_set(&rth->u.dst.__refcnt, 1);
- rth->u.dst.flags= DST_HOST;
- if (in_dev->cnf.no_policy)
- rth->u.dst.flags |= DST_NOPOLICY;
- rth->fl.fl4_dst = daddr;
+ rth->rt_flags = flags|RTCF_LOCAL;
+ rth->rt_type = res.type;
+ rth->rt_src = saddr;
rth->rt_dst = daddr;
- rth->fl.fl4_tos = tos;
+ rth->rt_iif = dev->ifindex;
+ rth->rt_gateway = daddr;
+
+ rth->fl.iif = dev->ifindex;
+ rth->fl.fl4_dst = daddr;
+ rth->fl.fl4_src = saddr;
#ifdef CONFIG_IP_ROUTE_FWMARK
rth->fl.fl4_fwmark= skb->nfmark;
#endif
- rth->fl.fl4_src = saddr;
- rth->rt_src = saddr;
+ rth->fl.fl4_tos = tos;
+ rth->rt_spec_dst= spec_dst;
#ifdef CONFIG_IP_ROUTE_NAT
rth->rt_dst_map = fl.fl4_dst;
rth->rt_src_map = fl.fl4_src;
#endif
-#ifdef CONFIG_NET_CLS_ROUTE
- rth->u.dst.tclassid = itag;
-#endif
- rth->rt_iif =
- rth->fl.iif = dev->ifindex;
- rth->u.dst.dev = &loopback_dev;
- dev_hold(rth->u.dst.dev);
- rth->rt_gateway = daddr;
- rth->rt_spec_dst= spec_dst;
- rth->u.dst.input= ip_local_deliver;
- rth->rt_flags = flags|RTCF_LOCAL;
if (res.type == RTN_UNREACHABLE) {
rth->u.dst.input= ip_error;
rth->u.dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
- rth->rt_type = res.type;
goto intern;
no_route:
@@ -1767,6 +1809,8 @@ int ip_route_input(struct sk_buff *skb,
tos &= IPTOS_RT_MASK;
hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
+ prefetch(&rt_hash_table[hash].chain->fl);
+
rcu_read_lock();
for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
smp_read_barrier_depends();
@@ -2048,7 +2092,10 @@ make_route:
}
}
- rth = dst_alloc(&ipv4_dst_ops);
+ hash = rt_hash_code(oldflp->fl4_dst,
+ oldflp->fl4_src ^ (oldflp->oif << 5), tos);
+
+ rth = ip_rt_dst_alloc(hash);
if (!rth)
goto e_nobufs;
@@ -2104,10 +2151,6 @@ make_route:
rt_set_nexthop(rth, &res, 0);
-
- rth->rt_flags = flags;
-
- hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif <<
5), tos);
err = rt_intern_hash(hash, rth, rp);
done:
if (free_res)
@@ -2132,6 +2175,8 @@ int __ip_route_output_key(struct rtable
struct rtable *rth;
hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5),
flp->fl4_tos);
+
+ prefetch(&rt_hash_table[hash].chain->fl);
rcu_read_lock();
for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
|