netdev
[Top] [All Lists]

Re: [PATCH] skb field reservation v2.3.34

To: netdev@xxxxxxxxxxx
Subject: Re: [PATCH] skb field reservation v2.3.34
From: Rusty Russell <rusty@xxxxxxxxxxxxxxxx>
Date: Thu, 30 Dec 1999 21:11:00 +1100
In-reply-to: Your message of "Tue, 28 Dec 1999 19:04:52 +0300." <199912281604.TAA22785@ms2.inr.ac.ru>
Sender: owner-netdev@xxxxxxxxxxx
In message <199912281604.TAA22785@xxxxxxxxxxxxx> Alexey writes:
> Such objects miss main required property: conservation while
> skb_clone/skb_copy. I see _no_ applications for such feature,

Very good point.

Changes:
1) Region is copied on skb_clone/skb_copy.
2) Callbacks when that is done (just like the destructor).
3) No more dcache lines get hit in __kfree_skb on no-reservations case.
4) Use array for iteration when there are reservations.
5) Everything wrapped in CONFIG_NETFILTER (can be moved later if desired).
6) s/size_t/unsigned int/
7) Against 2.3.35

Almost all changes in first two files of patch... (skbuff.h and
skbuff.c).

> > can also be used for other things where you need to play with skbs.
> 
> Paul. When we want to play, we just add new field to skb and that's all. 8)8)
> If game turns out to be not interesting, we delete it. Did you forget that
> Linux has public sources?

I don't want to see the following inside skbuff.h:

#if defined(CONFIG_NETFILTER_IP_CONNTRACK) || 
defined(CONFIG_NETFILTER_IP_CONNTRACK_MODULE)
        void    *ip_connection;         /* For connection tracking */
#endif

#if defined(CONFIG_NETFILTER_IP_CONNTRACK_FTP) || 
defined(CONFIG_NETFILTER_IP_CONNTRACK_MODULE_FTP)
        u_int16_t       ftp_offset, ftp_length; /* For ftp tracking */
#endif

> Also, I remember you liked to talk about coding style. 8)

I only like to talk about other people's coding style 8-).

Happy random holiday,
Rusty.

diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/core/skbuff.c linux-2.3/net/core/skbuff.c
--- linux-2.3-official/net/core/skbuff.c        Wed Dec 29 23:19:30 1999
+++ linux-2.3/net/core/skbuff.c Thu Dec 30 18:37:46 1999
@@ -19,6 +19,7 @@
  *             Ray VanTassle   :       Fixed --skb->lock in free
  *             Alan Cox        :       skb_copy copy arp field
  *             Andi Kleen      :       slabified it.
+ *             Rusty Russell   :       field reservation
  *
  *     NOTE:
  *             The __skb_ routines should be called with interrupts 
@@ -77,6 +78,154 @@
 
 static kmem_cache_t *skbuff_head_cache;
 
+#ifdef CONFIG_NETFILTER
+static LIST_HEAD(field_allocs);
+/* These don't need to be atomic_t, but play safe --RR  */
+static atomic_t field_generation = ATOMIC_INIT(0);
+static rwlock_t field_lock = RW_LOCK_UNLOCKED;
+
+/* Cache-friendly form */
+struct field_funcs
+{
+       u_int16_t offset, size;
+       int generation;
+       void (*func)(struct sk_buff *skb);
+};
+static atomic_t field_copiers = ATOMIC_INIT(0);
+static struct field_funcs field_copy[SKB_RESERVE_SIZE]; 
+static atomic_t field_destructors = ATOMIC_INIT(0);
+static struct field_funcs field_destroy[SKB_RESERVE_SIZE]; 
+
+int skb_field_reserve(struct skb_field *reg)
+{
+       struct list_head *i;
+       unsigned int align_mask = (reg->alignment - 1);
+       int ret = 0;
+       
+       reg->offset = 0;
+       write_lock_bh(&field_lock);
+       reg->gen = atomic_read(&field_generation) + 1;
+
+       /* Yes it's an ordered list, no we don't do garbage collection */
+       for (i = field_allocs.next; i != &field_allocs; i = i->next) {
+               struct skb_field *f = (struct skb_field *)i;
+
+               if (reg->offset + reg->size <= f->offset)
+                       break;
+
+               /* offset = last aligned possibility */
+               reg->offset = (f->offset + f->size + align_mask) & ~align_mask;
+       }
+
+       if (reg->offset + reg->size < SKB_RESERVE_SIZE) {
+               list_add_tail(&reg->list, i);
+               if (reg->destructor) {
+                       field_destroy[atomic_read(&field_destructors)]
+                               = ((struct field_funcs){ reg->offset,
+                                                        reg->size,
+                                                        reg->gen,
+                                                        reg->destructor });
+                       atomic_inc(&field_destructors);
+               }
+               if (reg->copy) {
+                       field_copy[atomic_read(&field_copiers)]
+                               = ((struct field_funcs){ reg->offset,
+                                                        reg->size,
+                                                        reg->gen,
+                                                        reg->copy });
+                       atomic_inc(&field_copiers);
+               }
+       } else
+               ret = -ENOMEM; /* -EEDITSKBUFF.H */
+
+       /* Need field_copiers and field_destructors updated first */
+       wmb();
+       atomic_inc(&field_generation);
+       write_unlock_bh(&field_lock);
+       return ret;
+}
+
+static void reserve_eliminate(struct skb_field *unreg,
+                             struct field_funcs *funcs,
+                             atomic_t *num_funcs)
+{
+       int i, num;
+
+       atomic_dec(num_funcs);
+       num = atomic_read(num_funcs);
+
+       for (i = 0; i < num; i++) {
+               if (funcs[i].offset == unreg->offset) {
+                       /* Move everything down... */
+                       memmove(funcs + i, funcs + i + 1, 
+                               sizeof(struct field_funcs) * (num - i));
+                       break;
+               }
+       }
+}
+
+void skb_field_unreserve(struct skb_field *unreg)
+{
+       write_lock_bh(&field_lock);
+       atomic_inc(&field_generation);
+       wmb();
+       list_del(&unreg->list);
+       if (unreg->destructor)
+               reserve_eliminate(unreg, field_destroy, &field_destructors);
+       if (unreg->copy)
+               reserve_eliminate(unreg, field_copy, &field_copiers);
+
+       write_unlock_bh(&field_lock);
+}
+
+/* Called very rarely; skb alloc'ed before field registration. */
+void skb_field_update(struct sk_buff *skb)
+{      
+       struct list_head *i;
+       int gen;
+
+       write_lock_bh(&field_lock);
+       gen = atomic_read(&field_generation);
+
+       /* Clear any registrations newer than this skb */
+       for (i = field_allocs.next; i != &field_allocs; i = i->next) {
+               struct skb_field *f = (struct skb_field *)i;
+
+               if ((int)f->gen - (int)skb->reserve_gen > 0)
+                       memset(skb->reserve+f->offset, f->size, 0);
+       }
+       skb->reserved_copies = atomic_read(&field_copiers);
+       skb->reserved_destructors = atomic_read(&field_destructors);
+       write_unlock_bh(&field_lock);
+
+       skb->reserve_gen = gen;
+}
+
+static inline void reserve_do_funcs(struct sk_buff *skb,
+                                   const struct field_funcs *funcs,
+                                   atomic_t *num_funcs)
+{
+       unsigned int i, num;
+
+       read_lock_bh(&field_lock);
+       num = atomic_read(num_funcs);
+
+       for (i = 0; i < num; i++) {
+               if ((int)skb->reserve_gen - (int)funcs[i].generation >= 0) {
+                       unsigned int j;
+
+                       for (j = 0; j < funcs[i].size; j++) {
+                               if (skb->reserve[j + funcs[i].offset]) {
+                                       funcs[i].func(skb);
+                                       break;
+                               }
+                       }
+               }
+       }
+       read_unlock_bh(&field_lock);
+}
+#endif
+
 /*
  *     Keep out-of-line to prevent kernel bloat.
  *     __builtin_return_address is not used because it is not always
@@ -165,6 +314,15 @@
        skb->is_clone = 0;
        skb->cloned = 0;
 
+#ifdef CONFIG_NETFILTER
+       /* Race here ok: if they ever use a field, and generation
+           changed between these assignment statements, skb will be
+           updated */
+       skb->reserve_gen = atomic_read(&field_generation);
+       skb->reserved_copies = atomic_read(&field_copiers);
+       skb->reserved_destructors = atomic_read(&field_destructors);
+#endif
+
 #ifdef CONFIG_ATM
        ATM_SKB(skb)->iovcnt = 0;
 #endif
@@ -201,7 +359,8 @@
        skb->dst = NULL;
        skb->rx_dev = NULL;
 #ifdef CONFIG_NETFILTER
-       skb->nfmark = skb->nfreason = skb->nfcache = 0;
+       memset(skb->reserve, 0, sizeof(skb->reserve));
+       skb->nfcache = 0;
 #ifdef CONFIG_NETFILTER_DEBUG
        skb->nf_debug = 0;
 #endif
@@ -235,8 +394,13 @@
        }
 
        dst_release(skb->dst);
+#ifdef CONFIG_NETFILTER
+       if (skb->reserved_destructors)
+               reserve_do_funcs(skb, field_destroy, &field_destructors);
+#endif
        if(skb->destructor)
                skb->destructor(skb);
+
 #ifdef CONFIG_NET              
        if(skb->rx_dev)
                dev_put(skb->rx_dev);
@@ -272,6 +436,12 @@
        n->is_clone = 1;
        atomic_set(&n->users, 1);
        n->destructor = NULL;
+
+#ifdef CONFIG_NETFILTER
+       if (n->reserved_copies)
+               reserve_do_funcs(skb, field_copy, &field_copiers);
+#endif
+               
        return n;
 }
 
@@ -301,13 +471,19 @@
        new->destructor = NULL;
        new->security=old->security;
 #ifdef CONFIG_NETFILTER
-       new->nfmark=old->nfmark;
-       new->nfreason=old->nfreason;
-       new->nfcache=old->nfcache;
 #ifdef CONFIG_NETFILTER_DEBUG
        new->nf_debug=old->nf_debug;
 #endif
-#endif
+       new->nfcache=old->nfcache;
+
+       new->reserve_gen = old->reserve_gen;
+       new->reserved_destructors = old->reserved_destructors;
+       new->reserved_copies = old->reserved_copies;
+       memcpy(new->reserve, old->reserve, sizeof(old->reserve));
+
+       if (new->reserved_copies)
+               reserve_do_funcs(new, field_copy, &field_copiers);
+#endif /*CONFIG_NETFILTER*/
 }
 
 /*
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/include/linux/skbuff.h 
linux-2.3/include/linux/skbuff.h
--- linux-2.3-official/include/linux/skbuff.h   Thu Dec 30 18:37:13 1999
+++ linux-2.3/include/linux/skbuff.h    Thu Dec 30 17:25:52 1999
@@ -37,6 +37,8 @@
 #define NET_CALLER(arg) __builtin_return_address(0)
 #endif
 
+#define SKB_RESERVE_SIZE 32
+
 struct sk_buff_head {
        /* These two members must be first. */
        struct sk_buff  * next;
@@ -97,7 +99,9 @@
                        cloned,                 /* head may be cloned (check 
refcnt to be sure). */
                        pkt_type,               /* Packet class                 
                */
                        pkt_bridged,            /* Tracker for bridging         
                */
-                       ip_summed;              /* Driver fed us an IP checksum 
                */
+                       ip_summed,              /* Driver fed us an IP checksum 
                */
+                       reserved_copies,        /* Reserved fields on copy      
                */
+                       reserved_destructors;   /* Reserved fields on kfree     
                */
        __u32           priority;               /* Packet queueing priority     
                */
        atomic_t        users;                  /* User count - see 
datagram.c,tcp.c            */
        unsigned short  protocol;               /* Packet protocol from driver. 
                */
@@ -109,11 +113,12 @@
        unsigned char   *tail;                  /* Tail pointer                 
                */
        unsigned char   *end;                   /* End pointer                  
                */
        void            (*destructor)(struct sk_buff *);        /* Destruct 
function            */
+       
 #ifdef CONFIG_NETFILTER
-       /* Can be used for communication between hooks. */
-        unsigned long  nfmark;
-       /* Reason for doing this to the packet (see netfilter.h) */
-       __u32           nfreason;
+       /* See skb_field_reserve()/skb_field_unreserve() */
+       int             reserve_gen;
+       char            reserve[SKB_RESERVE_SIZE];
+
        /* Cache info */
        __u32           nfcache;
 #ifdef CONFIG_NETFILTER_DEBUG
@@ -183,6 +188,41 @@
 extern void                    skb_trim(struct sk_buff *skb, unsigned int len);
 extern void    skb_over_panic(struct sk_buff *skb, int len, void *here);
 extern void    skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+#ifdef CONFIG_NETFILTER
+/* Selling space in skb's: the VCs will love it... */
+struct skb_field
+{
+       /* Filled in by skb_field_reserve() */
+       struct list_head list;
+       unsigned int offset;
+       int gen;
+
+       /* Filled in by caller. */
+       unsigned int size;
+       unsigned int alignment;  /* Use __alignof__ */
+       /* Don't call skb_field_unreserve from this: deadlock. */
+       void (*destructor)(struct sk_buff *);
+       /* Called after clone or copy. */
+       void (*copy)(struct sk_buff *);
+};
+
+extern int     skb_field_reserve(struct skb_field *reg);
+extern void    skb_field_unreserve(struct skb_field *unreg);
+
+/* Private */
+extern void    skb_field_update(struct sk_buff *skb);
+
+/* Access a field of an skb */
+#define skb_field(skb, reg, type)                                      \
+({                                                                     \
+       if ((skb)->reserve_gen - (reg)->gen < 0) skb_field_update(skb); \
+       &__skb_field(skb, reg, type);                                   \
+})
+
+/* If you reserve at boot, no skb can predate you, so use this. */
+#define __skb_field(skb, reg, type) (*((type *)((skb)->reserve+(reg)->offset)))
+#endif /* CONFIG_NETFILTER */
 
 /* Backwards compatibility */
 #define skb_realloc_headroom(skb, nhr) skb_copy_expand(skb, nhr, 
skb_tailroom(skb), GFP_ATOMIC)
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/include/linux/netfilter.h 
linux-2.3/include/linux/netfilter.h
--- linux-2.3-official/include/linux/netfilter.h        Thu Dec 30 18:43:00 1999
+++ linux-2.3/include/linux/netfilter.h Thu Dec 30 17:29:16 1999
@@ -15,7 +15,7 @@
 #define NF_ACCEPT 1
 #define NF_STOLEN 2
 #define NF_QUEUE 3
-#define NF_MAX_VERDICT NF_QUEUE
+/* >= NF_QUEUE treated the same. */
 
 /* Generic cache responses from hook functions. */
 #define NFC_ALTERED 0x8000
@@ -141,10 +141,8 @@
        int pf;
        /* Bitmask of hook numbers to match (1 << hooknum). */
        unsigned int hookmask;
-       /* If non-zero, only catch packets with this mark. */
-       unsigned int mark;
-       /* If non-zero, only catch packets of this reason. */
-       unsigned int reason;
+       /* If not 0xFFFFFFFF, only catch packets with this queue. */
+       int queuenum;
 
        struct nf_wakeme *wake;
 };
@@ -154,11 +152,8 @@
 extern void nf_unregister_interest(struct nf_interest *interest);
 extern void nf_getinfo(const struct sk_buff *skb,
                       struct net_device **indev,
-                      struct net_device **outdev,
-                      unsigned long *mark);
-extern void nf_reinject(struct sk_buff *skb,
-                       unsigned long mark,
-                       unsigned int verdict);
+                      struct net_device **outdev);
+extern void nf_reinject(struct sk_buff *skb, unsigned int verdict);
 
 #ifdef CONFIG_NETFILTER_DEBUG
 extern void nf_dump_skb(int pf, struct sk_buff *skb);
@@ -192,14 +187,4 @@
 #define SUMAX(a,b) ((size_t)(a)>(size_t)(b) ? (ssize_t)(a) : (ssize_t)(b))
 #define SUMIN(a,b) ((size_t)(a)<(size_t)(b) ? (ssize_t)(a) : (ssize_t)(b))
 #endif /*__KERNEL__*/
-
-enum nf_reason {
-       /* Do not, NOT, reorder these.  Add at end. */
-       NF_REASON_NONE,
-       NF_REASON_SET_BY_IPCHAINS,
-       NF_REASON_FOR_ROUTING,
-       NF_REASON_FOR_CLS_FW,
-       NF_REASON_MIN_RESERVED_FOR_CONNTRACK = 1024,
-};
-
 #endif /*__LINUX_NETFILTER_H*/
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/include/net/route.h 
linux-2.3/include/net/route.h
--- linux-2.3-official/include/net/route.h      Thu Dec 30 18:43:00 1999
+++ linux-2.3/include/net/route.h       Thu Dec 30 17:29:16 1999
@@ -110,7 +110,9 @@
 extern int             ip_rt_ioctl(unsigned int cmd, void *arg);
 extern void            ip_rt_get_source(u8 *src, struct rtable *rt);
 extern int             ip_rt_dump(struct sk_buff *skb,  struct 
netlink_callback *cb);
-
+#ifdef CONFIG_IP_ROUTE_FWMARK
+extern struct skb_field ip_rt_mark_res;
+#endif
 
 extern __inline__ void ip_rt_put(struct rtable * rt)
 {
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/core/netfilter.c 
linux-2.3/net/core/netfilter.c
--- linux-2.3-official/net/core/netfilter.c     Tue Dec 21 14:20:03 1999
+++ linux-2.3/net/core/netfilter.c      Wed Dec 29 19:27:45 1999
@@ -22,7 +22,7 @@
 #include <linux/unistd.h>
 
 /* In this code, we can be waiting indefinitely for userspace to
- * service a packet if a hook returns NF_QUEUE.  We could keep a count
+ * service a packet if a hook returns >= NF_QUEUE.  We could keep a count
  * of skbuffs queued for userspace, and not deregister a hook unless
  * this is zero, but that sucks.  Now, we simply check when the
  * packets come back: if the hook is gone, the packet is discarded. */
@@ -40,8 +40,8 @@
 
        /* If we're sent to userspace, this keeps housekeeping info */
        int pf;
-       unsigned long mark;
        unsigned int hook;
+       unsigned int queuenum;
        struct net_device *indev, *outdev;
        int (*okfn)(struct sk_buff *);
 };
@@ -53,6 +53,10 @@
 static LIST_HEAD(nf_sockopts);
 static LIST_HEAD(nf_interested);
 
+static struct skb_field skb_res 
+= { { NULL, NULL }, 0, 0,
+    sizeof(struct nf_info *), __alignof__(struct nf_info *), NULL, NULL };
+
 int nf_register_hook(struct nf_hook_ops *reg)
 {
        struct list_head *i;
@@ -358,11 +362,10 @@
 {
        for (*i = (*i)->next; *i != head; *i = (*i)->next) {
                struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-               switch (elem->hook(hook, skb, indev, outdev, okfn)) {
-               case NF_QUEUE:
-                       NFDEBUG("nf_iterate: NF_QUEUE for %p.\n", *skb);
-                       return NF_QUEUE;
+               unsigned int verdict
+                       = elem->hook(hook, skb, indev, outdev, okfn);
 
+               switch (verdict) {
                case NF_STOLEN:
                        NFDEBUG("nf_iterate: NF_STOLEN for %p.\n", *skb);
                        return NF_STOLEN;
@@ -371,14 +374,12 @@
                        NFDEBUG("nf_iterate: NF_DROP for %p.\n", *skb);
                        return NF_DROP;
 
-#ifdef CONFIG_NETFILTER_DEBUG
                case NF_ACCEPT:
                        break;
 
                default:
-                       NFDEBUG("Evil return from %p(%u).\n", 
-                               elem->hook, hook);
-#endif
+                       NFDEBUG("nf_iterate: %u for %p.\n", verdict, *skb);
+                       return verdict;
                }
        }
        return NF_ACCEPT;
@@ -389,7 +390,8 @@
                     int pf, unsigned int hook,
                     struct net_device *indev,
                     struct net_device *outdev,
-                    int (*okfn)(struct sk_buff *))
+                    int (*okfn)(struct sk_buff *),
+                    unsigned int queuenum)
 {
        struct list_head *i;
 
@@ -402,13 +404,14 @@
 
        /* Can't do struct assignments with arrays in them.  Damn. */
        info->elem = (struct nf_hook_ops *)elem;
-       info->mark = skb->nfmark;
        info->pf = pf;
        info->hook = hook;
        info->okfn = okfn;
        info->indev = indev;
        info->outdev = outdev;
-       skb->nfmark = (unsigned long)info;
+       info->queuenum = queuenum;
+
+       __skb_field(skb, &skb_res, struct nf_info *) = info;
 
        /* Bump dev refs so they don't vanish while packet is out */
        if (indev) dev_hold(indev);
@@ -419,8 +422,8 @@
 
                if ((recip->hookmask & (1 << info->hook))
                    && info->pf == recip->pf
-                   && (!recip->mark || info->mark == recip->mark)
-                   && (!recip->reason || skb->nfreason == recip->reason)) {
+                   && (recip->queuenum == 0xFFFFFFFF
+                       || info->queuenum == recip->queuenum)) {
                        /* FIXME: Andi says: use netlink.  Hmmm... --RR */
                        if (skb_queue_len(&recip->wake->skbq) >= 100) {
                                NFDEBUG("nf_hook: queue to long.\n");
@@ -428,8 +431,8 @@
                        }
                        /* Hand it to userspace for collection */
                        skb_queue_tail(&recip->wake->skbq, skb);
-                       NFDEBUG("Waking up pf=%i hook=%u mark=%lu reason=%u\n",
-                               pf, hook, skb->nfmark, skb->nfreason);
+                       NFDEBUG("Waking up pf=%i hook=%u\n",
+                               pf, hook);
                        wake_up_interruptible(&recip->wake->sleep);
 
                        return;
@@ -473,9 +476,10 @@
        elem = &nf_hooks[pf][hook];
        verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
                             outdev, &elem, okfn);
-       if (verdict == NF_QUEUE) {
+       if (verdict >= NF_QUEUE) {
                NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-               nf_queue(skb, elem, pf, hook, indev, outdev, okfn);
+               nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+                        verdict - NF_QUEUE);
        }
        read_unlock_bh(&nf_lock);
 
@@ -517,24 +521,24 @@
 
        /* Blow away any queued skbs; this is overzealous. */
        while ((skb = skb_dequeue(&interest->wake->skbq)) != NULL)
-               nf_reinject(skb, 0, NF_DROP);
+               nf_reinject(skb, NF_DROP);
 }
 
 void nf_getinfo(const struct sk_buff *skb, 
                struct net_device **indev,
-               struct net_device **outdev,
-               unsigned long *mark)
+               struct net_device **outdev)
 {
-       const struct nf_info *info = (const struct nf_info *)skb->nfmark;
+       const struct nf_info *info = 
+               __skb_field(skb, &skb_res, struct nf_info *);
 
        *indev = info->indev;
        *outdev = info->outdev;
-       *mark = info->mark;
 }
 
-void nf_reinject(struct sk_buff *skb, unsigned long mark, unsigned int verdict)
+void nf_reinject(struct sk_buff *skb, unsigned int verdict)
 {
-       struct nf_info *info = (struct nf_info *)skb->nfmark;
+       const struct nf_info *info = 
+               __skb_field(skb, &skb_res, struct nf_info *);
        struct list_head *elem = &info->elem->list;
        struct list_head *i;
 
@@ -551,16 +555,16 @@
        /* Continue traversal iff userspace said ok, and devices still
            exist... */
        if (verdict == NF_ACCEPT) {
-               skb->nfmark = mark;
                verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
                                     &skb, info->hook, 
                                     info->indev, info->outdev, &elem,
                                     info->okfn);
        }
 
-       if (verdict == NF_QUEUE) {
+       if (verdict >= NF_QUEUE) {
                nf_queue(skb, elem, info->pf, info->hook, 
-                        info->indev, info->outdev, info->okfn);
+                        info->indev, info->outdev, info->okfn,
+                        verdict - NF_QUEUE);
        }
        read_unlock_bh(&nf_lock);
 
@@ -626,4 +630,8 @@
        for (i = 0; i < NPROTO; i++)
                for (h = 0; h < NF_MAX_HOOKS; h++)
                        INIT_LIST_HEAD(&nf_hooks[i][h]);
+
+       if (skb_field_reserve(&skb_res) != 0)
+               panic("Can't reserve a %u byte field in skb\n",
+                     sizeof(struct nf_info *));
 }
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/ipv4/route.c linux-2.3/net/ipv4/route.c
--- linux-2.3-official/net/ipv4/route.c Wed Dec 29 23:19:30 1999
+++ linux-2.3/net/ipv4/route.c  Thu Dec 30 15:35:47 1999
@@ -127,6 +127,10 @@
 static struct timer_list rt_periodic_timer =
        { NULL, NULL, 0, 0L, NULL };
 
+#ifdef CONFIG_IP_ROUTE_FWMARK
+struct skb_field ip_rt_mark_res
+= { { NULL, NULL }, 0, 0, sizeof(__u32), __alignof__(__u32), NULL, NULL };
+#endif
 /*
  *     Interface to generic destination cache.
  */
@@ -1107,10 +1111,7 @@
        rth->rt_dst     = daddr;
        rth->key.tos    = tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-       if (skb->nfreason == NF_REASON_FOR_ROUTING)
-               rth->key.fwmark = skb->nfmark;
-       else 
-               rth->key.fwmark = 0;
+       rth->key.fwmark = __skb_field(skb, &ip_rt_mark_res, __u32);
 #endif
        rth->key.src    = saddr;
        rth->rt_src     = saddr;
@@ -1189,10 +1190,7 @@
        key.src = saddr;
        key.tos = tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-       if (skb->nfreason == NF_REASON_FOR_ROUTING)
-               key.fwmark = skb->nfmark;
-       else 
-               key.fwmark = 0;
+       key.fwmark = __skb_field(skb, &ip_rt_mark_res, __u32);
 #endif
        key.iif = dev->ifindex;
        key.oif = 0;
@@ -1314,10 +1312,7 @@
        rth->rt_dst     = daddr;
        rth->key.tos    = tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-       if (skb->nfreason == NF_REASON_FOR_ROUTING)
-               rth->key.fwmark = skb->nfmark;
-       else 
-               rth->key.fwmark = 0;
+       rth->key.fwmark = __skb_field(skb, &ip_rt_mark_res, __u32);
 #endif
        rth->key.src    = saddr;
        rth->rt_src     = saddr;
@@ -1391,10 +1386,7 @@
        rth->rt_dst     = daddr;
        rth->key.tos    = tos;
 #ifdef CONFIG_IP_ROUTE_FWMARK
-       if (skb->nfreason == NF_REASON_FOR_ROUTING)
-               rth->key.fwmark = skb->nfmark;
-       else 
-               rth->key.fwmark = 0;
+       rth->key.fwmark = __skb_field(skb, &ip_rt_mark_res, __u32);
 #endif
        rth->key.src    = saddr;
        rth->rt_src     = saddr;
@@ -1482,8 +1474,7 @@
                    rth->key.oif == 0 &&
 #ifdef CONFIG_IP_ROUTE_FWMARK
                    rth->key.fwmark 
-                   == (skb->nfreason == NF_REASON_FOR_ROUTING 
-                       ? skb->nfmark : 0) &&
+                   == __skb_field(skb, &ip_rt_mark_res, __u32) &&
 #endif
                    rth->key.tos == tos) {
                        rth->u.dst.lastuse = jiffies;
@@ -2166,5 +2157,10 @@
        proc_net_create ("rt_cache", 0, rt_cache_get_info);
 #ifdef CONFIG_NET_CLS_ROUTE
        create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
+#endif
+
+#ifdef CONFIG_IP_ROUTE_FWMARK
+       if (skb_field_reserve(&ip_rt_mark_res) != 0)
+               panic("ip_rt_init: can't reserve mark in skb");
 #endif
 }
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/netsyms.c linux-2.3/net/netsyms.c
--- linux-2.3-official/net/netsyms.c    Wed Dec 29 23:19:30 1999
+++ linux-2.3/net/netsyms.c     Wed Dec 29 19:27:41 1999
@@ -251,6 +251,9 @@
 EXPORT_SYMBOL(inetdev_by_index);
 EXPORT_SYMBOL(in_dev_finish_destroy);
 EXPORT_SYMBOL(ip_defrag);
+#ifdef CONFIG_IP_ROUTE_FWMARK
+EXPORT_SYMBOL(ip_rt_mark_res);
+#endif
 
 /* Route manipulation */
 EXPORT_SYMBOL(ip_rt_ioctl);
@@ -580,7 +583,14 @@
 EXPORT_SYMBOL(nf_register_interest);
 EXPORT_SYMBOL(nf_unregister_interest);
 EXPORT_SYMBOL(nf_hook_slow);
-#endif
+EXPORT_SYMBOL(skb_field_reserve);
+EXPORT_SYMBOL(skb_field_unreserve);
+EXPORT_SYMBOL(skb_field_update);
+#ifdef CONFIG_NET_CLS_FW
+extern struct skb_field cls_fw_res;
+EXPORT_SYMBOL(cls_fw_res);
+#endif /* CONFIG_NET_CLS_FW */
+#endif /* CONFIG_NETFILTER */
 
 EXPORT_SYMBOL(register_gifconf);
 
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/sched/cls_api.c 
linux-2.3/net/sched/cls_api.c
--- linux-2.3-official/net/sched/cls_api.c      Fri Oct 15 15:51:35 1999
+++ linux-2.3/net/sched/cls_api.c       Thu Dec 23 20:41:22 1999
@@ -461,6 +461,14 @@
        INIT_TC_FILTER(route4);
 #endif
 #ifdef CONFIG_NET_CLS_FW
+#ifdef CONFIG_NETFILTER
+       {
+               extern struct skb_field cls_fw_res;
+
+               if (skb_field_reserve(&cls_fw_res) != 0)
+                       panic("tc_filter_init: Can't reserve field cls_fw");
+       }
+#endif
        INIT_TC_FILTER(fw);
 #endif
 #ifdef CONFIG_NET_CLS_RSVP
diff -urN --minimal --exclude classlist.h --exclude devlist.h --exclude *.lds 
--exclude autoconf.h --exclude compile.h --exclude version.h --exclude .* 
--exclude *.[oa] --exclude *.orig --exclude config --exclude asm --exclude 
modules --exclude *.[Ss] --exclude System.map --exclude consolemap_deftbl.c 
--exclude *~ --exclude TAGS --exclude tags --exclude modversions.h --exclude 
install-kernel linux-2.3-official/net/sched/cls_fw.c 
linux-2.3/net/sched/cls_fw.c
--- linux-2.3-official/net/sched/cls_fw.c       Fri Oct 15 15:51:29 1999
+++ linux-2.3/net/sched/cls_fw.c        Wed Dec 29 20:16:20 1999
@@ -40,6 +40,9 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
+struct skb_field cls_fw_res 
+= { { NULL, NULL }, 0, 0, sizeof(__u32), __alignof__(u32), NULL, NULL };
+
 struct fw_head
 {
        struct fw_filter *ht[256];
@@ -66,7 +69,7 @@
        struct fw_head *head = (struct fw_head*)tp->root;
        struct fw_filter *f;
 #ifdef CONFIG_NETFILTER
-       u32 id = (skb->nfreason == NF_REASON_FOR_CLS_FW ? skb->nfmark : 0);
+       u32 id = *skb_field(skb, &cls_fw_res, u32);
 #else
        u32 id = 0;
 #endif
@@ -375,11 +378,28 @@
 #ifdef MODULE
 int init_module(void)
 {
-       return register_tcf_proto_ops(&cls_fw_ops);
+       int ret = 0;
+
+#ifdef CONFIG_NETFILTER
+       ret = skb_field_reserve(&cls_fw_res);
+#endif
+
+       if (ret == 0) {
+               ret = register_tcf_proto_ops(&cls_fw_ops);
+#ifdef CONFIG_NETFILTER
+               if (ret != 0)
+                       skb_field_unreserve(&cls_fw_res);
+#endif
+       }
+
+       return ret;
 }
 
 void cleanup_module(void) 
 {
        unregister_tcf_proto_ops(&cls_fw_ops);
+#ifdef CONFIG_NETFILTER
+       skb_field_unreserve(&cls_fw_res);
+#endif
 }
 #endif
--
Hacking time.

<Prev in Thread] Current Thread [Next in Thread>