Linus, please apply v2.3.99-pre6.
We used to simply hit a wall when the firewalling code was tracking
too many connections: this patch applies a number of strategies to
mitigate that:
1) Don't keep track of connections when packets dropped.
2) Forget connections which have only seen a RST reply.
3) Do randomish/LRU drop on unreplied connections when we're
under stress.
We still have an issue with being able to chew up serious amounts of
CPU even over 10baseT, and there are more tricks we can do when we
have TCP window tracking.
Also includes some cleanups in the fast `connection already
established' path.
Rusty.
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/Documentation/Configure.help
working/Documentation/Configure.help
--- linux-2.3.99-pre-6-2-rusty/Documentation/Configure.help Fri Apr 14
17:33:30 2000
+++ working/Documentation/Configure.help Mon Apr 24 13:00:30 2000
@@ -1771,7 +1771,7 @@
CONFIG_IP_NF_MATCH_LIMIT
limit matching allows you to control the rate at which a rule can be
matched: mainly useful in combination with the LOG target ("LOG
- target support", below).
+ target support", below) and to avoid some Denial of Service attacks.
If you want to compile it as a module, say M here and read
Documentation/modules.txt. If unsure, say `N'.
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/include/linux/netfilter_ipv4/ip_conntrack.h
working/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.3.99-pre-6-2-rusty/include/linux/netfilter_ipv4/ip_conntrack.h
Mon Apr 17 16:25:08 2000
+++ working/include/linux/netfilter_ipv4/ip_conntrack.h Sun Apr 23 22:41:38 2000
@@ -51,7 +51,10 @@
IPS_EXPECTED = 0x01,
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
- IPS_SEEN_REPLY = 0x02
+ IPS_SEEN_REPLY = 0x02,
+
+ /* Packet seen leaving box: bit 2 set. Can be set, not unset. */
+ IPS_CONFIRMED = 0x04
};
struct ip_conntrack_expect
@@ -88,7 +91,7 @@
struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
/* Have we seen traffic both ways yet? (bitset) */
- unsigned int status;
+ volatile unsigned int status;
/* Timer function; drops refcnt when it goes off. */
struct timer_list timeout;
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/include/linux/netfilter_ipv4/ip_conntrack_core.h
working/include/linux/netfilter_ipv4/ip_conntrack_core.h
--- linux-2.3.99-pre-6-2-rusty/include/linux/netfilter_ipv4/ip_conntrack_core.h
Mon Apr 17 16:25:08 2000
+++ working/include/linux/netfilter_ipv4/ip_conntrack_core.h Thu Apr 20
13:00:38 2000
@@ -20,8 +20,9 @@
extern struct ip_conntrack_protocol *__find_proto(u_int8_t protocol);
extern struct list_head protocol_list;
-/* Returns TRUE if it dealt with ICMP, and filled in skb->nfct */
-int icmp_error_track(struct sk_buff *skb);
+/* Returns conntrack if it dealt with ICMP, and filled in skb->nfct */
+extern struct ip_conntrack *icmp_error_track(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo);
extern int get_tuple(const struct iphdr *iph, size_t len,
struct ip_conntrack_tuple *tuple,
struct ip_conntrack_protocol *protocol);
@@ -30,6 +31,9 @@
struct ip_conntrack_tuple_hash *
ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack);
+
+/* Confirm a connection */
+void ip_conntrack_confirm(struct ip_conntrack *ct);
extern unsigned int ip_conntrack_htable_size;
extern struct list_head *ip_conntrack_hash;
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_core.c
working/net/ipv4/netfilter/ip_conntrack_core.c
--- linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_core.c Fri Apr
14 17:41:01 2000
+++ working/net/ipv4/netfilter/ip_conntrack_core.c Sun Apr 23 22:59:02 2000
@@ -157,10 +157,47 @@
}
static void
+clean_from_lists(struct ip_conntrack *ct)
+{
+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+ /* Remove from both hash lists */
+ LIST_DELETE(&ip_conntrack_hash
+ [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)],
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ LIST_DELETE(&ip_conntrack_hash
+ [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)],
+ &ct->tuplehash[IP_CT_DIR_REPLY]);
+ /* If our expected is in the list, take it out. */
+ if (ct->expected.expectant) {
+ IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected));
+ IP_NF_ASSERT(ct->expected.expectant == ct);
+ LIST_DELETE(&expect_list, &ct->expected);
+ }
+}
+
+static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
+ /* Unconfirmed connections haven't been cleaned up by the
+ timer: hence they cannot be simply deleted here. */
+ if (!(ct->status & IPS_CONFIRMED)) {
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Race check: they can't get a reference if noone has
+ one and we have the write lock. */
+ if (atomic_read(&ct->ct_general.use) == 0) {
+ clean_from_lists(ct);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ } else {
+ /* Either a last-minute confirmation (ie. ct
+ now has timer attached), or a last-minute
+ new skb has reference (still unconfirmed). */
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return;
+ }
+ }
+
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
IP_NF_ASSERT(!timer_pending(&ct->timeout));
@@ -178,19 +215,7 @@
struct ip_conntrack *ct = (void *)ul_conntrack;
WRITE_LOCK(&ip_conntrack_lock);
- /* Remove from both hash lists */
- LIST_DELETE(&ip_conntrack_hash
- [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
- LIST_DELETE(&ip_conntrack_hash
- [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)],
- &ct->tuplehash[IP_CT_DIR_REPLY]);
- /* If our expected is in the list, take it out. */
- if (ct->expected.expectant) {
- IP_NF_ASSERT(list_inlist(&expect_list, &ct->expected));
- IP_NF_ASSERT(ct->expected.expectant == ct);
- LIST_DELETE(&expect_list, &ct->expected);
- }
+ clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
ip_conntrack_put(ct);
}
@@ -235,6 +260,26 @@
return h;
}
+/* Confirm a connection */
+void
+ip_conntrack_confirm(struct ip_conntrack *ct)
+{
+ DEBUGP("Confirming conntrack %p\n", ct);
+ WRITE_LOCK(&ip_conntrack_lock);
+ /* Race check */
+ if (!(ct->status & IPS_CONFIRMED)) {
+ IP_NF_ASSERT(!timer_pending(&ct->timeout));
+ ct->status |= IPS_CONFIRMED;
+ /* Timer relative to confirmation time, not original
+ setting time, otherwise we'd get timer wrap in
+ wierd delay cases. */
+ ct->timeout.expires += jiffies;
+ add_timer(&ct->timeout);
+ atomic_inc(&ct->ct_general.use);
+ }
+ WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
/* Returns true if a connection correspondings to the tuple (required
for NAT). */
int
@@ -250,24 +295,28 @@
return h != NULL;
}
-/* Returns TRUE if it dealt with ICMP, and filled in skb fields */
-int icmp_error_track(struct sk_buff *skb)
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+struct ip_conntrack *
+icmp_error_track(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
{
- const struct iphdr *iph = skb->nh.iph;
- struct icmphdr *hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
+ const struct iphdr *iph;
+ struct icmphdr *hdr;
struct ip_conntrack_tuple innertuple, origtuple;
- struct iphdr *inner = (struct iphdr *)(hdr + 1);
- size_t datalen = skb->len - iph->ihl*4 - sizeof(*hdr);
+ struct iphdr *inner;
+ size_t datalen;
struct ip_conntrack_protocol *innerproto;
struct ip_conntrack_tuple_hash *h;
- enum ip_conntrack_info ctinfo;
- if (iph->protocol != IPPROTO_ICMP)
- return 0;
+ IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP);
+
+ iph = skb->nh.iph;
+ hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
+ inner = (struct iphdr *)(hdr + 1);
+ datalen = skb->len - iph->ihl*4 - sizeof(*hdr);
if (skb->len < iph->ihl * 4 + sizeof(struct icmphdr)) {
DEBUGP("icmp_error_track: too short\n");
- return 1;
+ return NULL;
}
if (hdr->type != ICMP_DEST_UNREACH
@@ -275,12 +324,12 @@
&& hdr->type != ICMP_TIME_EXCEEDED
&& hdr->type != ICMP_PARAMETERPROB
&& hdr->type != ICMP_REDIRECT)
- return 0;
+ return NULL;
/* Ignore it if the checksum's bogus. */
if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) {
DEBUGP("icmp_error_track: bad csum\n");
- return 1;
+ return NULL;
}
innerproto = find_proto(inner->protocol);
@@ -290,28 +339,68 @@
DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n",
inner->protocol, inner->ihl, 8,
datalen);
- return 1;
+ return NULL;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
DEBUGP("icmp_error_track: Can't invert tuple\n");
- return 1;
+ return NULL;
}
h = ip_conntrack_find_get(&innertuple, NULL);
if (!h) {
DEBUGP("icmp_error_track: no match\n");
- return 1;
+ return NULL;
+ }
+ if (!(h->ctrack->status & IPS_CONFIRMED)) {
+ DEBUGP("icmp_error_track: unconfirmed\n");
+ ip_conntrack_put(h->ctrack);
+ return NULL;
}
- ctinfo = IP_CT_RELATED;
+ *ctinfo = IP_CT_RELATED;
if (DIRECTION(h) == IP_CT_DIR_REPLY)
- ctinfo += IP_CT_IS_REPLY;
+ *ctinfo += IP_CT_IS_REPLY;
/* Update skb to refer to this connection */
- skb->nfct = &h->ctrack->infos[ctinfo];
- return 1;
+ skb->nfct = &h->ctrack->infos[*ctinfo];
+ return h->ctrack;
+}
+
+/* There's a small race here where we may free a just-replied to
+ connection. Too bad: we're in trouble anyway. */
+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
+{
+ /* Unconfirmed connections either really fresh or transitory
+ anyway */
+ if (!(i->ctrack->status & IPS_SEEN_REPLY)
+ && (i->ctrack->status & IPS_CONFIRMED))
+ return 1;
+ return 0;
+}
+
+static int early_drop(struct list_head *chain)
+{
+ /* Traverse backwards: gives us oldest, which is roughly LRU */
+ struct ip_conntrack_tuple_hash *h;
+ int dropped = 0;
+
+ READ_LOCK(&ip_conntrack_lock);
+ h = LIST_FIND(chain, unreplied, struct ip_conntrack_tuple_hash *);
+ if (h)
+ atomic_inc(&h->ctrack->ct_general.use);
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ if (!h)
+ return dropped;
+
+ if (del_timer(&h->ctrack->timeout)) {
+ death_by_timeout((unsigned long)h->ctrack);
+ dropped = 1;
+ }
+ ip_conntrack_put(h->ctrack);
+ return dropped;
}
static inline int helper_cmp(const struct ip_conntrack_helper *i,
@@ -345,29 +434,38 @@
enum ip_conntrack_info ctinfo;
unsigned long extra_jiffies;
int i;
+ static unsigned int drop_next = 0;
- if (!invert_tuple(&repl_tuple, tuple, protocol)) {
- DEBUGP("Can't invert tuple.\n");
- return 1;
- }
+ hash = hash_conntrack(tuple);
- if(ip_conntrack_max &&
- (atomic_read(&ip_conntrack_count) >= ip_conntrack_max)) {
+ if (ip_conntrack_max &&
+ atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
if (net_ratelimit())
- printk(KERN_WARNING "ip_conntrack: maximum limit of %d
entries exceeded\n", ip_conntrack_max);
+ printk(KERN_WARNING "ip_conntrack: maximum limit of"
+ " %d entries exceeded\n", ip_conntrack_max);
+
+ /* Try dropping from random chain, or else from the
+ chain about to put into (in case they're trying to
+ bomb one hash chain). */
+ if (!early_drop(&ip_conntrack_hash[drop_next++])
+ && !early_drop(&ip_conntrack_hash[hash]))
+ return 1;
+ }
+
+ if (!invert_tuple(&repl_tuple, tuple, protocol)) {
+ DEBUGP("Can't invert tuple.\n");
return 1;
}
+ repl_hash = hash_conntrack(&repl_tuple);
conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
return 1;
}
- hash = hash_conntrack(tuple);
- repl_hash = hash_conntrack(&repl_tuple);
memset(conntrack, 0, sizeof(struct ip_conntrack));
- atomic_set(&conntrack->ct_general.use, 2);
+ atomic_set(&conntrack->ct_general.use, 1);
conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
@@ -381,17 +479,17 @@
kmem_cache_free(ip_conntrack_cachep, conntrack);
return 1;
}
+ /* Don't set timer yet: wait for confirmation */
+ init_timer(&conntrack->timeout);
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
- conntrack->timeout.expires = jiffies + extra_jiffies;
- add_timer(&conntrack->timeout);
+ conntrack->timeout.expires = extra_jiffies;
/* Sew in at head of hash list. */
WRITE_LOCK(&ip_conntrack_lock);
/* Check noone else beat us in the race... */
if (__ip_conntrack_find(tuple, NULL)) {
WRITE_UNLOCK(&ip_conntrack_lock);
- printk("ip_conntrack: Wow someone raced us!\n");
kmem_cache_free(ip_conntrack_cachep, conntrack);
return 0;
}
@@ -417,70 +515,70 @@
&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]);
list_prepend(&ip_conntrack_hash[repl_hash],
&conntrack->tuplehash[IP_CT_DIR_REPLY]);
+ atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
/* Update skb to refer to this connection */
skb->nfct = &conntrack->infos[ctinfo];
- atomic_inc(&ip_conntrack_count);
return 1;
}
-static void
-resolve_normal_ct(struct sk_buff *skb, int create)
+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
+static inline struct ip_conntrack *
+resolve_normal_ct(struct sk_buff *skb,
+ struct ip_conntrack_protocol *proto,
+ enum ip_conntrack_info *ctinfo)
{
struct ip_conntrack_tuple tuple;
struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_protocol *proto;
- enum ip_conntrack_info ctinfo;
- proto = find_proto(skb->nh.iph->protocol);
if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto))
- return;
+ return NULL;
/* Loop around search/insert race */
do {
/* look for tuple match */
h = ip_conntrack_find_get(&tuple, NULL);
- if (!h && (!create || init_conntrack(&tuple, proto, skb)))
- return;
+ if (!h && init_conntrack(&tuple, proto, skb))
+ return NULL;
} while (!h);
/* It exists; we have (non-exclusive) reference. */
if (DIRECTION(h) == IP_CT_DIR_REPLY) {
- ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
+ /* Reply on unconfirmed connection => unclassifiable */
+ if (!(h->ctrack->status & IPS_CONFIRMED)) {
+ DEBUGP("Reply on unconfirmed connection\n");
+ ip_conntrack_put(h->ctrack);
+ return NULL;
+ }
+
+ *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
h->ctrack->status |= IPS_SEEN_REPLY;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
if (h->ctrack->status & IPS_SEEN_REPLY) {
DEBUGP("ip_conntrack_in: normal packet for %p\n",
h->ctrack);
- ctinfo = IP_CT_ESTABLISHED;
+ *ctinfo = IP_CT_ESTABLISHED;
} else if (h->ctrack->status & IPS_EXPECTED) {
DEBUGP("ip_conntrack_in: related packet for %p\n",
h->ctrack);
- ctinfo = IP_CT_RELATED;
+ *ctinfo = IP_CT_RELATED;
} else {
DEBUGP("ip_conntrack_in: new packet for %p\n",
h->ctrack);
- ctinfo = IP_CT_NEW;
+ *ctinfo = IP_CT_NEW;
}
}
- skb->nfct = &h->ctrack->infos[ctinfo];
+ skb->nfct = &h->ctrack->infos[*ctinfo];
+ return h->ctrack;
}
/* Return conntrack and conntrack_info a given skb */
-static struct ip_conntrack *
-__ip_conntrack_get(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- int create)
+inline struct ip_conntrack *
+ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
{
- if (!skb->nfct) {
- /* It may be an icmp error... */
- if (!icmp_error_track(skb))
- resolve_normal_ct(skb, create);
- }
-
if (skb->nfct) {
struct ip_conntrack *ct
= (struct ip_conntrack *)skb->nfct->master;
@@ -493,11 +591,6 @@
return NULL;
}
-struct ip_conntrack *
-ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
-{
- return __ip_conntrack_get(skb, ctinfo, 0);
-}
/* Netfilter hook itself. */
unsigned int ip_conntrack_in(unsigned int hooknum,
@@ -526,15 +619,19 @@
return NF_STOLEN;
}
- ct = __ip_conntrack_get(*pskb, &ctinfo, 1);
- if (!ct) {
- /* Not valid part of a connection */
- return NF_ACCEPT;
+ proto = find_proto((*pskb)->nh.iph->protocol);
+
+ /* It may be an icmp error... */
+ if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP
+ || !(ct = icmp_error_track(*pskb, &ctinfo))) {
+ if (!(ct = resolve_normal_ct(*pskb, proto, &ctinfo))) {
+ /* Not valid part of a connection */
+ return NF_ACCEPT;
+ }
}
+ IP_NF_ASSERT((*pskb)->nfct);
- proto = find_proto((*pskb)->nh.iph->protocol);
ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo);
-
if (ret == -1) {
/* Invalid */
nf_conntrack_put((*pskb)->nfct);
@@ -665,10 +762,15 @@
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
WRITE_LOCK(&ip_conntrack_lock);
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
- add_timer(&ct->timeout);
+ /* Timer may not be active yet */
+ if (!(ct->status & IPS_CONFIRMED))
+ ct->timeout.expires = extra_jiffies;
+ else {
+ /* Need del_timer for race avoidance (may already be dying). */
+ if (del_timer(&ct->timeout)) {
+ ct->timeout.expires = jiffies + extra_jiffies;
+ add_timer(&ct->timeout);
+ }
}
WRITE_UNLOCK(&ip_conntrack_lock);
}
@@ -740,6 +842,17 @@
/* Time to push up daises... */
if (del_timer(&h->ctrack->timeout))
death_by_timeout((unsigned long)h->ctrack);
+ else if (!(h->ctrack->status & IPS_CONFIRMED)) {
+ /* Unconfirmed connection. Clean from lists,
+ mark confirmed so it gets cleaned as soon
+ as packet comes back. */
+ WRITE_LOCK(&ip_conntrack_lock);
+ if (!(h->ctrack->status & IPS_CONFIRMED)) {
+ clean_from_lists(h->ctrack);
+ h->ctrack->status |= IPS_CONFIRMED;
+ }
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ }
/* ... else the timer will get him soon. */
ip_conntrack_put(h->ctrack);
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
working/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
--- linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
Fri Apr 14 17:41:01 2000
+++ working/net/ipv4/netfilter/ip_conntrack_proto_tcp.c Sat Apr 22 16:38:50 2000
@@ -23,6 +23,10 @@
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
+/* We steal a bit to indicate no reply yet (can't use status, because
+ it's set before we get into packet handling). */
+#define TCP_REPLY_BIT 0x1000
+
/* Actually, I believe that neither ipmasq (where this code is stolen
from) nor ipfilter do it exactly right. A new conntrack machine taking
into account packet loss (which creates uncertainty as to exactly
@@ -141,7 +145,7 @@
enum tcp_conntrack state;
READ_LOCK(&tcp_lock);
- state = conntrack->proto.tcp_state;
+ state = (conntrack->proto.tcp_state & ~TCP_REPLY_BIT);
READ_UNLOCK(&tcp_lock);
return sprintf(buffer, "%s ", tcp_conntrack_names[state]);
@@ -161,7 +165,7 @@
struct iphdr *iph, size_t len,
enum ip_conntrack_info ctinfo)
{
- enum tcp_conntrack newconntrack;
+ enum tcp_conntrack newconntrack, oldtcpstate;
struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl);
/* We're guaranteed to have the base header, but maybe not the
@@ -172,10 +176,11 @@
}
WRITE_LOCK(&tcp_lock);
+ oldtcpstate = conntrack->proto.tcp_state;
newconntrack
= tcp_conntracks
[CTINFO2DIR(ctinfo)]
- [get_conntrack_index(tcph)][conntrack->proto.tcp_state];
+ [get_conntrack_index(tcph)][oldtcpstate & ~TCP_REPLY_BIT];
/* Invalid */
if (newconntrack == TCP_CONNTRACK_MAX) {
@@ -187,9 +192,22 @@
}
conntrack->proto.tcp_state = newconntrack;
+ if ((oldtcpstate & TCP_REPLY_BIT)
+ || ctinfo >= IP_CT_IS_REPLY)
+ conntrack->proto.tcp_state |= TCP_REPLY_BIT;
+
WRITE_UNLOCK(&tcp_lock);
- ip_ct_refresh(conntrack, tcp_timeouts[conntrack->proto.tcp_state]);
+ /* If only reply is a RST, we can consider ourselves not to
+ have an established connection: this is a fairly common
+ problem case, so we can delete the conntrack
+ immediately. --RR */
+ if (!(oldtcpstate & TCP_REPLY_BIT) && tcph->rst) {
+ if (del_timer(&conntrack->timeout))
+ conntrack->timeout.function((unsigned long)conntrack);
+ } else
+ ip_ct_refresh(conntrack, tcp_timeouts[newconntrack]);
+
return NF_ACCEPT;
}
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_standalone.c
working/net/ipv4/netfilter/ip_conntrack_standalone.c
--- linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_conntrack_standalone.c
Fri Apr 14 17:41:01 2000
+++ working/net/ipv4/netfilter/ip_conntrack_standalone.c Sat Apr 22
16:18:19 2000
@@ -86,6 +86,12 @@
len += print_tuple(buffer + len,
&conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto);
+#if 0
+ if (!(conntrack->status & IPS_CONFIRMED))
+ len += sprintf(buffer + len, "[UNCONFIRMED] ");
+ len += sprintf(buffer + len, "use=%u ",
+ atomic_read(&conntrack->ct_general.use));
+#endif
len += sprintf(buffer + len, "\n");
return len;
@@ -157,6 +163,22 @@
return len;
}
+static unsigned int ip_confirm(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* We've seen it coming out the other side: confirm */
+ if ((*pskb)->nfct) {
+ struct ip_conntrack *ct
+ = (struct ip_conntrack *)(*pskb)->nfct->master;
+ if (!(ct->status & IPS_CONFIRMED))
+ ip_conntrack_confirm(ct);
+ }
+ return NF_ACCEPT;
+}
+
static unsigned int ip_refrag(unsigned int hooknum,
struct sk_buff **pskb,
const struct net_device *in,
@@ -165,6 +187,14 @@
{
struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ /* We've seen it coming out the other side: confirm */
+ if ((*pskb)->nfct) {
+ struct ip_conntrack *ct
+ = (struct ip_conntrack *)(*pskb)->nfct->master;
+ if (!(ct->status & IPS_CONFIRMED))
+ ip_conntrack_confirm(ct);
+ }
+
/* Local packets are never produced too large for their
interface. We degfragment them at LOCAL_OUT, however,
so we have to refragment them here. */
@@ -203,6 +233,8 @@
/* Refragmenter; last chance. */
static struct nf_hook_ops ip_conntrack_out_ops
= { { NULL, NULL }, ip_refrag, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_LAST };
+static struct nf_hook_ops ip_conntrack_local_in_ops
+= { { NULL, NULL }, ip_confirm, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_LAST-1 };
static int init_or_cleanup(int init)
{
@@ -230,10 +262,17 @@
printk("ip_conntrack: can't register post-routing hook.\n");
goto cleanup_inandlocalops;
}
+ ret = nf_register_hook(&ip_conntrack_local_in_ops);
+ if (ret < 0) {
+ printk("ip_conntrack: can't register local in hook.\n");
+ goto cleanup_inoutandlocalops;
+ }
return ret;
cleanup:
+ nf_unregister_hook(&ip_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
nf_unregister_hook(&ip_conntrack_out_ops);
cleanup_inandlocalops:
nf_unregister_hook(&ip_conntrack_local_out_ops);
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_fw_compat.c
working/net/ipv4/netfilter/ip_fw_compat.c
--- linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_fw_compat.c Wed Apr
12 17:13:07 2000
+++ working/net/ipv4/netfilter/ip_fw_compat.c Mon Apr 24 00:20:54 2000
@@ -13,6 +13,7 @@
#include <net/route.h>
#include <linux/netfilter_ipv4/compat_firewall.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
static struct firewall_ops *fwops;
@@ -60,6 +61,18 @@
return 0;
}
+static inline void
+confirm_connection(struct sk_buff *skb)
+{
+ if (skb->nfct) {
+ struct ip_conntrack *ct
+ = (struct ip_conntrack *)skb->nfct->master;
+
+ if (!(ct->status & IPS_CONFIRMED))
+ ip_conntrack_confirm(ct);
+ }
+}
+
static unsigned int
fw_in(unsigned int hooknum,
struct sk_buff **pskb,
@@ -105,10 +118,14 @@
ret = fwops->fw_output(fwops, PF_INET,
(struct net_device *)out,
(*pskb)->nh.raw, &redirpt, pskb);
- if (fwops->fw_acct_out && (ret == FW_ACCEPT || ret == FW_SKIP))
- fwops->fw_acct_out(fwops, PF_INET,
- (struct net_device *)in,
- (*pskb)->nh.raw, &redirpt, pskb);
+ if (ret == FW_ACCEPT || ret == FW_SKIP) {
+ if (fwops->fw_acct_out)
+ fwops->fw_acct_out(fwops, PF_INET,
+ (struct net_device *)in,
+ (*pskb)->nh.raw, &redirpt,
+ pskb);
+ confirm_connection(*pskb);
+ }
break;
}
@@ -155,6 +172,16 @@
}
}
+static unsigned int fw_confirm(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ confirm_connection(*pskb);
+ return NF_ACCEPT;
+}
+
extern int ip_fw_ctl(int optval, void *user, unsigned int len);
static int sock_fn(struct sock *sk, int optval, void *user, unsigned int len)
@@ -174,6 +201,9 @@
static struct nf_hook_ops forward_ops
= { { NULL, NULL }, fw_in, PF_INET, NF_IP_FORWARD, NF_IP_PRI_FILTER };
+static struct nf_hook_ops local_in_ops
+= { { NULL, NULL }, fw_confirm, PF_INET, NF_IP_LOCAL_IN, NF_IP_PRI_LAST - 1 };
+
static struct nf_sockopt_ops sock_ops
= { { NULL, NULL }, PF_INET, 64, 64 + 1024 + 1, &sock_fn, 0, 0, NULL,
0, NULL };
@@ -202,6 +232,7 @@
nf_register_hook(&preroute_ops);
nf_register_hook(&postroute_ops);
nf_register_hook(&forward_ops);
+ nf_register_hook(&local_in_ops);
return ret;
@@ -209,6 +240,7 @@
nf_unregister_hook(&preroute_ops);
nf_unregister_hook(&postroute_ops);
nf_unregister_hook(&forward_ops);
+ nf_unregister_hook(&local_in_ops);
masq_cleanup();
diff -urN --minimal --exclude *.lds --exclude *.ps --exclude *.pdf --exclude
*.sgml --exclude *.tex --exclude *.aux --exclude *.log --exclude classlist.h
--exclude devlist.h --exclude autoconf.h --exclude compile.h --exclude
version.h --exclude .* --exclude *.[oa] --exclude *.orig --exclude config
--exclude asm --exclude modules --exclude *.[Ss] --exclude System.map --exclude
consolemap_deftbl.c --exclude *~ --exclude TAGS --exclude tags --exclude
modversions.h --exclude install-kernel
linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_fw_compat_masq.c
working/net/ipv4/netfilter/ip_fw_compat_masq.c
--- linux-2.3.99-pre-6-2-rusty/net/ipv4/netfilter/ip_fw_compat_masq.c Wed Apr
12 17:13:07 2000
+++ working/net/ipv4/netfilter/ip_fw_compat_masq.c Thu Apr 20 13:04:24 2000
@@ -103,6 +103,7 @@
struct ip_conntrack_protocol *protocol;
struct ip_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct;
int ret;
protocol = find_proto(iph->protocol);
@@ -113,31 +114,18 @@
switch (iph->protocol) {
case IPPROTO_ICMP:
/* ICMP errors. */
- if (icmp_error_track(*pskb)) {
- /* If it is valid, tranlsate it */
- if ((*pskb)->nfct) {
- struct ip_conntrack *ct
- = (struct ip_conntrack *)
- (*pskb)->nfct->master;
- enum ip_conntrack_dir dir;
-
- if ((*pskb)->nfct-ct->infos >= IP_CT_IS_REPLY)
- dir = IP_CT_DIR_REPLY;
- else
- dir = IP_CT_DIR_ORIGINAL;
-
- icmp_reply_translation(*pskb,
- ct,
- NF_IP_PRE_ROUTING,
- dir);
- }
+ if ((ct = icmp_error_track(*pskb, &ctinfo))) {
+ icmp_reply_translation(*pskb, ct,
+ NF_IP_PRE_ROUTING,
+ CTINFO2DIR(ctinfo));
return NF_ACCEPT;
}
/* Fall thru... */
case IPPROTO_TCP:
case IPPROTO_UDP:
if (!get_tuple(iph, (*pskb)->len, &tuple, protocol)) {
- printk("ip_fw_compat_masq: Couldn't get tuple\n");
+ if (net_ratelimit())
+ printk("ip_fw_compat_masq: Can't get tuple\n");
return NF_ACCEPT;
}
break;
@@ -166,8 +154,9 @@
NF_IP_PRE_ROUTING,
pskb);
} else
- printk("ip_fw_compat_masq: conntrack"
- " didn't like\n");
+ if (net_ratelimit())
+ printk("ip_fw_compat_masq: conntrack"
+ " didn't like\n");
}
} else {
if (h)
--
Hacking time.
|