netdev
[Top] [All Lists]

[PATCH] Make netfilter handle SACK in NAT'ed connections (was Re: Fw: oo

To: kuznet@xxxxxxxxxxxxx
Subject: [PATCH] Make netfilter handle SACK in NAT'ed connections (was Re: Fw: oops/bug in tcp, SACK doesn't work?)
From: Harald Welte <laforge@xxxxxxxxxxxx>
Date: Sun, 27 Jan 2002 09:57:16 +0100
Cc: netdev@xxxxxxxxxxx
In-reply-to: <20010731033801.M1486@obroa-skai.gnumonks.org>; from laforge@gnumonks.org on Tue, Jul 31, 2001 at 03:38:01AM -0300
References: <20010728004447.I1240@obroa-skai.gnumonks.org> <200107291653.UAA18260@ms2.inr.ac.ru> <20010731033801.M1486@obroa-skai.gnumonks.org>
Sender: owner-netdev@xxxxxxxxxxx
User-agent: Mutt/1.3.17i
On Tue, Jul 31, 2001 at 03:38:01AM -0300, Harald Welte wrote:

Hi Alexey & Others.

I'm now following up a very old thread about netfilter deleting SACKPERM
in the case of NAT'ing protocols with helpers (ftp, irc, ...)

> On Sun, Jul 29, 2001 at 08:53:36PM +0400, Alexey Kuznetsov wrote:
> > 
> > It is not a valid justification. It is difficult to rewrite sequence
> > numbers.  As soon as nat does this, rewriting sacks is easy. Even not easy,
> > trivial.
> 
> > Sad and not expected behaviour. I used to ridicule commercial firewall
> > vendors, sometimes doing shit of this kind without any clear reasons. :-)
> 
> Ok, I am willing to extend netfilter conntrack/nat in order to deal with
> SACK.  It is really not about being too lazy to do it.

I've now implemented 'correct' SACK alteration (as correct as we can with
the current codebase).. It has gotten quite some bit of code, and I fear
it adds significant complexity (need to wade through all TCP options of 
every packet, alter all SACK's, ...).

It has been tested for some time in a couple of machines, and is working so
far (I have artificially removed packets from the TCP flow in order to cause
the receiver generate SACK's and they have been altered correctly).

The only question remaining is:  Is it worth the effort?  What do the
core linux developers think? 

We could put this patch into netfilter and remove the old delete-sackperm code.
We could also make it a config option (like the patch below does).
Or we could just keep the current behaviour of deleting sackperm.

Please _don't_ apply it to the kernel yet, this is just a proposal for 
discussion.

--- linuxppc-031201-nfpom/net/ipv4/netfilter/ip_nat_helper.c    Sun Dec  2 
21:13:35 2001
+++ linuxppc-031201-nfpom-sack/net/ipv4/netfilter/ip_nat_helper.c       Mon Jan 
14 21:54:58 2002
@@ -1,8 +1,11 @@
 /* ip_nat_mangle.c - generic support functions for NAT helpers 
  *
- * (C) 2000 by Harald Welte <laforge@xxxxxxxxxxxx>
+ * (C) 2000-2002 by Harald Welte <laforge@xxxxxxxxxxxx>
  *
  * distributed under the terms of GNU GPL
+ *
+ *     14 Jan 2002 Harald Welte <laforge@xxxxxxxxxxxx>:
+ *             - add support for SACK adjustment 
  */
 #include <linux/version.h>
 #include <linux/module.h>
@@ -32,6 +35,9 @@
 #define DEBUGP(format, args...)
 #define DUMP_OFFSET(x)
 #endif
+
+/* FIXME */
+#define CONFIG_IP_NF_NAT_SACK  1
        
 DECLARE_LOCK(ip_nat_seqofs_lock);
                         
@@ -182,6 +188,102 @@
        return 1;
 }
 
+#ifdef CONFIG_IP_NF_NAT_SACK
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct tcphdr *tcph, 
+           unsigned char *ptr, 
+           struct ip_nat_seq *natseq)
+{
+       struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+       int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+       int i;
+
+       for (i = 0; i < num_sacks; i++, sp++) {
+               u_int32_t new_start_seq, new_end_seq;
+
+               if (after(ntohl(sp->start_seq) - natseq->offset_before,
+                         natseq->correction_pos))
+                       new_start_seq = ntohl(sp->start_seq) 
+                                       - natseq->offset_after;
+               else
+                       new_start_seq = ntohl(sp->start_seq) 
+                                       - natseq->offset_before;
+               new_start_seq = htonl(new_start_seq);
+
+               if (after(ntohl(sp->end_seq) - natseq->offset_before,
+                         natseq->correction_pos))
+                       new_end_seq = ntohl(sp->end_seq)
+                                     - natseq->offset_after;
+               else
+                       new_end_seq = ntohl(sp->end_seq)
+                                     - natseq->offset_before;
+               new_end_seq = htonl(new_end_seq);
+
+               DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+                       ntohl(sp->start_seq), new_start_seq,
+                       ntohl(sp->end_seq), new_end_seq);
+
+               tcph->check = 
+                       ip_nat_cheat_check(~sp->start_seq, new_start_seq,
+                                          ip_nat_cheat_check(~sp->end_seq, 
+                                                             new_end_seq,
+                                                             tcph->check));
+
+               sp->start_seq = new_start_seq;
+               sp->end_seq = new_end_seq;
+       }
+}
+                       
+
+/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */
+static int
+ip_nat_sack_adjust(struct sk_buff *skb,
+                       struct ip_conntrack *ct,
+                       enum ip_conntrack_info ctinfo)
+{
+       struct iphdr *iph;
+       struct tcphdr *tcph;
+       unsigned char *ptr;
+       int length, dir, sack_adjusted = 0;
+
+       iph = skb->nh.iph;
+       tcph = (void *)iph + iph->ihl*4;
+       length = (tcph->doff*4)-sizeof(struct tcphdr);
+       ptr = (unsigned char *)(tcph+1);
+
+       dir = CTINFO2DIR(ctinfo);
+
+       while (length > 0) {
+               int opcode = *ptr++;
+               int opsize;
+
+               switch (opcode) {
+                       case TCPOPT_EOL:
+                               return !sack_adjusted;
+                       case TCPOPT_NOP:
+                               length--;
+                               continue;
+                       default:
+                               opsize = *ptr++;
+                               if (opsize > length) /* no partial opts */
+                                       return !sack_adjusted;
+                               if (opcode == TCPOPT_SACK) {
+                                       /* found SACK */
+                                       if((opsize >= (TCPOLEN_SACK_BASE + 
TCPOLEN_SACK_PERBLOCK)) &&
+                                          !((opsize - TCPOLEN_SACK_BASE) % 
TCPOLEN_SACK_PERBLOCK))
+                                               sack_adjust(tcph, ptr-2, 
&ct->nat.info.seq[!dir]);
+                                       
+                                       sack_adjusted = 1;
+                               }
+                               ptr += opsize-2;
+                               length -= opsize;
+               }
+       }
+       return !sack_adjusted;
+}
+#endif /* CONFIG_IP_NF_NAT_SACK */
+
 /* TCP sequence number adjustment */
 int 
 ip_nat_seq_adjust(struct sk_buff *skb, 
@@ -226,9 +328,24 @@
        tcph->seq = newseq;
        tcph->ack_seq = newack;
 
+#ifdef CONFIG_IP_NF_NAT_SACK
+       ip_nat_sack_adjust(skb, ct, ctinfo);
+#endif
+
        return 0;
 }
 
+#ifdef CONFIG_IP_NF_NAT_SACK
+
+/* Well, no need to fuck rusty anymor. We _can_ deal correctly with SACK */
+void 
+ip_nat_delete_sack(struct sk_buff *skb, struct tcphdr *tcph)
+{
+       return;
+}
+
+#else /* CONFIG_IP_NF_NAT_SACK */
+
 /* Grrr... SACK.  Fuck me even harder.  Don't want to fix it on the
    fly, so blow it away. */
 void
@@ -272,6 +389,8 @@
        }
        else DEBUGP("Something wrong with SACK_PERM.\n");
 }
+
+#endif /* CONFIG_IP_NF_NAT_SACK */
 
 static inline int
 helper_cmp(const struct ip_nat_helper *helper,
-- 
Live long and prosper
- Harald Welte / laforge@xxxxxxxxxxxx               http://www.gnumonks.org/
============================================================================
GCS/E/IT d- s-: a-- C+++ UL++++$ P+++ L++++$ E--- W- N++ o? K- w--- O- M- 
V-- PS+ PE-- Y+ PGP++ t++ 5-- !X !R tv-- b+++ DI? !D G+ e* h+ r% y+(*)

<Prev in Thread] Current Thread [Next in Thread>