netdev
[Top] [All Lists]

Re: [patch] TCP throughput after 2.2.17-pre1

To: Alan Cox <alan@xxxxxxxxxx>
Subject: Re: [patch] TCP throughput after 2.2.17-pre1
From: Andrew Morton <andrewm@xxxxxxxxxx>
Date: Mon, 19 Jun 2000 20:58:29 +1000
Cc: "David S. Miller" <davem@xxxxxxxxxx>, netdev@xxxxxxxxxxx, kuznet@xxxxxxxxxxxxx, Andi Kleen <ak@xxxxxx>
References: <200006181426.HAA04840@xxxxxxxxxxxxxxx> from "David S. Miller" at Jun 18, 2000 07:26:13 AM <200006181447.KAA07398@xxxxxxxxxxxxxxxxxxxxxxxx>
Sender: owner-netdev@xxxxxxxxxxx
Andi's patch works for me.  I've attached the 2.2.17-pre4 version here.

Alan Cox wrote:
> 
> and also
> the new code path waking on the socket kfreeing a buffer.

He didn't appear to do that bit.  It just polls.

The sleep_on() would be good to have; wait_for_tcp_memory() will be
called quite often for the non-oom case.
--- linux-2.2.17pre4/include/net/sock.h Tue Aug 10 05:05:13 1999
+++ linux-akpm/include/net/sock.h       Mon Jun 19 19:03:07 2000
@@ -717,6 +717,10 @@
 extern struct sk_buff          *sock_wmalloc(struct sock *sk,
                                              unsigned long size, int force,
                                              int priority);
+
+extern struct sk_buff          *sock_wmalloc_err(struct sock *sk,
+                                             unsigned long size, int force,
+                                             int priority, int *err);
 extern struct sk_buff          *sock_rmalloc(struct sock *sk,
                                              unsigned long size, int force,
                                              int priority);
--- linux-2.2.17pre4/include/net/snmp.h Mon Oct  5 03:19:39 1998
+++ linux-akpm/include/net/snmp.h       Mon Jun 19 19:03:07 2000
@@ -178,6 +178,7 @@
        unsigned long   OfoPruned;
        unsigned long   OutOfWindowIcmps; 
        unsigned long   LockDroppedIcmps; 
+       unsigned long   SockMallocOOM; 
 };
        
 #endif
--- linux-2.2.17pre4/net/core/sock.c    Tue May 11 02:55:25 1999
+++ linux-akpm/net/core/sock.c  Mon Jun 19 19:03:07 2000
@@ -566,6 +566,31 @@
                        skb->sk = sk;
                        return skb;
                }
+               net_statistics.SockMallocOOM++; 
+       }
+       return NULL;
+}
+
+/*
+ * Allocate memory from the sockets send buffer, telling caller about real 
OOM. 
+ * err is only set for oom, not for socket buffer overflow.
+ */ 
+struct sk_buff *sock_wmalloc_err(struct sock *sk, unsigned long size, int 
force, int priority, int *err)
+{
+       *err = 0; 
+       /* Note: overcommitment possible */ 
+       if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
+               struct sk_buff * skb;
+               *err = -ENOMEM; 
+               skb = alloc_skb(size, priority);
+               if (skb) {
+                       *err = 0;
+                       atomic_add(skb->truesize, &sk->wmem_alloc);
+                       skb->destructor = sock_wfree;
+                       skb->sk = sk;
+                       return skb;
+               }
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
@@ -583,6 +608,7 @@
                        skb->sk = sk;
                        return skb;
                }
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
@@ -602,6 +628,7 @@
                if (mem)
                        return mem;
                atomic_sub(size, &sk->omem_alloc);
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
--- linux-2.2.17pre4/net/ipv4/tcp.c     Sun Jun 18 21:04:07 2000
+++ linux-akpm/net/ipv4/tcp.c   Mon Jun 19 20:40:33 2000
@@ -697,40 +697,38 @@
 }
 
 /*
- *     Wait for more memory for a socket
- *
- *     If we got here an allocation has failed on us. We cannot
- *     spin here or we may block the very code freeing memory
- *     for us.
+ *     Wait for more memory for a socket.
+ *     Special case is err == -ENOMEM, in this case just sleep a bit waiting
+ *     for the system to free up some memory. 
  */
-static void wait_for_tcp_memory(struct sock * sk)
+static void wait_for_tcp_memory(struct sock * sk, int err)
 {
        release_sock(sk);
        if (!tcp_memory_free(sk)) {
                struct wait_queue wait = { current, NULL };
+
                sk->socket->flags &= ~SO_NOSPACE;
                add_wait_queue(sk->sleep, &wait);
                for (;;) {
                        if (signal_pending(current))
                                break;
                        current->state = TASK_INTERRUPTIBLE;
-                       if (tcp_memory_free(sk))
+                       if (tcp_memory_free(sk) && !err)
                                break;
                        if (sk->shutdown & SEND_SHUTDOWN)
                                break;
                        if (sk->err)
                                break;
-                       schedule();
+                       if (!err) 
+                               schedule();
+                       else {
+                               schedule_timeout(1); 
+                               break;
+                       }       
                }
                current->state = TASK_RUNNING;
                remove_wait_queue(sk->sleep, &wait);
        }
-       else
-       {
-               /* Yield time to the memory freeing paths */
-               current->state = TASK_INTERRUPTIBLE;
-               schedule_timeout(1);
-       }
        lock_sock(sk);
 }
 
@@ -924,7 +922,7 @@
                                tmp += copy;
                                queue_it = 0;
                        }
-                       skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL);
+                       skb = sock_wmalloc_err(sk, tmp, 0, GFP_KERNEL, &err);
 
                        /* If we didn't get any memory, we need to sleep. */
                        if (skb == NULL) {
@@ -937,8 +935,10 @@
                                        err = -ERESTARTSYS;
                                        goto do_interrupted;
                                }
-                               tcp_push_pending_frames(sk, tp);
-                               wait_for_tcp_memory(sk);
+                               /* In OOM that would fail anyways so do not 
bother. */ 
+                               if (!err) 
+                                       tcp_push_pending_frames(sk, tp);
+                               wait_for_tcp_memory(sk, err);
 
                                /* If SACK's were formed or PMTU events 
happened,
                                 * we must find out about it.
--- linux-2.2.17pre4/net/ipv4/proc.c    Fri Jun 16 23:48:00 2000
+++ linux-akpm/net/ipv4/proc.c  Mon Jun 19 19:03:07 2000
@@ -359,8 +359,8 @@
        len = sprintf(buffer,
                      "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed"
                      " EmbryonicRsts PruneCalled RcvPruned OfoPruned"
-                     " OutOfWindowIcmps LockDroppedIcmps\n"    
-                     "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+                     " OutOfWindowIcmps LockDroppedIcmps SockMallocOOM\n"      
+                     "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
                      net_statistics.SyncookiesSent,
                      net_statistics.SyncookiesRecv,
                      net_statistics.SyncookiesFailed,
@@ -369,7 +369,8 @@
                      net_statistics.RcvPruned,
                      net_statistics.OfoPruned,
                      net_statistics.OutOfWindowIcmps,
-                     net_statistics.LockDroppedIcmps);
+                     net_statistics.LockDroppedIcmps,
+                     net_statistics.SockMallocOOM);
 
        if (offset >= len)
        {
<Prev in Thread] Current Thread [Next in Thread>