netdev
[Top] [All Lists]

Re: [patch] TCP throughput after 2.2.17-pre1

To: Andrew Morton <andrewm@xxxxxxxxxx>
Subject: Re: [patch] TCP throughput after 2.2.17-pre1
From: Andi Kleen <ak@xxxxxx>
Date: Sun, 18 Jun 2000 18:16:37 +0200
Cc: "David S. Miller" <davem@xxxxxxxxxx>, "netdev@xxxxxxxxxxx" <netdev@xxxxxxxxxxx>, alan@xxxxxxxxxxxxxxxxxxx
In-reply-to: <394CD00E.A02FEBBE@uow.edu.au>; from Andrew Morton on Sun, Jun 18, 2000 at 03:41:47PM +0200
References: <394CD00E.A02FEBBE@uow.edu.au>
Sender: owner-netdev@xxxxxxxxxxx
On Sun, Jun 18, 2000 at 03:41:47PM +0200, Andrew Morton wrote:
> With 2.2.17-pre1 on a 400MHz uniprocessor it is possible to saturate a
> 100baseT with netperf (just a TCP transmitter) with 40% CPU left over.
> 
> With -pre3 and -pre4 it maxes out at 38 Mbits/sec. A five times
> reduction.

Your patch is unfortunately not the right solution. It goes back to
the old state and doesn't fix the original bug (tcp spinning in case
of real oom). Also you cannot just use tcp_* in generic code and 
the release_sock move is useless because sk->sndbuf management occurs
outside the socket lock anyways.

Here is a better patch. It tries to properly distingush the two socket
oom cases (out of socket buffer and out of system memory) and sleeps
in the later case to give the system some time to recover. It was
integrated with the normal sleep loop, because a sndbuf wakeup is 
a strong cue that some memory was freed again. 

I also added a net_statistics to make the problem more transparent.

Based on discussions with Alan. Patch is relative to plain 2.2.16. 

-Andi


--- include/net/sock.h.sockalloc        Fri Jun 16 13:33:37 2000
+++ include/net/sock.h  Sun Jun 18 17:17:46 2000
@@ -717,6 +717,10 @@
 extern struct sk_buff          *sock_wmalloc(struct sock *sk,
                                              unsigned long size, int force,
                                              int priority);
+
+extern struct sk_buff          *sock_wmalloc_err(struct sock *sk,
+                                             unsigned long size, int force,
+                                             int priority, int *err);
 extern struct sk_buff          *sock_rmalloc(struct sock *sk,
                                              unsigned long size, int force,
                                              int priority);
--- include/net/snmp.h.sockalloc        Fri Jun 16 13:33:37 2000
+++ include/net/snmp.h  Sun Jun 18 17:50:29 2000
@@ -178,6 +178,7 @@
        unsigned long   OfoPruned;
        unsigned long   OutOfWindowIcmps; 
        unsigned long   LockDroppedIcmps; 
+       unsigned long   SockMallocOOM; 
 };
        
 #endif
--- net/core/sock.c.sockalloc   Fri Jun 16 13:33:44 2000
+++ net/core/sock.c     Sun Jun 18 18:05:01 2000
@@ -566,6 +565,31 @@
                        skb->sk = sk;
                        return skb;
                }
+               net_statistics.SockMallocOOM++; 
+       }
+       return NULL;
+}
+
+/*
+ * Allocate memory from the sockets send buffer, telling caller about real 
OOM. 
+ * err is only set for oom, not for socket buffer overflow.
+ */ 
+struct sk_buff *sock_wmalloc_err(struct sock *sk, unsigned long size, int 
force, int priority, int *err)
+{
+       *err = 0; 
+       /* Note: overcommitment possible */ 
+       if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
+               struct sk_buff * skb;
+               *err = -ENOMEM; 
+               skb = alloc_skb(size, priority);
+               if (skb) {
+                       *err = 0;
+                       atomic_add(skb->truesize, &sk->wmem_alloc);
+                       skb->destructor = sock_wfree;
+                       skb->sk = sk;
+                       return skb;
+               }
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
@@ -583,6 +607,7 @@
                        skb->sk = sk;
                        return skb;
                }
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
@@ -602,6 +627,7 @@
                if (mem)
                        return mem;
                atomic_sub(size, &sk->omem_alloc);
+               net_statistics.SockMallocOOM++; 
        }
        return NULL;
 }
--- net/ipv4/tcp.c.sockalloc    Fri Jun 16 13:33:45 2000
+++ net/ipv4/tcp.c      Sun Jun 18 18:02:38 2000
@@ -697,9 +697,11 @@
 }
 
 /*
- *     Wait for more memory for a socket
+ *     Wait for more memory for a socket.
+ *     Special case is err == -ENOMEM, in this case just sleep a bit waiting
+ *     for the system to free up some memory. 
  */
-static void wait_for_tcp_memory(struct sock * sk)
+static void wait_for_tcp_memory(struct sock * sk, int err)
 {
        release_sock(sk);
        if (!tcp_memory_free(sk)) {
@@ -711,13 +713,18 @@
                        if (signal_pending(current))
                                break;
                        current->state = TASK_INTERRUPTIBLE;
-                       if (tcp_memory_free(sk))
+                       if (tcp_memory_free(sk) && !err)
                                break;
                        if (sk->shutdown & SEND_SHUTDOWN)
                                break;
                        if (sk->err)
                                break;
-                       schedule();
+                       if (!err) 
+                               schedule();
+                       else {
+                               schedule_timeout(1); 
+                               break;
+                       }       
                }
                current->state = TASK_RUNNING;
                remove_wait_queue(sk->sleep, &wait);
@@ -915,7 +922,7 @@
                                tmp += copy;
                                queue_it = 0;
                        }
-                       skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL);
+                       skb = sock_wmalloc_err(sk, tmp, 0, GFP_KERNEL, &err);
 
                        /* If we didn't get any memory, we need to sleep. */
                        if (skb == NULL) {
@@ -928,8 +935,10 @@
                                        err = -ERESTARTSYS;
                                        goto do_interrupted;
                                }
-                               tcp_push_pending_frames(sk, tp);
-                               wait_for_tcp_memory(sk);
+                               /* In OOM that would fail anyways so do not 
bother. */ 
+                               if (!err) 
+                                       tcp_push_pending_frames(sk, tp);
+                               wait_for_tcp_memory(sk, err);
 
                                /* If SACK's were formed or PMTU events 
happened,
                                 * we must find out about it.
--- net/ipv4/proc.c.sockalloc   Fri Jun 16 13:33:45 2000
+++ net/ipv4/proc.c     Sun Jun 18 17:50:17 2000
@@ -359,8 +359,8 @@
        len = sprintf(buffer,
                      "TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed"
                      " EmbryonicRsts PruneCalled RcvPruned OfoPruned"
-                     " OutOfWindowIcmps LockDroppedIcmps\n"    
-                     "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+                     " OutOfWindowIcmps LockDroppedIcmps SockMallocOOM\n"      
+                     "TcpExt: %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
                      net_statistics.SyncookiesSent,
                      net_statistics.SyncookiesRecv,
                      net_statistics.SyncookiesFailed,
@@ -369,7 +369,8 @@
                      net_statistics.RcvPruned,
                      net_statistics.OfoPruned,
                      net_statistics.OutOfWindowIcmps,
-                     net_statistics.LockDroppedIcmps);
+                     net_statistics.LockDroppedIcmps,
+                     net_statistics.SockMallocOOM);
 
        if (offset >= len)
        {





<Prev in Thread] Current Thread [Next in Thread>