netdev
[Top] [All Lists]

[patch] TCP throughput after 2.2.17-pre1

To: "David S. Miller" <davem@xxxxxxxxxx>
Subject: [patch] TCP throughput after 2.2.17-pre1
From: Andrew Morton <andrewm@xxxxxxxxxx>
Date: Sun, 18 Jun 2000 23:35:10 +1000
Cc: "netdev@xxxxxxxxxxx" <netdev@xxxxxxxxxxx>
Sender: owner-netdev@xxxxxxxxxxx
With 2.2.17-pre1 on a 400MHz uniprocessor it is possible to saturate a
100baseT with netperf (just a TCP transmitter) with 40% CPU left over.

With -pre3 and -pre4 it maxes out at 38 Mbits/sec. A five times
reduction.



The problem is caused by this patch:

Index: tcp.c
===================================================================
RCS file: /opt/cvs/lk2.2/net/ipv4/tcp.c,v
retrieving revision 1.3
retrieving revision 1.3.2.1
diff -u -r1.3 -r1.3.2.1
--- tcp.c       2000/06/11 13:26:18     1.3
+++ tcp.c       2000/06/17 04:19:01     1.3.2.1
@@ -698,13 +698,16 @@

 /*
  *     Wait for more memory for a socket
+ *
+ *     If we got here an allocation has failed on us. We cannot
+ *     spin here or we may block the very code freeing memory
+ *     for us.
  */
 static void wait_for_tcp_memory(struct sock * sk)
 {
        release_sock(sk);
        if (!tcp_memory_free(sk)) {
                struct wait_queue wait = { current, NULL };
-
                sk->socket->flags &= ~SO_NOSPACE;
                add_wait_queue(sk->sleep, &wait);
                for (;;) {
@@ -721,6 +724,12 @@
                }
                current->state = TASK_RUNNING;
                remove_wait_queue(sk->sleep, &wait);
+       }
+       else
+       {
+               /* Yield time to the memory freeing paths */
+               current->state = TASK_INTERRUPTIBLE;
+               schedule_timeout(1);
        }
        lock_sock(sk);
 }



This code is assuming that the network layer has run out of
memory.  Not true.  Instead it goes to sleep if the socket can still
take
more data!  The comment about "Yield time to the memory freeing
paths" is bogus.

I've looked through Alan's changelogs and I follow all the mailing
lists, but I cannot tell where this one came from.

A fix against 2.2.17-pre4 (with some optimisations and
rationalisations) is attached.
--- linux-2.2.17pre4/include/net/sock.h Tue Aug 10 05:05:13 1999
+++ linux-akpm/include/net/sock.h       Sun Jun 18 23:16:09 2000
@@ -922,6 +922,16 @@
        return sock_wspace(sk) >= MIN_WRITE_SPACE;
 }
 
+/*
+ *     Return true if the socket's snd buffer is not fully utilised
+ */
+
+extern __inline__ int tcp_memory_free(struct sock *sk)
+{
+       return atomic_read(&sk->wmem_alloc) < sk->sndbuf;
+}
+
+
 /* 
  *     Declarations from timer.c 
  */
--- linux-2.2.17pre4/net/ipv4/tcp.c     Sun Jun 18 21:04:07 2000
+++ linux-akpm/net/ipv4/tcp.c   Sun Jun 18 23:29:21 2000
@@ -201,7 +201,8 @@
  *                                     tcp_do_sendmsg to avoid burstiness.
  *             Eric Schenk     :       Fix fast close down bug with
  *                                     shutdown() followed by close().
- *             Andi Kleen :    Make poll agree with SIGIO
+ *             Andi Kleen      :       Make poll agree with SIGIO
+ *              Andrew Morton   :      Repair handling of 'wmem_alloc 
exceeded' state
  *                                     
  *             This program is free software; you can redistribute it and/or
  *             modify it under the terms of the GNU General Public License
@@ -691,23 +692,17 @@
        return 0;
 }
 
-static inline int tcp_memory_free(struct sock *sk)
-{
-       return atomic_read(&sk->wmem_alloc) < sk->sndbuf;
-}
-
 /*
  *     Wait for more memory for a socket
  *
- *     If we got here an allocation has failed on us. We cannot
- *     spin here or we may block the very code freeing memory
- *     for us.
+ *     If we got here the socket has used all its available send buffer space.
+ *     We deschedule until more data can be sent.
  */
 static void wait_for_tcp_memory(struct sock * sk)
 {
-       release_sock(sk);
        if (!tcp_memory_free(sk)) {
                struct wait_queue wait = { current, NULL };
+               release_sock(sk);
                sk->socket->flags &= ~SO_NOSPACE;
                add_wait_queue(sk->sleep, &wait);
                for (;;) {
@@ -724,14 +719,8 @@
                }
                current->state = TASK_RUNNING;
                remove_wait_queue(sk->sleep, &wait);
+               lock_sock(sk);
        }
-       else
-       {
-               /* Yield time to the memory freeing paths */
-               current->state = TASK_INTERRUPTIBLE;
-               schedule_timeout(1);
-       }
-       lock_sock(sk);
 }
 
 /*
--- linux-2.2.17pre4/net/core/sock.c    Tue May 11 02:55:25 1999
+++ linux-akpm/net/core/sock.c  Sun Jun 18 23:24:18 2000
@@ -558,7 +558,7 @@
  */
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 
int priority)
 {
-       if (force || atomic_read(&sk->wmem_alloc) < sk->sndbuf) {
+       if (force || tcp_memory_free(sk)) {
                struct sk_buff * skb = alloc_skb(size, priority);
                if (skb) {
                        atomic_add(skb->truesize, &sk->wmem_alloc);
@@ -575,7 +575,7 @@
  */ 
 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 
int priority)
 {
-       if (force || atomic_read(&sk->rmem_alloc) < sk->rcvbuf) {
+       if (force || tcp_memory_free(sk)) {
                struct sk_buff *skb = alloc_skb(size, priority);
                if (skb) {
                        atomic_add(skb->truesize, &sk->rmem_alloc);
@@ -658,7 +658,7 @@
                if (signal_pending(current))
                        break;
                current->state = TASK_INTERRUPTIBLE;
-               if (atomic_read(&sk->wmem_alloc) < sk->sndbuf)
+               if (tcp_memory_free(sk))
                        break;
                if (sk->shutdown & SEND_SHUTDOWN)
                        break;
<Prev in Thread] Current Thread [Next in Thread>