A couple of problems here..
* tcp_init() wants to set sysctl_tcp_max_tw_buckets to 180,000. This
seems too high (Andi says 22 megs). I think the patch here is more
consistent.
* tcp_twkill() can consume a huge amount of time if it has enough
connections to deal with. When running lmbench I have observed it
killing 2,500 connections in a single pass, which means we spend 15
milliseconds in the timer handler. This is crazy.
So I just kill a hundred and then reschedule the timer to run in a
couple of jiffies time. The downside: this limits the tw reaping to
2,500 connections per second. But I don't see a DoS opportunity here.
--- linux-2.4.0-test9-pre7/net/ipv4/tcp.c Tue Sep 26 21:45:33 2000
+++ linux-akpm/net/ipv4/tcp.c Mon Oct 2 02:44:59 2000
@@ -2438,7 +2438,7 @@
if (order > 4) {
sysctl_local_port_range[0] = 32768;
sysctl_local_port_range[1] = 61000;
- sysctl_tcp_max_tw_buckets = 180000;
+ sysctl_tcp_max_tw_buckets <<= 1;
sysctl_tcp_max_orphans = 4096<<(order-4);
sysctl_max_syn_backlog = 1024;
} else if (order < 3) {
--- linux-2.4.0-test9-pre7/net/ipv4/tcp_minisocks.c Tue Sep 26 21:45:33 2000
+++ linux-akpm/net/ipv4/tcp_minisocks.c Mon Oct 2 02:15:02 2000
@@ -434,6 +434,7 @@
{
struct tcp_tw_bucket *tw;
int killed = 0;
+ int max_killed = 0;
/* NOTE: compare this to previous version where lock
* was released after detaching chain. It was racy,
@@ -447,6 +448,13 @@
goto out;
while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
+
+ /* This loop takes ~6 usecs per iteration. */
+ if (killed > 100) {
+ max_killed = 1;
+ break;
+ }
+
tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
tw->pprev_death = NULL;
spin_unlock(&tw_death_lock);
@@ -457,12 +465,19 @@
killed++;
spin_lock(&tw_death_lock);
+
+ }
+
+ if (max_killed) { /* More to do: do it soon */
+ mod_timer(&tcp_tw_timer, jiffies+2);
+ } else {
+ tcp_tw_death_row_slot =
+ ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+
+ if ((tcp_tw_count -= killed) != 0)
+ mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
}
- tcp_tw_death_row_slot =
- ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
- if ((tcp_tw_count -= killed) != 0)
- mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
net_statistics[smp_processor_id()*2].TimeWaited += killed;
out:
spin_unlock(&tw_death_lock);
|