I've sent this to the list a month ago, but I think it was overlooked.
I think this (actually, the second patch) should go as a bugfix
into 2.6.10 .. So I'm reposting it, if I'm completely wrong about
this tell me and I'll shut up. Thanks!
----- Forwarded message from Miquel van Smoorenburg <miquels@xxxxxxxxxx> -----
Date: Thu, 14 Oct 2004 20:26:26 +0200
From: Miquel van Smoorenburg <miquels@xxxxxxxxxx>
To: linux-xfs@xxxxxxxxxxx
Subject: shut up fs/xfs/linux-2.6/kmem.c
Message-ID: <20041014182625.GA7535@xxxxxxxxxx>
Hello,
on my machines fs/xfs/linux-2.6/kmem.c is printing a lot
of garbage about failed 4th and 5th order allocations.
I investigated this, and it appears that the vmalloc/kmalloc
wrappers don't set __GFP_REPEAT or __GFP_NOFAIL, instead looping
endlessly in the wrapper until the allocation succeeds. Because
there is no delay built in (kmalloc at least calls blk_congestion_wait()
if it's told to retry) it's printing lots of debug info in a very
short time.
This only happens for 4th and 5th order allocations, because kmalloc()
itself behaves as if __GFP_REPEAT or __GFP_NOFAIL was set for
< 4th order allocations if __GFP_WAIT is set. (BTW, note that
mm/page_alloc.c::__alloc_pages() has either the comment or
the logic wrong...)
The first patch just retains the current way of doing things, but
cuts down on the debug output - it also adds blk_congestion_wait()
in the endless loop. With this patch applied, my system is
finally peaceful and quiet again. Which is great, if you have
a 9600 bd serial console - otherwise the system just stops.
The second patch does the same, but simplifies things a lot by
using __GFP_NOFAIL. It's cleaner, deletes code instead of adding
to it, but I'm not sure if the printk's were there for some reason.
Mike.
== First patch, quiet down kmem.c and introduce wait:
--- linux-2.6.9-rc4-tw/fs/xfs/linux-2.6/kmem.c.ORIG 2004-08-14
12:55:33.000000000 +0200
+++ linux-2.6.9-rc4-tw/fs/xfs/linux-2.6/kmem.c 2004-10-12 17:47:16.000000000
+0200
@@ -35,6 +35,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/blkdev.h>
#include "time.h"
#include "kmem.h"
@@ -47,18 +48,25 @@
kmem_alloc(size_t size, int flags)
{
int retries = 0, lflags = kmem_flags_convert(flags);
+ int lflagsq, warn;
void *ptr;
+ lflagsq = lflags | (flags & (KM_MAYFAIL|KM_NOSLEEP)) ? 0 : __GFP_NOWARN;
+
do {
+ warn = (++retries % 100) == 0;
if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
- ptr = kmalloc(size, lflags);
+ ptr = kmalloc(size, warn ? lflags : lflagsq);
else
- ptr = __vmalloc(size, lflags, PAGE_KERNEL);
+ ptr = __vmalloc(size, warn ? lflags : lflagsq,
+ PAGE_KERNEL);
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
+ if (warn)
+ printk(KERN_ERR "xfs: possible memory allocation "
+ "deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
+ blk_congestion_wait(WRITE, HZ/50);
} while (1);
}
@@ -103,15 +111,21 @@
kmem_zone_alloc(kmem_zone_t *zone, int flags)
{
int retries = 0, lflags = kmem_flags_convert(flags);
+ int lflagsq, warn;
void *ptr;
+ lflagsq = lflags | (flags & (KM_MAYFAIL|KM_NOSLEEP)) ? 0 : __GFP_NOWARN;
+
do {
- ptr = kmem_cache_alloc(zone, lflags);
+ warn = (++retries % 100) == 0;
+ ptr = kmem_cache_alloc(zone, warn ? lflags : lflagsq);
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
+ if (warn)
+ printk(KERN_ERR "xfs: possible memory allocation "
+ "deadlock in %s (mode:0x%x)\n",
__FUNCTION__, lflags);
+ blk_congestion_wait(WRITE, HZ/50);
} while (1);
}
== alternative patch, simplify and use __GFP_WAIT.
--- linux-2.6.9-rc4-tw/fs/xfs/linux-2.6/kmem.c.ORIG 2004-08-14
12:55:33.000000000 +0200
+++ linux-2.6.9-rc4-tw/fs/xfs/linux-2.6/kmem.c 2004-10-14 20:17:31.000000000
+0200
@@ -35,6 +35,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/blkdev.h>
#include "time.h"
#include "kmem.h"
@@ -46,20 +47,23 @@
void *
kmem_alloc(size_t size, int flags)
{
- int retries = 0, lflags = kmem_flags_convert(flags);
+ int retries, lflags = kmem_flags_convert(flags);
void *ptr;
- do {
- if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
+ if (flags & (KM_MAYFAIL|KM_NOSLEEP))
+ lflags |= __GFP_NOWARN;
+
+ for (retries = 0; retries < MAX_VMALLOCS; retries++) {
+ if (size < MAX_SLAB_SIZE)
ptr = kmalloc(size, lflags);
else
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, lflags);
- } while (1);
+ blk_congestion_wait(WRITE, HZ/50);
+ }
+
+ return kmalloc(size, lflags | __GFP_NOFAIL);
}
void *
@@ -102,17 +106,12 @@
void *
kmem_zone_alloc(kmem_zone_t *zone, int flags)
{
- int retries = 0, lflags = kmem_flags_convert(flags);
- void *ptr;
+ int lflags = kmem_flags_convert(flags);
- do {
- ptr = kmem_cache_alloc(zone, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
- return ptr;
- if (!(++retries % 100))
- printk(KERN_ERR "possible deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, lflags);
- } while (1);
+ if (!(flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ lflags |= __GFP_NOFAIL;
+
+ return kmem_cache_alloc(zone, lflags);
}
void *
----- End forwarded message -----
|