xfs
[Top] [All Lists]

Re: [PATCH] XFS: _xfs_buf_find ignores XBF_DONT_BLOCK flag

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: [PATCH] XFS: _xfs_buf_find ignores XBF_DONT_BLOCK flag
From: Peter Watkins <treestem@xxxxxxxxx>
Date: Thu, 15 Jul 2010 17:33:41 -0400
Cc: xfs@xxxxxxxxxxx
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:mime-version:received:received:in-reply-to :references:date:message-id:subject:from:to:cc:content-type :content-transfer-encoding; bh=dMK3gMg0o8vok5wdhuTsa6LQsjG886Xghf2Um74y2us=; b=mxOg/7xwZEO+pALS0rwngEBgRhCbhw03JgY5bM6CM6lt0HNalNFRoofbjx2vGIum59 4XESpQcorSIgPS/Z49EXdeZirW7wG482oOz2KlkvT3BhEIyebPZqsQNEmh53wZbSm5td RzziwVoSuuKtaw3kqy5je9MHRFHllQhySCLTQ=
Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:in-reply-to:references:date:message-id:subject:from:to :cc:content-type:content-transfer-encoding; b=vYGLOejPLbelZcqqzgV6pFliUPuc7sL0OpMl4LpzgmA8PS3xFiLgVTkHkPhYBSsPWg alMrjOK4Metpfd6ytO+hqW1bLzT5KMHM3ulR3+7p+NYIpGDwRlM++d9/BERC6xlv4rtZ LUSl+VegAB7sspB3GZC34eNjvCSnJsbkOZbr0=
In-reply-to: <20100714234857.GD30737@dastard>
References: <1279134201-6890-1-git-send-email-treestem@xxxxxxxxx> <20100714234857.GD30737@dastard>
On Wed, Jul 14, 2010 at 7:48 PM, Dave Chinner <david@xxxxxxxxxxxxx> wrote:

> On Wed, Jul 14, 2010 at 03:03:21PM -0400, Peter Watkins wrote:

[ snip ]

>
> I think you misunderstand what XBF_DONT_BLOCK is for - that's not
> hard because it's not particularly well commented. FYI, it's to
> prevent memory reclaim recursion back into the filesystem during
> memory allocation in the buffer layer while we hold locked items.
> The buffer cache implementation of the flag has changed over time
> (originally an Irix flag, IIRC) so it doesn't quite describe what it
> does on linux anymore.  See xb_to_gfp() and xb_to_km() to find out
> what it triggers.
>
> FWIW, if we wanted to avoid blocking on buffer locks, then we'd be
> passing XBF_TRYLOCK....

Thanks for taking the time to clue me in.


>
> Ok, so the problem here is that we are trying a memory allocation
> with a locked inode buffer, and memory reclaim can trip over that
> locked inode buffer and deadlock. That means we should not allow
> allocation recursion from within xfs_iformat() even though we are
> not in a tranaction context.
>

Which is just what KM_NOFS is for. I see.

> The following patch converts the allocations during inode
> initialisation to use KM_NOFS. It applies to a 2.6.35-rc3 kernel,
> but might apply to a 2.6.27 kernel. Can you test it?

Sure, I'll test it on some of these systems. The bug is not easy to
reproduce, but
I'll try to recreate the load.

Thanks for the help!

>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@xxxxxxxxxxxxx
>
> xfs: fix memory reclaim recursion deadlock on locked inode buffer
>
> From: Dave Chinner <dchinner@xxxxxxxxxx>
>
> Calling into memory reclaim with a locked inode buffer can deadlock
> if memory reclaim tries to lock the inode buffer during inode
> teardown. Convert the relevant memory allocations to use KM_NOFS to
> avoid this deadlock condition.
>
> Reported-by: Peter Watkins <treestem@xxxxxxxxx>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_inode.c |   20 +++++++++++---------
>  1 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5715a9d..3c206b3 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -422,7 +422,7 @@ xfs_iformat(
>        if (!XFS_DFORK_Q(dip))
>                return 0;
>        ASSERT(ip->i_afp == NULL);
> -       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
> +       ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP|KM_NOFS);
>        ip->i_afp->if_ext_max =
>                XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
>        switch (dip->di_aformat) {
> @@ -505,7 +505,7 @@ xfs_iformat_local(
>                ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
>        else {
>                real_size = roundup(size, 4);
> -               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +               ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP|KM_NOFS);
>        }
>        ifp->if_bytes = size;
>        ifp->if_real_bytes = real_size;
> @@ -632,7 +632,7 @@ xfs_iformat_btree(
>        }
>
>        ifp->if_broot_bytes = size;
> -       ifp->if_broot = kmem_alloc(size, KM_SLEEP);
> +       ifp->if_broot = kmem_alloc(size, KM_SLEEP|KM_NOFS);
>        ASSERT(ifp->if_broot != NULL);
>        /*
>         * Copy and convert from the on-disk structure
> @@ -2191,7 +2191,7 @@ xfs_iroot_realloc(
>                 */
>                if (ifp->if_broot_bytes == 0) {
>                        new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
> -                       ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
> +                       ifp->if_broot = kmem_alloc(new_size, 
> KM_SLEEP|KM_NOFS);
>                        ifp->if_broot_bytes = (int)new_size;
>                        return;
>                }
> @@ -2207,7 +2207,7 @@ xfs_iroot_realloc(
>                new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
>                ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
>                                (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* 
> old size */
> -                               KM_SLEEP);
> +                               KM_SLEEP|KM_NOFS);
>                op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
>                                                     ifp->if_broot_bytes);
>                np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
> @@ -2233,7 +2233,7 @@ xfs_iroot_realloc(
>        else
>                new_size = 0;
>        if (new_size > 0) {
> -               new_broot = kmem_alloc(new_size, KM_SLEEP);
> +               new_broot = kmem_alloc(new_size, KM_SLEEP|KM_NOFS);
>                /*
>                 * First copy over the btree block header.
>                 */
> @@ -2337,7 +2337,8 @@ xfs_idata_realloc(
>                real_size = roundup(new_size, 4);
>                if (ifp->if_u1.if_data == NULL) {
>                        ASSERT(ifp->if_real_bytes == 0);
> -                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +                       ifp->if_u1.if_data = kmem_alloc(real_size,
> +                                                       KM_SLEEP|KM_NOFS);
>                } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
>                        /*
>                         * Only do the realloc if the underlying size
> @@ -2348,11 +2349,12 @@ xfs_idata_realloc(
>                                        kmem_realloc(ifp->if_u1.if_data,
>                                                        real_size,
>                                                        ifp->if_real_bytes,
> -                                                       KM_SLEEP);
> +                                                       KM_SLEEP|KM_NOFS);
>                        }
>                } else {
>                        ASSERT(ifp->if_real_bytes == 0);
> -                       ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP);
> +                       ifp->if_u1.if_data = kmem_alloc(real_size,
> +                                                       KM_SLEEP|KM_NOFS);
>                        memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
>                                ifp->if_bytes);
>                }
>

<Prev in Thread] Current Thread [Next in Thread>