xfs
[Top] [All Lists]

RE: [PATCH] xfs: I/O completion handlers must use NOFS allocations

To: "Christoph Hellwig" <hch@xxxxxxxxxxxxx>
Subject: RE: [PATCH] xfs: I/O completion handlers must use NOFS allocations
From: "Alex Elder" <aelder@xxxxxxx>
Date: Mon, 2 Nov 2009 14:34:01 -0600
Cc: "Thomas Neumann" <tneumann@xxxxxxxxxxxxxxxxxxxxx>, <xfs@xxxxxxxxxxx>
In-reply-to: <20091019040003.GA21115@xxxxxxxxxxxxx>
Thread-index: AcpQchCAk8Ba56CpQRWbxrqJPuWXawLiHbSw
Thread-topic: [PATCH] xfs: I/O completion handlers must use NOFS allocations
Christoph Hellwig wrote:
> When complention I/O requests we must not allow the memory allocator to
> recurse into the filesystem, as we might deadlock on waiting for the
> I/O completion otherwise.  The only thing currently allocting normal
> GFP_KERNEL memory is the allocation of the transaction structure for the
> unwritten extent conversion.  Add a memflags argument to _xfs_trans_alloc
> to allow controlling the allocator behaviour.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Reported-by: Thomas Neumann <tneumann@xxxxxxxxxxxxxxxxxxxxx>
> Tested-by: Thomas Neumann <tneumann@xxxxxxxxxxxxxxxxxxxxx>

This looks good.  It's kind of too bad the GFP_ flag
argument had to be added, but I can't think of a cleaner
way than the way you did it.

I have one minor suggestion on wording used in a comment,
but the code looks correct to me.  I verified that--as you
say--the only place we're doing an allocation in an I/O
completion handler (i.e., a function called via
io_end->io_work->func) is in xfs_iomap_write_unwritten(),
so this fix should cover the only case.

Sorry it took so long to get to this.

Reviewed-by: Alex Elder <aelder@xxxxxxx>

> Index: linux-2.6/fs/xfs/xfs_fsops.c
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_fsops.c 2009-10-16 23:08:25.170027882 +0200
> +++ linux-2.6/fs/xfs/xfs_fsops.c      2009-10-16 23:09:22.904256700 +0200
> @@ -611,7 +611,7 @@ xfs_fs_log_dummy(
>       xfs_inode_t     *ip;
>       int             error;
> 
> -     tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
> +     tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
>       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
>       if (error) {
>               xfs_trans_cancel(tp, 0);
> Index: linux-2.6/fs/xfs/xfs_iomap.c
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_iomap.c 2009-10-16 23:10:14.342278346 +0200
> +++ linux-2.6/fs/xfs/xfs_iomap.c      2009-10-16 23:12:08.148004392 +0200
> @@ -860,8 +860,15 @@ xfs_iomap_write_unwritten(
>                * set up a transaction to convert the range of extents
>                * from unwritten to real. Do allocations in a loop until
>                * we have covered the range passed in.
> +              *
> +              * Note that we opencoding the transaction allocation here
> +              * to pass KM_NOFS - we can't risk to recurse back into


How about, "we can't risk recursing back into"


> +              * the filesystem here as we might be asked to write out
> +              * the same inode that we complete here and might deadlock
> +              * on the iolock.
>                */
> -             tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
> +             xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
> +             tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
>               tp->t_flags |= XFS_TRANS_RESERVE;
>               error = xfs_trans_reserve(tp, resblks,
>                               XFS_WRITE_LOG_RES(mp), 0,
> Index: linux-2.6/fs/xfs/xfs_mount.c
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_mount.c 2009-10-16 23:08:25.147024815 +0200
> +++ linux-2.6/fs/xfs/xfs_mount.c      2009-10-16 23:09:25.127005437 +0200
> @@ -1471,7 +1471,7 @@ xfs_log_sbcount(
>       if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
>               return 0;
> 
> -     tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
> +     tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
>       error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
>                                       XFS_DEFAULT_LOG_COUNT);
>       if (error) {
> Index: linux-2.6/fs/xfs/xfs_trans.c
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_trans.c 2009-10-16 23:08:25.117003816 +0200
> +++ linux-2.6/fs/xfs/xfs_trans.c      2009-10-16 23:08:58.202005942 +0200
> @@ -236,19 +236,20 @@ xfs_trans_alloc(
>       uint            type)
>  {
>       xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
> -     return _xfs_trans_alloc(mp, type);
> +     return _xfs_trans_alloc(mp, type, KM_SLEEP);
>  }
> 
>  xfs_trans_t *
>  _xfs_trans_alloc(
>       xfs_mount_t     *mp,
> -     uint            type)
> +     uint            type,
> +     uint            memflags)
>  {
>       xfs_trans_t     *tp;
> 
>       atomic_inc(&mp->m_active_trans);
> 
> -     tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
> +     tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
>       tp->t_magic = XFS_TRANS_MAGIC;
>       tp->t_type = type;
>       tp->t_mountp = mp;
> Index: linux-2.6/fs/xfs/xfs_trans.h
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_trans.h 2009-10-16 23:08:25.127003692 +0200
> +++ linux-2.6/fs/xfs/xfs_trans.h      2009-10-16 23:09:04.726256000 +0200
> @@ -924,7 +924,7 @@ typedef struct xfs_trans {
>   * XFS transaction mechanism exported interfaces.
>   */
>  xfs_trans_t  *xfs_trans_alloc(struct xfs_mount *, uint);
> -xfs_trans_t  *_xfs_trans_alloc(struct xfs_mount *, uint);
> +xfs_trans_t  *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
>  xfs_trans_t  *xfs_trans_dup(xfs_trans_t *);
>  int          xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
>                                 uint, uint);
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

<Prev in Thread] Current Thread [Next in Thread>