xfs
[Top] [All Lists]

Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free

To: xfs@xxxxxxxxxxx
Subject: Re: [PATCH] xfs: fix mmap_sem vs iolock lock order inversion in xfs_free_eofblocks
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Fri, 30 Oct 2009 04:55:25 -0400
In-reply-to: <20091019040346.GB21115@xxxxxxxxxxxxx>
References: <20091019040346.GB21115@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.19 (2009-01-05)
ping?

On Mon, Oct 19, 2009 at 12:03:46AM -0400, Christoph Hellwig wrote:
> When xfs_free_eofblocks is called from ->release the VM might already
> hold the mmap_sem, but in the write path we take the iolock before
> taking the mmap_sem in the generic write code.
> 
> Switch xfs_free_eofblocks to only trylock the iolock if called from ->release
> and skip trimming the prellocated blocks in that case.  We'll still free
> them later on the final iput.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> 
> Index: xfs/fs/xfs/xfs_rw.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_rw.h  2009-10-14 17:30:02.396028076 +0200
> +++ xfs/fs/xfs/xfs_rw.h       2009-10-14 17:30:20.472287472 +0200
> @@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_
>  }
>  
>  /*
> - * Flags for xfs_free_eofblocks
> - */
> -#define XFS_FREE_EOF_LOCK    (1<<0)
> -#define XFS_FREE_EOF_NOLOCK  (1<<1)
> -
> -
> -/*
>   * helper function to extract extent size hint from inode
>   */
>  STATIC_INLINE xfs_extlen_t
> Index: xfs/fs/xfs/xfs_vnodeops.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_vnodeops.c    2009-10-14 17:30:13.363024201 +0200
> +++ xfs/fs/xfs/xfs_vnodeops.c 2009-10-14 17:35:39.314256388 +0200
> @@ -709,6 +709,11 @@ xfs_fsync(
>  }
>  
>  /*
> + * Flags for xfs_free_eofblocks
> + */
> +#define XFS_FREE_EOF_TRYLOCK (1<<0)
> +
> +/*
>   * This is called by xfs_inactive to free any blocks beyond eof
>   * when the link count isn't zero and by xfs_dm_punch_hole() when
>   * punching a hole to EOF.
> @@ -726,7 +731,6 @@ xfs_free_eofblocks(
>       xfs_filblks_t   map_len;
>       int             nimaps;
>       xfs_bmbt_irec_t imap;
> -     int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
>  
>       /*
>        * Figure out if there are any blocks beyond the end
> @@ -768,14 +772,19 @@ xfs_free_eofblocks(
>                * cache and we can't
>                * do that within a transaction.
>                */
> -             if (use_iolock)
> +             if (flags & XFS_FREE_EOF_TRYLOCK) {
> +                     if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
> +                             xfs_trans_cancel(tp, 0);
> +                             return 0;
> +                     }
> +             } else {
>                       xfs_ilock(ip, XFS_IOLOCK_EXCL);
> +             }
>               error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
>                                   ip->i_size);
>               if (error) {
>                       xfs_trans_cancel(tp, 0);
> -                     if (use_iolock)
> -                             xfs_iunlock(ip, XFS_IOLOCK_EXCL);
> +                     xfs_iunlock(ip, XFS_IOLOCK_EXCL);
>                       return error;
>               }
>  
> @@ -812,8 +821,7 @@ xfs_free_eofblocks(
>                       error = xfs_trans_commit(tp,
>                                               XFS_TRANS_RELEASE_LOG_RES);
>               }
> -             xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
> -                                         : XFS_ILOCK_EXCL));
> +             xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
>       }
>       return error;
>  }
> @@ -1113,7 +1121,17 @@ xfs_release(
>                    (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
>                   (!(ip->i_d.di_flags &
>                               (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
> -                     error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +
> +                     /*
> +                      * If we can't get the iolock just skip truncating
> +                      * the blocks past EOF because we could deadlock
> +                      * with the mmap_sem otherwise.  We'll get another
> +                      * chance to drop them once the last reference to
> +                      * the inode is dropped, so we'll never leak blocks
> +                      * permanently.
> +                      */
> +                     error = xfs_free_eofblocks(mp, ip,
> +                                                XFS_FREE_EOF_TRYLOCK);
>                       if (error)
>                               return error;
>               }
> @@ -1184,7 +1202,7 @@ xfs_inactive(
>                    (!(ip->i_d.di_flags &
>                               (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
>                     (ip->i_delayed_blks != 0)))) {
> -                     error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
> +                     error = xfs_free_eofblocks(mp, ip, 0);
>                       if (error)
>                               return VN_INACTIVE_CACHE;
>               }
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
---end quoted text---

<Prev in Thread] Current Thread [Next in Thread>