xfs
[Top] [All Lists]

[PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks

To: xfs@xxxxxxxxxxx
Subject: [PATCH 3/3] xfs: do not wait for the iolock in xfs_free_eofblocks
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Fri, 31 Jul 2009 17:03:43 -0400
References: <20090731210340.638586000@xxxxxxxxxxxxxxxxxxxxxx>
User-agent: quilt/0.47-1
For both callers of xfs_free_eofblocks taking the iolock in blocking
fashion causes problems:

 - in xfs_release is causes a lock order reversal with the mmap lock
 - due to xfs_inactive beeing called from reclaim context it would
   forbid memory allocation under the iolock.

Just making the lock acquisition non-blocking solves both issues.  In
xfs_release that will leave preallocation on the file, but we'll get
rid of them later, and in xfs_incative the iolock can never be held
by anyone (see rationale in the previous patch).

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: linux-2.6/fs/xfs/xfs_rw.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_rw.h      2009-07-31 18:41:28.413339736 +0200
+++ linux-2.6/fs/xfs/xfs_rw.h   2009-07-31 18:45:38.737339768 +0200
@@ -37,13 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_
 }
 
 /*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_LOCK      (1<<0)
-#define XFS_FREE_EOF_NOLOCK    (1<<1)
-
-
-/*
  * helper function to extract extent size hint from inode
  */
 STATIC_INLINE xfs_extlen_t
@@ -78,10 +71,4 @@ extern int xfs_read_buf(struct xfs_mount
 extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
                                xfs_buf_t *bp, xfs_daddr_t blkno);
 
-/*
- * Prototypes for functions in xfs_vnodeops.c.
- */
-extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
-                       int flags);
-
 #endif /* __XFS_RW_H__ */
Index: linux-2.6/fs/xfs/xfs_vnodeops.c
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_vnodeops.c        2009-07-31 18:45:37.483080484 
+0200
+++ linux-2.6/fs/xfs/xfs_vnodeops.c     2009-07-31 19:21:33.305080480 +0200
@@ -718,12 +718,11 @@ xfs_fsync(
  * when the link count isn't zero and by xfs_dm_punch_hole() when
  * punching a hole to EOF.
  */
-int
+STATIC int
 xfs_free_eofblocks(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,
-       int             flags)
+       xfs_inode_t     *ip)
 {
+       xfs_mount_t     *mp = ip->i_mount;
        xfs_trans_t     *tp;
        int             error;
        xfs_fileoff_t   end_fsb;
@@ -731,7 +730,6 @@ xfs_free_eofblocks(
        xfs_filblks_t   map_len;
        int             nimaps;
        xfs_bmbt_irec_t imap;
-       int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
 
        /*
         * Figure out if there are any blocks beyond the end
@@ -749,77 +747,78 @@ xfs_free_eofblocks(
                          NULL, 0, &imap, &nimaps, NULL, NULL);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-       if (!error && (nimaps != 0) &&
-           (imap.br_startblock != HOLESTARTBLOCK ||
-            ip->i_delayed_blks)) {
-               /*
-                * Attach the dquots to the inode up front.
-                */
-               error = xfs_qm_dqattach(ip, 0);
-               if (error)
-                       return error;
+       if (error || nimaps == 0 ||
+           (imap.br_startblock == HOLESTARTBLOCK && !ip->i_delayed_blks))
+               return error;
 
-               /*
-                * There are blocks after the end of file.
-                * Free them up now by truncating the file to
-                * its current size.
-                */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+       /*
+        * Attach the dquots to the inode up front.
+        */
+       error = xfs_qm_dqattach(ip, 0);
+       if (error)
+               return error;
 
-               /*
-                * Do the xfs_itruncate_start() call before
-                * reserving any log space because
-                * itruncate_start will call into the buffer
-                * cache and we can't
-                * do that within a transaction.
-                */
-               if (use_iolock)
-                       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-               error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
-                                   ip->i_size);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       if (use_iolock)
-                               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                       return error;
-               }
+       /*
+        * There are blocks after the end of file.
+        *
+        * Free them up now by truncating the file to its current size.
+        */
+       tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
 
-               error = xfs_trans_reserve(tp, 0,
-                                         XFS_ITRUNCATE_LOG_RES(mp),
-                                         0, XFS_TRANS_PERM_LOG_RES,
-                                         XFS_ITRUNCATE_LOG_COUNT);
-               if (error) {
-                       ASSERT(XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp, 0);
-                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-                       return error;
-               }
+       /*
+        * We can't just take the iolock here normally because that may
+        * cause a lock order reversal or deadlocks due to memory allocations.
+        *
+        * But in the worse case we'll leave a superflous allocation on the
+        * inode that will get purged the next time we drop the last reference
+        * to a file pointing to this inode or totally evicting this inode.
+        */
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+               xfs_trans_cancel(tp, 0);
+               return EAGAIN;
+       }
 
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               xfs_trans_ijoin(tp, ip,
-                               XFS_IOLOCK_EXCL |
-                               XFS_ILOCK_EXCL);
-               xfs_trans_ihold(tp, ip);
+       /*
+        * Do the xfs_itruncate_start() call before reserving any log space
+        * because itruncate_start will call into the buffer cache and we
+        * can't do that within a transaction.
+        */
+       error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size);
+       if (error)
+               goto out_cancel;
 
-               error = xfs_itruncate_finish(&tp, ip,
-                                            ip->i_size,
-                                            XFS_DATA_FORK,
-                                            0);
-               /*
-                * If we get an error at this point we
-                * simply don't bother truncating the file.
-                */
-               if (error) {
-                       xfs_trans_cancel(tp,
-                                        (XFS_TRANS_RELEASE_LOG_RES |
-                                         XFS_TRANS_ABORT));
-               } else {
-                       error = xfs_trans_commit(tp,
-                                               XFS_TRANS_RELEASE_LOG_RES);
-               }
-               xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
-                                           : XFS_ILOCK_EXCL));
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp),
+                                 0, XFS_TRANS_PERM_LOG_RES,
+                                 XFS_ITRUNCATE_LOG_COUNT);
+       if (error) {
+               ASSERT(XFS_FORCED_SHUTDOWN(mp));
+               goto out_cancel;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+       xfs_trans_ihold(tp, ip);
+
+       /*
+        * If we get an error at this point we simply don't bother truncating
+        * the file.
+        */
+       error = xfs_itruncate_finish(&tp, ip, ip->i_size, XFS_DATA_FORK, 0);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                    XFS_TRANS_ABORT);
+               goto out_unlock;
        }
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
+       return error;
+
+ out_cancel:
+       xfs_trans_cancel(tp, 0);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
        return error;
 }
 
@@ -1116,8 +1115,8 @@ xfs_release(
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
                    (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
-                       if (error)
+                       error = xfs_free_eofblocks(ip);
+                       if (error && error != EAGAIN)
                                return error;
                }
        }
@@ -1187,7 +1186,8 @@ xfs_inactive(
                     (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
                      (ip->i_delayed_blks != 0)))) {
-                       error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
+                       error = xfs_free_eofblocks(ip);
+                       ASSERT(error != EAGAIN);
                        if (error)
                                return VN_INACTIVE_CACHE;
                }

<Prev in Thread] Current Thread [Next in Thread>