xfs
[Top] [All Lists]

[RFC PATCH 2/4] xfs: create function to scan and clear EOFBLOCKS inodes

To: xfs@xxxxxxxxxxx
Subject: [RFC PATCH 2/4] xfs: create function to scan and clear EOFBLOCKS inodes
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Mon, 27 Aug 2012 15:51:49 -0400
In-reply-to: <1346097111-4476-1-git-send-email-bfoster@xxxxxxxxxx>
References: <1346097111-4476-1-git-send-email-bfoster@xxxxxxxxxx>
xfs_inodes_free_eofblocks() implements scanning functionality for
EOFBLOCKS inodes. It scans the radix tree and frees post-EOF blocks
for inodes that meet particular criteria. The scan can be filtered
by a particular quota type/id and minimum file size. The scan can
also be invoked in trylock mode or wait (force) mode.

The xfs_free_eofblocks() helper is invoked to clear post-EOF space.
It is slightly modified to support an output parameter that
indicates whether space was freed and helps decide whether the
EOFBLOCKS tag should be cleared in trylock scans.

Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>
---
 fs/xfs/xfs_sync.c     |  168 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_sync.h     |    3 +
 fs/xfs/xfs_vnodeops.c |   17 +++--
 fs/xfs/xfs_vnodeops.h |    2 +
 4 files changed, 184 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 5e14741..27c3c46 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -971,6 +971,174 @@ xfs_reclaim_inodes_count(
        return reclaimable;
 }
 
+/*
+ * Handle an EOFBLOCKS tagged inode. If this is a forced scan, we wait on the
+ * iolock ourselves rather than rely on the trylock in xfs_free_eofblocks(). 
+ *
+ * We rely on the output parameter from xfs_free_eofblocks() to determine
+ * whether we should clear the tag because in the trylock case, it could have
+ * skipped the inode due to lock contention.
+ */
+STATIC int
+xfs_inode_free_eofblocks(
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       int ret = 0;
+       bool freed = false;
+       bool wait_iolock = (flags & EOFBLOCKS_WAIT) ? true : false;
+
+       if (wait_iolock)
+               xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       if ((S_ISREG(ip->i_d.di_mode) &&
+            (VFS_I(ip)->i_size > 0 ||
+            (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
+            (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
+           (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+               /* !wait_iolock == need_iolock in xfs_free_eofblocks() */
+               ret = xfs_free_eofblocks(ip->i_mount, ip, !wait_iolock, &freed);
+               if (freed)
+                       xfs_inode_clear_eofblocks_tag(ip);
+       } else {
+               /* inode could be preallocated or append-only */
+               xfs_inode_clear_eofblocks_tag(ip);
+       }
+
+       if (wait_iolock)
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+       return ret;
+}
+
+/*
+ * Determine whether an inode matches a particular qouta id.
+ */
+STATIC int
+xfs_inode_match_quota_id(
+       struct xfs_inode        *ip,
+       int                     qtype,
+       uint32_t                id)
+{
+       switch (qtype) {
+       case XFS_DQ_USER:
+               return ip->i_d.di_uid == id;
+       case XFS_DQ_GROUP:
+               return ip->i_d.di_gid == id;
+       default:
+               return xfs_get_projid(ip) == id;
+       }
+
+       return 0;
+}
+
+/*
+ * This is mostly copied from xfs_reclaim_inodes_ag().
+ *
+ * TODO:
+ * - Could we enhance ag_iterator to support a tag and use it instead of this?
+ */
+int
+xfs_inodes_free_eofblocks(
+       struct xfs_mount        *mp,
+       int                     qtype,
+       uint32_t                id,
+       uint64_t                min_file_size,
+       int                     flags)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       ag = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_EOFBLOCKS_TAG))) {
+               unsigned long   first_index = 0;
+               int             nr_found = 0;
+               int             done = 0;
+
+               ag = pag->pag_agno + 1;
+
+               do {
+                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                       int     i;
+
+                       rcu_read_lock();
+                       nr_found = radix_tree_gang_lookup_tag(
+                                       &pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH,
+                                       XFS_ICI_EOFBLOCKS_TAG);
+                       if (!nr_found) {
+                               rcu_read_unlock();
+                               break;
+                       }
+
+                       /*
+                        * Grab the inodes before we drop the lock. if we found
+                        * nothing, nr == 0 and the loop will be skipped.
+                        */
+                       for (i = 0; i < nr_found; i++) {
+                               struct xfs_inode *ip = batch[i];
+
+                               if (done || xfs_inode_ag_walk_grab(ip))
+                                       batch[i] = NULL;
+
+                               /*
+                                * Update the index for the next lookup. Catch
+                                * overflows into the next AG range which can 
occur if
+                                * we have inodes in the last block of the AG 
and we
+                                * are currently pointing to the last inode.
+                                *
+                                * Because we may see inodes that are from the 
wrong AG
+                                * due to RCU freeing and reallocation, only 
update the
+                                * index if it lies in this AG. It was a race 
that lead
+                                * us to see this inode, so another lookup from 
the
+                                * same index will not find it again.
+                                */
+                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+                                                               pag->pag_agno)
+                                       continue;
+                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 
1);
+                               if (first_index < XFS_INO_TO_AGINO(mp, 
ip->i_ino))
+                                       done = 1;
+                       }
+
+                       /* unlock now we've grabbed the inodes. */
+                       rcu_read_unlock();
+
+                       for (i = 0; i < nr_found; i++) {
+                               if (!batch[i])
+                                       continue;
+
+                               /* default projid represents a full scan */
+                               if ((!(qtype == XFS_DQ_PROJ &&
+                                      id == XFS_PROJID_DEFAULT) &&
+                                    !xfs_inode_match_quota_id(batch[i], qtype,
+                                                              id)) ||
+                                   (min_file_size && XFS_ISIZE(batch[i]) < 
+                                                               min_file_size)
+                                  ) {
+                                       IRELE(batch[i]);
+                                       continue;
+                               }
+
+                               error = xfs_inode_free_eofblocks(batch[i], 
flags);
+                               IRELE(batch[i]);
+                               if (error)
+                                       last_error = error;
+                       }
+
+                       cond_resched();
+
+               } while (nr_found && !done);
+
+               xfs_perag_put(pag);
+       }
+
+       return XFS_ERROR(last_error);
+}
+
 STATIC void
 __xfs_inode_set_eofblocks_tag(
        struct xfs_perag        *pag,
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 4486491..78aca41 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -43,8 +43,11 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, 
struct xfs_inode *ip);
 void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                struct xfs_inode *ip);
 
+#define EOFBLOCKS_WAIT         0x0001
+
 void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
 void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
+int xfs_inodes_free_eofblocks(struct xfs_mount *, int, uint32_t, uint64_t, 
int);
 
 int xfs_sync_inode_grab(struct xfs_inode *ip);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 658ee2e..53460f3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -150,11 +150,12 @@ xfs_readlink(
  * when the link count isn't zero and by xfs_dm_punch_hole() when
  * punching a hole to EOF.
  */
-STATIC int
+int
 xfs_free_eofblocks(
        xfs_mount_t     *mp,
        xfs_inode_t     *ip,
-       bool            need_iolock)
+       bool            need_iolock,
+       bool            *blocks_freed)
 {
        xfs_trans_t     *tp;
        int             error;
@@ -237,6 +238,9 @@ xfs_free_eofblocks(
                } else {
                        error = xfs_trans_commit(tp,
                                                XFS_TRANS_RELEASE_LOG_RES);
+                       if (blocks_freed)
+                               *blocks_freed = true;
+
                }
 
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -391,6 +395,7 @@ xfs_release(
 {
        xfs_mount_t     *mp = ip->i_mount;
        int             error;
+       bool            freed = false;
 
        if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
                return 0;
@@ -463,11 +468,11 @@ xfs_release(
                if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
                        return 0;
 
-               error = xfs_free_eofblocks(mp, ip, true);
+               error = xfs_free_eofblocks(mp, ip, true, &freed);
                if (error)
                        return error;
-
-               xfs_inode_clear_eofblocks_tag(ip);
+               if (freed)
+                       xfs_inode_clear_eofblocks_tag(ip);
 
                /* delalloc blocks after truncation means it really is dirty */
                if (ip->i_delayed_blks)
@@ -522,7 +527,7 @@ xfs_inactive(
                    (!(ip->i_d.di_flags &
                                (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
                     ip->i_delayed_blks != 0))) {
-                       error = xfs_free_eofblocks(mp, ip, false);
+                       error = xfs_free_eofblocks(mp, ip, false, NULL);
                        if (error)
                                return VN_INACTIVE_CACHE;
                        xfs_inode_clear_eofblocks_tag(ip);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 447e146..918d24d 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -57,5 +57,7 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
 int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
 
 int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int xfs_free_eofblocks(struct xfs_mount *, struct xfs_inode *, bool, bool *);
+
 
 #endif /* _XFS_VNODEOPS_H */
-- 
1.7.7.6

<Prev in Thread] Current Thread [Next in Thread>