xfs
[Top] [All Lists]

[PATCH 12/18] xfs: split inode AG walking into separate code for reclaim

To: xfs@xxxxxxxxxxx
Subject: [PATCH 12/18] xfs: split inode AG walking into separate code for reclaim
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Fri, 24 Sep 2010 22:31:10 +1000
In-reply-to: <1285331476-23015-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1285331476-23015-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

The reclaim walk requires different locking and has a slightly
different walk algorithm, so separate it out so that it can be
optimised separately.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Reviewed-by: Christoph Hellwig <hch@xxxxxx>
Reviewed-by: Alex Elder <aelder@xxxxxxx>
---
 fs/xfs/linux-2.6/xfs_sync.c    |  202 ++++++++++++++++++----------------------
 fs/xfs/linux-2.6/xfs_sync.h    |    2 +-
 fs/xfs/linux-2.6/xfs_trace.h   |    2 +-
 fs/xfs/quota/xfs_qm_syscalls.c |    3 +-
 fs/xfs/xfs_mount.c             |   26 +++++
 fs/xfs/xfs_mount.h             |    2 +
 6 files changed, 122 insertions(+), 115 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ddeaff9..359422d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -40,78 +40,46 @@
 #include <linux/freezer.h>
 
 
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag,
-       uint32_t                *first_index,
-       int                     tag)
-{
-       int                     nr_found;
-       struct xfs_inode        *ip;
-
-       /*
-        * use a gang lookup to find the next inode in the tree
-        * as the tree is sparse and a gang lookup walks to find
-        * the number of objects requested.
-        */
-       if (tag == XFS_ICI_NO_TAG) {
-               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                               (void **)&ip, *first_index, 1);
-       } else {
-               nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-                               (void **)&ip, *first_index, 1, tag);
-       }
-       if (!nr_found)
-               return NULL;
-
-       /*
-        * Update the index for the next lookup. Catch overflows
-        * into the next AG range which can occur if we have inodes
-        * in the last block of the AG and we are currently
-        * pointing to the last inode.
-        */
-       *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-       if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-               return NULL;
-       return ip;
-}
-
 STATIC int
 xfs_inode_ag_walk(
        struct xfs_mount        *mp,
        struct xfs_perag        *pag,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-       int                     flags,
-       int                     tag,
-       int                     exclusive,
-       int                     *nr_to_scan)
+       int                     flags)
 {
        uint32_t                first_index;
        int                     last_error = 0;
        int                     skipped;
+       int                     done;
 
 restart:
+       done = 0;
        skipped = 0;
        first_index = 0;
        do {
                int             error = 0;
+               int             nr_found;
                xfs_inode_t     *ip;
 
-               if (exclusive)
-                       write_lock(&pag->pag_ici_lock);
-               else
-                       read_lock(&pag->pag_ici_lock);
-               ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
-               if (!ip) {
-                       if (exclusive)
-                               write_unlock(&pag->pag_ici_lock);
-                       else
-                               read_unlock(&pag->pag_ici_lock);
+               read_lock(&pag->pag_ici_lock);
+               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                               (void **)&ip, first_index, 1);
+               if (!nr_found) {
+                       read_unlock(&pag->pag_ici_lock);
                        break;
                }
 
+               /*
+                * Update the index for the next lookup. Catch overflows
+                * into the next AG range which can occur if we have inodes
+                * in the last block of the AG and we are currently
+                * pointing to the last inode.
+                */
+               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                       done = 1;
+
                /* execute releases pag->pag_ici_lock */
                error = execute(ip, pag, flags);
                if (error == EAGAIN) {
@@ -125,7 +93,7 @@ restart:
                if (error == EFSCORRUPTED)
                        break;
 
-       } while ((*nr_to_scan)--);
+       } while (!done);
 
        if (skipped) {
                delay(1);
@@ -134,73 +102,29 @@ restart:
        return last_error;
 }
 
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          *first,
-       int                     tag)
-{
-       struct xfs_perag        *pag = NULL;
-
-       if (tag == XFS_ICI_RECLAIM_TAG) {
-               int found;
-               int ref;
-
-               rcu_read_lock();
-               found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
-                               (void **)&pag, *first, 1, tag);
-               if (found <= 0) {
-                       rcu_read_unlock();
-                       return NULL;
-               }
-               *first = pag->pag_agno + 1;
-               /* open coded pag reference increment */
-               ref = atomic_inc_return(&pag->pag_ref);
-               rcu_read_unlock();
-               trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
-       } else {
-               pag = xfs_perag_get(mp, *first);
-               (*first)++;
-       }
-       return pag;
-}
-
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
-       int                     flags,
-       int                     tag,
-       int                     exclusive,
-       int                     *nr_to_scan)
+       int                     flags)
 {
        struct xfs_perag        *pag;
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
-       int                     nr;
 
-       nr = nr_to_scan ? *nr_to_scan : INT_MAX;
        ag = 0;
-       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
-               error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
-                                               exclusive, &nr);
+       while ((pag = xfs_perag_get(mp, ag))) {
+               ag = pag->pag_agno + 1;
+               error = xfs_inode_ag_walk(mp, pag, execute, flags);
                xfs_perag_put(pag);
                if (error) {
                        last_error = error;
                        if (error == EFSCORRUPTED)
                                break;
                }
-               if (nr <= 0)
-                       break;
        }
-       if (nr_to_scan)
-               *nr_to_scan = nr;
        return XFS_ERROR(last_error);
 }
 
@@ -318,8 +242,7 @@ xfs_sync_data(
 
        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
-       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-                                     XFS_ICI_NO_TAG, 0, NULL);
+       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
        if (error)
                return XFS_ERROR(error);
 
@@ -337,8 +260,7 @@ xfs_sync_attr(
 {
        ASSERT((flags & ~SYNC_WAIT) == 0);
 
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-                                    XFS_ICI_NO_TAG, 0, NULL);
+       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
 }
 
 STATIC int
@@ -859,13 +781,72 @@ reclaim:
 
 }
 
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+       struct xfs_mount        *mp,
+       int                     flags,
+       int                     *nr_to_scan)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       ag = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               unsigned long   first_index = 0;
+               int             done = 0;
+
+               ag = pag->pag_agno + 1;
+
+               do {
+                       struct xfs_inode *ip;
+                       int     nr_found;
+
+                       write_lock(&pag->pag_ici_lock);
+                       nr_found = 
radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                                       (void **)&ip, first_index, 1,
+                                       XFS_ICI_RECLAIM_TAG);
+                       if (!nr_found) {
+                               write_unlock(&pag->pag_ici_lock);
+                               break;
+                       }
+
+                       /*
+                        * Update the index for the next lookup. Catch overflows
+                        * into the next AG range which can occur if we have 
inodes
+                        * in the last block of the AG and we are currently
+                        * pointing to the last inode.
+                        */
+                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                               done = 1;
+
+                       error = xfs_reclaim_inode(ip, pag, flags);
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
+
+               } while (!done && (*nr_to_scan)--);
+
+               xfs_perag_put(pag);
+       }
+       return XFS_ERROR(last_error);
+}
+
 int
 xfs_reclaim_inodes(
        xfs_mount_t     *mp,
        int             mode)
 {
-       return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-                                       XFS_ICI_RECLAIM_TAG, 1, NULL);
+       int             nr_to_scan = INT_MAX;
+
+       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
 }
 
 /*
@@ -887,17 +868,16 @@ xfs_reclaim_inode_shrink(
                if (!(gfp_mask & __GFP_FS))
                        return -1;
 
-               xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
-                                       XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
-               /* if we don't exhaust the scan, don't bother coming back */
+               xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan);
+               /* terminate if we don't exhaust the scan */
                if (nr_to_scan > 0)
                        return -1;
        }
 
        reclaimable = 0;
        ag = 0;
-       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
-                                       XFS_ICI_RECLAIM_TAG))) {
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               ag = pag->pag_agno + 1;
                reclaimable += pag->pag_ici_reclaimable;
                xfs_perag_put(pag);
        }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index fe78726..e8a3528 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 
struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags, int tag, int write_lock, int *nr_to_scan);
+       int flags);
 
 void xfs_inode_shrinker_register(struct xfs_mount *mp);
 void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2a1d4fb..286dc20 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
                 unsigned long caller_ip),                                      
\
        TP_ARGS(mp, agno, refcount, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
 DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
 DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 7a71336..ac11fbe 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes(
        uint             flags)
 {
        ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
-                               XFS_ICI_NO_TAG, 0, NULL);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 912101d..d66e87c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
        return pag;
 }
 
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          first,
+       int                     tag)
+{
+       struct xfs_perag        *pag;
+       int                     found;
+       int                     ref;
+
+       rcu_read_lock();
+       found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                       (void **)&pag, first, 1, tag);
+       if (found <= 0) {
+               rcu_read_unlock();
+               return NULL;
+       }
+       ref = atomic_inc_return(&pag->pag_ref);
+       rcu_read_unlock();
+       trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+       return pag;
+}
+
 void
 xfs_perag_put(struct xfs_perag *pag)
 {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 622da21..7ab2409 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
  * perag get/put wrappers for ref counting
  */
 struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                       int tag);
 void   xfs_perag_put(struct xfs_perag *pag);
 
 /*
-- 
1.7.1

<Prev in Thread] Current Thread [Next in Thread>