xfs
[Top] [All Lists]

[PATCH] xfs: stable update for 2.6.32.x and 2.6.33.y

To: stable@xxxxxxxxxx
Subject: [PATCH] xfs: stable update for 2.6.32.x and 2.6.33.y
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Tue, 4 May 2010 12:58:20 +1000
Cc: xfs@xxxxxxxxxxx
User-agent: Mutt/1.5.20 (2009-06-14)
G'day,

The following patch that adds a inode reclaim shrinker needs to be
added to both the 2.6.32.x stable series and the 2.6.33.y stable
series.

The regression this patch addresses was introduced in the previous
round of XFS stable updates that have just been released (2.6.32.12
and 2.6.33.3). However, the fix wasn't upstream until after thoes
stable kernels were released, so please consider this for the next
stable releaseѕ.

The same patch applies to the XFS code in both 2.6.32.12 and
2.6.33.3 trees, and has passed a run of xfsqa on both kernels with
limited memory to trigger the OOM conditions that lead to problems.
The upstream commit is 9bf729c0af67897ea8498ce17c29b0683f7f2028.

Cheers,

Dave.
-- 
Dave Chinner
david@xxxxxxxxxxxxx

xfs: add a shrinker to background inode reclaim

From: Dave Chinner <dchinner@xxxxxxxxxx>

>From 9bf729c0af67897ea8498ce17c29b0683f7f2028 Thu, 29 Apr 2010 21:22:13 +0000

On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Reviewed-by: Christoph Hellwig <hch@xxxxxx>

---
 fs/xfs/linux-2.6/xfs_super.c   |    5 ++
 fs/xfs/linux-2.6/xfs_sync.c    |  107 +++++++++++++++++++++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_sync.h    |    7 ++-
 fs/xfs/quota/xfs_qm_syscalls.c |    3 +-
 fs/xfs/xfs_ag.h                |    1 +
 fs/xfs/xfs_mount.h             |    1 +
 6 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 77414db..146d491 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1160,6 +1160,7 @@ xfs_fs_put_super(
 
        xfs_unmountfs(mp);
        xfs_freesb(mp);
+       xfs_inode_shrinker_unregister(mp);
        xfs_icsb_destroy_counters(mp);
        xfs_close_devices(mp);
        xfs_dmops_put(mp);
@@ -1523,6 +1524,8 @@ xfs_fs_fill_super(
        if (error)
                goto fail_vnrele;
 
+       xfs_inode_shrinker_register(mp);
+
        kfree(mtpt);
        return 0;
 
@@ -1767,6 +1770,7 @@ init_xfs_fs(void)
                goto out_cleanup_procfs;
 
        vfs_initquota();
+       xfs_inode_shrinker_init();
 
        error = register_filesystem(&xfs_fs_type);
        if (error)
@@ -1794,6 +1798,7 @@ exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
+       xfs_inode_shrinker_destroy();
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
        xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6b6b394..57adf2d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
                                           struct xfs_perag *pag, int flags),
        int                     flags,
        int                     tag,
-       int                     exclusive)
+       int                     exclusive,
+       int                     *nr_to_scan)
 {
        struct xfs_perag        *pag = &mp->m_perag[ag];
        uint32_t                first_index;
@@ -135,7 +136,7 @@ restart:
                if (error == EFSCORRUPTED)
                        break;
 
-       } while (1);
+       } while ((*nr_to_scan)--);
 
        if (skipped) {
                delay(1);
@@ -153,23 +154,30 @@ xfs_inode_ag_iterator(
                                           struct xfs_perag *pag, int flags),
        int                     flags,
        int                     tag,
-       int                     exclusive)
+       int                     exclusive,
+       int                     *nr_to_scan)
 {
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
+       int                     nr;
 
+       nr = nr_to_scan ? *nr_to_scan : INT_MAX;
        for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
                if (!mp->m_perag[ag].pag_ici_init)
                        continue;
                error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
-                                               exclusive);
+                                               exclusive, &nr);
                if (error) {
                        last_error = error;
                        if (error == EFSCORRUPTED)
                                break;
                }
+               if (nr <= 0)
+                       break;
        }
+       if (nr_to_scan)
+               *nr_to_scan = nr;
        return XFS_ERROR(last_error);
 }
 
@@ -289,7 +297,7 @@ xfs_sync_data(
        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-                                     XFS_ICI_NO_TAG, 0);
+                                     XFS_ICI_NO_TAG, 0, NULL);
        if (error)
                return XFS_ERROR(error);
 
@@ -311,7 +319,7 @@ xfs_sync_attr(
        ASSERT((flags & ~SYNC_WAIT) == 0);
 
        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-                                    XFS_ICI_NO_TAG, 0);
+                                    XFS_ICI_NO_TAG, 0, NULL);
 }
 
 STATIC int
@@ -679,6 +687,7 @@ __xfs_inode_set_reclaim_tag(
        radix_tree_tag_set(&pag->pag_ici_root,
                           XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
                           XFS_ICI_RECLAIM_TAG);
+       pag->pag_ici_reclaimable++;
 }
 
 /*
@@ -710,6 +719,7 @@ __xfs_inode_clear_reclaim_tag(
 {
        radix_tree_tag_clear(&pag->pag_ici_root,
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+       pag->pag_ici_reclaimable--;
 }
 
 STATIC int
@@ -770,5 +780,88 @@ xfs_reclaim_inodes(
        int             mode)
 {
        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-                                       XFS_ICI_RECLAIM_TAG, 1);
+                                       XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+       int             nr_to_scan,
+       gfp_t           gfp_mask)
+{
+       struct xfs_mount *mp;
+       xfs_agnumber_t  ag;
+       int             reclaimable = 0;
+
+       if (nr_to_scan) {
+               if (!(gfp_mask & __GFP_FS))
+                       return -1;
+
+               down_read(&xfs_mount_list_lock);
+               list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+                       xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+                                       XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+                       if (nr_to_scan <= 0)
+                               break;
+               }
+               up_read(&xfs_mount_list_lock);
+       }
+
+       down_read(&xfs_mount_list_lock);
+       list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+               for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+                       if (!mp->m_perag[ag].pag_ici_init)
+                               continue;
+                       reclaimable += mp->m_perag[ag].pag_ici_reclaimable;
+               }
+       }
+       up_read(&xfs_mount_list_lock);
+       return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+       .shrink = xfs_reclaim_inode_shrink,
+       .seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+       init_rwsem(&xfs_mount_list_lock);
+       register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+       ASSERT(list_empty(&xfs_mount_list));
+       unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+       struct xfs_mount        *mp)
+{
+       down_write(&xfs_mount_list_lock);
+       list_add_tail(&mp->m_mplist, &xfs_mount_list);
+       up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+       struct xfs_mount        *mp)
+{
+       down_write(&xfs_mount_list_lock);
+       list_del(&mp->m_mplist);
+       up_write(&xfs_mount_list_lock);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index ea932b4..0b28c13 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, 
struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags, int tag, int write_lock);
+       int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 873e07e..145f596 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
        uint             flags)
 {
        ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+                               XFS_ICI_NO_TAG, 0, NULL);
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6702bd8..1182604 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -229,6 +229,7 @@ typedef struct xfs_perag
        int             pag_ici_init;   /* incore inode cache initialised */
        rwlock_t        pag_ici_lock;   /* incore inode lock */
        struct radix_tree_root pag_ici_root;    /* incore inode cache root */
+       int             pag_ici_reclaimable;    /* reclaimable inodes */
 #endif
 } xfs_perag_t;
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e45..c95f81a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -257,6 +257,7 @@ typedef struct xfs_mount {
        wait_queue_head_t       m_wait_single_sync_task;
        __int64_t               m_update_flags; /* sb flags we need to update
                                                   on the next remount,rw */
+       struct list_head        m_mplist;       /* inode shrinker mount list */
 } xfs_mount_t;
 
 /*

<Prev in Thread] Current Thread [Next in Thread>
  • [PATCH] xfs: stable update for 2.6.32.x and 2.6.33.y, Dave Chinner <=