xfs
[Top] [All Lists]

Re: 2.6.31 xfs_fs_destroy_inode: cannot reclaim

To: Patrick Schreurs <patrick@xxxxxxxxxxxxxxxx>
Subject: Re: 2.6.31 xfs_fs_destroy_inode: cannot reclaim
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Mon, 12 Oct 2009 19:38:54 -0400
Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>, Tommy van Leeuwen <tommy@xxxxxxxxxxxxxxxx>, Bas Couwenberg <bas@xxxxxxxxxxxxxxxx>, XFS List <xfs@xxxxxxxxxxx>
In-reply-to: <4AD18C8D.90808@xxxxxxxxxxxxxxxx>
References: <20090930124104.GA7463@xxxxxxxxxxxxx> <4AC60D27.9060703@xxxxxxxxxxxxxxxx> <20091005214348.GA15448@xxxxxxxxxxxxx> <4ACB080D.3010708@xxxxxxxxxxxxxxxx> <20091007011926.GB32032@xxxxxxxxxxxxx> <4AD18C8D.90808@xxxxxxxxxxxxxxxx>
User-agent: Mutt/1.5.19 (2009-01-05)
On Sun, Oct 11, 2009 at 09:43:09AM +0200, Patrick Schreurs wrote:
> Hello Christoph,
>
> Attached you'll find a screenshot from a 2.6.31.3 server, which includes  
> your patches and has XFS_DEBUG turned on. I truly hope this is useful to  
> you.

Thanks.  The patch below should fix the inode reclaim race that could
lead to the double free you're seeing.  To be applied ontop of all
the other patches I sent you.

Index: xfs/fs/xfs/linux-2.6/xfs_sync.c
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_sync.c        2009-10-11 19:09:43.828254119 
+0200
+++ xfs/fs/xfs/linux-2.6/xfs_sync.c     2009-10-12 13:48:14.886006087 +0200
@@ -670,22 +670,22 @@ xfs_reclaim_inode(
 {
        xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
 
-       /* The hash lock here protects a thread in xfs_iget_core from
-        * racing with us on linking the inode back with a vnode.
-        * Once we have the XFS_IRECLAIM flag set it will not touch
-        * us.
+       /*
+        * The hash lock here protects a thread in xfs_iget from racing with
+        * us on recycling the inode.  Once we have the XFS_IRECLAIM flag set
+        * it will not touch it.
         */
-       write_lock(&pag->pag_ici_lock);
        spin_lock(&ip->i_flags_lock);
-       if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
-           !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+       ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
                spin_unlock(&ip->i_flags_lock);
                write_unlock(&pag->pag_ici_lock);
-               return -EAGAIN;
+               return 0;
        }
        __xfs_iflags_set(ip, XFS_IRECLAIM);
        spin_unlock(&ip->i_flags_lock);
        write_unlock(&pag->pag_ici_lock);
+
        xfs_put_perag(ip->i_mount, pag);
 
        /*
@@ -758,27 +758,107 @@ __xfs_inode_clear_reclaim_tag(
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
 }
 
-STATIC int
-xfs_reclaim_inode_now(
-       struct xfs_inode        *ip,
+STATIC xfs_inode_t *
+xfs_reclaim_ag_lookup(
+       struct xfs_mount        *mp,
        struct xfs_perag        *pag,
+       uint32_t                *first_index)
+{
+       int                     nr_found;
+       struct xfs_inode        *ip;
+
+       /*
+        * use a gang lookup to find the next inode in the tree
+        * as the tree is sparse and a gang lookup walks to find
+        * the number of objects requested.
+        */
+       write_lock(&pag->pag_ici_lock);
+       nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                       (void **)&ip, *first_index, 1, XFS_ICI_RECLAIM_TAG);
+       if (!nr_found)
+               goto unlock;
+
+       /*
+        * Update the index for the next lookup. Catch overflows
+        * into the next AG range which can occur if we have inodes
+        * in the last block of the AG and we are currently
+        * pointing to the last inode.
+        */
+       *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+       if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+               goto unlock;
+
+       return ip;
+
+unlock:
+       write_unlock(&pag->pag_ici_lock);
+       return NULL;
+}
+
+STATIC int
+xfs_reclaim_ag_walk(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          ag,
        int                     flags)
 {
-       /* ignore if already under reclaim */
-       if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-               read_unlock(&pag->pag_ici_lock);
-               return 0;
+       struct xfs_perag        *pag = &mp->m_perag[ag];
+       uint32_t                first_index;
+       int                     last_error = 0;
+       int                     skipped;
+
+restart:
+       skipped = 0;
+       first_index = 0;
+       do {
+               int             error = 0;
+               xfs_inode_t     *ip;
+
+               ip = xfs_reclaim_ag_lookup(mp, pag, &first_index);
+               if (!ip)
+                       break;
+
+               error = xfs_reclaim_inode(ip, flags);
+               if (error == EAGAIN) {
+                       skipped++;
+                       continue;
+               }
+               if (error)
+                       last_error = error;
+               /*
+                * bail out if the filesystem is corrupted.
+                */
+               if (error == EFSCORRUPTED)
+                       break;
+
+       } while (1);
+
+       if (skipped) {
+               delay(1);
+               goto restart;
        }
-       read_unlock(&pag->pag_ici_lock);
 
-       return xfs_reclaim_inode(ip, flags);
+       xfs_put_perag(mp, pag);
+       return last_error;
 }
 
 int
 xfs_reclaim_inodes(
-       xfs_mount_t     *mp,
-       int             mode)
+       xfs_mount_t             *mp,
+       int                     mode)
 {
-       return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
-                                       XFS_ICI_RECLAIM_TAG);
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+               if (!mp->m_perag[ag].pag_ici_init)
+                       continue;
+               error = xfs_reclaim_ag_walk(mp, ag, mode);
+               if (error) {
+                       last_error = error;
+                       if (error == EFSCORRUPTED)
+                               break;
+               }
+       }
+       return XFS_ERROR(last_error);
 }

<Prev in Thread] Current Thread [Next in Thread>