xfs
[Top] [All Lists]

Re: 2.6.31 xfs_fs_destroy_inode: cannot reclaim

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: 2.6.31 xfs_fs_destroy_inode: cannot reclaim
From: Tommy van Leeuwen <tommy@xxxxxxxxxxxxxxxx>
Date: Wed, 21 Oct 2009 11:45:11 +0200
Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>, Patrick Schreurs <patrick@xxxxxxxxxxxxxxxx>, Bas Couwenberg <bas@xxxxxxxxxxxxxxxx>, XFS List <xfs@xxxxxxxxxxx>
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:mime-version:sender:received:in-reply-to :references:date:x-google-sender-auth:message-id:subject:from:to:cc :content-type:content-transfer-encoding; bh=DYVChaq1SCHhmf0MNZhV/ijjfj7/queqdSuBD/EfpME=; b=q7qxVkEboUjqM3iGvYCcAlrUjAX1yxabjp9s+dKwwYbUncZ8w2WEUPz1sq3BNTMyvJ gBUQHwwUUX+YHTdgLcEUYbtqOOhzpLbqCfExpbJz0bbFKzXgqAi2lXIfOV5JPYpKjPnE HcFwYB74J6Ng31PZaMh/VlC0n/pUDZ7yKkahI=
Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:sender:in-reply-to:references:date :x-google-sender-auth:message-id:subject:from:to:cc:content-type :content-transfer-encoding; b=NSFFjMKkvSG4jNmjKkbF/fWqRaxh+Uwr7AlhukPjsMRLSW0DF9jejYeeF8+kksOo58 aiiG7Tow7ko/9j+62rL9wHXdwGaWf2wY/Ck/LcQwav3+ahXJnFqAFPZtxlnfMJvJq/+U UdweVnbjwuArbMvb6QYb7lxENZMGoCEfXjZow=
In-reply-to: <20091020034048.GA9464@xxxxxxxxxxxxxxxx>
References: <20090930124104.GA7463@xxxxxxxxxxxxx> <4AC60D27.9060703@xxxxxxxxxxxxxxxx> <20091005214348.GA15448@xxxxxxxxxxxxx> <4ACB080D.3010708@xxxxxxxxxxxxxxxx> <20091007011926.GB32032@xxxxxxxxxxxxx> <4AD18C8D.90808@xxxxxxxxxxxxxxxx> <20091012233854.GA29446@xxxxxxxxxxxxx> <20091019011600.GO9464@xxxxxxxxxxxxxxxx> <20091019035426.GB18296@xxxxxxxxxxxxx> <20091020034048.GA9464@xxxxxxxxxxxxxxxx>
Sender: chiparus@xxxxxxxxx
On Tue, Oct 20, 2009 at 5:40 AM, Dave Chinner <david@xxxxxxxxxxxxx> wrote:
> On Sun, Oct 18, 2009 at 11:54:26PM -0400, Christoph Hellwig wrote:
>> On Mon, Oct 19, 2009 at 12:16:00PM +1100, Dave Chinner wrote:
>> > > +  * The hash lock here protects a thread in xfs_iget from racing with
>> > > +  * us on recycling the inode.  Once we have the XFS_IRECLAIM flag set
>> > > +  * it will not touch it.
>> > >    */
>> > > - write_lock(&pag->pag_ici_lock);
>> >
>> > Did you mean to remove this write_lock? The patch does not remove
>> > the unlocks....
>>
>> It's taken by the caller.
>
> Ah, I guess I need to see the whole patch series, then.

This is the full patch we're using now on 2.6.31.4. (Just running btw
so no results yet).

diff -ru linux-2.6.31.4/fs/xfs/linux-2.6/xfs_sync.c
linux-2.6.31.4-xfspatch/fs/xfs/linux-2.6/xfs_sync.c
--- linux-2.6.31.4/fs/xfs/linux-2.6/xfs_sync.c  2009-09-10
00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/linux-2.6/xfs_sync.c 2009-10-21
11:24:56.000000000 +0200
@@ -180,6 +180,11 @@
                return EFSCORRUPTED;
        }

+       if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) {
+               read_unlock(&pag->pag_ici_lock);
+               return ENOENT;
+       }
+
        /*
         * If we can't get a reference on the inode, it must be in reclaim.
         * Leave it for the reclaim code to flush. Also avoid inodes that
@@ -191,7 +196,7 @@
        }
        read_unlock(&pag->pag_ici_lock);

-       if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+       if (is_bad_inode(inode)) {
                IRELE(ip);
                return ENOENT;
        }
@@ -655,22 +660,21 @@
 {
        xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);

-       /* The hash lock here protects a thread in xfs_iget_core from
-        * racing with us on linking the inode back with a vnode.
-        * Once we have the XFS_IRECLAIM flag set it will not touch
-        * us.
+       /*
+        * The hash lock here protects a thread in xfs_iget from racing with
+        * us on recycling the inode.  Once we have the XFS_IRECLAIM flag set
+        * it will not touch it.
         */
-       write_lock(&pag->pag_ici_lock);
        spin_lock(&ip->i_flags_lock);
-       if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
-           !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+       ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
                spin_unlock(&ip->i_flags_lock);
                write_unlock(&pag->pag_ici_lock);
                if (locked) {
                        xfs_ifunlock(ip);
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                }
-               return -EAGAIN;
+               return 0;
        }
        __xfs_iflags_set(ip, XFS_IRECLAIM);
        spin_unlock(&ip->i_flags_lock);
@@ -764,6 +768,88 @@
        xfs_put_perag(mp, pag);
 }

+STATIC xfs_inode_t *
+xfs_reclaim_ag_lookup(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
+       uint32_t                *first_index)
+{
+       int                     nr_found;
+       struct xfs_inode        *ip;
+
+       /*
+        * use a gang lookup to find the next inode in the tree
+        * as the tree is sparse and a gang lookup walks to find
+        * the number of objects requested.
+        */
+       write_lock(&pag->pag_ici_lock);
+       nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+                       (void **)&ip, *first_index, 1, XFS_ICI_RECLAIM_TAG);
+       if (!nr_found)
+               goto unlock;
+
+       /*
+        * Update the index for the next lookup. Catch overflows
+        * into the next AG range which can occur if we have inodes
+        * in the last block of the AG and we are currently
+        * pointing to the last inode.
+        */
+       *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+       if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+               goto unlock;
+
+       return ip;
+
+unlock:
+       write_unlock(&pag->pag_ici_lock);
+       return NULL;
+}
+
+STATIC int
+xfs_reclaim_ag_walk(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          ag,
+       int                     flags)
+{
+       struct xfs_perag        *pag = &mp->m_perag[ag];
+       uint32_t                first_index;
+       int                     last_error = 0;
+       int                     skipped;
+
+restart:
+       skipped = 0;
+       first_index = 0;
+       do {
+               int             error = 0;
+               xfs_inode_t     *ip;
+
+               ip = xfs_reclaim_ag_lookup(mp, pag, &first_index);
+               if (!ip)
+                       break;
+
+               error = xfs_reclaim_inode(ip, 0, flags);
+               if (error == EAGAIN) {
+                       skipped++;
+                       continue;
+               }
+               if (error)
+                       last_error = error;
+               /*
+                * bail out if the filesystem is corrupted.
+                */
+               if (error == EFSCORRUPTED)
+                       break;
+
+       } while (1);
+
+       if (skipped) {
+               delay(1);
+               goto restart;
+       }
+       xfs_put_perag(mp, pag);
+       return last_error;
+}
+
 STATIC int
 xfs_reclaim_inode_now(
        struct xfs_inode        *ip,
@@ -785,6 +871,19 @@
        xfs_mount_t     *mp,
        int             mode)
 {
-       return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
-                                       XFS_ICI_RECLAIM_TAG);
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+               if (!mp->m_perag[ag].pag_ici_init)
+                       continue;
+               error = xfs_reclaim_ag_walk(mp, ag, mode);
+               if (error) {
+                       last_error = error;
+                       if (error == EFSCORRUPTED)
+                               break;
+               }
+       }
+       return XFS_ERROR(last_error);
 }
diff -ru linux-2.6.31.4/fs/xfs/xfs_iget.c
linux-2.6.31.4-xfspatch/fs/xfs/xfs_iget.c
--- linux-2.6.31.4/fs/xfs/xfs_iget.c    2009-09-10 00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/xfs_iget.c   2009-10-14
13:56:33.000000000 +0200
@@ -242,6 +242,8 @@

                error = -inode_init_always(mp->m_super, inode);
                if (error) {
+                       printk("XFS: inode_init_always failed to
re-initialize inode\n");
+
                        /*
                         * Re-initializing the inode failed, and we are in deep
                         * trouble.  Try to re-add it to the reclaim list.
@@ -538,17 +540,21 @@
 {
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_perag        *pag;
+       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);

        XFS_STATS_INC(xs_ig_reclaims);

        /*
-        * Remove the inode from the per-AG radix tree.  It doesn't matter
-        * if it was never added to it because radix_tree_delete can deal
-        * with that case just fine.
+        * Remove the inode from the per-AG radix tree.
+        *
+        * Because radix_tree_delete won't complain even if the item was never
+        * added to the tree assert that it's been there before to catch
+        * problems with the inode life time early on.
         */
        pag = xfs_get_perag(mp, ip->i_ino);
        write_lock(&pag->pag_ici_lock);
-       radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
+       ASSERT(radix_tree_lookup(&pag->pag_ici_root, agino));
+       radix_tree_delete(&pag->pag_ici_root, agino);
        write_unlock(&pag->pag_ici_lock);
        xfs_put_perag(mp, pag);

diff -ru linux-2.6.31.4/fs/xfs/xfs_vnodeops.c
linux-2.6.31.4-xfspatch/fs/xfs/xfs_vnodeops.c
--- linux-2.6.31.4/fs/xfs/xfs_vnodeops.c        2009-09-10
00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/xfs_vnodeops.c       2009-10-14
13:56:33.000000000 +0200
@@ -2465,45 +2465,36 @@
 xfs_reclaim(
        xfs_inode_t     *ip)
 {
-
        xfs_itrace_entry(ip);

        ASSERT(!VN_MAPPED(VFS_I(ip)));

        /* bad inode, get out here ASAP */
-       if (is_bad_inode(VFS_I(ip))) {
-               xfs_ireclaim(ip);
-               return 0;
-       }
+       if (is_bad_inode(VFS_I(ip)))
+               goto out_reclaim;

        xfs_ioend_wait(ip);

        ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);

        /*
-        * Make sure the atime in the XFS inode is correct before freeing the
-        * Linux inode.
+        * We should never get here with one of the reclaim flags already set.
         */
-       xfs_synchronize_atime(ip);
+       BUG_ON(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       BUG_ON(xfs_iflags_test(ip, XFS_IRECLAIM));

        /*
         * If we have nothing to flush with this inode then complete the
-        * teardown now, otherwise break the link between the xfs inode and the
-        * linux inode and clean up the xfs inode later. This avoids flushing
-        * the inode to disk during the delete operation itself.
-        *
-        * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
-        * first to ensure that xfs_iunpin() will never see an xfs inode
-        * that has a linux inode being reclaimed. Synchronisation is provided
-        * by the i_flags_lock.
+        * teardown now, otherwise delay the flush operation.
         */
-       if (!ip->i_update_core && (ip->i_itemp == NULL)) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               xfs_iflock(ip);
-               xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-               return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
+       if (ip->i_update_core || ip->i_itemp) {
+               xfs_inode_set_reclaim_tag(ip);
+               return 0;
        }
        xfs_inode_set_reclaim_tag(ip);
+
+out_reclaim:
+       xfs_ireclaim(ip);
        return 0;
 }

<Prev in Thread] Current Thread [Next in Thread>