2.6.31 xfs_fs_destroy_inode: cannot reclaim
Tommy van Leeuwen
tommy at news-service.com
Wed Oct 21 04:45:11 CDT 2009
On Tue, Oct 20, 2009 at 5:40 AM, Dave Chinner <david at fromorbit.com> wrote:
> On Sun, Oct 18, 2009 at 11:54:26PM -0400, Christoph Hellwig wrote:
>> On Mon, Oct 19, 2009 at 12:16:00PM +1100, Dave Chinner wrote:
>> > > + * The hash lock here protects a thread in xfs_iget from racing with
>> > > + * us on recycling the inode. Once we have the XFS_IRECLAIM flag set
>> > > + * it will not touch it.
>> > > */
>> > > - write_lock(&pag->pag_ici_lock);
>> >
>> > Did you mean to remove this write_lock? The patch does not remove
>> > the unlocks....
>>
>> It's taken by the caller.
>
> Ah, I guess I need to see the whole patch series, then.
This is the full patch we're using now on 2.6.31.4. (Just running btw
so no results yet).
diff -ru linux-2.6.31.4/fs/xfs/linux-2.6/xfs_sync.c
linux-2.6.31.4-xfspatch/fs/xfs/linux-2.6/xfs_sync.c
--- linux-2.6.31.4/fs/xfs/linux-2.6/xfs_sync.c 2009-09-10
00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/linux-2.6/xfs_sync.c 2009-10-21
11:24:56.000000000 +0200
@@ -180,6 +180,11 @@
return EFSCORRUPTED;
}
+ if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) {
+ read_unlock(&pag->pag_ici_lock);
+ return ENOENT;
+ }
+
/*
* If we can't get a reference on the inode, it must be in reclaim.
* Leave it for the reclaim code to flush. Also avoid inodes that
@@ -191,7 +196,7 @@
}
read_unlock(&pag->pag_ici_lock);
- if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+ if (is_bad_inode(inode)) {
IRELE(ip);
return ENOENT;
}
@@ -655,22 +660,21 @@
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
- /* The hash lock here protects a thread in xfs_iget_core from
- * racing with us on linking the inode back with a vnode.
- * Once we have the XFS_IRECLAIM flag set it will not touch
- * us.
+ /*
+ * The hash lock here protects a thread in xfs_iget from racing with
+ * us on recycling the inode. Once we have the XFS_IRECLAIM flag set
+ * it will not touch it.
*/
- write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
- if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
- !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+ ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
if (locked) {
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- return -EAGAIN;
+ return 0;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
@@ -764,6 +768,88 @@
xfs_put_perag(mp, pag);
}
+STATIC xfs_inode_t *
+xfs_reclaim_ag_lookup(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ uint32_t *first_index)
+{
+ int nr_found;
+ struct xfs_inode *ip;
+
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ write_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, *first_index, 1, XFS_ICI_RECLAIM_TAG);
+ if (!nr_found)
+ goto unlock;
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ goto unlock;
+
+ return ip;
+
+unlock:
+ write_unlock(&pag->pag_ici_lock);
+ return NULL;
+}
+
+STATIC int
+xfs_reclaim_ag_walk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t ag,
+ int flags)
+{
+ struct xfs_perag *pag = &mp->m_perag[ag];
+ uint32_t first_index;
+ int last_error = 0;
+ int skipped;
+
+restart:
+ skipped = 0;
+ first_index = 0;
+ do {
+ int error = 0;
+ xfs_inode_t *ip;
+
+ ip = xfs_reclaim_ag_lookup(mp, pag, &first_index);
+ if (!ip)
+ break;
+
+ error = xfs_reclaim_inode(ip, 0, flags);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
+ if (error)
+ last_error = error;
+ /*
+ * bail out if the filesystem is corrupted.
+ */
+ if (error == EFSCORRUPTED)
+ break;
+
+ } while (1);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ xfs_put_perag(mp, pag);
+ return last_error;
+}
+
STATIC int
xfs_reclaim_inode_now(
struct xfs_inode *ip,
@@ -785,6 +871,19 @@
xfs_mount_t *mp,
int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
- XFS_ICI_RECLAIM_TAG);
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ error = xfs_reclaim_ag_walk(mp, ag, mode);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
}
diff -ru linux-2.6.31.4/fs/xfs/xfs_iget.c
linux-2.6.31.4-xfspatch/fs/xfs/xfs_iget.c
--- linux-2.6.31.4/fs/xfs/xfs_iget.c 2009-09-10 00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/xfs_iget.c 2009-10-14
13:56:33.000000000 +0200
@@ -242,6 +242,8 @@
error = -inode_init_always(mp->m_super, inode);
if (error) {
+ printk("XFS: inode_init_always failed to
re-initialize inode\n");
+
/*
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
@@ -538,17 +540,21 @@
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
XFS_STATS_INC(xs_ig_reclaims);
/*
- * Remove the inode from the per-AG radix tree. It doesn't matter
- * if it was never added to it because radix_tree_delete can deal
- * with that case just fine.
+ * Remove the inode from the per-AG radix tree.
+ *
+ * Because radix_tree_delete won't complain even if the item was never
+ * added to the tree assert that it's been there before to catch
+ * problems with the inode life time early on.
*/
pag = xfs_get_perag(mp, ip->i_ino);
write_lock(&pag->pag_ici_lock);
- radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
+ ASSERT(radix_tree_lookup(&pag->pag_ici_root, agino));
+ radix_tree_delete(&pag->pag_ici_root, agino);
write_unlock(&pag->pag_ici_lock);
xfs_put_perag(mp, pag);
diff -ru linux-2.6.31.4/fs/xfs/xfs_vnodeops.c
linux-2.6.31.4-xfspatch/fs/xfs/xfs_vnodeops.c
--- linux-2.6.31.4/fs/xfs/xfs_vnodeops.c 2009-09-10
00:13:59.000000000 +0200
+++ linux-2.6.31.4-xfspatch/fs/xfs/xfs_vnodeops.c 2009-10-14
13:56:33.000000000 +0200
@@ -2465,45 +2465,36 @@
xfs_reclaim(
xfs_inode_t *ip)
{
-
xfs_itrace_entry(ip);
ASSERT(!VN_MAPPED(VFS_I(ip)));
/* bad inode, get out here ASAP */
- if (is_bad_inode(VFS_I(ip))) {
- xfs_ireclaim(ip);
- return 0;
- }
+ if (is_bad_inode(VFS_I(ip)))
+ goto out_reclaim;
xfs_ioend_wait(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
/*
- * Make sure the atime in the XFS inode is correct before freeing the
- * Linux inode.
+ * We should never get here with one of the reclaim flags already set.
*/
- xfs_synchronize_atime(ip);
+ BUG_ON(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ BUG_ON(xfs_iflags_test(ip, XFS_IRECLAIM));
/*
* If we have nothing to flush with this inode then complete the
- * teardown now, otherwise break the link between the xfs inode and the
- * linux inode and clean up the xfs inode later. This avoids flushing
- * the inode to disk during the delete operation itself.
- *
- * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
- * first to ensure that xfs_iunpin() will never see an xfs inode
- * that has a linux inode being reclaimed. Synchronisation is provided
- * by the i_flags_lock.
+ * teardown now, otherwise delay the flush operation.
*/
- if (!ip->i_update_core && (ip->i_itemp == NULL)) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
- xfs_iflags_set(ip, XFS_IRECLAIMABLE);
- return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
+ if (ip->i_update_core || ip->i_itemp) {
+ xfs_inode_set_reclaim_tag(ip);
+ return 0;
}
xfs_inode_set_reclaim_tag(ip);
+
+out_reclaim:
+ xfs_ireclaim(ip);
return 0;
}
More information about the xfs
mailing list