On Sun, Oct 11, 2009 at 09:43:09AM +0200, Patrick Schreurs wrote:
> Hello Christoph,
>
> Attached you'll find a screenshot from a 2.6.31.3 server, which includes
> your patches and has XFS_DEBUG turned on. I truly hope this is useful to
> you.
Thanks. The patch below should fix the inode reclaim race that could
lead to the double free you're seeing. To be applied ontop of all
the other patches I sent you.
Index: xfs/fs/xfs/linux-2.6/xfs_sync.c
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_sync.c 2009-10-11 19:09:43.828254119
+0200
+++ xfs/fs/xfs/linux-2.6/xfs_sync.c 2009-10-12 13:48:14.886006087 +0200
@@ -670,22 +670,22 @@ xfs_reclaim_inode(
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
- /* The hash lock here protects a thread in xfs_iget_core from
- * racing with us on linking the inode back with a vnode.
- * Once we have the XFS_IRECLAIM flag set it will not touch
- * us.
+ /*
+ * The hash lock here protects a thread in xfs_iget from racing with
+ * us on recycling the inode. Once we have the XFS_IRECLAIM flag set
+ * it will not touch it.
*/
- write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
- if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
- !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+ ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
- return -EAGAIN;
+ return 0;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
+
xfs_put_perag(ip->i_mount, pag);
/*
@@ -758,27 +758,107 @@ __xfs_inode_clear_reclaim_tag(
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
}
-STATIC int
-xfs_reclaim_inode_now(
- struct xfs_inode *ip,
+STATIC xfs_inode_t *
+xfs_reclaim_ag_lookup(
+ struct xfs_mount *mp,
struct xfs_perag *pag,
+ uint32_t *first_index)
+{
+ int nr_found;
+ struct xfs_inode *ip;
+
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ write_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, *first_index, 1, XFS_ICI_RECLAIM_TAG);
+ if (!nr_found)
+ goto unlock;
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ goto unlock;
+
+ return ip;
+
+unlock:
+ write_unlock(&pag->pag_ici_lock);
+ return NULL;
+}
+
+STATIC int
+xfs_reclaim_ag_walk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t ag,
int flags)
{
- /* ignore if already under reclaim */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- return 0;
+ struct xfs_perag *pag = &mp->m_perag[ag];
+ uint32_t first_index;
+ int last_error = 0;
+ int skipped;
+
+restart:
+ skipped = 0;
+ first_index = 0;
+ do {
+ int error = 0;
+ xfs_inode_t *ip;
+
+ ip = xfs_reclaim_ag_lookup(mp, pag, &first_index);
+ if (!ip)
+ break;
+
+ error = xfs_reclaim_inode(ip, flags);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
+ if (error)
+ last_error = error;
+ /*
+ * bail out if the filesystem is corrupted.
+ */
+ if (error == EFSCORRUPTED)
+ break;
+
+ } while (1);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
}
- read_unlock(&pag->pag_ici_lock);
- return xfs_reclaim_inode(ip, flags);
+ xfs_put_perag(mp, pag);
+ return last_error;
}
int
xfs_reclaim_inodes(
- xfs_mount_t *mp,
- int mode)
+ xfs_mount_t *mp,
+ int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
- XFS_ICI_RECLAIM_TAG);
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ error = xfs_reclaim_ag_walk(mp, ag, mode);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
}
|