2.6.31 xfs_fs_destroy_inode: cannot reclaim

Christoph Hellwig hch at infradead.org
Mon Oct 12 18:38:54 CDT 2009


On Sun, Oct 11, 2009 at 09:43:09AM +0200, Patrick Schreurs wrote:
> Hello Christoph,
>
> Attached you'll find a screenshot from a 2.6.31.3 server, which includes  
> your patches and has XFS_DEBUG turned on. I truly hope this is useful to  
> you.

Thanks.  The patch below should fix the inode reclaim race that could
lead to the double free you're seeing.  To be applied ontop of all
the other patches I sent you.

Index: xfs/fs/xfs/linux-2.6/xfs_sync.c
===================================================================
--- xfs.orig/fs/xfs/linux-2.6/xfs_sync.c	2009-10-11 19:09:43.828254119 +0200
+++ xfs/fs/xfs/linux-2.6/xfs_sync.c	2009-10-12 13:48:14.886006087 +0200
@@ -670,22 +670,22 @@ xfs_reclaim_inode(
 {
 	xfs_perag_t	*pag = xfs_get_perag(ip->i_mount, ip->i_ino);
 
-	/* The hash lock here protects a thread in xfs_iget_core from
-	 * racing with us on linking the inode back with a vnode.
-	 * Once we have the XFS_IRECLAIM flag set it will not touch
-	 * us.
+	/*
+	 * The hash lock here protects a thread in xfs_iget from racing with
+	 * us on recycling the inode.  Once we have the XFS_IRECLAIM flag set
+	 * it will not touch it.
 	 */
-	write_lock(&pag->pag_ici_lock);
 	spin_lock(&ip->i_flags_lock);
-	if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
-	    !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
+	ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+	if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
 		spin_unlock(&ip->i_flags_lock);
 		write_unlock(&pag->pag_ici_lock);
-		return -EAGAIN;
+		return 0;
 	}
 	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
 	write_unlock(&pag->pag_ici_lock);
+
 	xfs_put_perag(ip->i_mount, pag);
 
 	/*
@@ -758,27 +758,107 @@ __xfs_inode_clear_reclaim_tag(
 			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
 }
 
-STATIC int
-xfs_reclaim_inode_now(
-	struct xfs_inode	*ip,
+STATIC xfs_inode_t *
+xfs_reclaim_ag_lookup(
+	struct xfs_mount	*mp,
 	struct xfs_perag	*pag,
+	uint32_t		*first_index)
+{
+	int			nr_found;
+	struct xfs_inode	*ip;
+
+	/*
+	 * use a gang lookup to find the next inode in the tree
+	 * as the tree is sparse and a gang lookup walks to find
+	 * the number of objects requested.
+	 */
+	write_lock(&pag->pag_ici_lock);
+	nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+			(void **)&ip, *first_index, 1, XFS_ICI_RECLAIM_TAG);
+	if (!nr_found)
+		goto unlock;
+
+	/*
+	 * Update the index for the next lookup. Catch overflows
+	 * into the next AG range which can occur if we have inodes
+	 * in the last block of the AG and we are currently
+	 * pointing to the last inode.
+	 */
+	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+		goto unlock;
+
+	return ip;
+
+unlock:
+	write_unlock(&pag->pag_ici_lock);
+	return NULL;
+}
+
+STATIC int
+xfs_reclaim_ag_walk(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		ag,
 	int			flags)
 {
-	/* ignore if already under reclaim */
-	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-		read_unlock(&pag->pag_ici_lock);
-		return 0;
+	struct xfs_perag	*pag = &mp->m_perag[ag];
+	uint32_t		first_index;
+	int			last_error = 0;
+	int			skipped;
+
+restart:
+	skipped = 0;
+	first_index = 0;
+	do {
+		int		error = 0;
+		xfs_inode_t	*ip;
+
+		ip = xfs_reclaim_ag_lookup(mp, pag, &first_index);
+		if (!ip)
+			break;
+
+		error = xfs_reclaim_inode(ip, flags);
+		if (error == EAGAIN) {
+			skipped++;
+			continue;
+		}
+		if (error)
+			last_error = error;
+		/*
+		 * bail out if the filesystem is corrupted.
+		 */
+		if (error == EFSCORRUPTED)
+			break;
+
+	} while (1);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
 	}
-	read_unlock(&pag->pag_ici_lock);
 
-	return xfs_reclaim_inode(ip, flags);
+	xfs_put_perag(mp, pag);
+	return last_error;
 }
 
 int
 xfs_reclaim_inodes(
-	xfs_mount_t	*mp,
-	int		mode)
+	xfs_mount_t		*mp,
+	int			mode)
 {
-	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
-					XFS_ICI_RECLAIM_TAG);
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+
+	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+		if (!mp->m_perag[ag].pag_ici_init)
+			continue;
+		error = xfs_reclaim_ag_walk(mp, ag, mode);
+		if (error) {
+			last_error = error;
+			if (error == EFSCORRUPTED)
+				break;
+		}
+	}
+	return XFS_ERROR(last_error);
 }




More information about the xfs mailing list