2.6.30 panic - xfs_fs_destroy_inode

Patrick Schreurs patrick at news-service.com
Thu Jul 2 12:31:30 CDT 2009


Hi Christoph,

With this patch we see the following:

kernel BUG at fs/inode.c:1288!
invalid opcode: 0000 [#2] SMP
last sysfs file: /sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
CPU 1
Modules linked in: acpi_cpufreq cpufreq_ondemand ipmi_si ipmi_devintf 
ipmi_msghandler bonding mptspi 8250_pnp rng_core scsi_transport_spi 
thermal serio_raw processor 8250 serial_core bnx2 thermal_sys
Pid: 8048, comm: diablo Tainted: G      D    2.6.30xfspatch #1 PowerEdge 
1950
RIP: 0010:[<ffffffff8028aaa3>]  [<ffffffff8028aaa3>] iput+0x13/0x60
RSP: 0018:ffff88007ec6db58  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff88022cbef5c0 RCX: ffff88017d1edd30
RDX: ffff88022cbef5f0 RSI: ffff88017d1edcc8 RDI: ffff88022cbef5c0
RBP: ffff8801383ae788 R08: ffff88007ec6db98 R09: 0000000000000246
R10: ffff88008c2156a0 R11: ffffffff8028b7a8 R12: ffff88022e831c00
R13: ffff88007ec6db98 R14: ffff88022e831d18 R15: ffff88007ec6dc0c
FS:  0000000001495860(0063) GS:ffff88002804d000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00007fa14f9fa000 CR3: 000000007ee5c000 CR4: 00000000000006a0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process diablo (pid: 8048, threadinfo ffff88007ec6c000, task 
ffff8800855926f0)
Stack:
  ffff88017d1edcc0 ffffffff802884f7 ffff88008c2156a0 ffff88017d1edcc0
  ffff88022e831c00 ffffffff80288783 00000000000000c0 0000000000000008
  ffff8800b9ad5a00 ffff880138304ac0 ffff88007ec6dba8 ffff88007ec6dba8
Call Trace:
  [<ffffffff802884f7>] ? d_kill+0x34/0x55
  [<ffffffff80288783>] ? __shrink_dcache_sb+0x26b/0x301
  [<ffffffff802888f8>] ? shrink_dcache_memory+0xdf/0x16e
  [<ffffffff8025e3dd>] ? shrink_slab+0xe0/0x153
  [<ffffffff8025efa6>] ? try_to_free_pages+0x22e/0x31b
  [<ffffffff8025c68a>] ? isolate_pages_global+0x0/0x231
  [<ffffffff80259543>] ? __alloc_pages_internal+0x25f/0x3ff
  [<ffffffff8025b05a>] ? __do_page_cache_readahead+0xab/0x1b1
  [<ffffffff8025b218>] ? force_page_cache_readahead+0x57/0x7e
  [<ffffffff80264164>] ? sys_madvise+0x394/0x4e0
  [<ffffffff8020ae2b>] ? system_call_fastpath+0x16/0x1b
Code: 4b 70 be 01 00 00 00 48 89 df e8 f8 86 00 00 eb db 48 83 c4 28 5b 
5d c3 53 48 85 ff 48 89 fb 74 55 48 83 bf f8 01 00 00 40 75 04 <0f> 0b 
eb fe 48 8d 7f 48 48 c7 c6 f0 aa 5b 80 e8 51 4b 0a 00 85
RIP  [<ffffffff8028aaa3>] iput+0x13/0x60
  RSP <ffff88007ec6db58>
---[ end trace 06a9d5e318d14bf7 ]---

This server also crahed twice. Unfortunately i don't have a complete 
logging of this event. See attachment for a partial log.

Thanks for looking into this.

Patrick Schreurs

Christoph Hellwig wrote:
> Actually you might want to give this patch a try which fixes a race
> affecting the reclaim tag in iget:
> 
> 
> Index: xfs/fs/xfs/xfs_iget.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_iget.c	2009-06-04 13:27:41.901946950 +0200
> +++ xfs/fs/xfs/xfs_iget.c	2009-06-04 14:08:08.837816707 +0200
> @@ -132,80 +132,89 @@ xfs_iget_cache_hit(
>  	int			flags,
>  	int			lock_flags) __releases(pag->pag_ici_lock)
>  {
> +	struct inode		*inode = VFS_I(ip);
>  	struct xfs_mount	*mp = ip->i_mount;
> -	int			error = EAGAIN;
> +	int			error;
> +
> +	spin_lock(&ip->i_flags_lock);
>  
>  	/*
> -	 * If INEW is set this inode is being set up
> -	 * If IRECLAIM is set this inode is being torn down
> -	 * Pause and try again.
> +	 * This inode is being torn down, pause and try again.
>  	 */
> -	if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
> +	if (ip->i_flags & XFS_IRECLAIM) {
>  		XFS_STATS_INC(xs_ig_frecycle);
> +		error = EAGAIN;
>  		goto out_error;
>  	}
>  
> -	/* If IRECLAIMABLE is set, we've torn down the vfs inode part */
> -	if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
> +	/*
> +	 * If we are racing with another cache hit that is currently recycling
> +	 * this inode out of the XFS_IRECLAIMABLE state, wait for the
> +	 * initialisation to complete before continuing.
> +	 */
> +	if (ip->i_flags & XFS_INEW) {
> +		spin_unlock(&ip->i_flags_lock);
> +		read_unlock(&pag->pag_ici_lock);
>  
> -		/*
> -		 * If lookup is racing with unlink, then we should return an
> -		 * error immediately so we don't remove it from the reclaim
> -		 * list and potentially leak the inode.
> -		 */
> -		if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
> -			error = ENOENT;
> -			goto out_error;
> -		}
> +		XFS_STATS_INC(xs_ig_frecycle);
> +		wait_on_inode(inode);
> +		return EAGAIN;
> +	}
>  
> +	/*
> +	 * If lookup is racing with unlink, then we should return an
> +	 * error immediately so we don't remove it from the reclaim
> +	 * list and potentially leak the inode.
> +	 */
> +	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
> +		error = ENOENT;
> +		goto out_error;
> +	}
> +
> +	/*
> +	 * If IRECLAIMABLE is set, we've torn down the vfs inode part already.
> +	 * Need to carefully get it back into useable state.
> +	 */
> +	if (ip->i_flags & XFS_IRECLAIMABLE) {
>  		xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
>  
>  		/*
> -		 * We need to re-initialise the VFS inode as it has been
> -		 * 'freed' by the VFS. Do this here so we can deal with
> -		 * errors cleanly, then tag it so it can be set up correctly
> -		 * later.
> +		 * We need to set XFS_INEW atomically with clearing the
> +		 * reclaimable tag so that we do have an indicator of the
> +		 * inode still being initialized.
>  		 */
> -		if (!inode_init_always(mp->m_super, VFS_I(ip))) {
> +		ip->i_flags |= XFS_INEW;
> +		__xfs_inode_clear_reclaim_tag(pag, ip);
> +
> +		spin_unlock(&ip->i_flags_lock);
> +		read_unlock(&pag->pag_ici_lock);
> +
> +		if (unlikely(!inode_init_always(mp->m_super, inode))) {
> +			printk("node_init_always failed!!\n");
> +
> +			/*
> +			 * Re-initializing the inode failed, and we are in deep
> +			 * trouble.  Try to re-add it to the reclaim list.
> +			 */
> +			read_lock(&pag->pag_ici_lock);
> +			spin_lock(&ip->i_flags_lock);
> +
> +			ip->i_flags &= ~XFS_INEW;
> +			__xfs_inode_set_reclaim_tag(pag, ip);
> +
>  			error = ENOMEM;
>  			goto out_error;
>  		}
> -
> -		/*
> -		 * We must set the XFS_INEW flag before clearing the
> -		 * XFS_IRECLAIMABLE flag so that if a racing lookup does
> -		 * not find the XFS_IRECLAIMABLE above but has the igrab()
> -		 * below succeed we can safely check XFS_INEW to detect
> -		 * that this inode is still being initialised.
> -		 */
> -		xfs_iflags_set(ip, XFS_INEW);
> -		xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
> -
> -		/* clear the radix tree reclaim flag as well. */
> -		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
> -	} else if (!igrab(VFS_I(ip))) {
> +	} else {
>  		/* If the VFS inode is being torn down, pause and try again. */
> -		XFS_STATS_INC(xs_ig_frecycle);
> -		goto out_error;
> -	} else if (xfs_iflags_test(ip, XFS_INEW)) {
> -		/*
> -		 * We are racing with another cache hit that is
> -		 * currently recycling this inode out of the XFS_IRECLAIMABLE
> -		 * state. Wait for the initialisation to complete before
> -		 * continuing.
> -		 */
> -		wait_on_inode(VFS_I(ip));
> -	}
> +		if (!igrab(inode))
> +			goto out_error;
>  
> -	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
> -		error = ENOENT;
> -		iput(VFS_I(ip));
> -		goto out_error;
> +		/* We've got a live one. */
> +		spin_unlock(&ip->i_flags_lock);
> +		read_unlock(&pag->pag_ici_lock);
>  	}
>  
> -	/* We've got a live one. */
> -	read_unlock(&pag->pag_ici_lock);
> -
>  	if (lock_flags != 0)
>  		xfs_ilock(ip, lock_flags);
>  
> @@ -215,6 +224,7 @@ xfs_iget_cache_hit(
>  	return 0;
>  
>  out_error:
> +	spin_unlock(&ip->i_flags_lock);
>  	read_unlock(&pag->pag_ici_lock);
>  	return error;
>  }
> Index: xfs/fs/xfs/linux-2.6/xfs_sync.c
> ===================================================================
> --- xfs.orig/fs/xfs/linux-2.6/xfs_sync.c	2009-06-04 13:40:09.135939715 +0200
> +++ xfs/fs/xfs/linux-2.6/xfs_sync.c	2009-06-04 13:59:17.978816696 +0200
> @@ -607,6 +607,17 @@ xfs_reclaim_inode(
>  	return 0;
>  }
>  
> +void
> +__xfs_inode_set_reclaim_tag(
> +	struct xfs_perag	*pag,
> +	struct xfs_inode	*ip)
> +{
> +	xfs_agino_t	agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
> +
> +	radix_tree_tag_set(&pag->pag_ici_root, agino, XFS_ICI_RECLAIM_TAG);
> +	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
> +}
> +
>  /*
>   * We set the inode flag atomically with the radix tree tag.
>   * Once we get tag lookups on the radix tree, this inode flag
> @@ -621,9 +632,7 @@ xfs_inode_set_reclaim_tag(
>  
>  	read_lock(&pag->pag_ici_lock);
>  	spin_lock(&ip->i_flags_lock);
> -	radix_tree_tag_set(&pag->pag_ici_root,
> -			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
> -	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
> +	__xfs_inode_set_reclaim_tag(pag, ip);
>  	spin_unlock(&ip->i_flags_lock);
>  	read_unlock(&pag->pag_ici_lock);
>  	xfs_put_perag(mp, pag);
> @@ -631,30 +640,15 @@ xfs_inode_set_reclaim_tag(
>  
>  void
>  __xfs_inode_clear_reclaim_tag(
> -	xfs_mount_t	*mp,
> -	xfs_perag_t	*pag,
> -	xfs_inode_t	*ip)
> -{
> -	radix_tree_tag_clear(&pag->pag_ici_root,
> -			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
> -}
> -
> -void
> -xfs_inode_clear_reclaim_tag(
> -	xfs_inode_t	*ip)
> +	struct xfs_perag	*pag,
> +	struct xfs_inode	*ip)
>  {
> -	xfs_mount_t	*mp = ip->i_mount;
> -	xfs_perag_t	*pag = xfs_get_perag(mp, ip->i_ino);
> +	xfs_agino_t	agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
>  
> -	read_lock(&pag->pag_ici_lock);
> -	spin_lock(&ip->i_flags_lock);
> -	__xfs_inode_clear_reclaim_tag(mp, pag, ip);
> -	spin_unlock(&ip->i_flags_lock);
> -	read_unlock(&pag->pag_ici_lock);
> -	xfs_put_perag(mp, pag);
> +	ip->i_flags &= ~XFS_IRECLAIMABLE;
> +	radix_tree_tag_clear(&pag->pag_ici_root, agino, XFS_ICI_RECLAIM_TAG);
>  }
>  
> -
>  STATIC void
>  xfs_reclaim_inodes_ag(
>  	xfs_mount_t	*mp,
> Index: xfs/fs/xfs/linux-2.6/xfs_sync.h
> ===================================================================
> --- xfs.orig/fs/xfs/linux-2.6/xfs_sync.h	2009-06-04 13:53:32.994814723 +0200
> +++ xfs/fs/xfs/linux-2.6/xfs_sync.h	2009-06-04 13:58:54.746942001 +0200
> @@ -51,7 +51,6 @@ int xfs_reclaim_inode(struct xfs_inode *
>  int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
>  
>  void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
> -void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
> -void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
> -				struct xfs_inode *ip);
> +void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
> +void __xfs_inode_clear_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
>  #endif
> 
> _______________________________________________
> xfs mailing list
> xfs at oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
-------------- next part --------------
A non-text attachment was scrubbed...
Name: sb02-20090702.jpg
Type: image/jpeg
Size: 80654 bytes
Desc: not available
URL: <http://oss.sgi.com/pipermail/xfs/attachments/20090702/5630123c/attachment.jpg>


More information about the xfs mailing list