Hi,
From: Steve Lord <lord@xxxxxxx>
Subject: Re: deadlocks on ENOSPC
Date: Fri, 15 Jul 2005 07:21:25 -0500
> Hi Masano,
>
> That is definitely a bug, the extent logic is not supposed to lock
> allocation groups out of order. Multiple allocation groups are OK,
> but wrapping past the last allocation group back to the first again
> is not.
The following `break' seems to be guilty.
fs/xfs/xfs_alloc.c: in xfs_alloc_fix_freelist()
1967 /*
1968 * Stop if we run out. Won't happen if callers are obeying
1969 * the restrictions correctly. Can happen for free calls
1970 * on a completely full ag.
1971 */
1972 if (targs.agbno == NULLAGBLOCK)
1973 break;
I've tested the deadlock situation several times, and I found that
deadlock occur immediately after the above `break'. If kernel quit
at that point, the AGF would be just locked until it is committed. It
may cause AGF deadlock.
xfs_strategy()
+-- xfs_bmapi()
+-- xfs_bmap_alloc()
|
+-- xfs_alloc_vextent( XFS_ALLOCTYPE_NEAR_BNO )
| |
| +-- xfs_alloc_fix_freelist()
| | |
| | +-- xfs_alloc_read_agf( AGm ) <-- Get and lock the AGF
| | +-- xfs_alloc_ag_vextent() <-- Make the freelist longer
| | +-- NULLAGBLOCK && return 0 <-- Quit because of no space
| | (the AGF is locked)
| +-- NULLFSBLOCK && return 0
:
+-- xfs_alloc_vextent( XFS_ALLOCTYPE_FIRST_AG )
| |
| +-- xfs_alloc_fix_freelist()
| |
: +-- xfs_alloc_read_agf( AGn: n<m ) -> breaking order
Here is the patch against 2.6.13-rc5 + TAKE 938502. This patch adds
XFS_ALLOCTYPE_FIRST_AG_TRYLOCK flag to avoid waiting on locks.
8<--------8<--------8<--------8<--------8<--------8<--------
--- linux-2.6.13-rc5/fs/xfs/xfs_alloc.h.orig 2005-08-03 11:13:27.000000000
+0900
+++ linux-2.6.13-rc5/fs/xfs/xfs_alloc.h 2005-08-03 11:14:48.000000000 +0900
@@ -48,7 +48,8 @@ typedef enum xfs_alloctype
XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */
XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */
XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */
- XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */
+ XFS_ALLOCTYPE_THIS_BNO, /* at exactly this block */
+ XFS_ALLOCTYPE_FIRST_AG_TRYLOCK /* ... start at ag 0 with trylock */
} xfs_alloctype_t;
/*
--- linux-2.6.13-rc5/fs/xfs/xfs_alloc.c.orig 2005-08-03 10:30:33.000000000
+0900
+++ linux-2.6.13-rc5/fs/xfs/xfs_alloc.c 2005-08-03 12:06:31.000000000 +0900
@@ -2322,6 +2322,7 @@ xfs_alloc_vextent(
case XFS_ALLOCTYPE_ANY_AG:
case XFS_ALLOCTYPE_START_AG:
case XFS_ALLOCTYPE_FIRST_AG:
+ case XFS_ALLOCTYPE_FIRST_AG_TRYLOCK:
/*
* Rotate through the allocation groups looking for a winner.
*/
@@ -2341,6 +2342,14 @@ xfs_alloc_vextent(
args->type = XFS_ALLOCTYPE_THIS_AG;
sagno = 0;
flags = 0;
+ } else if (type == XFS_ALLOCTYPE_FIRST_AG_TRYLOCK) {
+ /*
+ * Start with allocation group given by bno.
+ */
+ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
+ args->type = XFS_ALLOCTYPE_THIS_AG;
+ sagno = 0;
+ flags = XFS_ALLOC_FLAG_TRYLOCK;
} else {
if (type == XFS_ALLOCTYPE_START_AG)
args->type = XFS_ALLOCTYPE_THIS_AG;
--- linux-2.6.13-rc5/fs/xfs/xfs_bmap.c.orig 2005-08-03 11:13:40.000000000
+0900
+++ linux-2.6.13-rc5/fs/xfs/xfs_bmap.c 2005-08-03 11:16:39.000000000 +0900
@@ -2711,7 +2711,7 @@ xfs_bmap_alloc(
}
if (args.fsbno == NULLFSBLOCK && nullfb) {
args.fsbno = 0;
- args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.type = XFS_ALLOCTYPE_FIRST_AG_TRYLOCK;
args.total = ap->minlen;
args.minleft = 0;
if ((error = xfs_alloc_vextent(&args)))
8<--------8<--------8<--------8<--------8<--------8<--------
This patch reduces AGF deadlock but is not complete yet. To avoid
deadlock on ENOSPC completely, I need to change the definition of
XFS_STRAT_WRITE_IMAPS from 2 to 1 too. This workaround is Steve's
idea.
I don't know whether this patch/approach is right or not. So any
comments are welcome.
--
masano
|