xfs
[Top] [All Lists]

[PATCH] deadlocks on ENOSPC

To: linux-xfs@xxxxxxxxxxx
Subject: [PATCH] deadlocks on ENOSPC
From: Andi Kleen <ak@xxxxxxx>
Date: Sat, 12 Jun 2004 04:08:38 +0200
Sender: linux-xfs-bounce@xxxxxxxxxxx
Hallo,

I've been tracking a deadlock on out of space conditions. It's easy to 
reproduce:

Use a XFS file systems with about 30MB free and run fsstress -d/xfs -p30 -n50000
on it and run in parallel while true ; do cp -a /usr /xfs ; done 

After some time all fsstress processes and up in D and don't make any progress 
anymore. Looking at sysrq output they seem to be mostly deadlocked in grabbing
the semaphore of a pagebuf. Two or three are deep in allocation (usually
beyind xfs_alloc_read_agf()), the others are in sync and wait_on_inode

(e.g. http://www.firstfloor.org/~andi/deadlock3 has a full example) 

I see it with 2.6.7rc3-bk3 and earlier kernels.

My theory is that one of the ENOSPC error paths forgets to brelse a buffer 
which 
causes the semaphore to get lost and causes the deadlocks later. This is likely
a AGF buffer, since xfs_alloc_read_agf() and children are blocking too.

While looking at the code I found some missing brelses in xfs_alloc errors
paths that could explain some of the problems. With this patch applied
I unfortunately get still deadlocks with the same test case eventually 
(lost pagebuf sem again) but they take soemwhat longer.

Most likely more audit of error paths is needed (i wish there was an automatic
tool for that).

-Andi  



diff -u linux/fs/xfs/xfs_alloc.c-o linux/fs/xfs/xfs_alloc.c
--- linux/fs/xfs/xfs_alloc.c-o  1970-01-01 01:12:51.000000000 +0100
+++ linux/fs/xfs/xfs_alloc.c    2004-06-12 03:54:18.000000000 +0200
@@ -1868,6 +1868,7 @@
         * try harder at this point
         */
        if (pag->pagf_metadata && args->userdata && flags) {
+               xfs_trans_brelse(tp, agbp);
                args->agbp = NULL;
                return 0;
        }
@@ -1886,8 +1887,7 @@
             (int)(pag->pagf_freeblks + pag->pagf_flcount -
                   need - args->total) <
             (int)args->minleft)) {
-               if (agbp)
-                       xfs_trans_brelse(tp, agbp);
+               xfs_trans_brelse(tp, agbp);
                args->agbp = NULL;
                return 0;
        }
@@ -1932,10 +1932,16 @@
        while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) {
                xfs_buf_t       *bp;
 
-               if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
+               if ((error = xfs_alloc_get_freelist(tp, agbp, &bno))) {
+                       xfs_trans_brelse(tp, agbp);
+                       args->agbp = NULL;      
                        return error;
-               if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 
1)))
+               }
+               if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 
1))) {
+                       xfs_trans_brelse(tp, agbp);
+                       args->agbp = NULL;
                        return error;
+               }
                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
                xfs_trans_binval(tp, bp);
        }
@@ -1951,8 +1957,11 @@
        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
        targs.type = XFS_ALLOCTYPE_THIS_AG;
        targs.pag = pag;
-       if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
+       if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) { 
+               xfs_trans_brelse(tp, agbp);
+               args->agbp = NULL;      
                return error;
+       }
        /*
         * Make the freelist longer if it's too short.
         */
@@ -1962,8 +1971,12 @@
                /*
                 * Allocate as many blocks as possible at once.
                 */
-               if ((error = xfs_alloc_ag_vextent(&targs)))
+               if ((error = xfs_alloc_ag_vextent(&targs))) {
+                       xfs_trans_brelse(tp, agbp);
+                       xfs_trans_brelse(tp, agflbp);
+                       args->agbp = NULL;      
                        return error;
+               }
                /*
                 * Stop if we run out.  Won't happen if callers are obeying
                 * the restrictions correctly.  Can happen for free calls
@@ -1976,8 +1989,12 @@
                 */
                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
                        if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
-                                       bno)))
+                                                           bno))) { 
+                               xfs_trans_brelse(tp, agbp);
+                               xfs_trans_brelse(tp, agflbp);
+                               args->agbp = NULL;      
                                return error;
+                       }
                }
        }
        args->agbp = agbp;
diff -u linux/fs/xfs/xfs_trans_buf.c-o linux/fs/xfs/xfs_trans_buf.c
--- linux/fs/xfs/xfs_trans_buf.c-o      2004-06-11 16:32:53.000000000 +0200
+++ linux/fs/xfs/xfs_trans_buf.c        2004-06-12 03:45:11.000000000 +0200
@@ -521,6 +521,9 @@
        xfs_log_item_t          *lip;
        xfs_log_item_desc_t     *lidp;
 
+       if (!bp)
+               return;
+
        /*
         * Default to a normal brelse() call if the tp is NULL.
         */


<Prev in Thread] Current Thread [Next in Thread>