xfs
[Top] [All Lists]

[PATCH] repair: avoid ABBA deadlocks on prefetched buffers

To: xfs@xxxxxxxxxxx
Subject: [PATCH] repair: avoid ABBA deadlocks on prefetched buffers
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Tue, 15 Nov 2011 16:09:53 -0500
User-agent: Mutt/1.5.21 (2010-09-15)
Both the prefetch threads and actual repair processing threads can have
multiple buffers at a time locked, but they do no use a common locker
order, which can lead to ABBA deadlocks while trying to lock the buffers.

Switch the prefetch code to do a trylock and skip buffers that have
already been locked to avoid this deadlock.

Reported-by: Arkadiusz Mi??kiewicz <arekm@xxxxxxxx>
Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: xfsprogs-dev/include/libxfs.h
===================================================================
--- xfsprogs-dev.orig/include/libxfs.h  2011-11-15 20:43:02.513069998 +0100
+++ xfsprogs-dev/include/libxfs.h       2011-11-15 20:43:16.669736580 +0100
@@ -279,27 +279,41 @@ enum xfs_buf_flags_t {    /* b_flags bits *
 extern struct cache    *libxfs_bcache;
 extern struct cache_operations libxfs_bcache_operations;
 
+#define LIBXFS_GETBUF_TRYLOCK  (1 << 0)
+
 #ifdef XFS_BUF_TRACING
 
 #define libxfs_readbuf(dev, daddr, len, flags) \
-               libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, (dev), 
(daddr), (len), (flags))
+       libxfs_trace_readbuf(__FUNCTION__, __FILE__, __LINE__, \
+                           (dev), (daddr), (len), (flags))
 #define libxfs_writebuf(buf, flags) \
-               libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, (buf), 
(flags))
+       libxfs_trace_writebuf(__FUNCTION__, __FILE__, __LINE__, \
+                             (buf), (flags))
 #define libxfs_getbuf(dev, daddr, len) \
-               libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, (dev), 
(daddr), (len))
+       libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+                           (dev), (daddr), (len))
+#define libxfs_getbuf_flags(dev, daddr, len, flags) \
+       libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
+                           (dev), (daddr), (len), (flags))
 #define libxfs_putbuf(buf) \
-               libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
+       libxfs_trace_putbuf(__FUNCTION__, __FILE__, __LINE__, (buf))
 
-extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int, dev_t, 
xfs_daddr_t, int, int);
-extern int     libxfs_trace_writebuf(const char *, const char *, int, 
xfs_buf_t *, int);
+extern xfs_buf_t *libxfs_trace_readbuf(const char *, const char *, int,
+                       dev_t, xfs_daddr_t, int, int);
+extern int     libxfs_trace_writebuf(const char *, const char *, int,
+                       xfs_buf_t *, int);
 extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, dev_t, 
xfs_daddr_t, int);
-extern void    libxfs_trace_putbuf (const char *, const char *, int, xfs_buf_t 
*);
+extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
+                       dev_t, xfs_daddr_t, int, unsigned int);
+extern void    libxfs_trace_putbuf (const char *, const char *, int,
+                       xfs_buf_t *);
 
 #else
 
 extern xfs_buf_t *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
 extern int     libxfs_writebuf(xfs_buf_t *, int);
 extern xfs_buf_t *libxfs_getbuf(dev_t, xfs_daddr_t, int);
+extern xfs_buf_t *libxfs_getbuf_flags(dev_t, xfs_daddr_t, int, unsigned int);
 extern void    libxfs_putbuf (xfs_buf_t *);
 
 #endif
Index: xfsprogs-dev/libxfs/rdwr.c
===================================================================
--- xfsprogs-dev.orig/libxfs/rdwr.c     2011-11-15 20:43:02.503069998 +0100
+++ xfsprogs-dev/libxfs/rdwr.c  2011-11-15 20:43:16.669736580 +0100
@@ -195,6 +195,7 @@ libxfs_log_header(
 #undef libxfs_readbuf
 #undef libxfs_writebuf
 #undef libxfs_getbuf
+#undef libxfs_getbuf_flags
 #undef libxfs_putbuf
 
 xfs_buf_t      *libxfs_readbuf(dev_t, xfs_daddr_t, int, int);
@@ -238,6 +239,19 @@ libxfs_trace_getbuf(const char *func, co
        return bp;
 }
 
+xfs_buf_t *
+libxfs_trace_getbuf_flags(const char *func, const char *file, int line,
+               dev_t device, xfs_daddr_t blkno, int len, unsigned long flags)
+{
+       xfs_buf_t       *bp = libxfs_getbuf(device, blkno, len, flags);
+
+       bp->b_func = func;
+       bp->b_file = file;
+       bp->b_line = line;
+
+       return bp;
+}
+
 void
 libxfs_trace_putbuf(const char *func, const char *file, int line, xfs_buf_t 
*bp)
 {
@@ -380,8 +394,8 @@ int                 lock_buf_count = 0;
 
 extern int     use_xfs_buf_lock;
 
-xfs_buf_t *
-libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+struct xfs_buf *
+libxfs_getbuf_flags(dev_t device, xfs_daddr_t blkno, int len, unsigned int 
flags)
 {
        xfs_buf_t       *bp;
        xfs_bufkey_t    key;
@@ -392,28 +406,48 @@ libxfs_getbuf(dev_t device, xfs_daddr_t
        key.bblen = len;
 
        miss = cache_node_get(libxfs_bcache, &key, (struct cache_node **)&bp);
-       if (bp) {
-               if (use_xfs_buf_lock)
+       if (!bp)
+               return NULL;
+
+       if (use_xfs_buf_lock) {
+               if (flags & LIBXFS_GETBUF_TRYLOCK) {
+                       int ret;
+
+                       ret = pthread_mutex_trylock(&bp->b_lock);
+                       if (ret) {
+                               ASSERT(ret == EAGAIN);
+                               cache_node_put(libxfs_bcache, (struct 
cache_node *)bp);
+                               return NULL;
+                       }
+               } else {
                        pthread_mutex_lock(&bp->b_lock);
-               cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
-                       cache_node_get_priority((struct cache_node *)bp) -
+               }
+       }
+
+       cache_node_set_priority(libxfs_bcache, (struct cache_node *)bp,
+               cache_node_get_priority((struct cache_node *)bp) -
                                                CACHE_PREFETCH_PRIORITY);
 #ifdef XFS_BUF_TRACING
-               pthread_mutex_lock(&libxfs_bcache->c_mutex);
-               lock_buf_count++;
-               list_add(&bp->b_lock_list, &lock_buf_list);
-               pthread_mutex_unlock(&libxfs_bcache->c_mutex);
+       pthread_mutex_lock(&libxfs_bcache->c_mutex);
+       lock_buf_count++;
+       list_add(&bp->b_lock_list, &lock_buf_list);
+       pthread_mutex_unlock(&libxfs_bcache->c_mutex);
 #endif
 #ifdef IO_DEBUG
-               printf("%lx %s: %s buffer %p for bno = %llu\n",
-                       pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
-                       bp, (long long)LIBXFS_BBTOOFF64(blkno));
+       printf("%lx %s: %s buffer %p for bno = %llu\n",
+               pthread_self(), __FUNCTION__, miss ? "miss" : "hit",
+               bp, (long long)LIBXFS_BBTOOFF64(blkno));
 #endif
-       }
 
        return bp;
 }
 
+struct xfs_buf *
+libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len)
+{
+       return libxfs_getbuf_flags(device, blkno, len, 0);
+}
+
 void
 libxfs_putbuf(xfs_buf_t *bp)
 {
Index: xfsprogs-dev/repair/prefetch.c
===================================================================
--- xfsprogs-dev.orig/repair/prefetch.c 2011-11-15 20:44:30.903069469 +0100
+++ xfsprogs-dev/repair/prefetch.c      2011-11-15 20:48:23.073068083 +0100
@@ -112,8 +112,17 @@ pf_queue_io(
 {
        xfs_buf_t               *bp;
 
-       bp = libxfs_getbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
-                       XFS_FSB_TO_BB(mp, blen));
+       /*
+        * Never block on a buffer lock here, given that the actual repair
+        * code might lock buffers in a different order from us.  Given that
+        * the lock holder is either reading it from disk himself or
+        * completely overwriting it this behaviour is perfectly fine.
+        */
+       bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
+                       XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK);
+       if (!bp)
+               return;
+
        if (bp->b_flags & LIBXFS_B_UPTODATE) {
                if (B_IS_INODE(flag))
                        pf_read_inode_dirs(args, bp);

<Prev in Thread] Current Thread [Next in Thread>