xfs
[Top] [All Lists]

Re: [LOCKDEP] xfs: possible recursive locking detected

To: Nathan Scott <nathans@xxxxxxx>
Subject: Re: [LOCKDEP] xfs: possible recursive locking detected
From: Ingo Molnar <mingo@xxxxxxx>
Date: Tue, 4 Jul 2006 10:41:43 +0200
Cc: Alexey Dobriyan <adobriyan@xxxxxxxxx>, Matthew Wilcox <matthew@xxxxxx>, linux-fsdevel@xxxxxxxxxxxxxxx, xfs@xxxxxxxxxxx, Arjan van de Ven <arjan@xxxxxxxxxxxxx>
In-reply-to: <20060704063225.GA2752@xxxxxxx>
References: <20060704004116.GA7612@xxxxxxxxxxxxxxxxxxxxxx> <20060704011858.GG1605@xxxxxxxxxxxxxxxx> <20060704112503.H1495869@xxxxxxxxxxxxxxxxxxxxxxxx> <20060704063225.GA2752@xxxxxxx>
Sender: xfs-bounce@xxxxxxxxxxx
User-agent: Mutt/1.4.2.1i
* Ingo Molnar <mingo@xxxxxxx> wrote:

> > > > While trying to remove 2 small files, 2 empty dirs and 1 empty dir 
> > > > on xfs partition
> > > 
> > > Probably spurious.  xfs_ilock can be called on both the parent and 
> > > child, which wouldn't be a deadlock.
> > 
> > Hmm... they'd be different inodes though, so different lock addresses 
> > in memory - is lockdep taking that into account?  Would we need to go 
> > annotate xfs_ilock somehow to give better hints to the lockdep code?
> 
> correct, lockdep has to be taught about relations between locks within 
> the same lock-class. (it detects relations between different 
> lock-classes automatically) It's usually a straightforward process.
> 
> In this particular case we probably need to do something similar to 
> the VFS's 'enum inode_i_mutex_lock_class' subclass categorizations: we 
> need xfs_ilock_nested(.., subclass), where in xfs_lock_dir_and_entry() 
> we'd pass in ILOCK_PARENT. [normal locking calls have a default 
> subclass ID of 0]

the patch below should solve the case mentioned above, but i'm sure 
there will be other cases as well.

XFS locking seems quite complex all around. All this dynamic 
flag-passing into an opaque function (such as xfs_ilock), just to have 
them untangled in xfs_ilock():

        if (lock_flags & XFS_IOLOCK_EXCL) {
                mrupdate(&ip->i_iolock);
        } else if (lock_flags & XFS_IOLOCK_SHARED) {
                mraccess(&ip->i_iolock);
        }
        if (lock_flags & XFS_ILOCK_EXCL) {
                mrupdate(&ip->i_lock);
        } else if (lock_flags & XFS_ILOCK_SHARED) {
                mraccess(&ip->i_lock);
        }

is pretty inefficient too - there are 85 calls to xfs_ilock(), and 74 of 
them have static flags. 

Also, mrunlock() does:

 static inline void mrunlock(mrlock_t *mrp)
 {
         if (mrp->mr_writer) {
                 mrp->mr_writer = 0;
                 up_write(&mrp->mr_lock);
         } else {
                 up_read(&mrp->mr_lock);
         }
 }

while xfs_iunlock() knows whether the release is for read or for write! 
So ->mr_writer seems like a totally unnecessary layer. In fact basically 
all codepaths should be well aware of whether they locked the inode for 
read or for write.

i'd really suggest to clean this up and to convert:

        xfs_ilock(ip, XFS_ILOCK_EXCL);

to:

        down_write(&ip->i_lock);

and:

        xfs_iunlock(ip, XFS_ILOCK_EXCL);

to:

        up_write(&ip->i_lock);

and eliminate all those layers of indirection.

        Ingo

---
 fs/xfs/linux-2.6/mrlock.h |   33 +++++++++++++++++++++++++++++++++
 fs/xfs/xfs_iget.c         |   12 ++++++++----
 fs/xfs/xfs_inode.h        |    3 ++-
 fs/xfs/xfs_vnodeops.c     |    8 ++++----
 4 files changed, 47 insertions(+), 9 deletions(-)

Index: linux/fs/xfs/linux-2.6/mrlock.h
===================================================================
--- linux.orig/fs/xfs/linux-2.6/mrlock.h
+++ linux/fs/xfs/linux-2.6/mrlock.h
@@ -27,12 +27,33 @@ typedef struct {
        int                     mr_writer;
 } mrlock_t;
 
+/*
+ * ilock nesting subclasses for the lock validator:
+ *
+ * 0: the object of the current VFS operation
+ * 1: parent
+ * 2: child/target
+ * 3: quota file
+ *
+ * The locking order between these classes is
+ * parent -> child -> normal -> quota
+ */
+enum xfs_ilock_class
+{
+       ILOCK_NORMAL,
+       ILOCK_PARENT,
+       ILOCK_CHILD,
+       ILOCK_QUOTA
+};
+
 #define mrinit(mrp, name)      \
        do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
 #define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
 #define mrfree(mrp)            do { } while (0)
 #define mraccess(mrp)          mraccessf(mrp, 0)
+#define mraccess_nested(mrp, s)        mraccessf_nested(mrp, 0, s)
 #define mrupdate(mrp)          mrupdatef(mrp, 0)
+#define mrupdate_nested(mrp, s)        mrupdatef_nested(mrp, 0, s)
 
 static inline void mraccessf(mrlock_t *mrp, int flags)
 {
@@ -45,6 +66,18 @@ static inline void mrupdatef(mrlock_t *m
        mrp->mr_writer = 1;
 }
 
+static inline void mraccessf_nested(mrlock_t *mrp, int flags, int subclass)
+{
+       down_read_nested(&mrp->mr_lock, subclass);
+}
+
+static inline void mrupdatef_nested(mrlock_t *mrp, int flags, int subclass)
+{
+       down_write_nested(&mrp->mr_lock, subclass);
+       mrp->mr_writer = 1;
+}
+
+
 static inline int mrtryaccess(mrlock_t *mrp)
 {
        return down_read_trylock(&mrp->mr_lock);
Index: linux/fs/xfs/xfs_iget.c
===================================================================
--- linux.orig/fs/xfs/xfs_iget.c
+++ linux/fs/xfs/xfs_iget.c
@@ -859,10 +859,14 @@ xfs_ilock(xfs_inode_t     *ip,
        } else if (lock_flags & XFS_IOLOCK_SHARED) {
                mraccess(&ip->i_iolock);
        }
-       if (lock_flags & XFS_ILOCK_EXCL) {
-               mrupdate(&ip->i_lock);
-       } else if (lock_flags & XFS_ILOCK_SHARED) {
-               mraccess(&ip->i_lock);
+       if (lock_flags & XFS_ILOCK_EXCL_PARENT)
+               mrupdate_nested(&ip->i_lock, ILOCK_PARENT);
+       else {
+               if (lock_flags & XFS_ILOCK_EXCL) {
+                       mrupdate(&ip->i_lock);
+               } else if (lock_flags & XFS_ILOCK_SHARED) {
+                       mraccess(&ip->i_lock);
+               }
        }
        xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
 }
Index: linux/fs/xfs/xfs_inode.h
===================================================================
--- linux.orig/fs/xfs/xfs_inode.h
+++ linux/fs/xfs/xfs_inode.h
@@ -352,11 +352,12 @@ typedef struct xfs_inode {
 #define XFS_SIZE_TOKEN_WR       (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
 #define XFS_EXTSIZE_WR         (XFS_EXTSIZE_RD | XFS_WILLLEND)
 /*     XFS_SIZE_TOKEN_WANT     0x200 */
+#define        XFS_ILOCK_EXCL_PARENT   0x400
 
 #define XFS_LOCK_MASK  \
        (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \
         XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \
-        XFS_WILLLEND)
+        XFS_WILLLEND | XFS_ILOCK_EXCL_PARENT)
 
 /*
  * Flags for xfs_iflush()
Index: linux/fs/xfs/xfs_vnodeops.c
===================================================================
--- linux.orig/fs/xfs/xfs_vnodeops.c
+++ linux/fs/xfs/xfs_vnodeops.c
@@ -1942,7 +1942,7 @@ xfs_create(
                goto error_return;
        }
 
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       xfs_ilock(dp, XFS_ILOCK_EXCL_PARENT);
 
        XFS_BMAP_INIT(&free_list, &first_block);
 
@@ -2137,7 +2137,7 @@ xfs_lock_dir_and_entry(
        attempts = 0;
 
 again:
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       xfs_ilock(dp, XFS_ILOCK_EXCL_PARENT);
 
        e_inum = ip->i_ino;
 
@@ -2836,7 +2836,7 @@ xfs_mkdir(
                goto error_return;
        }
 
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       xfs_ilock(dp, XFS_ILOCK_EXCL_PARENT);
 
        /*
         * Check for directory link count overflow.
@@ -3385,7 +3385,7 @@ xfs_symlink(
                goto error_return;
        }
 
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       xfs_ilock(dp, XFS_ILOCK_EXCL_PARENT);
 
        /*
         * Check whether the directory allows new symlinks or not.


<Prev in Thread] Current Thread [Next in Thread>