xfs
[Top] [All Lists]

[RFC PATCH 1/6] xfs: introduce mmap/truncate lock

To: xfs@xxxxxxxxxxx
Subject: [RFC PATCH 1/6] xfs: introduce mmap/truncate lock
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Thu, 8 Jan 2015 09:25:38 +1100
Cc: linux-fsdevel@xxxxxxxxxxxxxxx, linux-mm@xxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1420669543-8093-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1420669543-8093-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

Right now we cannot serialise mmap against truncate or hole punch
sanely. ->page_mkwrite is not able to take locks that the read IO
path normally takes (i.e. the inode iolock) because that could
result in lock inversions (read - iolock - page fault - page_mkwrite
- iolock) and so we cannot use an IO path lock to serialise page
write faults against truncate operations.

Instead, introduce a new lock that is used *only* in the
->page_mkwrite path that is the equivalent of the iolock. The lock
ordering in a page fault is i_mmaplock -> page lock -> i_ilock,
and so in truncate we can i_iolock -> i_mmaplock and so lock out
new write faults during the process of truncation.

Because i_mmap_lock is outside the page lock, we can hold it across
all the same operations we hold the i_iolock for. The only
difference is that we never hold the i_mmaplock in the normal IO
path and so do not ever have the possibility that we can page fault
inside it. Hence there are no recursion issues on the i_mmap_lock
and so we can use it to serialise page fault IO against inode
modification operations that affect the IO path.

This patch introduces the i_mmaplock infrastructure, lockdep
annotations and initialisation/destruction code. Use of the new lock
will be in subsequent patches.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/xfs_inode.c | 86 ++++++++++++++++++++++++++++++++++++++++++++----------
 fs/xfs/xfs_inode.h | 29 +++++++++++++-----
 fs/xfs/xfs_super.c |  2 ++
 3 files changed, 95 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 400791a..573b49c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -150,6 +150,8 @@ xfs_ilock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +161,11 @@ xfs_ilock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
 
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrupdate_nested(&ip->i_mmaplock, XFS_IOLOCK_DEP(lock_flags));
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mraccess_nested(&ip->i_mmaplock, XFS_IOLOCK_DEP(lock_flags));
+
        if (lock_flags & XFS_ILOCK_EXCL)
                mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +198,8 @@ xfs_ilock_nowait(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +211,35 @@ xfs_ilock_nowait(
                if (!mrtryaccess(&ip->i_iolock))
                        goto out;
        }
+
+       if (lock_flags & XFS_MMAPLOCK_EXCL) {
+               if (!mrtryupdate(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+               if (!mrtryaccess(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       }
+
        if (lock_flags & XFS_ILOCK_EXCL) {
                if (!mrtryupdate(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
        } else if (lock_flags & XFS_ILOCK_SHARED) {
                if (!mrtryaccess(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
        }
        return 1;
 
- out_undo_iolock:
+out_undo_mmaplock:
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrunlock_excl(&ip->i_iolock);
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
- out:
+out:
        return 0;
 }
 
@@ -244,6 +267,8 @@ xfs_iunlock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +279,11 @@ xfs_iunlock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
 
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+
        if (lock_flags & XFS_ILOCK_EXCL)
                mrunlock_excl(&ip->i_lock);
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +301,14 @@ xfs_ilock_demote(
        xfs_inode_t             *ip,
        uint                    lock_flags)
 {
-       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
+       ASSERT((lock_flags &
+               ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
 
        if (lock_flags & XFS_ILOCK_EXCL)
                mrdemote(&ip->i_lock);
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrdemote(&ip->i_mmaplock);
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrdemote(&ip->i_iolock);
 
@@ -294,6 +327,12 @@ xfs_isilocked(
                return rwsem_is_locked(&ip->i_lock.mr_lock);
        }
 
+       if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+               if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+                       return !!ip->i_mmaplock.mr_writer;
+               return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+       }
+
        if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
                if (!(lock_flags & XFS_IOLOCK_SHARED))
                        return !!ip->i_iolock.mr_writer;
@@ -314,14 +353,27 @@ int xfs_lock_delays;
 #endif
 
 /*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
  */
 static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << 
XFS_IOLOCK_SHIFT;
+       }
+
+       if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+               lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+                                                       XFS_MMAPLOCK_SHIFT;
+       }
+
        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
 
@@ -440,10 +492,10 @@ again:
 }
 
 /*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If 
we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
  */
 void
 xfs_lock_two_inodes(
@@ -455,8 +507,12 @@ xfs_lock_two_inodes(
        int                     attempts = 0;
        xfs_log_item_t          *lp;
 
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-               ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+
        ASSERT(ip0->i_ino != ip1->i_ino);
 
        if (ip0->i_ino > ip1->i_ino) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index de97ccc..8e7a12a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
        struct xfs_inode_log_item *i_itemp;     /* logging information */
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_iolock;       /* inode IO lock */
+       mrlock_t                i_mmaplock;     /* inode mmap IO lock */
        atomic_t                i_pincount;     /* inode pin count */
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
        /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define        XFS_IOLOCK_SHARED       (1<<1)
 #define        XFS_ILOCK_EXCL          (1<<2)
 #define        XFS_ILOCK_SHARED        (1<<3)
+#define        XFS_MMAPLOCK_EXCL       (1<<4)
+#define        XFS_MMAPLOCK_SHARED     (1<<5)
 
 #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                               | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
 
 #define XFS_LOCK_FLAGS \
        { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
        { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
        { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
+       { XFS_MMAPLOCK_EXCL,    "MMAPLOCK_EXCL" }, \
+       { XFS_MMAPLOCK_SHARED,  "MMAPLOCK_SHARED" }
 
 
 /*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define XFS_IOLOCK_SHIFT       16
 #define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
 
+#define XFS_MMAPLOCK_SHIFT     20
+
 #define XFS_ILOCK_SHIFT                24
 #define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
 #define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
 #define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
 
-#define XFS_IOLOCK_DEP_MASK    0x00ff0000
+#define XFS_IOLOCK_DEP_MASK    0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK  0x00f00000
 #define XFS_ILOCK_DEP_MASK     0xff000000
-#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
-
-#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) >> 
XFS_IOLOCK_SHIFT)
-#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) >> 
XFS_ILOCK_SHIFT)
+#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | \
+                                XFS_MMAPLOCK_DEP_MASK | \
+                                XFS_ILOCK_DEP_MASK)
+
+#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) \
+                                       >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags)        (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+                                       >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) \
+                                       >> XFS_ILOCK_SHIFT)
 
 /*
  * For multiple groups support: if S_ISGID bit is set in the parent
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index afd6bae..40d2ac5 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -986,6 +986,8 @@ xfs_fs_inode_init_once(
        atomic_set(&ip->i_pincount, 0);
        spin_lock_init(&ip->i_flags_lock);
 
+       mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
 }
-- 
2.0.0

<Prev in Thread] Current Thread [Next in Thread>