xfs
[Top] [All Lists]

[PATCH 06/11] xfs: replace i_flock with a sleeping bitlock

To: xfs@xxxxxxxxxxx
Subject: [PATCH 06/11] xfs: replace i_flock with a sleeping bitlock
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Thu, 08 Dec 2011 10:58:01 -0500
References: <20111208155755.323930705@xxxxxxxxxxxxxxxxxxxxxx>
User-agent: quilt/0.48-1
We almost never block on i_flock, the exception is synchronous inode
flushing.  Instead of bloating the inode with a 16/24-byte completion
that we abuse as a semaphore just implement it as a bitlock that uses
a bit waitqueue for the rare sleeping path.  This primarily is a
tradeoff between a much smaller inode and a faster non-blocking
path vs faster wakeups, and we are much better off with the former.

A small downside is that we will lose lockdep checking for i_flock, but
given that it's always taken inside the ilock that should be acceptable.

Note that for example the inode writeback locking is implemented in a
very similar way.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Reviewed-by: Alex Elder <aelder@xxxxxxx>

---
 fs/xfs/xfs_iget.c       |   20 +++++++++++-
 fs/xfs/xfs_inode.c      |    4 +-
 fs/xfs/xfs_inode.h      |   78 ++++++++++++++++++++++++++++++------------------
 fs/xfs/xfs_inode_item.c |    4 +-
 fs/xfs/xfs_super.c      |    7 ----
 fs/xfs/xfs_sync.c       |    9 ++---
 6 files changed, 76 insertions(+), 46 deletions(-)

Index: xfs/fs/xfs/xfs_iget.c
===================================================================
--- xfs.orig/fs/xfs/xfs_iget.c  2011-11-30 12:59:04.989734747 +0100
+++ xfs/fs/xfs/xfs_iget.c       2011-11-30 12:59:08.843047205 +0100
@@ -77,7 +77,7 @@ xfs_inode_alloc(
 
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
-       ASSERT(completion_done(&ip->i_flush));
+       ASSERT(!xfs_isiflocked(ip));
        ASSERT(ip->i_ino == 0);
 
        mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -151,7 +151,7 @@ xfs_inode_free(
        /* asserts to verify all state is correct here */
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
-       ASSERT(completion_done(&ip->i_flush));
+       ASSERT(!xfs_isiflocked(ip));
 
        /*
         * Because we use RCU freeing we need to ensure the inode always
@@ -714,3 +714,19 @@ xfs_isilocked(
        return 0;
 }
 #endif
+
+void
+__xfs_iflock(
+       struct xfs_inode        *ip)
+{
+       wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
+       DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
+
+       do {
+               prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+               if (xfs_isiflocked(ip))
+                       io_schedule();
+       } while (!xfs_iflock_nowait(ip));
+
+       finish_wait(wq, &wait.wait);
+}
Index: xfs/fs/xfs/xfs_inode.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.c 2011-11-30 12:59:05.003068007 +0100
+++ xfs/fs/xfs/xfs_inode.c      2011-11-30 12:59:08.843047205 +0100
@@ -2396,7 +2396,7 @@ xfs_iflush(
        XFS_STATS_INC(xs_iflush_count);
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(!completion_done(&ip->i_flush));
+       ASSERT(xfs_isiflocked(ip));
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 
@@ -2512,7 +2512,7 @@ xfs_iflush_int(
 #endif
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(!completion_done(&ip->i_flush));
+       ASSERT(xfs_isiflocked(ip));
        ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
               ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
 
Index: xfs/fs/xfs/xfs_inode.h
===================================================================
--- xfs.orig/fs/xfs/xfs_inode.h 2011-11-30 12:59:05.013067955 +0100
+++ xfs/fs/xfs/xfs_inode.h      2011-11-30 12:59:08.846380520 +0100
@@ -237,7 +237,6 @@ typedef struct xfs_inode {
        struct xfs_inode_log_item *i_itemp;     /* logging information */
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_iolock;       /* inode IO lock */
-       struct completion       i_flush;        /* inode flush completion q */
        atomic_t                i_pincount;     /* inode pin count */
        wait_queue_head_t       i_ipin_wait;    /* inode pinning wait queue */
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *i
        return ret;
 }
 
+static inline int
+xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
+{
+       int ret;
+
+       spin_lock(&ip->i_flags_lock);
+       ret = ip->i_flags & flags;
+       if (!ret)
+               ip->i_flags |= flags;
+       spin_unlock(&ip->i_flags_lock);
+       return ret;
+}
+
 /*
  * Project quota id helpers (previously projid was 16bit only
  * and using two 16bit values to hold new 32bit projid was chosen
@@ -344,35 +356,17 @@ xfs_set_projid(struct xfs_inode *ip,
 }
 
 /*
- * Manage the i_flush queue embedded in the inode.  This completion
- * queue synchronizes processes attempting to flush the in-core
- * inode back to disk.
- */
-static inline void xfs_iflock(xfs_inode_t *ip)
-{
-       wait_for_completion(&ip->i_flush);
-}
-
-static inline int xfs_iflock_nowait(xfs_inode_t *ip)
-{
-       return try_wait_for_completion(&ip->i_flush);
-}
-
-static inline void xfs_ifunlock(xfs_inode_t *ip)
-{
-       complete(&ip->i_flush);
-}
-
-/*
  * In-core inode flags.
  */
-#define XFS_IRECLAIM           0x0001  /* started reclaiming this inode */
-#define XFS_ISTALE             0x0002  /* inode has been staled */
-#define XFS_IRECLAIMABLE       0x0004  /* inode can be reclaimed */
-#define XFS_INEW               0x0008  /* inode has just been allocated */
-#define XFS_IFILESTREAM                0x0010  /* inode is in a filestream 
directory */
-#define XFS_ITRUNCATED         0x0020  /* truncated down so flush-on-close */
-#define XFS_IDIRTY_RELEASE     0x0040  /* dirty release already seen */
+#define XFS_IRECLAIM           (1 << 0) /* started reclaiming this inode */
+#define XFS_ISTALE             (1 << 1) /* inode has been staled */
+#define XFS_IRECLAIMABLE       (1 << 2) /* inode can be reclaimed */
+#define XFS_INEW               (1 << 3) /* inode has just been allocated */
+#define XFS_IFILESTREAM                (1 << 4) /* inode is in a filestream 
dir. */
+#define XFS_ITRUNCATED         (1 << 5) /* truncated down so flush-on-close */
+#define XFS_IDIRTY_RELEASE     (1 << 6) /* dirty release already seen */
+#define __XFS_IFLOCK_BIT       7        /* inode is being flushed right now */
+#define XFS_IFLOCK             (1 << __XFS_IFLOCK_BIT)
 
 /*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -385,6 +379,34 @@ static inline void xfs_ifunlock(xfs_inod
         XFS_IFILESTREAM);
 
 /*
+ * Synchronize processes attempting to flush the in-core inode back to disk.
+ */
+
+extern void __xfs_iflock(struct xfs_inode *ip);
+
+static inline int xfs_iflock_nowait(struct xfs_inode *ip)
+{
+       return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
+}
+
+static inline void xfs_iflock(struct xfs_inode *ip)
+{
+       if (!xfs_iflock_nowait(ip))
+               __xfs_iflock(ip);
+}
+
+static inline void xfs_ifunlock(struct xfs_inode *ip)
+{
+       xfs_iflags_clear(ip, XFS_IFLOCK);
+       wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
+}
+
+static inline int xfs_isiflocked(struct xfs_inode *ip)
+{
+       return xfs_iflags_test(ip, XFS_IFLOCK);
+}
+
+/*
  * Flags for inode locking.
  * Bit ranges: 1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
  *             1<<16 - 1<<32-1 -- lockdep annotation (integers)
Index: xfs/fs/xfs/xfs_inode_item.c
===================================================================
--- xfs.orig/fs/xfs/xfs_inode_item.c    2011-11-30 12:59:05.023067900 +0100
+++ xfs/fs/xfs/xfs_inode_item.c 2011-11-30 12:59:08.846380520 +0100
@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
         * If a flush is not in progress anymore, chances are that the
         * inode was taken off the AIL. So, just get out.
         */
-       if (completion_done(&ip->i_flush) ||
+       if (!xfs_isiflocked(ip) ||
            !(lip->li_flags & XFS_LI_IN_AIL)) {
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                return true;
@@ -750,7 +750,7 @@ xfs_inode_item_push(
        struct xfs_inode        *ip = iip->ili_inode;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
-       ASSERT(!completion_done(&ip->i_flush));
+       ASSERT(xfs_isiflocked(ip));
 
        /*
         * Since we were able to lock the inode's flush lock and
Index: xfs/fs/xfs/xfs_super.c
===================================================================
--- xfs.orig/fs/xfs/xfs_super.c 2011-11-30 12:59:05.036401160 +0100
+++ xfs/fs/xfs/xfs_super.c      2011-11-30 12:59:08.846380520 +0100
@@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
        atomic_set(&ip->i_pincount, 0);
        spin_lock_init(&ip->i_flags_lock);
        init_waitqueue_head(&ip->i_ipin_wait);
-       /*
-        * Because we want to use a counting completion, complete
-        * the flush completion once to allow a single access to
-        * the flush completion without blocking.
-        */
-       init_completion(&ip->i_flush);
-       complete(&ip->i_flush);
 
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
Index: xfs/fs/xfs/xfs_sync.c
===================================================================
--- xfs.orig/fs/xfs/xfs_sync.c  2011-11-30 12:59:05.053067738 +0100
+++ xfs/fs/xfs/xfs_sync.c       2011-11-30 12:59:08.849713835 +0100
@@ -671,14 +671,13 @@ xfs_reclaim_inode_grab(
                return 1;
 
        /*
-        * do some unlocked checks first to avoid unnecessary lock traffic.
-        * The first is a flush lock check, the second is a already in reclaim
-        * check. Only do these checks if we are not going to block on locks.
+        * If we are asked for non-blocking operation, do unlocked checks to
+        * see if the inode already is being flushed or in reclaim to avoid
+        * lock traffic.
         */
        if ((flags & SYNC_TRYLOCK) &&
-           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+           __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
                return 1;
-       }
 
        /*
         * The radix tree lock here protects a thread in xfs_iget from racing

<Prev in Thread] Current Thread [Next in Thread>