xfs
[Top] [All Lists]

[PATCH 3/6] xfs: convert ENOSPC inode flushing to use new syncd workqueu

To: xfs@xxxxxxxxxxx
Subject: [PATCH 3/6] xfs: convert ENOSPC inode flushing to use new syncd workqueue
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Thu, 10 Mar 2011 11:05:26 +1100
In-reply-to: <1299715529-11026-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1299715529-11026-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

On of the problems with the current inode flush at ENOSPC is that we
queue a flush per ENOSPC event, regardless of how many are already
queued. Thi can result in    hundreds of queued flushes, most of
which simply burn CPU scanned and do no real work. This simply slows
down allocation at ENOSPC.

We really only need one active flush at a time, and we can easily
implement that via the new xfs_syncd_wq. All we need to do is queue
a flush if one is not already active, then block waiting for the
currently active flush to complete. The result is that we only ever
have a single ENOSPC inode flush active at a time and this greatly
reduces the overhead of ENOSPC processing.

On my 2p test machine, this results in tests exercising ENOSPC
conditions running significantly faster - 042 halves execution time,
083 drops from 60s to 5s, etc - while not introducing test
regressions.

This allows us to remove the old xfssyncd threads and infrastructure
as they are no longer used.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/linux-2.6/xfs_super.c |    2 -
 fs/xfs/linux-2.6/xfs_sync.c  |  133 +++++++++++------------------------------
 fs/xfs/xfs_mount.h           |    4 +-
 3 files changed, 37 insertions(+), 102 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 768894b..1cbee9c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1480,8 +1480,6 @@ xfs_fs_fill_super(
        spin_lock_init(&mp->m_sb_lock);
        mutex_init(&mp->m_growlock);
        atomic_set(&mp->m_active_trans, 0);
-       INIT_LIST_HEAD(&mp->m_sync_list);
-       spin_lock_init(&mp->m_sync_lock);
 
        mp->m_super = sb;
        sb->s_fs_info = mp;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 91c513b..62e1f09 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -433,99 +433,6 @@ xfs_quiesce_attr(
        xfs_unmountfs_writesb(mp);
 }
 
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
-       struct xfs_mount *mp,
-       void            *data,
-       void            (*syncer)(struct xfs_mount *, void *),
-       struct completion *completion)
-{
-       struct xfs_sync_work *work;
-
-       work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
-       INIT_LIST_HEAD(&work->w_list);
-       work->w_syncer = syncer;
-       work->w_data = data;
-       work->w_mount = mp;
-       work->w_completion = completion;
-       spin_lock(&mp->m_sync_lock);
-       list_add_tail(&work->w_list, &mp->m_sync_list);
-       spin_unlock(&mp->m_sync_lock);
-       wake_up_process(mp->m_sync_task);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inodes_work(
-       struct xfs_mount *mp,
-       void            *arg)
-{
-       struct inode    *inode = arg;
-       xfs_sync_data(mp, SYNC_TRYLOCK);
-       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-       iput(inode);
-}
-
-void
-xfs_flush_inodes(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-       DECLARE_COMPLETION_ONSTACK(completion);
-
-       igrab(inode);
-       xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, 
&completion);
-       wait_for_completion(&completion);
-       xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
-}
-
-STATIC int
-xfssyncd(
-       void                    *arg)
-{
-       struct xfs_mount        *mp = arg;
-       long                    timeleft;
-       xfs_sync_work_t         *work, *n;
-       LIST_HEAD               (tmp);
-
-       set_freezable();
-       timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-       for (;;) {
-               if (list_empty(&mp->m_sync_list))
-                       timeleft = schedule_timeout_interruptible(timeleft);
-               /* swsusp */
-               try_to_freeze();
-               if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-                       break;
-
-               spin_lock(&mp->m_sync_lock);
-               list_splice_init(&mp->m_sync_list, &tmp);
-               spin_unlock(&mp->m_sync_lock);
-
-               list_for_each_entry_safe(work, n, &tmp, w_list) {
-                       (*work->w_syncer)(mp, work->w_data);
-                       list_del(&work->w_list);
-                       if (work->w_completion)
-                               complete(work->w_completion);
-                       kmem_free(work);
-               }
-       }
-
-       return 0;
-}
-
 static void
 xfs_syncd_queue_sync(
        struct xfs_mount        *mp)
@@ -562,16 +469,48 @@ xfs_sync_worker(
        xfs_syncd_queue_sync(mp);
 }
 
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       queue_work(xfs_syncd_wq, &mp->m_flush_work);
+       flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(work,
+                                       struct xfs_mount, m_flush_work);
+
+       xfs_sync_data(mp, SYNC_TRYLOCK);
+       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+       xfs_log_force(mp, XFS_LOG_SYNC);
+}
+
 int
 xfs_syncd_init(
        struct xfs_mount        *mp)
 {
+       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
        INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
        xfs_syncd_queue_sync(mp);
 
-       mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", 
mp->m_fsname);
-       if (IS_ERR(mp->m_sync_task))
-               return -PTR_ERR(mp->m_sync_task);
        return 0;
 }
 
@@ -580,7 +519,7 @@ xfs_syncd_stop(
        struct xfs_mount        *mp)
 {
        cancel_delayed_work_sync(&mp->m_sync_work);
-       kthread_stop(mp->m_sync_task);
+       cancel_work_sync(&mp->m_flush_work);
 }
 
 void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2c11e62..a0ad90e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,9 +204,7 @@ typedef struct xfs_mount {
 #endif
        struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
        struct delayed_work     m_sync_work;    /* background sync work */
-       struct task_struct      *m_sync_task;   /* generalised sync thread */
-       struct list_head        m_sync_list;    /* sync thread work item list */
-       spinlock_t              m_sync_lock;    /* work item list lock */
+       struct work_struct      m_flush_work;   /* background inode flush */
        __int64_t               m_update_flags; /* sb flags we need to update
                                                   on the next remount,rw */
        struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
-- 
1.7.2.3

<Prev in Thread] Current Thread [Next in Thread>