xfs
[Top] [All Lists]

[PATCH v2] xfs: split metadata and log buffer completion to separate wor

To: xfs@xxxxxxxxxxx
Subject: [PATCH v2] xfs: split metadata and log buffer completion to separate workqueues
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Wed, 3 Dec 2014 14:24:19 -0500
Delivered-to: xfs@xxxxxxxxxxx
XFS traditionally sends all buffer I/O completion work to a single
workqueue. This includes metadata buffer completion and log buffer
completion. The log buffer completion requires a high priority queue to
prevent stalls due to log forces getting stuck behind other queued work.

Rather than continue to prioritize all buffer I/O completion due to the
needs of log completion, split log buffer completion off to
m_log_workqueue and move the high priority flag from m_buf_workqueue to
m_log_workqueue.

Add a b_ioend_wq wq pointer to xfs_buf to allow completion workqueue
customization on a per-buffer basis. Initialize b_ioend_wq to
m_buf_workqueue by default in the generic buffer I/O submission path.
Finally, override the default wq with the high priority m_log_workqueue
in the log buffer I/O submission path.

Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>
---

v2:
- Add bp->b_ioend_wq pointer to multiplex buffer I/O completion
  workqueues.
v1/rfc: http://oss.sgi.com/archives/xfs/2014-11/msg00240.html

 fs/xfs/xfs_buf.c   | 13 ++++++++++---
 fs/xfs/xfs_buf.h   |  3 ++-
 fs/xfs/xfs_log.c   |  4 ++++
 fs/xfs/xfs_super.c |  5 ++---
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4ce390..bb502a3 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1041,7 +1041,7 @@ xfs_buf_ioend_work(
        struct work_struct      *work)
 {
        struct xfs_buf          *bp =
-               container_of(work, xfs_buf_t, b_iodone_work);
+               container_of(work, xfs_buf_t, b_ioend_work);
 
        xfs_buf_ioend(bp);
 }
@@ -1050,8 +1050,8 @@ void
 xfs_buf_ioend_async(
        struct xfs_buf  *bp)
 {
-       INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
-       queue_work(bp->b_target->bt_mount->m_buf_workqueue, &bp->b_iodone_work);
+       INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
+       queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
 }
 
 void
@@ -1220,6 +1220,13 @@ _xfs_buf_ioapply(
         */
        bp->b_error = 0;
 
+       /*
+        * Initialize the I/O completion workqueue if we haven't yet or the
+        * submitter has not opted to specify a custom one.
+        */
+       if (!bp->b_ioend_wq)
+               bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
+
        if (bp->b_flags & XBF_WRITE) {
                if (bp->b_flags & XBF_SYNCIO)
                        rw = WRITE_SYNC;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 82002c0..75ff5d5 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -164,7 +164,8 @@ typedef struct xfs_buf {
        struct xfs_perag        *b_pag;         /* contains rbtree root */
        xfs_buftarg_t           *b_target;      /* buffer target (device) */
        void                    *b_addr;        /* virtual address of buffer */
-       struct work_struct      b_iodone_work;
+       struct work_struct      b_ioend_work;
+       struct workqueue_struct *b_ioend_wq;    /* I/O completion wq */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f6d6b8b..e408bf5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1806,6 +1806,8 @@ xlog_sync(
        XFS_BUF_ZEROFLAGS(bp);
        XFS_BUF_ASYNC(bp);
        bp->b_flags |= XBF_SYNCIO;
+       /* use high priority completion wq */
+       bp->b_ioend_wq = log->l_mp->m_log_workqueue;
 
        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
                bp->b_flags |= XBF_FUA;
@@ -1854,6 +1856,8 @@ xlog_sync(
                bp->b_flags |= XBF_SYNCIO;
                if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
                        bp->b_flags |= XBF_FUA;
+               /* use high priority completion wq */
+               bp->b_ioend_wq = log->l_mp->m_log_workqueue;
 
                ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
                ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index fee11c8..7a833c3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -839,8 +839,7 @@ xfs_init_mount_workqueues(
        struct xfs_mount        *mp)
 {
        mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
-                       WQ_MEM_RECLAIM|WQ_HIGHPRI|WQ_FREEZABLE, 1,
-                       mp->m_fsname);
+                       WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
        if (!mp->m_buf_workqueue)
                goto out;
 
@@ -865,7 +864,7 @@ xfs_init_mount_workqueues(
                goto out_destroy_cil;
 
        mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-                       WQ_FREEZABLE, 0, mp->m_fsname);
+                       WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
        if (!mp->m_log_workqueue)
                goto out_destroy_reclaim;
 
-- 
1.8.3.1

<Prev in Thread] Current Thread [Next in Thread>