On Thu, Jun 04, 2015 at 12:03:39PM +1000, Dave Chinner wrote:
> Fixing this requires a tweak to the algorithm in
> __xfs_buf_delwri_submit() so that we don't lock an entire list of
> thousands of IOs before starting submission. In the mean time,
> reducing the number of AGs will reduce the impact of this because
> the delayed write submission code will skip buffers that are already
> locked or pinned in memory, and hence an AG under modification at
> the time submission occurs will be skipped by the delwri code.
You might like to try the patch below on a test machine to see if
it helps with the problem.
Cheers,
Dave.
--
Dave Chinner
david@xxxxxxxxxxxxx
xfs: reduce lock hold times in buffer writeback
From: Dave Chinner <dchinner@xxxxxxxxxx>
Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
fs/xfs/xfs_buf.c | 80 ++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 61 insertions(+), 19 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index bbe4e9e..8d2cc36 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1768,15 +1768,63 @@ xfs_buf_cmp(
return 0;
}
+static void
+xfs_buf_delwri_submit_buffers(
+ struct list_head *buffer_list,
+ struct list_head *io_list,
+ bool wait)
+{
+ struct xfs_buf *bp, *n;
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
+ list_for_each_entry_safe(bp, n, buffer_list, b_list) {
+ bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC |
+ XBF_WRITE_FAIL);
+ bp->b_flags |= XBF_WRITE | XBF_ASYNC;
+
+ /*
+ * We do all IO submission async. This means if we need
+ * to wait for IO completion we need to take an extra
+ * reference so the buffer is still valid on the other
+ * side. We need to move the buffer onto the io_list
+ * at this point so the caller can still access it.
+ */
+ if (wait) {
+ xfs_buf_hold(bp);
+ list_move_tail(&bp->b_list, io_list);
+ } else
+ list_del_init(&bp->b_list);
+
+ xfs_buf_submit(bp);
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
+ * submit buffers for write.
+ *
+ * When we have a large buffer list, we do not want to hold all the buffers
+ * locked while we block on the request queue waiting for IO dispatch. To avoid
+ * this problem, we lock and submit buffers in groups of 50, thereby minimising
+ * the lock hold times for lists which may contain thousands of objects.
+ *
+ * To do this, we sort the buffer list before we walk the list to lock and
+ * submit buffers, and we plug and unplug around each group of buffers we
+ * submit.
+ */
static int
__xfs_buf_delwri_submit(
struct list_head *buffer_list,
struct list_head *io_list,
bool wait)
{
- struct blk_plug plug;
struct xfs_buf *bp, *n;
+ LIST_HEAD (submit_list);
int pinned = 0;
+ int count = 0;
+
+ list_sort(NULL, buffer_list, xfs_buf_cmp);
list_for_each_entry_safe(bp, n, buffer_list, b_list) {
if (!wait) {
@@ -1802,30 +1850,24 @@ __xfs_buf_delwri_submit(
continue;
}
- list_move_tail(&bp->b_list, io_list);
+ list_move_tail(&bp->b_list, &submit_list);
trace_xfs_buf_delwri_split(bp, _RET_IP_);
- }
-
- list_sort(NULL, io_list, xfs_buf_cmp);
-
- blk_start_plug(&plug);
- list_for_each_entry_safe(bp, n, io_list, b_list) {
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
- bp->b_flags |= XBF_WRITE | XBF_ASYNC;
/*
- * we do all Io submission async. This means if we need to wait
- * for IO completion we need to take an extra reference so the
- * buffer is still valid on the other side.
+ * We do small batches of IO submission to minimise lock hold
+ * time and unnecessary writeback of buffers that are hot and
+ * would otherwise be relogged and hence not require immediate
+ * writeback.
*/
- if (wait)
- xfs_buf_hold(bp);
- else
- list_del_init(&bp->b_list);
+ if (count++ < 50)
+ continue;
- xfs_buf_submit(bp);
+ xfs_buf_delwri_submit_buffers(&submit_list, io_list, wait);
+ count = 0;
}
- blk_finish_plug(&plug);
+
+ if (!list_empty(&submit_list))
+ xfs_buf_delwri_submit_buffers(&submit_list, io_list, wait);
return pinned;
}
|