xfs
[Top] [All Lists]

Re: [PATCH 4/4] xfs: reduce lock hold times in buffer writeback

To: xfs@xxxxxxxxxxx
Subject: Re: [PATCH 4/4] xfs: reduce lock hold times in buffer writeback
From: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
Date: Tue, 5 Apr 2016 09:19:47 -0400
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1459836310-12619-5-git-send-email-david@xxxxxxxxxxxxx>
Mail-followup-to: xfs@xxxxxxxxxxx
References: <1459836310-12619-1-git-send-email-david@xxxxxxxxxxxxx> <1459836310-12619-5-git-send-email-david@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.24 (2015-08-30)
Looks good to me

Reviewed-by: Carlos Maiolino <cmaiolino@xxxxxxxxxx>

On Tue, Apr 05, 2016 at 04:05:10PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> When we have a lot of metadata to flush from the AIL, the buffer
> list can get very long. The current submission code tries to batch
> submission to optimise IO order of the metadata (i.e. ascending
> block order) to maximise block layer merging or IO to adjacent
> metadata blocks.
> 
> Unfortunately, the method used can result in long lock times
> occurring as buffers locked early on in the buffer list might not be
> dispatched until the end of the IO licst processing. This is because
> sorting does not occur util after the buffer list has been processed
> and the buffers that are going to be submitted are locked. Hence
> when the buffer list is several thousand buffers long, the lock hold
> times before IO dispatch can be significant.
> 
> To fix this, sort the buffer list before we start trying to lock and
> submit buffers. This means we can now submit buffers immediately
> after they are locked, allowing merging to occur immediately on the
> plug and dispatch to occur as quickly as possible. This means there
> is minimal delay between locking the buffer and IO submission
> occuring, hence reducing the worst case lock hold times seen during
> delayed write buffer IO submission signficantly.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_buf.c | 60 
> +++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 467a636..0d49e81 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1780,18 +1780,33 @@ xfs_buf_cmp(
>       return 0;
>  }
>  
> +/*
> + * submit buffers for write.
> + *
> + * When we have a large buffer list, we do not want to hold all the buffers
> + * locked while we block on the request queue waiting for IO dispatch. To 
> avoid
> + * this problem, we lock and submit buffers in groups of 50, thereby 
> minimising
> + * the lock hold times for lists which may contain thousands of objects.
> + *
> + * To do this, we sort the buffer list before we walk the list to lock and
> + * submit buffers, and we plug and unplug around each group of buffers we
> + * submit.
> + */
>  static int
> -__xfs_buf_delwri_submit(
> +xfs_buf_delwri_submit_buffers(
>       struct list_head        *buffer_list,
> -     struct list_head        *io_list,
> -     bool                    wait)
> +     struct list_head        *wait_list)
>  {
> -     struct blk_plug         plug;
>       struct xfs_buf          *bp, *n;
> +     LIST_HEAD               (submit_list);
>       int                     pinned = 0;
> +     struct blk_plug         plug;
> +
> +     list_sort(NULL, buffer_list, xfs_buf_cmp);
>  
> +     blk_start_plug(&plug);
>       list_for_each_entry_safe(bp, n, buffer_list, b_list) {
> -             if (!wait) {
> +             if (!wait_list) {
>                       if (xfs_buf_ispinned(bp)) {
>                               pinned++;
>                               continue;
> @@ -1814,25 +1829,21 @@ __xfs_buf_delwri_submit(
>                       continue;
>               }
>  
> -             list_move_tail(&bp->b_list, io_list);
>               trace_xfs_buf_delwri_split(bp, _RET_IP_);
> -     }
> -
> -     list_sort(NULL, io_list, xfs_buf_cmp);
> -
> -     blk_start_plug(&plug);
> -     list_for_each_entry_safe(bp, n, io_list, b_list) {
> -             bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
> -             bp->b_flags |= XBF_WRITE | XBF_ASYNC;
>  
>               /*
> -              * we do all Io submission async. This means if we need to wait
> -              * for IO completion we need to take an extra reference so the
> -              * buffer is still valid on the other side.
> +              * We do all IO submission async. This means if we need
> +              * to wait for IO completion we need to take an extra
> +              * reference so the buffer is still valid on the other
> +              * side. We need to move the buffer onto the io_list
> +              * at this point so the caller can still access it.
>                */
> -             if (wait)
> +             bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
> +             bp->b_flags |= XBF_WRITE | XBF_ASYNC;
> +             if (wait_list) {
>                       xfs_buf_hold(bp);
> -             else
> +                     list_move_tail(&bp->b_list, wait_list);
> +             } else
>                       list_del_init(&bp->b_list);
>  
>               xfs_buf_submit(bp);
> @@ -1855,8 +1866,7 @@ int
>  xfs_buf_delwri_submit_nowait(
>       struct list_head        *buffer_list)
>  {
> -     LIST_HEAD               (io_list);
> -     return __xfs_buf_delwri_submit(buffer_list, &io_list, false);
> +     return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
>  }
>  
>  /*
> @@ -1871,15 +1881,15 @@ int
>  xfs_buf_delwri_submit(
>       struct list_head        *buffer_list)
>  {
> -     LIST_HEAD               (io_list);
> +     LIST_HEAD               (wait_list);
>       int                     error = 0, error2;
>       struct xfs_buf          *bp;
>  
> -     __xfs_buf_delwri_submit(buffer_list, &io_list, true);
> +     xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
>  
>       /* Wait for IO to complete. */
> -     while (!list_empty(&io_list)) {
> -             bp = list_first_entry(&io_list, struct xfs_buf, b_list);
> +     while (!list_empty(&wait_list)) {
> +             bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
>  
>               list_del_init(&bp->b_list);
>  
> -- 
> 2.7.0
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

-- 
Carlos

<Prev in Thread] Current Thread [Next in Thread>