xfs
[Top] [All Lists]

Re: [PATCH 04/10] xfs: Sort delayed write buffers before dispatch

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: [PATCH 04/10] xfs: Sort delayed write buffers before dispatch
From: Alex Elder <aelder@xxxxxxx>
Date: Fri, 05 Feb 2010 17:53:34 -0600
Cc: xfs@xxxxxxxxxxx
In-reply-to: <1265153104-29680-5-git-send-email-david@xxxxxxxxxxxxx>
References: <1265153104-29680-1-git-send-email-david@xxxxxxxxxxxxx> <1265153104-29680-5-git-send-email-david@xxxxxxxxxxxxx>
Reply-to: aelder@xxxxxxx
On Wed, 2010-02-03 at 10:24 +1100, Dave Chinner wrote:
> Currently when the xfsbufd writes delayed write buffers, it pushes
> them to disk in the order they come off the delayed write list. If
> there are lots of buffers ѕpread widely over the disk, this results
> in overwhelming the elevator sort queues in the block layer and we
> end up losing the posibility of merging adjacent buffers to minimise
> the number of IOs.
> 
> Use the new generic list_sort function to sort the delwri dispatch
> queue before issue to ensure that the buffers are pushed in the most
> friendly order possible to the lower layers.

Looks good.

> Signed-off-by: Dave Chinner <david@xxxxxxxxxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>

Reviewed-by: Alex Elder <aelder@xxxxxxx>

> ---
>  fs/xfs/linux-2.6/xfs_buf.c |   87 
> ++++++++++++++++++++++++++++++--------------
>  1 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
> index b306265..4556a4c 100644
> --- a/fs/xfs/linux-2.6/xfs_buf.c
> +++ b/fs/xfs/linux-2.6/xfs_buf.c
> @@ -33,6 +33,7 @@
>  #include <linux/migrate.h>
>  #include <linux/backing-dev.h>
>  #include <linux/freezer.h>
> +#include <linux/list_sort.h>
>  
>  #include "xfs_sb.h"
>  #include "xfs_inum.h"
> @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split(
>  
>  }
>  
> +/*
> + * Compare function is more complex than it needs to be because
> + * the return value is only 32 bits and we are doing comparisons
> + * on 64 bit values
> + */
> +static int
> +xfs_buf_cmp(
> +     void            *priv,
> +     struct list_head *a,
> +     struct list_head *b)
> +{
> +     struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
> +     struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
> +     xfs_daddr_t             diff;
> +
> +     diff = ap->b_bn - bp->b_bn;
> +     if (diff < 0)
> +             return -1;
> +     if (diff > 0)
> +             return 1;
> +     return 0;
> +}
> +
> +void
> +xfs_buf_delwri_sort(
> +     xfs_buftarg_t   *target,
> +     struct list_head *list)
> +{
> +     list_sort(NULL, list, xfs_buf_cmp);
> +}
> +
>  STATIC int
>  xfsbufd(
>       void            *data)
>  {
> -     struct list_head tmp;
> -     xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
> -     int             count;
> -     xfs_buf_t       *bp;
> +     xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
>  
>       current->flags |= PF_MEMALLOC;
>  
> @@ -1893,6 +1922,8 @@ xfsbufd(
>       do {
>               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
>               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
> +             int     count = 0;
> +             struct list_head tmp;
>  
>               if (unlikely(freezing(current))) {
>                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
> @@ -1907,11 +1938,10 @@ xfsbufd(
>               schedule_timeout_interruptible(tout);
>  
>               xfs_buf_delwri_split(target, &tmp, age);
> -             count = 0;
> +             list_sort(NULL, &tmp, xfs_buf_cmp);
>               while (!list_empty(&tmp)) {
> -                     bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -                     ASSERT(target == bp->b_target);
> -
> +                     struct xfs_buf *bp;
> +                     bp = list_first_entry(&tmp, struct xfs_buf, b_list);
>                       list_del_init(&bp->b_list);
>                       xfs_buf_iostrategy(bp);
>                       count++;
> @@ -1937,42 +1967,45 @@ xfs_flush_buftarg(
>       xfs_buftarg_t   *target,
>       int             wait)
>  {
> -     struct list_head tmp;
> -     xfs_buf_t       *bp, *n;
> +     xfs_buf_t       *bp;
>       int             pincount = 0;
> +     LIST_HEAD(tmp_list);
> +     LIST_HEAD(wait_list);
>  
>       xfs_buf_runall_queues(xfsconvertd_workqueue);
>       xfs_buf_runall_queues(xfsdatad_workqueue);
>       xfs_buf_runall_queues(xfslogd_workqueue);
>  
>       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
> -     pincount = xfs_buf_delwri_split(target, &tmp, 0);
> +     pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
>  
>       /*
> -      * Dropped the delayed write list lock, now walk the temporary list
> +      * Dropped the delayed write list lock, now walk the temporary list.
> +      * All I/O is issued async and then if we need to wait for completion
> +      * we do that after issuing all the IO.
>        */
> -     list_for_each_entry_safe(bp, n, &tmp, b_list) {
> +     list_sort(NULL, &tmp_list, xfs_buf_cmp);
> +     while (!list_empty(&tmp_list)) {
> +             bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
>               ASSERT(target == bp->b_target);
> -             if (wait)
> +             list_del_init(&bp->b_list);
> +             if (wait) {
>                       bp->b_flags &= ~XBF_ASYNC;
> -             else
> -                     list_del_init(&bp->b_list);
> -
> +                     list_add(&bp->b_list, &wait_list);
> +             }
>               xfs_buf_iostrategy(bp);
>       }
>  
> -     if (wait)
> +     if (wait) {
> +             /* Expedite and wait for IO to complete. */
>               blk_run_address_space(target->bt_mapping);
> +             while (!list_empty(&wait_list)) {
> +                     bp = list_first_entry(&wait_list, struct xfs_buf, 
> b_list);
>  
> -     /*
> -      * Remaining list items must be flushed before returning
> -      */
> -     while (!list_empty(&tmp)) {
> -             bp = list_entry(tmp.next, xfs_buf_t, b_list);
> -
> -             list_del_init(&bp->b_list);
> -             xfs_iowait(bp);
> -             xfs_buf_relse(bp);
> +                     list_del_init(&bp->b_list);
> +                     xfs_iowait(bp);
> +                     xfs_buf_relse(bp);
> +             }
>       }
>  
>       return pincount;



<Prev in Thread] Current Thread [Next in Thread>