xfs
[Top] [All Lists]

Re: [PATCH 2/3] xfs: don't release bios on completion immediately

To: Christoph Hellwig <hch@xxxxxx>
Subject: Re: [PATCH 2/3] xfs: don't release bios on completion immediately
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Thu, 17 Mar 2016 09:05:23 -0400
Cc: xfs@xxxxxxxxxxx, Dave Chinner <dchinner@xxxxxxxxxx>
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1458128681-10869-3-git-send-email-hch@xxxxxx>
References: <1458128681-10869-1-git-send-email-hch@xxxxxx> <1458128681-10869-3-git-send-email-hch@xxxxxx>
User-agent: Mutt/1.5.24 (2015-08-30)
On Wed, Mar 16, 2016 at 12:44:40PM +0100, Christoph Hellwig wrote:
> Completion of an ioend requires us to walk the bufferhead list to
> end writback on all the bufferheads. This, in turn, is needed so
> that we can end writeback on all the pages we just did IO on.
> 
> To remove our dependency on bufferheads in writeback, we need to
> turn this around the other way - we need to walk the pages we've
> just completed IO on, and then walk the buffers attached to the
> pages and complete their IO. In doing this, we remove the
> requirement for the ioend to track bufferheads directly.
> 
> To enable IO completion to walk all the pages we've submitted IO on,
> we need to keep the bios that we used for IO around until the ioend
> has been completed. We can do this simply by chaining the bios to
> the ioend at completion time, and then walking their pages directly
> just before destroying the ioend.
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> [hch: changed the xfs_finish_page_writeback calling convention]
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---

Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>

>  fs/xfs/xfs_aops.c | 95 
> +++++++++++++++++++++++++++++++++++++++----------------
>  fs/xfs/xfs_aops.h |  5 +--
>  2 files changed, 71 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 17cc6cc..5928770 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -84,20 +84,64 @@ xfs_find_bdev_for_inode(
>  }
>  
>  /*
> - * We're now finished for good with this ioend structure.
> - * Update the page state via the associated buffer_heads,
> - * release holds on the inode and bio, and finally free
> - * up memory.  Do not use the ioend after this.
> + * We're now finished for good with this page.  Update the page state via the
> + * associated buffer_heads, paying attention to the start and end offsets 
> that
> + * we need to process on the page.
> + */
> +static void
> +xfs_finish_page_writeback(
> +     struct inode            *inode,
> +     struct bio_vec          *bvec,
> +     int                     error)
> +{
> +     unsigned int            blockmask = (1 << inode->i_blkbits) - 1;
> +     unsigned int            end = bvec->bv_offset + bvec->bv_len - 1;
> +     struct buffer_head      *head, *bh;
> +     unsigned int            off = 0;
> +
> +     ASSERT(bvec->bv_offset < PAGE_SIZE);
> +     ASSERT((bvec->bv_offset & blockmask) == 0);
> +     ASSERT(end < PAGE_SIZE);
> +     ASSERT((bvec->bv_len & blockmask) == 0);
> +
> +     bh = head = page_buffers(bvec->bv_page);
> +
> +     do {
> +             if (off < bvec->bv_offset)
> +                     goto next_bh;
> +             if (off > end)
> +                     break;
> +             bh->b_end_io(bh, !error);
> +next_bh:
> +             off += bh->b_size;
> +     } while ((bh = bh->b_this_page) != head);
> +}
> +
> +/*
> + * We're now finished for good with this ioend structure.  Update the page
> + * state, release holds on bios, and finally free up memory.  Do not use the
> + * ioend after this.
>   */
>  STATIC void
>  xfs_destroy_ioend(
> -     xfs_ioend_t             *ioend)
> +     struct xfs_ioend        *ioend)
>  {
> -     struct buffer_head      *bh, *next;
> +     struct inode            *inode = ioend->io_inode;
> +     int                     error = ioend->io_error;
> +     struct bio              *bio, *next;
> +
> +     for (bio = ioend->io_bio_done; bio; bio = next) {
> +             struct bio_vec  *bvec;
> +             int             i;
> +
> +             next = bio->bi_private;
> +             bio->bi_private = NULL;
>  
> -     for (bh = ioend->io_buffer_head; bh; bh = next) {
> -             next = bh->b_private;
> -             bh->b_end_io(bh, !ioend->io_error);
> +             /* walk each page on bio, ending page IO on them */
> +             bio_for_each_segment_all(bvec, bio, i)
> +                     xfs_finish_page_writeback(inode, bvec, error);
> +
> +             bio_put(bio);
>       }
>  
>       mempool_free(ioend, xfs_ioend_pool);
> @@ -286,6 +330,7 @@ xfs_alloc_ioend(
>       ioend->io_type = type;
>       ioend->io_inode = inode;
>       INIT_WORK(&ioend->io_work, xfs_end_io);
> +     spin_lock_init(&ioend->io_lock);
>       return ioend;
>  }
>  
> @@ -365,15 +410,21 @@ STATIC void
>  xfs_end_bio(
>       struct bio              *bio)
>  {
> -     xfs_ioend_t             *ioend = bio->bi_private;
> -
> -     if (!ioend->io_error)
> -             ioend->io_error = bio->bi_error;
> +     struct xfs_ioend        *ioend = bio->bi_private;
> +     unsigned long           flags;
>  
> -     /* Toss bio and pass work off to an xfsdatad thread */
>       bio->bi_private = NULL;
>       bio->bi_end_io = NULL;
> -     bio_put(bio);
> +
> +     spin_lock_irqsave(&ioend->io_lock, flags);
> +     if (!ioend->io_error)
> +            ioend->io_error = bio->bi_error;
> +     if (!ioend->io_bio_done)
> +             ioend->io_bio_done = bio;
> +     else
> +             ioend->io_bio_done_tail->bi_private = bio;
> +     ioend->io_bio_done_tail = bio;
> +     spin_unlock_irqrestore(&ioend->io_lock, flags);
>  
>       xfs_finish_ioend(ioend);
>  }
> @@ -511,21 +562,11 @@ xfs_add_to_ioend(
>       if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
>           bh->b_blocknr != wpc->last_block + 1 ||
>           offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
> -             struct xfs_ioend        *new;
> -
>               if (wpc->ioend)
>                       list_add(&wpc->ioend->io_list, iolist);
> -
> -             new = xfs_alloc_ioend(inode, wpc->io_type);
> -             new->io_offset = offset;
> -             new->io_buffer_head = bh;
> -             new->io_buffer_tail = bh;
> -             wpc->ioend = new;
> -     } else {
> -             wpc->ioend->io_buffer_tail->b_private = bh;
> -             wpc->ioend->io_buffer_tail = bh;
> +             wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type);
> +             wpc->ioend->io_offset = offset;
>       }
> -     bh->b_private = NULL;
>  
>  retry:
>       if (!wpc->ioend->io_bio)
> diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
> index c89c3bd..1c7b041 100644
> --- a/fs/xfs/xfs_aops.h
> +++ b/fs/xfs/xfs_aops.h
> @@ -46,13 +46,14 @@ typedef struct xfs_ioend {
>       int                     io_error;       /* I/O error code */
>       atomic_t                io_remaining;   /* hold count */
>       struct inode            *io_inode;      /* file being written to */
> -     struct buffer_head      *io_buffer_head;/* buffer linked list head */
> -     struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
>       size_t                  io_size;        /* size of the extent */
>       xfs_off_t               io_offset;      /* offset in the file */
>       struct work_struct      io_work;        /* xfsdatad work queue */
>       struct xfs_trans        *io_append_trans;/* xact. for size update */
>       struct bio              *io_bio;        /* bio being built */
> +     struct bio              *io_bio_done;   /* bios completed */
> +     struct bio              *io_bio_done_tail; /* bios completed */
> +     spinlock_t              io_lock;        /* for bio completion list */
>  } xfs_ioend_t;
>  
>  extern const struct address_space_operations xfs_address_space_operations;
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

<Prev in Thread] Current Thread [Next in Thread>