xfs
[Top] [All Lists]

Re: [PATCH 4/5] xfs: log file size updates at I/O completion time

To: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Subject: Re: [PATCH 4/5] xfs: log file size updates at I/O completion time
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Thu, 16 Feb 2012 18:10:28 +1100
Cc: xfs@xxxxxxxxxxx
In-reply-to: <20120207181155.511135108@xxxxxxxxxxxxxxxxxxxxxx>
References: <20120207181037.745771452@xxxxxxxxxxxxxxxxxxxxxx> <20120207181155.511135108@xxxxxxxxxxxxxxxxxxxxxx>
User-agent: Mutt/1.5.21 (2010-09-15)
On Tue, Feb 07, 2012 at 01:10:41PM -0500, Christoph Hellwig wrote:
> Do not use unlogged metadata updates and the VFS dirty bit for updating
> the file size after writeback.  In addition to causing various problems
> with updates getting delayed for far too log this also drags in the
                                           long

> unscalable VFS dirty tracking, and is one of the few remaining unlogged
> metadata updates.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> 
> ---
....
> @@ -173,18 +214,26 @@ xfs_end_io(
>        * range to normal written extens after the data I/O has finished.
>        */
>       if (ioend->io_type == IO_UNWRITTEN) {
> +             if (ioend->io_append_trans) {
> +                     ASSERT(ioend->io_isdirect);
> +
> +                     current_set_flags_nested(
> +                             &ioend->io_append_trans->t_pflags, PF_FSTRANS);
> +                     xfs_trans_cancel(ioend->io_append_trans, 0);
> +             }
> +
>               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
>                                                ioend->io_size);
>               if (error) {
>                       ioend->io_error = -error;
>                       goto done;
>               }
> +     } else if (ioend->io_append_trans) {
> +             error = xfs_setfilesize(ioend);
> +             if (error)
> +                     ioend->io_error = error;

That looks like it should be:

                        ioend->io_error = -error;

>       } else {
> -             /*
> -              * We might have to update the on-disk file size after
> -              * extending writes.
> -              */
> -             xfs_setfilesize(ioend);
> +             ASSERT(!xfs_ioend_is_append(ioend));
>       }
>  
>  done:
> @@ -224,6 +273,7 @@ xfs_alloc_ioend(
>        */
>       atomic_set(&ioend->io_remaining, 1);
>       ioend->io_isasync = 0;
> +     ioend->io_isdirect = 0;
>       ioend->io_error = 0;
>       ioend->io_list = NULL;
>       ioend->io_type = type;
> @@ -234,6 +284,7 @@ xfs_alloc_ioend(
>       ioend->io_size = 0;
>       ioend->io_iocb = NULL;
>       ioend->io_result = 0;
> +     ioend->io_append_trans = NULL;

This is starting to look like it should memset the ioend to zero
after allocation rather than all these individual "initialise to
zero" lines.

>  
>       INIT_WORK(&ioend->io_work, xfs_end_io);
>       return ioend;
> @@ -341,18 +392,9 @@ xfs_submit_ioend_bio(
>       xfs_ioend_t             *ioend,
>       struct bio              *bio)
>  {
> -     struct xfs_inode        *ip = XFS_I(ioend->io_inode);
>       atomic_inc(&ioend->io_remaining);
>       bio->bi_private = ioend;
>       bio->bi_end_io = xfs_end_bio;
> -
> -     /*
> -      * If the I/O is beyond EOF we mark the inode dirty immediately
> -      * but don't update the inode size until I/O completion.
> -      */
> -     if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
> -             xfs_mark_inode_dirty(ip);
> -
>       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
>  }
>  
> @@ -1014,8 +1056,20 @@ xfs_vm_writepage(
>                                 wbc, end_index);
>       }
>  
> -     if (iohead)
> +     if (iohead) {
> +             /*
> +              * Reserve log space if we might write beyond the on-disk
> +              * inode size.
> +              */
> +             if (ioend->io_type != IO_UNWRITTEN &&
> +                 xfs_ioend_is_append(ioend)) {
> +                     err = xfs_setfilesize_trans_alloc(ioend);
> +                     if (err)
> +                             goto error;
> +             }
> +
>               xfs_submit_ioend(wbc, iohead);
> +     }
>  
>       return 0;
>  
> @@ -1295,17 +1349,26 @@ xfs_vm_direct_IO(
>  {
>       struct inode            *inode = iocb->ki_filp->f_mapping->host;
>       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
> +     struct xfs_ioend        *ioend = NULL;
>       ssize_t                 ret;
>  
>       if (rw & WRITE) {
> -             iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
> +             size_t size = iov_length(iov, nr_segs);
> +
> +             iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
> +             if (offset + size > XFS_I(inode)->i_d.di_size) {
> +                     ret = xfs_setfilesize_trans_alloc(ioend);
> +                     if (ret)
> +                             goto destroy_ioend;
> +                     ioend->io_isdirect = 1;
> +             }

This is a bit messy, but necessary to handle racing IOs and
unwritten extent conversion.. Can you add a comment explining
why it is necessary so we don't forget in future?

-- 
Dave Chinner
david@xxxxxxxxxxxxx

<Prev in Thread] Current Thread [Next in Thread>