On Tue, Feb 07, 2012 at 01:10:41PM -0500, Christoph Hellwig wrote:
> Do not use unlogged metadata updates and the VFS dirty bit for updating
> the file size after writeback. In addition to causing various problems
> with updates getting delayed for far too log this also drags in the
long
> unscalable VFS dirty tracking, and is one of the few remaining unlogged
> metadata updates.
>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
>
> ---
....
> @@ -173,18 +214,26 @@ xfs_end_io(
> * range to normal written extens after the data I/O has finished.
> */
> if (ioend->io_type == IO_UNWRITTEN) {
> + if (ioend->io_append_trans) {
> + ASSERT(ioend->io_isdirect);
> +
> + current_set_flags_nested(
> + &ioend->io_append_trans->t_pflags, PF_FSTRANS);
> + xfs_trans_cancel(ioend->io_append_trans, 0);
> + }
> +
> error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
> ioend->io_size);
> if (error) {
> ioend->io_error = -error;
> goto done;
> }
> + } else if (ioend->io_append_trans) {
> + error = xfs_setfilesize(ioend);
> + if (error)
> + ioend->io_error = error;
That looks like it should be:
ioend->io_error = -error;
> } else {
> - /*
> - * We might have to update the on-disk file size after
> - * extending writes.
> - */
> - xfs_setfilesize(ioend);
> + ASSERT(!xfs_ioend_is_append(ioend));
> }
>
> done:
> @@ -224,6 +273,7 @@ xfs_alloc_ioend(
> */
> atomic_set(&ioend->io_remaining, 1);
> ioend->io_isasync = 0;
> + ioend->io_isdirect = 0;
> ioend->io_error = 0;
> ioend->io_list = NULL;
> ioend->io_type = type;
> @@ -234,6 +284,7 @@ xfs_alloc_ioend(
> ioend->io_size = 0;
> ioend->io_iocb = NULL;
> ioend->io_result = 0;
> + ioend->io_append_trans = NULL;
This is starting to look like it should memset the ioend to zero
after allocation rather than all these individual "initialise to
zero" lines.
>
> INIT_WORK(&ioend->io_work, xfs_end_io);
> return ioend;
> @@ -341,18 +392,9 @@ xfs_submit_ioend_bio(
> xfs_ioend_t *ioend,
> struct bio *bio)
> {
> - struct xfs_inode *ip = XFS_I(ioend->io_inode);
> atomic_inc(&ioend->io_remaining);
> bio->bi_private = ioend;
> bio->bi_end_io = xfs_end_bio;
> -
> - /*
> - * If the I/O is beyond EOF we mark the inode dirty immediately
> - * but don't update the inode size until I/O completion.
> - */
> - if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
> - xfs_mark_inode_dirty(ip);
> -
> submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
> }
>
> @@ -1014,8 +1056,20 @@ xfs_vm_writepage(
> wbc, end_index);
> }
>
> - if (iohead)
> + if (iohead) {
> + /*
> + * Reserve log space if we might write beyond the on-disk
> + * inode size.
> + */
> + if (ioend->io_type != IO_UNWRITTEN &&
> + xfs_ioend_is_append(ioend)) {
> + err = xfs_setfilesize_trans_alloc(ioend);
> + if (err)
> + goto error;
> + }
> +
> xfs_submit_ioend(wbc, iohead);
> + }
>
> return 0;
>
> @@ -1295,17 +1349,26 @@ xfs_vm_direct_IO(
> {
> struct inode *inode = iocb->ki_filp->f_mapping->host;
> struct block_device *bdev = xfs_find_bdev_for_inode(inode);
> + struct xfs_ioend *ioend = NULL;
> ssize_t ret;
>
> if (rw & WRITE) {
> - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
> + size_t size = iov_length(iov, nr_segs);
> +
> + iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
> + if (offset + size > XFS_I(inode)->i_d.di_size) {
> + ret = xfs_setfilesize_trans_alloc(ioend);
> + if (ret)
> + goto destroy_ioend;
> + ioend->io_isdirect = 1;
> + }
This is a bit messy, but necessary to handle racing IOs and
unwritten extent conversion.. Can you add a comment explining
why it is necessary so we don't forget in future?
--
Dave Chinner
david@xxxxxxxxxxxxx
|