xfs
[Top] [All Lists]

Re: xfs: properly truncate blocks outsize i_size on write_begin failure

To: xfs@xxxxxxxxxxx
Subject: Re: xfs: properly truncate blocks outsize i_size on write_begin failure
From: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Date: Sun, 29 Mar 2009 03:44:56 -0400
Cc: Nick Piggin <npiggin@xxxxxxx>, linux-fsdevel@xxxxxxxxxxxxxxx
In-reply-to: <20090318063425.GA18386@xxxxxxxxxxxxx>
References: <20090318063425.GA18386@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.18 (2008-05-17)
ping?

On Wed, Mar 18, 2009 at 02:34:25AM -0400, Christoph Hellwig wrote:
> As pointed out by Dave in the thread starting at
> http://oss.sgi.com/archives/xfs/2008-04/msg00542.html the current use
> of vmtruncate in block_write_begin is incorrect for filesystem not
> using ->truncate for doing the actual on-disk truncatation.  Doing
> it in ->truncate for a filesystem like XFS means it would be split
> over multiple transactions leading to violations of the atomicy guarantee.
> Historically (and still documented in Documentation/filesystems/Locking)
> ->truncate is not a method but only a helper for setattr implementations
> so this is correct.
> 
> The correct fix would be to use ->setattr but that needs a new ATTR_NOLOCK
> flag and an audit of all filesystems.  I'm still hoping to do that later
> but for now I really want XFS fixed.
> 
> So just duplicate block_write_begin in XFS to do the proper truncation,
> note that I need to export __block_prepare_write to no duplicate even
> more code than nessecary.
> 
> GFS2 and UFS and possibly others wants the same kind of fix, too.
> 
> And I need to get rid of ->truncate one day to prevent people from using
> it in stupid ways.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> 
> Index: xfs/fs/buffer.c
> ===================================================================
> --- xfs.orig/fs/buffer.c      2009-03-01 04:22:47.091430206 +0100
> +++ xfs/fs/buffer.c   2009-03-01 04:22:59.913336991 +0100
> @@ -1910,7 +1910,7 @@ void page_zero_new_buffers(struct page *
>  }
>  EXPORT_SYMBOL(page_zero_new_buffers);
>  
> -static int __block_prepare_write(struct inode *inode, struct page *page,
> +int __block_prepare_write(struct inode *inode, struct page *page,
>               unsigned from, unsigned to, get_block_t *get_block)
>  {
>       unsigned block_start, block_end;
> @@ -1989,6 +1989,7 @@ static int __block_prepare_write(struct 
>               page_zero_new_buffers(page, from, to);
>       return err;
>  }
> +EXPORT_SYMBOL(__block_prepare_write);
>  
>  static int __block_commit_write(struct inode *inode, struct page *page,
>               unsigned from, unsigned to)
> Index: xfs/fs/xfs/linux-2.6/xfs_aops.c
> ===================================================================
> --- xfs.orig/fs/xfs/linux-2.6/xfs_aops.c      2009-03-01 04:16:12.003305119 
> +0100
> +++ xfs/fs/xfs/linux-2.6/xfs_aops.c   2009-03-01 04:21:55.618306704 +0100
> @@ -1598,6 +1598,71 @@ xfs_vm_direct_IO(
>       return ret;
>  }
>  
> +/*
> + * This is a copy of fs/buffer.c:block_write_begin except for doing
> + * the correct setattr call in the error case instead of the wrong
> + * vmtruncate.
> + */
> +static int
> +xfs_block_write_begin(struct file *file, struct address_space *mapping,
> +                     loff_t pos, unsigned len, unsigned flags,
> +                     struct page **pagep, void **fsdata,
> +                     get_block_t *get_block)
> +{
> +     struct inode *inode = mapping->host;
> +     int status = 0;
> +     struct page *page;
> +     pgoff_t index;
> +     unsigned start, end;
> +     int ownpage = 0;
> +
> +     index = pos >> PAGE_CACHE_SHIFT;
> +     start = pos & (PAGE_CACHE_SIZE - 1);
> +     end = start + len;
> +
> +     page = *pagep;
> +     if (page == NULL) {
> +             ownpage = 1;
> +             page = grab_cache_page_write_begin(mapping, index, flags);
> +             if (!page) {
> +                     status = -ENOMEM;
> +                     goto out;
> +             }
> +             *pagep = page;
> +     } else
> +             BUG_ON(!PageLocked(page));
> +
> +     status = __block_prepare_write(inode, page, start, end, get_block);
> +     if (unlikely(status)) {
> +             ClearPageUptodate(page);
> +
> +             if (ownpage) {
> +                     unlock_page(page);
> +                     page_cache_release(page);
> +                     *pagep = NULL;
> +
> +                     /*
> +                      * prepare_write() may have instantiated a few blocks
> +                      * outside i_size.  Trim these off again. Don't need
> +                      * i_size_read because we hold i_mutex.
> +                      */
> +                     if (pos + len > inode->i_size) {
> +                             struct iattr newattrs;
> +                             int error;
> +
> +                             newattrs.ia_size = inode->i_size;
> +                             newattrs.ia_valid = ATTR_SIZE | ATTR_FORCE;
> +                             error = xfs_setattr(XFS_I(inode), &newattrs,
> +                                                 XFS_ATTR_NOLOCK);
> +                             WARN_ON(error); /* not much we can do.. */
> +                     }
> +             }
> +     }
> +
> +out:
> +     return status;
> +}
> +
>  STATIC int
>  xfs_vm_write_begin(
>       struct file             *file,
> Index: xfs/include/linux/buffer_head.h
> ===================================================================
> --- xfs.orig/include/linux/buffer_head.h      2009-03-01 04:23:14.364305635 
> +0100
> +++ xfs/include/linux/buffer_head.h   2009-03-01 04:23:19.081305583 +0100
> @@ -218,6 +218,8 @@ int generic_write_end(struct file *, str
>                               struct page *, void *);
>  void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
>  int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
> +int __block_prepare_write(struct inode *inode, struct page *page,
> +             unsigned from, unsigned to, get_block_t *get_block);
>  int cont_write_begin(struct file *, struct address_space *, loff_t,
>                       unsigned, unsigned, struct page **, void **,
>                       get_block_t *, loff_t *);
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
---end quoted text---

<Prev in Thread] Current Thread [Next in Thread>