Hey Christoph,
On Sun, Dec 18, 2011 at 03:00:04PM -0500, Christoph Hellwig wrote:
> This wrapper isn't overly useful, not to say rather confusing.
>
> Around the call to xfs_itruncate_extents it does:
>
> - add tracing
> - add a few asserts in debug builds
> - conditionally update the inode size in two places
> - log the inode
>
> Both the tracing and the inode logging can be moved to xfs_itruncate_extents
> as they are useful for the attribute fork as well - in fact the attr code
> already does an equivalent xfs_trans_log_inode call just after calling
> xfs_itruncate_extents.
>
> The conditional size updates are a mess, and there
> was no reason to do them in two places anyway, as the first one was
> conditional on the inode having extents - but without extents we
> xfs_itruncate_extents would be a no-op and the placement wouldn't matter
> anyway.
>
> Instead move the size assignments and the asserts that make sense
> to the callers that want it.
>
> As a side effect of this clean up xfs_setattr_size by introducing variables
> for the old and new inode size, and moving the size updates into a common
> place.
>
> Reviewed-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
Looks good. I had some minor questions below.
Reviewed-by: Ben Myers <bpm@xxxxxxx>
> ---
> fs/xfs/xfs_attr.c | 4 -
> fs/xfs/xfs_inode.c | 124
> +++--------------------------------------------
> fs/xfs/xfs_inode.h | 2
> fs/xfs/xfs_iops.c | 47 +++++++++++------
> fs/xfs/xfs_qm_syscalls.c | 9 +++
> fs/xfs/xfs_trace.h | 4 -
> fs/xfs/xfs_vnodeops.c | 17 +++++-
> 7 files changed, 65 insertions(+), 142 deletions(-)
>
> Index: xfs/fs/xfs/xfs_attr.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_attr.c 2011-11-30 12:58:07.820044461 +0100
> +++ xfs/fs/xfs/xfs_attr.c 2011-11-30 12:58:51.519807719 +0100
> @@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
> if (error)
> goto out;
>
> - /*
> - * Commit the last in the sequence of transactions.
> - */
> - xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
> error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
> xfs_iunlock(dp, XFS_ILOCK_EXCL);
>
> Index: xfs/fs/xfs/xfs_inode.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_inode.c 2011-11-30 12:58:07.830044408 +0100
> +++ xfs/fs/xfs/xfs_inode.c 2011-11-30 12:58:51.519807719 +0100
> @@ -1166,52 +1166,6 @@ xfs_ialloc(
> }
>
> /*
> - * Check to make sure that there are no blocks allocated to the
> - * file beyond the size of the file. We don't check this for
> - * files with fixed size extents or real time extents, but we
> - * at least do it for regular files.
> - */
> -#ifdef DEBUG
> -STATIC void
> -xfs_isize_check(
> - struct xfs_inode *ip,
> - xfs_fsize_t isize)
> -{
> - struct xfs_mount *mp = ip->i_mount;
> - xfs_fileoff_t map_first;
> - int nimaps;
> - xfs_bmbt_irec_t imaps[2];
> - int error;
> -
> - if (!S_ISREG(ip->i_d.di_mode))
> - return;
> -
> - if (XFS_IS_REALTIME_INODE(ip))
> - return;
> -
> - if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
> - return;
> -
> - nimaps = 2;
> - map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
> - /*
> - * The filesystem could be shutting down, so bmapi may return
> - * an error.
> - */
> - error = xfs_bmapi_read(ip, map_first,
> - (XFS_B_TO_FSB(mp,
> - (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
> - imaps, &nimaps, XFS_BMAPI_ENTIRE);
> - if (error)
> - return;
> - ASSERT(nimaps == 1);
> - ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
> -}
> -#else /* DEBUG */
> -#define xfs_isize_check(ip, isize)
> -#endif /* DEBUG */
> -
> -/*
You've tossed xfs_isize_check in the round filer, but you didn't mention
that in your commit message. Isn't this still useful code?
> * Free up the underlying blocks past new_size. The new size must be smaller
> * than the current size. This routine can be used both for the attribute
> and
> * data fork, and does not modify the inode size, which is left to the
> caller.
> @@ -1258,6 +1212,8 @@ xfs_itruncate_extents(
> ASSERT(ip->i_itemp->ili_lock_flags == 0);
> ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
>
> + trace_xfs_itruncate_extents_start(ip, new_size);
> +
> /*
> * Since it is possible for space to become allocated beyond
> * the end of the file (in a crash where the space is allocated
> @@ -1325,6 +1281,14 @@ xfs_itruncate_extents(
> goto out;
> }
>
> + /*
> + * Always re-log the inode so that our permanent transaction can keep
> + * on rolling it forward in the log.
> + */
> + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Oh.. This is why it's ok not to log the inode core in
xfs_attr_inactive: it calls xfs_itruncate_extents()...
> +
> + trace_xfs_itruncate_extents_end(ip, new_size);
> +
> out:
> *tpp = tp;
> return error;
> @@ -1338,74 +1302,6 @@ out_bmap_cancel:
> goto out;
> }
>
> -int
> -xfs_itruncate_data(
> - struct xfs_trans **tpp,
> - struct xfs_inode *ip,
> - xfs_fsize_t new_size)
> -{
> - int error;
> -
> - trace_xfs_itruncate_data_start(ip, new_size);
> -
> - /*
> - * The first thing we do is set the size to new_size permanently on
> - * disk. This way we don't have to worry about anyone ever being able
> - * to look at the data being freed even in the face of a crash.
> - * What we're getting around here is the case where we free a block, it
> - * is allocated to another file, it is written to, and then we crash.
> - * If the new data gets written to the file but the log buffers
> - * containing the free and reallocation don't, then we'd end up with
> - * garbage in the blocks being freed. As long as we make the new_size
> - * permanent before actually freeing any blocks it doesn't matter if
> - * they get written to.
> - */
> - if (ip->i_d.di_nextents > 0) {
> - /*
> - * If we are not changing the file size then do not update
> - * the on-disk file size - we may be called from
> - * xfs_inactive_free_eofblocks(). If we update the on-disk
> - * file size and then the system crashes before the contents
> - * of the file are flushed to disk then the files may be
> - * full of holes (ie NULL files bug).
> - */
> - if (ip->i_size != new_size) {
> - ip->i_d.di_size = new_size;
> - ip->i_size = new_size;
> - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
> - }
> - }
> -
> - error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
> - if (error)
> - return error;
> -
> - /*
> - * If we are not changing the file size then do not update the on-disk
> - * file size - we may be called from xfs_inactive_free_eofblocks().
> - * If we update the on-disk file size and then the system crashes
> - * before the contents of the file are flushed to disk then the files
> - * may be full of holes (ie NULL files bug).
> - */
> - xfs_isize_check(ip, new_size);
> - if (ip->i_size != new_size) {
> - ip->i_d.di_size = new_size;
> - ip->i_size = new_size;
> - }
> -
> - ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
You didn't pull this assert along with
> - ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
this one into xfs_qm_scall_trunc_qfile. Was that intentional?
> -
> - /*
> - * Always re-log the inode so that our permanent transaction can keep
> - * on rolling it forward in the log.
> - */
> - xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
> -
> - trace_xfs_itruncate_data_end(ip, new_size);
> - return 0;
> -}
> -
> /*
> * This is called when the inode's link count goes to 0.
> * We place the on-disk inode on a list in the AGI. It
> Index: xfs/fs/xfs/xfs_inode.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_inode.h 2011-11-30 12:58:08.843372251 +0100
> +++ xfs/fs/xfs/xfs_inode.h 2011-11-30 12:58:51.523141034 +0100
> @@ -491,8 +491,6 @@ int xfs_ifree(struct xfs_trans *, xfs_i
> struct xfs_bmap_free *);
> int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
> int, xfs_fsize_t);
> -int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
> - xfs_fsize_t);
> int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
>
> void xfs_iext_realloc(xfs_inode_t *, int, int);
> Index: xfs/fs/xfs/xfs_iops.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_iops.c 2011-11-30 12:58:07.856710929 +0100
> +++ xfs/fs/xfs/xfs_iops.c 2011-11-30 12:58:51.523141034 +0100
> @@ -750,6 +750,7 @@ xfs_setattr_size(
> struct xfs_mount *mp = ip->i_mount;
> struct inode *inode = VFS_I(ip);
> int mask = iattr->ia_valid;
> + xfs_off_t oldsize, newsize;
> struct xfs_trans *tp;
> int error;
> uint lock_flags;
> @@ -777,11 +778,13 @@ xfs_setattr_size(
> lock_flags |= XFS_IOLOCK_EXCL;
> xfs_ilock(ip, lock_flags);
>
> + oldsize = ip->i_size;
> + newsize = iattr->ia_size;
> +
> /*
> * Short circuit the truncate case for zero length files.
> */
> - if (iattr->ia_size == 0 &&
> - ip->i_size == 0 && ip->i_d.di_nextents == 0) {
> + if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
> if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
> goto out_unlock;
>
> @@ -807,14 +810,14 @@ xfs_setattr_size(
> * the inode to the transaction, because the inode cannot be unlocked
> * once it is a part of the transaction.
> */
> - if (iattr->ia_size > ip->i_size) {
> + if (newsize > oldsize) {
> /*
> * Do the first part of growing a file: zero any data in the
> * last block that is beyond the old EOF. We need to do this
> * before the inode is joined to the transaction to modify
> * i_size.
> */
> - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
> + error = xfs_zero_eof(ip, newsize, oldsize);
> if (error)
> goto out_unlock;
> }
> @@ -833,8 +836,8 @@ xfs_setattr_size(
> * here and prevents waiting for other data not within the range we
> * care about here.
> */
> - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
> - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0,
> + if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
> + error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
> FI_NONE);
> if (error)
> goto out_unlock;
> @@ -845,8 +848,7 @@ xfs_setattr_size(
> */
> inode_dio_wait(inode);
>
> - error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
> - xfs_get_blocks);
> + error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
> if (error)
> goto out_unlock;
>
> @@ -857,7 +859,7 @@ xfs_setattr_size(
> if (error)
> goto out_trans_cancel;
>
> - truncate_setsize(inode, iattr->ia_size);
> + truncate_setsize(inode, newsize);
>
> commit_flags = XFS_TRANS_RELEASE_LOG_RES;
> lock_flags |= XFS_ILOCK_EXCL;
> @@ -876,19 +878,30 @@ xfs_setattr_size(
> * these flags set. For all other operations the VFS set these flags
> * explicitly if it wants a timestamp update.
> */
> - if (iattr->ia_size != ip->i_size &&
> - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
> + if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
> iattr->ia_ctime = iattr->ia_mtime =
> current_fs_time(inode->i_sb);
> mask |= ATTR_CTIME | ATTR_MTIME;
> }
>
> - if (iattr->ia_size > ip->i_size) {
> - ip->i_d.di_size = iattr->ia_size;
> - ip->i_size = iattr->ia_size;
> - } else if (iattr->ia_size <= ip->i_size ||
> - (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
> - error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
> + /*
> + * The first thing we do is set the size to new_size permanently on
> + * disk. This way we don't have to worry about anyone ever being able
> + * to look at the data being freed even in the face of a crash.
> + * What we're getting around here is the case where we free a block, it
> + * is allocated to another file, it is written to, and then we crash.
> + * If the new data gets written to the file but the log buffers
> + * containing the free and reallocation don't, then we'd end up with
> + * garbage in the blocks being freed. As long as we make the new size
> + * permanent before actually freeing any blocks it doesn't matter if
> + * they get written to.
> + */
> + ip->i_d.di_size = newsize;
> + ip->i_size = newsize;
> + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +
> + if (newsize <= oldsize) {
> + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
> if (error)
> goto out_trans_abort;
This newsize/oldsize section is really a nice cleanup.
>
> Index: xfs/fs/xfs/xfs_qm_syscalls.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_qm_syscalls.c 2011-11-30 12:58:07.866710876 +0100
> +++ xfs/fs/xfs/xfs_qm_syscalls.c 2011-11-30 12:58:51.523141034 +0100
> @@ -31,6 +31,7 @@
> #include "xfs_mount.h"
> #include "xfs_bmap_btree.h"
> #include "xfs_inode.h"
> +#include "xfs_inode_item.h"
> #include "xfs_itable.h"
> #include "xfs_bmap.h"
> #include "xfs_rtalloc.h"
> @@ -263,13 +264,19 @@ xfs_qm_scall_trunc_qfile(
> xfs_ilock(ip, XFS_ILOCK_EXCL);
> xfs_trans_ijoin(tp, ip, 0);
>
> - error = xfs_itruncate_data(&tp, ip, 0);
> + ip->i_d.di_size = 0;
> + ip->i_size = 0;
> + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> +
> + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
It it worth a comment that you log the size here because of the
possibility of another file being allocated these extents in the face of
a crash?
> if (error) {
> xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
> XFS_TRANS_ABORT);
> goto out_unlock;
> }
>
> + ASSERT(ip->i_d.di_nextents == 0);
> +
> xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
> error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
>
> Index: xfs/fs/xfs/xfs_trace.h
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_trace.h 2011-11-30 12:58:07.876710822 +0100
> +++ xfs/fs/xfs/xfs_trace.h 2011-11-30 12:58:51.523141034 +0100
> @@ -1090,8 +1090,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
> DEFINE_EVENT(xfs_itrunc_class, name, \
> TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
> TP_ARGS(ip, new_size))
> -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
> -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
> +DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
> +DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
>
> TRACE_EVENT(xfs_pagecache_inval,
> TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
> Index: xfs/fs/xfs/xfs_vnodeops.c
> ===================================================================
> --- xfs.orig/fs/xfs/xfs_vnodeops.c 2011-11-30 12:58:07.893377397 +0100
> +++ xfs/fs/xfs/xfs_vnodeops.c 2011-11-30 12:58:51.526474350 +0100
> @@ -226,7 +226,14 @@ xfs_free_eofblocks(
> xfs_ilock(ip, XFS_ILOCK_EXCL);
> xfs_trans_ijoin(tp, ip, 0);
>
> - error = xfs_itruncate_data(&tp, ip, ip->i_size);
> + /*
> + * Do not update the on-disk file size. If we update the
> + * on-disk file size and then the system crashes before the
> + * contents of the file are flushed to disk then the files
> + * may be full of holes (ie NULL files bug).
> + */
> + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
> + ip->i_size);
> if (error) {
> /*
> * If we get an error at this point we simply don't
> @@ -670,13 +677,19 @@ xfs_inactive(
> xfs_ilock(ip, XFS_ILOCK_EXCL);
> xfs_trans_ijoin(tp, ip, 0);
>
> - error = xfs_itruncate_data(&tp, ip, 0);
> + ip->i_d.di_size = 0;
> + ip->i_size = 0;
> + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Maybe a comment here too?
> +
> + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
> if (error) {
> xfs_trans_cancel(tp,
> XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
> xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
> return VN_INACTIVE_CACHE;
> }
> +
> + ASSERT(ip->i_d.di_nextents == 0);
> } else if (S_ISLNK(ip->i_d.di_mode)) {
>
> /*
>
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
|