On Tue, Jan 11, 2011 at 10:37:44AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
>
> We need to obtain the i_mutex, i_iolock and i_ilock during the read
> and write paths. Add a set of wrapper functions to neatly
> encapsulate the lock ordering and shared/exclusive semantics to make
> the locking easier to follow and get right.
>
> Note that this changes some of the exclusive locking serialisation in
> that serialisation will occur against the i_mutex instead of the
> XFS_IOLOCK_EXCL. This does not change any behaviour, and it is
> arguably more efficient to use the mutex for such serialisation than
> the rw_sem.
>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
> fs/xfs/linux-2.6/xfs_file.c | 136
> ++++++++++++++++++++++++++-----------------
> 1 files changed, 82 insertions(+), 54 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
> index c47d7dc0..941d7c2 100644
> --- a/fs/xfs/linux-2.6/xfs_file.c
> +++ b/fs/xfs/linux-2.6/xfs_file.c
> @@ -41,6 +41,40 @@
> static const struct vm_operations_struct xfs_file_vm_ops;
>
> /*
> + * Locking primitives for read and write IO paths to ensure we consistently
> use
> + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
> + */
> +static inline void
> +xfs_rw_ilock(
> + struct xfs_inode *ip,
> + int type)
> +{
> + if (type & XFS_IOLOCK_EXCL)
> + mutex_lock(&VFS_I(ip)->i_mutex);
> + xfs_ilock(ip, type);
> +}
> +
> +static inline void
> +xfs_rw_iunlock(
> + struct xfs_inode *ip,
> + int type)
> +{
> + xfs_iunlock(ip, type);
> + if (type & XFS_IOLOCK_EXCL)
> + mutex_unlock(&VFS_I(ip)->i_mutex);
> +}
> +
> +static inline void
> +xfs_rw_ilock_demote(
> + struct xfs_inode *ip,
> + int type)
> +{
> + xfs_ilock_demote(ip, type);
> + if (type & XFS_IOLOCK_EXCL)
> + mutex_unlock(&VFS_I(ip)->i_mutex);
> +}
> +
> +/*
> * xfs_iozero
> *
> * xfs_iozero clears the specified range of buffer supplied,
> @@ -262,22 +296,21 @@ xfs_file_aio_read(
> if (XFS_FORCED_SHUTDOWN(mp))
> return -EIO;
>
> - if (unlikely(ioflags & IO_ISDIRECT))
> - mutex_lock(&inode->i_mutex);
> - xfs_ilock(ip, XFS_IOLOCK_SHARED);
> -
> if (unlikely(ioflags & IO_ISDIRECT)) {
> + xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
> +
> if (inode->i_mapping->nrpages) {
> ret = -xfs_flushinval_pages(ip,
> (iocb->ki_pos & PAGE_CACHE_MASK),
> -1, FI_REMAPF_LOCKED);
> + if (ret) {
> + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> + return ret;
> + }
> }
> - mutex_unlock(&inode->i_mutex);
> - if (ret) {
> - xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> - return ret;
> - }
> - }
> + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> + } else
> + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>
> trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
>
> @@ -285,7 +318,7 @@ xfs_file_aio_read(
> if (ret > 0)
> XFS_STATS_ADD(xs_read_bytes, ret);
>
> - xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> return ret;
> }
>
> @@ -309,7 +342,7 @@ xfs_file_splice_read(
> if (XFS_FORCED_SHUTDOWN(ip->i_mount))
> return -EIO;
>
> - xfs_ilock(ip, XFS_IOLOCK_SHARED);
> + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
>
> trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
>
> @@ -317,7 +350,7 @@ xfs_file_splice_read(
> if (ret > 0)
> XFS_STATS_ADD(xs_read_bytes, ret);
>
> - xfs_iunlock(ip, XFS_IOLOCK_SHARED);
> + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> return ret;
> }
>
> @@ -338,10 +371,10 @@ xfs_aio_write_isize_update(
> *ppos = isize;
>
> if (*ppos > ip->i_size) {
> - xfs_ilock(ip, XFS_ILOCK_EXCL);
> + xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
> if (*ppos > ip->i_size)
> ip->i_size = *ppos;
> - xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
> }
> }
>
> @@ -356,14 +389,22 @@ xfs_aio_write_newsize_update(
> struct xfs_inode *ip)
> {
> if (ip->i_new_size) {
> - xfs_ilock(ip, XFS_ILOCK_EXCL);
> + xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
> ip->i_new_size = 0;
> if (ip->i_d.di_size > ip->i_size)
> ip->i_d.di_size = ip->i_size;
> - xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
> }
> }
>
> +/*
> + * xfs_file_splice_write() does not use xfs_rw_ilock() because
> + * generic_file_splice_write() takes the i_mutex itself. This, in theory,
> + * couuld cause lock inversions between the aio_write path and the splice
> path
> + * if someone is doing concurrent splice(2) based writes and write(2) based
> + * writes to the same inode. The only real way to fix this is to re-implement
> + * the generic code here with correct locking orders.
> + */
> STATIC ssize_t
> xfs_file_splice_write(
> struct pipe_inode_info *pipe,
> @@ -386,14 +427,13 @@ xfs_file_splice_write(
> if (XFS_FORCED_SHUTDOWN(ip->i_mount))
> return -EIO;
>
> - xfs_ilock(ip, XFS_IOLOCK_EXCL);
> + xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL);
>
> new_size = *ppos + count;
>
> - xfs_ilock(ip, XFS_ILOCK_EXCL);
> if (new_size > ip->i_size)
> ip->i_new_size = new_size;
> - xfs_iunlock(ip, XFS_ILOCK_EXCL);
> + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
While using the xfs_rw_iunlock here actually is correct I think it's
highly confusing, given that we didn't use it to take the ilock. What
about just leaving all the code in xfs_file_splice_write as-is and not
touching it at all?
|