xfs
[Top] [All Lists]

[PATCH 4/7] xfs: split direct IO write path from xfs_file_aio_write

To: xfs@xxxxxxxxxxx
Subject: [PATCH 4/7] xfs: split direct IO write path from xfs_file_aio_write
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Wed, 15 Dec 2010 12:23:25 +1100
In-reply-to: <1292376208-16282-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1292376208-16282-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

The current xfs_file_aio_write code is a mess of locking shenanigans
to handle the different locking requirements of buffered and direct
IO. Start to clean this up by disentangling the direct IO path from
the mess.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/linux-2.6/xfs_file.c |  184 +++++++++++++++++++++++++++---------------
 1 files changed, 118 insertions(+), 66 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 4608aab..d70a249 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -587,6 +587,116 @@ out_lock:
        return error;
 }
 
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By spearating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping. This also clearly indicates that XFS
+ * does not fall back to buffered IO in the direct IO write path.
+ *
+ * Returns with locks held indicated by @need_i_mutex and @iolock.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *need_i_mutex,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 error = 0;
+       xfs_fsize_t             new_size;
+       size_t                  count = ocount;
+       xfs_buftarg_t           *target = XFS_IS_REALTIME_INODE(ip) ?
+                                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+       *need_i_mutex = 0;
+       *iolock = 0;
+       if ((pos & target->bt_smask) || (count & target->bt_smask))
+               return XFS_ERROR(-EINVAL);
+
+       if (mapping->nrpages || pos > ip->i_size) {
+               *iolock = XFS_IOLOCK_EXCL;
+               *need_i_mutex = 1;
+               mutex_lock(&inode->i_mutex);
+       } else {
+               *iolock = XFS_IOLOCK_SHARED;
+       }
+       xfs_ilock(ip, XFS_ILOCK_EXCL|*iolock);
+
+       error = generic_write_checks(file, &pos, &count,
+                                       S_ISBLK(inode->i_mode));
+       if (error) {
+               xfs_iunlock(ip, XFS_ILOCK_EXCL|*iolock);
+               *iolock = 0;
+               return error;
+       }
+
+       new_size = pos + count;
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+
+       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+               file_update_time(file);
+
+       /*
+        * If the offset is beyond the size of the file, we have a couple of
+        * things to do. First, if there is already space allocated we need to
+        * either create holes or zero the disk or ...
+        *
+        * If there is a page where the previous size lands, we need to zero it
+        * out up to the new size.
+        */
+       if (pos > ip->i_size) {
+               error = -xfs_zero_eof(ip, pos, ip->i_size);
+               if (error) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       return error;
+               }
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * If we're writing the file then make sure to clear the setuid and
+        * setgid bits if the process is not being run by root.  This keeps
+        * people from modifying setuid and setgid binaries.
+        */
+       error = file_remove_suid(file);
+       if (unlikely(error))
+               return error;
+
+       if (mapping->nrpages) {
+               WARN_ON(*need_i_mutex == 0);
+               error = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+                                                       FI_REMAPF_LOCKED);
+               if (error)
+                       return error;
+       }
+
+       if (*need_i_mutex) {
+               /* demote the lock now the cached pages are gone */
+               xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               mutex_unlock(&inode->i_mutex);
+               *iolock = XFS_IOLOCK_SHARED;
+               *need_i_mutex = 0;
+       }
+
+       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+       error = generic_file_direct_write(iocb, iovp,
+                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+       /* No fallback to buffered IO on errors for XFS. */
+       return error;
+}
+
 STATIC ssize_t
 xfs_file_aio_write(
        struct kiocb            *iocb,
@@ -628,19 +738,17 @@ xfs_file_aio_write(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-relock:
        if (ioflags & IO_ISDIRECT) {
-               iolock = XFS_IOLOCK_SHARED;
-               need_i_mutex = 0;
-       } else {
-               iolock = XFS_IOLOCK_EXCL;
-               need_i_mutex = 1;
-               mutex_lock(&inode->i_mutex);
+               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+                                       ocount, &need_i_mutex, &iolock);
+               goto done_io;
        }
 
+       iolock = XFS_IOLOCK_EXCL;
+       need_i_mutex = 1;
+       mutex_lock(&inode->i_mutex);
        xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
 
-start:
        error = -generic_write_checks(file, &pos, &count,
                                        S_ISBLK(inode->i_mode));
        if (error) {
@@ -648,26 +756,6 @@ start:
                goto out_unlock_mutex;
        }
 
-       if (ioflags & IO_ISDIRECT) {
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               if ((pos & target->bt_smask) || (count & target->bt_smask)) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
-                       return XFS_ERROR(-EINVAL);
-               }
-
-               if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
-                       iolock = XFS_IOLOCK_EXCL;
-                       need_i_mutex = 1;
-                       mutex_lock(&inode->i_mutex);
-                       xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
-                       goto start;
-               }
-       }
-
        new_size = pos + count;
        if (new_size > ip->i_size)
                ip->i_new_size = new_size;
@@ -706,44 +794,7 @@ start:
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 
-       if ((ioflags & IO_ISDIRECT)) {
-               if (mapping->nrpages) {
-                       WARN_ON(need_i_mutex == 0);
-                       error = xfs_flushinval_pages(ip,
-                                       (pos & PAGE_CACHE_MASK),
-                                       -1, FI_REMAPF_LOCKED);
-                       if (error)
-                               goto out_unlock_internal;
-               }
-
-               if (need_i_mutex) {
-                       /* demote the lock now the cached pages are gone */
-                       xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
-                       mutex_unlock(&inode->i_mutex);
-
-                       iolock = XFS_IOLOCK_SHARED;
-                       need_i_mutex = 0;
-               }
-
-               trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
-               ret = generic_file_direct_write(iocb, iovp,
-                               &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-               /*
-                * direct-io write to a hole: fall through to buffered I/O
-                * for completing the rest of the request.
-                */
-               if (ret >= 0 && ret != count) {
-                       XFS_STATS_ADD(xs_write_bytes, ret);
-
-                       pos += ret;
-                       count -= ret;
-
-                       ioflags &= ~IO_ISDIRECT;
-                       xfs_iunlock(ip, iolock);
-                       goto relock;
-               }
-       } else {
+       if (!(ioflags & IO_ISDIRECT)) {
                int enospc = 0;
                ssize_t ret2 = 0;
 
@@ -767,6 +818,7 @@ write_retry:
 
        current->backing_dev_info = NULL;
 
+done_io:
        xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
 
        error = -ret;
-- 
1.7.2.3

<Prev in Thread] Current Thread [Next in Thread>