xfs
[Top] [All Lists]

Re: XFS performance issues: O_DIRECT and Linux 2.6.6+

To: James Foris <james.foris@xxxxxxxxxx>
Subject: Re: XFS performance issues: O_DIRECT and Linux 2.6.6+
From: Nathan Scott <nathans@xxxxxxx>
Date: Wed, 15 Sep 2004 18:33:07 +1000
Cc: linux-xfs@xxxxxxxxxxx
In-reply-to: <20040915015002.GA12795@frodo>
References: <411A8410.2030000@med.ge.com> <20040910041106.GA14336@frodo> <4144B19A.2020407@med.ge.com> <4145D141.1040907@med.ge.com> <20040914095914.A4118499@wobbly.melbourne.sgi.com> <41472212.1090605@med.ge.com> <20040915015002.GA12795@frodo>
Sender: linux-xfs-bounce@xxxxxxxxxxx
User-agent: Mutt/1.5.3i
Hi there,

On Wed, Sep 15, 2004 at 11:50:02AM +1000, Nathan Scott wrote:
> On Tue, Sep 14, 2004 at 11:53:38AM -0500, James Foris wrote:
> > ...
> > I put a drop-through printk in mm/filemap.c to report when O_DIRECT hits the
> > following:
> > 
> >     /*
> >                  * If we get here for O_DIRECT writes then we must have 
> > fallen 
> >                  through
> >           * to buffered writes (block instantiation inside i_size).  So 
> >           we sync
> >           * the file data here, to try to honour O_DIRECT expectations.
> >           */
> >           if (unlikely(file->f_flags & O_DIRECT) && written)
> >                  status = filemap_write_and_wait(mapping);
> 
> Hmmm... very interesting.
> 

Could you try the patch below James?  It should apply
cleanly to the 2.6.x-xfs cvs tree on oss.sgi.com, or to
Linus' current -bk tree (but that may need a little bit
of tweaking, not sure off the top of my head).

Let me know if the numbers are good/bad/indifferent (or
if you see any hangs etc - I really need to stare at the
locking in here for a whole lot longer).

thanks!

-- 
Nathan


===========================================================================
fs/direct-io.c
===========================================================================

--- a/fs/direct-io.c    Wed Sep 15 17:29:16 2004
+++ b/fs/direct-io.c    Wed Sep 15 17:29:13 2004
@@ -53,9 +53,12 @@
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
  *
- * needs_locking is set for regular files on direct-IO-naive filesystems.  It
+ * needs_locking is one for regular files on direct-IO-naive filesystems.  It
  * determines whether we need to do the fancy locking which prevents direct-IO
- * from being able to read uninitialised disk blocks.
+ * from being able to read uninitialised disk blocks.  When needs_locking iso
+ * zero (blockdev) this locking is not done, and when a value of two is used,
+ * i_sem is not held for the entire direct write (it is held initially only,
+ * during reads).
  */
 
 struct dio {
@@ -212,7 +215,7 @@
 {
        if (dio->end_io && dio->result)
                dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
-       if (dio->needs_locking)
+       if (dio->needs_locking > 0)
                up_read(&dio->inode->i_alloc_sem);
 }
 
@@ -492,7 +495,7 @@
        unsigned long fs_count; /* Number of filesystem-sized blocks */
        unsigned long dio_count;/* Number of dio_block-sized blocks */
        unsigned long blkmask;
-       int beyond_eof = 0;
+       int create;
 
        /*
         * If there was a memory error and we've overwritten all the
@@ -510,10 +513,13 @@
                if (dio_count & blkmask)        
                        fs_count++;
 
-               if (dio->needs_locking) {
-                       if (dio->block_in_file >= (i_size_read(dio->inode) >>
-                                                       dio->blkbits))
-                               beyond_eof = 1;
+               create = dio->rw == WRITE;
+               if (dio->needs_locking == 1) {
+                       if (dio->block_in_file < (i_size_read(dio->inode) >>
+                                                       dio->blkbits) && create)
+                               create = 0;
+               } else if (dio->needs_locking == 0) {
+                       create = 0;
                }
                /*
                 * For writes inside i_size we forbid block creations: only
@@ -522,7 +528,7 @@
                 * writes.
                 */
                ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
-                               map_bh, (dio->rw == WRITE) && beyond_eof);
+                                                       map_bh, create);
        }
        return ret;
 }
@@ -1024,7 +1030,7 @@
         * we can let i_sem go now that its achieved its purpose
         * of protecting us from looking up uninitialized blocks.
         */
-       if ((rw == READ) && dio->needs_locking)
+       if ((rw == READ) && dio->needs_locking == 1)
                up(&dio->inode->i_sem);
 
        /*
@@ -1160,8 +1166,8 @@
         *      writers need to grab i_alloc_sem only (i_sem is already held)
         */
        needs_locking = 0;
-       if (S_ISREG(inode->i_mode) && needs_special_locking) {
-               needs_locking = 1;
+       if (S_ISREG(inode->i_mode) && needs_special_locking > 0) {
+               needs_locking = needs_special_locking;
                if (rw == READ) {
                        struct address_space *mapping;
 
@@ -1173,8 +1179,12 @@
                                kfree(dio);
                                goto out;
                        }
+                       down_read(&inode->i_alloc_sem);
+                       if (needs_special_locking == 2)
+                               up(&inode->i_sem);
+               } else {
+                       down_read(&inode->i_alloc_sem);
                }
-               down_read(&inode->i_alloc_sem);
        }
        dio->needs_locking = needs_locking;
        /*

===========================================================================
fs/xfs/linux-2.6/xfs_aops.c
===========================================================================

--- a/fs/xfs/linux-2.6/xfs_aops.c       Wed Sep 15 17:29:16 2004
+++ b/fs/xfs/linux-2.6/xfs_aops.c       Wed Sep 15 17:26:31 2004
@@ -1024,7 +1024,7 @@
        if (error)
                return -error;
 
-       return blockdev_direct_IO_no_locking(rw, iocb, inode,
+       return blockdev_direct_IO_own_locking(rw, iocb, inode,
                iomap.iomap_target->pbr_bdev,
                iov, offset, nr_segs,
                linvfs_get_blocks_direct,

===========================================================================
include/linux/fs.h
===========================================================================

--- a/include/linux/fs.h        Wed Sep 15 17:29:16 2004
+++ b/include/linux/fs.h        Wed Sep 15 17:18:17 2004
@@ -1476,6 +1476,15 @@
                                nr_segs, get_blocks, end_io, 0);
 }
 
+static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb 
*iocb,
+       struct inode *inode, struct block_device *bdev, const struct iovec *iov,
+       loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
+       dio_iodone_t end_io)
+{
+       return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+                               nr_segs, get_blocks, end_io, 2);
+}
+
 extern struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))


<Prev in Thread] Current Thread [Next in Thread>