Hi there,
On Wed, Sep 15, 2004 at 11:50:02AM +1000, Nathan Scott wrote:
> On Tue, Sep 14, 2004 at 11:53:38AM -0500, James Foris wrote:
> > ...
> > I put a drop-through printk in mm/filemap.c to report when O_DIRECT hits the
> > following:
> >
> > /*
> > * If we get here for O_DIRECT writes then we must have
> > fallen
> > through
> > * to buffered writes (block instantiation inside i_size). So
> > we sync
> > * the file data here, to try to honour O_DIRECT expectations.
> > */
> > if (unlikely(file->f_flags & O_DIRECT) && written)
> > status = filemap_write_and_wait(mapping);
>
> Hmmm... very interesting.
>
Could you try the patch below James? It should apply
cleanly to the 2.6.x-xfs cvs tree on oss.sgi.com, or to
Linus' current -bk tree (but that may need a little bit
of tweaking, not sure off the top of my head).
Let me know if the numbers are good/bad/indifferent (or
if you see any hangs etc - I really need to stare at the
locking in here for a whole lot longer).
thanks!
--
Nathan
===========================================================================
fs/direct-io.c
===========================================================================
--- a/fs/direct-io.c Wed Sep 15 17:29:16 2004
+++ b/fs/direct-io.c Wed Sep 15 17:29:13 2004
@@ -53,9 +53,12 @@
* If blkfactor is zero then the user's request was aligned to the filesystem's
* blocksize.
*
- * needs_locking is set for regular files on direct-IO-naive filesystems. It
+ * needs_locking is one for regular files on direct-IO-naive filesystems. It
* determines whether we need to do the fancy locking which prevents direct-IO
- * from being able to read uninitialised disk blocks.
+ * from being able to read uninitialised disk blocks. When needs_locking iso
+ * zero (blockdev) this locking is not done, and when a value of two is used,
+ * i_sem is not held for the entire direct write (it is held initially only,
+ * during reads).
*/
struct dio {
@@ -212,7 +215,7 @@
{
if (dio->end_io && dio->result)
dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
- if (dio->needs_locking)
+ if (dio->needs_locking > 0)
up_read(&dio->inode->i_alloc_sem);
}
@@ -492,7 +495,7 @@
unsigned long fs_count; /* Number of filesystem-sized blocks */
unsigned long dio_count;/* Number of dio_block-sized blocks */
unsigned long blkmask;
- int beyond_eof = 0;
+ int create;
/*
* If there was a memory error and we've overwritten all the
@@ -510,10 +513,13 @@
if (dio_count & blkmask)
fs_count++;
- if (dio->needs_locking) {
- if (dio->block_in_file >= (i_size_read(dio->inode) >>
- dio->blkbits))
- beyond_eof = 1;
+ create = dio->rw == WRITE;
+ if (dio->needs_locking == 1) {
+ if (dio->block_in_file < (i_size_read(dio->inode) >>
+ dio->blkbits) && create)
+ create = 0;
+ } else if (dio->needs_locking == 0) {
+ create = 0;
}
/*
* For writes inside i_size we forbid block creations: only
@@ -522,7 +528,7 @@
* writes.
*/
ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
- map_bh, (dio->rw == WRITE) && beyond_eof);
+ map_bh, create);
}
return ret;
}
@@ -1024,7 +1030,7 @@
* we can let i_sem go now that its achieved its purpose
* of protecting us from looking up uninitialized blocks.
*/
- if ((rw == READ) && dio->needs_locking)
+ if ((rw == READ) && dio->needs_locking == 1)
up(&dio->inode->i_sem);
/*
@@ -1160,8 +1166,8 @@
* writers need to grab i_alloc_sem only (i_sem is already held)
*/
needs_locking = 0;
- if (S_ISREG(inode->i_mode) && needs_special_locking) {
- needs_locking = 1;
+ if (S_ISREG(inode->i_mode) && needs_special_locking > 0) {
+ needs_locking = needs_special_locking;
if (rw == READ) {
struct address_space *mapping;
@@ -1173,8 +1179,12 @@
kfree(dio);
goto out;
}
+ down_read(&inode->i_alloc_sem);
+ if (needs_special_locking == 2)
+ up(&inode->i_sem);
+ } else {
+ down_read(&inode->i_alloc_sem);
}
- down_read(&inode->i_alloc_sem);
}
dio->needs_locking = needs_locking;
/*
===========================================================================
fs/xfs/linux-2.6/xfs_aops.c
===========================================================================
--- a/fs/xfs/linux-2.6/xfs_aops.c Wed Sep 15 17:29:16 2004
+++ b/fs/xfs/linux-2.6/xfs_aops.c Wed Sep 15 17:26:31 2004
@@ -1024,7 +1024,7 @@
if (error)
return -error;
- return blockdev_direct_IO_no_locking(rw, iocb, inode,
+ return blockdev_direct_IO_own_locking(rw, iocb, inode,
iomap.iomap_target->pbr_bdev,
iov, offset, nr_segs,
linvfs_get_blocks_direct,
===========================================================================
include/linux/fs.h
===========================================================================
--- a/include/linux/fs.h Wed Sep 15 17:29:16 2004
+++ b/include/linux/fs.h Wed Sep 15 17:18:17 2004
@@ -1476,6 +1476,15 @@
nr_segs, get_blocks, end_io, 0);
}
+static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb
*iocb,
+ struct inode *inode, struct block_device *bdev, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks,
+ dio_iodone_t end_io)
+{
+ return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+ nr_segs, get_blocks, end_io, 2);
+}
+
extern struct file_operations generic_ro_fops;
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
|