xfs
[Top] [All Lists]

[PATCH 3/3] fs: add a flag for per-operation O_DSYNC semantics

To: Milosz Tanski <milosz@xxxxxxxxx>
Subject: [PATCH 3/3] fs: add a flag for per-operation O_DSYNC semantics
From: Christoph Hellwig <hch@xxxxxx>
Date: Wed, 1 Oct 2014 23:04:54 +0200
Cc: linux-fsdevel@xxxxxxxxxxxxxxx, xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1412197494-7655-1-git-send-email-hch@xxxxxx>
References: <1412197494-7655-1-git-send-email-hch@xxxxxx>
With the new read/write with flags syscalls we can support a flag
to enable O_DSYNC semantics on a per-operation basis.  This Ñs
useful to implement protocols like SMB, NFS or SCSI that have such
per-operation flags.

Example program below:

cat > pwritev2.c << EOF

        (off_t) val,                              \
        (off_t) ((((uint64_t) (val)) >> (sizeof (long) * 4)) >> (sizeof (long) 
* 4))

static ssize_t
pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
{
        return syscall(__NR_pwritev2, fd, iov, iovcnt, LO_HI_LONG(offset),
                         flags);
}

int main(int argc, char **argv)
{
        int fd = open(argv[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
        char buf[1024];
        struct iovec iov = { .iov_base = buf, .iov_len = 1024 };
        int ret;

        if (fd < 0) {
                perror("open");
                return 0;
        }

        memset(buf, 0xfe, sizeof(buf));

        ret = pwritev2(fd, &iov, 1, 0, RWF_DSYNC);
        if (ret < 0)
                perror("pwritev2");
        else
                printf("ret = %d\n", ret);

        return 0;
}
EOF

Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
 fs/ceph/file.c     |  4 +++-
 fs/fuse/file.c     |  2 ++
 fs/nfs/file.c      | 10 ++++++----
 fs/ocfs2/file.c    |  6 ++++--
 fs/read_write.c    | 11 ++++++++---
 include/linux/fs.h |  3 ++-
 mm/filemap.c       |  4 +++-
 7 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 2eb02f8..e59f1f1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -968,7 +968,9 @@ retry_snap:
        ceph_put_cap_refs(ci, got);
 
        if (written >= 0 &&
-           ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+           ((file->f_flags & O_SYNC) ||
+            IS_SYNC(file->f_mapping->host) ||
+            (iocb->ki_rwflags & RWF_DSYNC) ||
             ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
                err = vfs_fsync_range(file, pos, pos + written - 1, 1);
                if (err < 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index caa8d95..bb4fb23 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1248,6 +1248,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
                written += written_buffered;
                iocb->ki_pos = pos + written_buffered;
        } else {
+               if (iocb->ki_rwflags & RWF_DSYNC)
+                       return -EINVAL;
                written = fuse_perform_write(file, mapping, from, pos);
                if (written >= 0)
                        iocb->ki_pos = pos + written;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 524dd80..a9e6cc4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -621,13 +621,15 @@ static const struct vm_operations_struct nfs_file_vm_ops 
= {
        .remap_pages = generic_file_remap_pages,
 };
 
-static int nfs_need_sync_write(struct file *filp, struct inode *inode)
+static int nfs_need_sync_write(struct kiocb *iocb, struct inode *inode)
 {
        struct nfs_open_context *ctx;
 
-       if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
+       if (IS_SYNC(inode) ||
+           (iocb->ki_filp->f_flags & O_DSYNC) ||
+           (iocb->ki_rwflags & RWF_DSYNC))
                return 1;
-       ctx = nfs_file_open_context(filp);
+       ctx = nfs_file_open_context(iocb->ki_filp);
        if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
            nfs_ctx_key_to_expire(ctx))
                return 1;
@@ -674,7 +676,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter 
*from)
                written = result;
 
        /* Return error values for O_DSYNC and IS_SYNC() */
-       if (result >= 0 && nfs_need_sync_write(file, inode)) {
+       if (result >= 0 && nfs_need_sync_write(iocb, inode)) {
                int err = vfs_fsync(file, 0);
                if (err < 0)
                        result = err;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d96f60d..8f29714 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2375,8 +2375,10 @@ out_dio:
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
-       if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
-           ((file->f_flags & O_DIRECT) && !direct_io)) {
+       if (((file->f_flags & O_DSYNC) && !direct_io) ||
+           IS_SYNC(inode) ||
+           ((file->f_flags & O_DIRECT) && !direct_io) ||
+           (iocb->ki_rwflags & RWF_DSYNC)) {
                ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
                                               *ppos + count - 1);
                if (ret < 0)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5592a18..8af8925 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -837,8 +837,13 @@ static ssize_t do_readv_writev(int type, struct file *file,
                ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
                                                pos, iter_fn, flags);
        } else {
-               if (type == READ && (flags & RWF_NONBLOCK))
-                       return -EAGAIN;
+               if (type == READ) {
+                       if (flags & RWF_NONBLOCK)
+                               return -EAGAIN;
+               } else {
+                       if (flags & RWF_DSYNC)
+                               return -EINVAL;
+               }
 
                if (fnv)
                        ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
@@ -886,7 +891,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec 
__user *vec,
                return -EBADF;
        if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
-       if (flags & ~0)
+       if (flags & ~RWF_DSYNC)
                return -EINVAL;
 
        return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cf759fa..005e7c8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1478,7 +1478,8 @@ struct block_device_operations;
 #define HAVE_UNLOCKED_IOCTL 1
 
 /* These flags are used for the readv/writev syscalls with flags. */
-#define RWF_NONBLOCK 0x00000001
+#define RWF_NONBLOCK   0x00000001
+#define RWF_DSYNC      0x00000002
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index f9ffb6f..40831ae 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2659,7 +2659,9 @@ int generic_write_sync(struct kiocb *iocb, loff_t count)
        struct file *file = iocb->ki_filp;
 
        if (count > 0 &&
-           ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))) {
+           ((file->f_flags & O_DSYNC) ||
+            (iocb->ki_rwflags & RWF_DSYNC) ||
+            IS_SYNC(file->f_mapping->host))) {
                bool fdatasync = !(file->f_flags & __O_SYNC);
                ssize_t ret = 0;
 
-- 
1.9.1

<Prev in Thread] Current Thread [Next in Thread>