xfs
[Top] [All Lists]

[PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

To: linux-kernel@xxxxxxxxxxxxxxx
Subject: [PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC
From: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Date: Fri, 16 Aug 2013 16:22:11 -0700
Cc: linux-ext4@xxxxxxxxxxxxxxx, Dave Chinner <david@xxxxxxxxxxxxx>, Theodore Ts'o <tytso@xxxxxxx>, Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>, xfs@xxxxxxxxxxx, Jan Kara <jack@xxxxxxx>, Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx>, Christoph Hellwig <hch@xxxxxxxxxxxxx>, Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <cover.1376679411.git.luto@xxxxxxxxxxxxxx>
In-reply-to: <cover.1376679411.git.luto@xxxxxxxxxxxxxx>
References: <cover.1376679411.git.luto@xxxxxxxxxxxxxx>
References: <cover.1376679411.git.luto@xxxxxxxxxxxxxx>
This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
---
 mm/msync.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@
 #include <linux/file.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
 
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *
  * MS_ASYNC does not start I/O (it used to, up to 2.5.67).
  * Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it.  We track dirty pages correct without MS_ASYNC.
  *
  * The application may now run fsync() to
  * write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@
  * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  * applications.
  */
+
+static int msync_async_range(struct vm_area_struct *vma,
+                             unsigned long *start, unsigned long end)
+{
+       struct mm_struct *mm;
+       struct address_space *mapping;
+       int iters = 0;
+
+       while (*start < end && *start < vma->vm_end && iters < 128) {
+               unsigned int page_mask, page_increm;
+
+               /*
+                * Require that the pte writable (because otherwise it can't
+                * be dirty, so there's nothing to clean).
+                *
+                * In theory we could check the pte dirty bit, but this is
+                * awkward and barely worth it.
+                */
+               struct page *page = follow_page_mask(vma, *start,
+                                                    FOLL_GET | FOLL_WRITE,
+                                                    &page_mask);
+
+               if (page && !IS_ERR(page)) {
+                       if (lock_page_killable(page) == 0) {
+                               page_mkclean(page);
+                               unlock_page(page);
+                       }
+                       put_page(page);
+               }
+
+               if (IS_ERR(page))
+                       return PTR_ERR(page);
+
+               page_increm = 1 + (~(*start >> PAGE_SHIFT) & page_mask);
+               *start += page_increm * PAGE_SIZE;
+               cond_resched();
+               iters++;
+       }
+
+       /* XXX: try to do this only once? */
+       mapping = vma->vm_file->f_mapping;
+       if (mapping->a_ops->flush_cmtime)
+               mapping->a_ops->flush_cmtime(mapping);
+
+       /* Give mmap_sem writers a chance. */
+       mm = current->mm;
+       up_read(&mm->mmap_sem);
+       down_read(&mm->mmap_sem);
+       return 0;
+}
+
 SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 {
        unsigned long end;
@@ -77,18 +131,25 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, 
int, flags)
                        goto out_unlock;
                }
                file = vma->vm_file;
-               start = vma->vm_end;
-               if ((flags & MS_SYNC) && file &&
-                               (vma->vm_flags & VM_SHARED)) {
-                       get_file(file);
-                       up_read(&mm->mmap_sem);
-                       error = vfs_fsync(file, 0);
-                       fput(file);
-                       if (error || start >= end)
-                               goto out;
-                       down_read(&mm->mmap_sem);
+               if (file && vma->vm_flags & VM_SHARED) {
+                       if (flags & MS_SYNC) {
+                               start = vma->vm_end;
+                               get_file(file);
+                               up_read(&mm->mmap_sem);
+                               error = vfs_fsync(file, 0);
+                               fput(file);
+                               if (error || start >= end)
+                                       goto out;
+                               down_read(&mm->mmap_sem);
+                       } else if ((vma->vm_flags & VM_WRITE) &&
+                                  file->f_mapping) {
+                               error = msync_async_range(vma, &start, end);
+                       } else {
+                               start = vma->vm_end;
+                       }
                        vma = find_vma(mm, start);
                } else {
+                       start = vma->vm_end;
                        if (start >= end) {
                                error = 0;
                                goto out_unlock;
-- 
1.8.3.1

<Prev in Thread] Current Thread [Next in Thread>