xfs
[Top] [All Lists]

[PATCH v2] xfs: probe data buffer from page cache for unwritten extents

To: xfs@xxxxxxxxxxx
Subject: [PATCH v2] xfs: probe data buffer from page cache for unwritten extents
From: Jeff Liu <jeff.liu@xxxxxxxxxx>
Date: Mon, 25 Jun 2012 20:41:31 +0800
Organization: Oracle
Reply-to: jeff.liu@xxxxxxxxxx
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.28) Gecko/20120313 Thunderbird/3.1.20
Hello,

Using the start offset rather than map->br_startoff to calculate the starting 
page index could
get more accurate data offset in page cache probe routine.
With this refinement, the old max_t() could be able to remove too. 

Thanks Mark for pointing this out!

-Jeff


Cc: Mark Tinguely <tinguely@xxxxxxx>
Signed-off-by: Jie Liu <jeff.liu@xxxxxxxxxx>

---
 fs/xfs/xfs_file.c |  155 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 150 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9f7ec15..113190b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
 
 #include <linux/dcache.h>
 #include <linux/falloc.h>
+#include <linux/pagevec.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -966,6 +967,108 @@ xfs_vm_page_mkwrite(
        return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 
+/*
+ * Probe the data buffer offset in page cache for unwritten extents.
+ * Iterate each page to find out if a buffer head state has BH_Unwritten
+ * or BH_Uptodate set.
+ */
+STATIC bool
+xfs_has_unwritten_buffer(
+       struct inode            *inode,
+       struct xfs_bmbt_irec    *map,
+       loff_t                  *offset)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct pagevec          pvec;
+       pgoff_t                 index;
+       pgoff_t                 end;
+       bool                    found = false;
+
+       pagevec_init(&pvec, 0);
+
+       index = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, *offset))
+                            >> PAGE_CACHE_SHIFT;
+       end = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount)
+                          >> PAGE_CACHE_SHIFT;
+       do {
+               unsigned int    i;
+               unsigned        nr_pages;
+               int             want = min_t(pgoff_t, end - index,
+                                            (pgoff_t)PAGEVEC_SIZE - 1) + 1;
+               nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+                                         want);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page             *page = pvec.pages[i];
+                       struct buffer_head      *bh;
+                       struct buffer_head      *head;
+                       xfs_fileoff_t           last;
+
+                       /*
+                        * There is no need to check the following pages
+                        * if the current page offset is out of range.
+                        */
+                       if (page->index > end)
+                               goto out;
+
+                       if (!trylock_page(page))
+                               goto out;
+
+                       if (!page_has_buffers(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       last = XFS_B_TO_FSBT(mp,
+                                            page->index << PAGE_CACHE_SHIFT);
+                       bh = head = page_buffers(page);
+                       do {
+                               /*
+                                * An extent in XFS_EXT_UNWRITTEN has disk
+                                * blocks already mapped to it, but no data
+                                * has been committed to them yet.  If it has
+                                * dirty data in the page cache it can be
+                                * identified by having BH_Unwritten set in
+                                * each buffers.  Also, the buffer head state
+                                * might be in BH_Uptodate too if the buffer
+                                * writeback procedure was fired, we need to
+                                * examine it as well.
+                                */
+                               if ((buffer_unwritten(bh) ||
+                                    buffer_uptodate(bh)) &&
+                                    *offset <= XFS_FSB_TO_B(mp, last)) {
+                                       found = true;
+                                       *offset = XFS_FSB_TO_B(mp, last);
+                                       unlock_page(page);
+                                       goto out;
+                               }
+                               last++;
+                       } while ((bh = bh->b_this_page) != head);
+                       unlock_page(page);
+               }
+
+               /*
+                * If the number of probed pages less than our desired,
+                * there should no more pages mapped, search done.
+                */
+               if (nr_pages < want)
+                       break;
+
+               index = pvec.pages[i - 1]->index + 1;
+               pagevec_release(&pvec);
+       } while (index < end);
+
+out:
+       pagevec_release(&pvec);
+       if (!found)
+               *offset = 0;
+
+       return found;
+}
+
 STATIC loff_t
 xfs_seek_data(
        struct file             *file,
@@ -1009,19 +1112,61 @@ xfs_seek_data(
         * Treat unwritten extent as data extent since it might
         * contains dirty data in page cache.
         */
-       if (map[0].br_startblock != HOLESTARTBLOCK) {
-               offset = max_t(loff_t, start,
-                              XFS_FSB_TO_B(mp, map[0].br_startoff));
+       if (map[0].br_state == XFS_EXT_NORM &&
+           !isnullstartblock(map[0].br_startblock)) {
+               offset = start;
        } else {
+               /*
+                * Landed in an unwritten extent, try to find out
+                * the data buffer offset from page cache firstly.
+                * Treat it as a hole if nothing was found, and
+                * skip to check the next extent.
+                */
+               if (map[0].br_startblock == DELAYSTARTBLOCK ||
+                   map[0].br_state == XFS_EXT_UNWRITTEN) {
+                       /* Probing page cache start from offset */
+                       offset = start;
+                       if (xfs_has_unwritten_buffer(inode, &map[0],
+                                                    &offset))
+                               goto out;
+               }
+
+               /*
+                * Found a hole in map[0] and nothing in map[1].
+                * Probably means that we are reading after EOF.
+                */
                if (nmap == 1) {
                        error = ENXIO;
                        goto out_unlock;
                }
 
-               offset = max_t(loff_t, start,
-                              XFS_FSB_TO_B(mp, map[1].br_startoff));
+               /*
+                * We have two mappings, and need to check map[1] to
+                * see if there is data or not.
+                */
+               if (map[1].br_state == XFS_EXT_NORM &&
+                   !isnullstartblock(map[1].br_startblock)) {
+                       offset = XFS_FSB_TO_B(mp, map[1].br_startoff);
+               } else {
+                       if (map[1].br_startblock == DELAYSTARTBLOCK ||
+                           map[1].br_state == XFS_EXT_UNWRITTEN) {
+                               /* Probing page cache start from offset */
+                               offset = start;
+                               if (xfs_has_unwritten_buffer(inode, &map[1],
+                                                            &offset))
+                                       goto out;
+                       }
+                       /*
+                        * xfs_bmapi_read() can handle repeated hole regions,
+                        * hence it should not return two extents both are
+                        * holes.  If the 2nd extent is unwritten, there must
+                        * have data buffer resides in page cache.
+                        */
+                       BUG();
+               }
        }
 
+out:
        if (offset != file->f_pos)
                file->f_pos = offset;
 
-- 
1.7.9

<Prev in Thread] Current Thread [Next in Thread>