xfs
[Top] [All Lists]

[PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse files

To: xfs@xxxxxxxxxxx
Subject: [PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse files
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Mon, 21 Jan 2013 23:53:53 +1100
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1358772835-21436-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1358772835-21436-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

This is an RFC that follow sup from a conversion Eric and I had on
IRC.  The idea is to prevent EOF speculative preallocation from
triggering larger allocations on IO patterns of
truncate--to-zero-seek-write-seek-write-....  which results in
non-sparse files for large files. This, unfortunately, is the way cp
behaves when copying sparse files, and it results in sub-optimal
destination file layouts.

What this code does is that it looks at the current extent over the
new EOF location, and if it is a hole it turns off preallocation
altogether. To avoid the next write from doing a large prealloc, it
takes the size of subsequent preallocations from the current size of
the existing EOF extent. IOWs, if you leave a hole in the file, it
resets preallocation behaviour to the same as if it was a zero size
file.

I haven't fully tested this, so I'm not sure if it works exactly
like I think it should, but I wanted to get this out there to get
more eyes on it...

Example new behaviour:

$ xfs_io -f -c "pwrite 0 31m" \
            -c "pwrite 33m 1m" \
            -c "pwrite 128m 1m" \
            -c "fiemap -v" /mnt/scratch/blah
wrote 32505856/32505856 bytes at offset 0
31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec)
wrote 1048576/1048576 bytes at offset 34603008
1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec)
wrote 1048576/1048576 bytes at offset 134217728
1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec)
/mnt/scratch/blah:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..65535]:      96..65631        65536   0x0
   1: [65536..67583]:  hole              2048
   2: [67584..69631]:  67680..69727      2048   0x0
   3: [69632..262143]: hole             192512
   4: [262144..264191]: 262240..264287    2048   0x1

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/xfs_iomap.c |   65 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4..3587772 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,53 @@ xfs_iomap_eof_want_preallocate(
 }
 
 /*
+ * Determine the initial size of the preallocation. It will be bound by the
+ * current file size, but when we are extending sparse files the current file
+ * size is not a good metric to use. Hence we need to look up the extent that
+ * ends at the current EOF and use the result to determine the preallocation
+ * size.
+ *
+ * If the extent is a hole, then preallocation is essentially disabled.
+ * Otherwise we take the size of the data extent as the basis for the
+ * preallocation size. If the size of the extent is greater than half the
+ * maximum extent length, then use the file size as the basis. This ensures 
that
+ * for large files the preallocation size always extends to MAXEXTLEN rather
+ * than falling short due to things like stripe unit/width alignment of real
+ * extents.
+ */
+STATIC int
+xfs_iomap_eof_prealloc_initial_size(
+       struct xfs_mount        *mp,
+       struct xfs_inode        *ip,
+       xfs_bmbt_irec_t         *imap,
+       int                     nimaps)
+{
+       xfs_fileoff_t   start_fsb;
+       xfs_fsblock_t   firstblock;
+       int             imaps = 1;
+       int             error;
+
+       ASSERT(nimaps >= imaps);
+
+       /* if we are using a specific prealloc size, return now */
+       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+               return 0;
+
+       start_fsb = XFS_B_TO_FSBT(mp, XFS_ISIZE(ip));
+       error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps,
+                              XFS_BMAPI_ENTIRE);
+       if (error)
+               return 0;
+
+       ASSERT(imaps == 1);
+       if (imap[0].br_startblock == HOLESTARTBLOCK)
+               return 0;
+       if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
+               return imap[0].br_blockcount;
+       return XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+}
+
+/*
  * If we don't have a user specified preallocation size, dynamically increase
  * the preallocation size as the size of the file grows. Cap the maximum size
  * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +366,17 @@ xfs_iomap_eof_want_preallocate(
 STATIC xfs_fsblock_t
 xfs_iomap_prealloc_size(
        struct xfs_mount        *mp,
-       struct xfs_inode        *ip)
+       struct xfs_inode        *ip,
+       xfs_bmbt_irec_t         *imap,
+       int                     nimaps)
 {
        xfs_fsblock_t           alloc_blocks = 0;
 
-       if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+       alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, imap, 
nimaps);
+       if (alloc_blocks > 0) {
                int shift = 0;
                int64_t freesp;
 
-               /*
-                * rounddown_pow_of_two() returns an undefined result
-                * if we pass in alloc_blocks = 0. Hence the "+ 1" to
-                * ensure we always pass in a non-zero value.
-                */
-               alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
                alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
                                        rounddown_pow_of_two(alloc_blocks));
 
@@ -398,7 +442,10 @@ xfs_iomap_write_delay(
 
 retry:
        if (prealloc) {
-               xfs_fsblock_t   alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+               xfs_fsblock_t   alloc_blocks;
+
+               alloc_blocks = xfs_iomap_prealloc_size(mp, ip, imap,
+                                                      XFS_WRITE_IMAPS);
 
                aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
                ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-- 
1.7.10

<Prev in Thread] Current Thread [Next in Thread>