[PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse files
Dave Chinner
david at fromorbit.com
Mon Jan 21 06:53:53 CST 2013
From: Dave Chinner <dchinner at redhat.com>
This is an RFC that follow sup from a conversion Eric and I had on
IRC. The idea is to prevent EOF speculative preallocation from
triggering larger allocations on IO patterns of
truncate--to-zero-seek-write-seek-write-.... which results in
non-sparse files for large files. This, unfortunately, is the way cp
behaves when copying sparse files, and it results in sub-optimal
destination file layouts.
What this code does is that it looks at the current extent over the
new EOF location, and if it is a hole it turns off preallocation
altogether. To avoid the next write from doing a large prealloc, it
takes the size of subsequent preallocations from the current size of
the existing EOF extent. IOWs, if you leave a hole in the file, it
resets preallocation behaviour to the same as if it was a zero size
file.
I haven't fully tested this, so I'm not sure if it works exactly
like I think it should, but I wanted to get this out there to get
more eyes on it...
Example new behaviour:
$ xfs_io -f -c "pwrite 0 31m" \
-c "pwrite 33m 1m" \
-c "pwrite 128m 1m" \
-c "fiemap -v" /mnt/scratch/blah
wrote 32505856/32505856 bytes at offset 0
31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec)
wrote 1048576/1048576 bytes at offset 34603008
1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec)
wrote 1048576/1048576 bytes at offset 134217728
1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec)
/mnt/scratch/blah:
EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
0: [0..65535]: 96..65631 65536 0x0
1: [65536..67583]: hole 2048
2: [67584..69631]: 67680..69727 2048 0x0
3: [69632..262143]: hole 192512
4: [262144..264191]: 262240..264287 2048 0x1
Signed-off-by: Dave Chinner <dchinner at redhat.com>
---
fs/xfs/xfs_iomap.c | 65 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 56 insertions(+), 9 deletions(-)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4..3587772 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,53 @@ xfs_iomap_eof_want_preallocate(
}
/*
+ * Determine the initial size of the preallocation. It will be bound by the
+ * current file size, but when we are extending sparse files the current file
+ * size is not a good metric to use. Hence we need to look up the extent that
+ * ends at the current EOF and use the result to determine the preallocation
+ * size.
+ *
+ * If the extent is a hole, then preallocation is essentially disabled.
+ * Otherwise we take the size of the data extent as the basis for the
+ * preallocation size. If the size of the extent is greater than half the
+ * maximum extent length, then use the file size as the basis. This ensures that
+ * for large files the preallocation size always extends to MAXEXTLEN rather
+ * than falling short due to things like stripe unit/width alignment of real
+ * extents.
+ */
+STATIC int
+xfs_iomap_eof_prealloc_initial_size(
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_bmbt_irec_t *imap,
+ int nimaps)
+{
+ xfs_fileoff_t start_fsb;
+ xfs_fsblock_t firstblock;
+ int imaps = 1;
+ int error;
+
+ ASSERT(nimaps >= imaps);
+
+ /* if we are using a specific prealloc size, return now */
+ if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+ return 0;
+
+ start_fsb = XFS_B_TO_FSBT(mp, XFS_ISIZE(ip));
+ error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps,
+ XFS_BMAPI_ENTIRE);
+ if (error)
+ return 0;
+
+ ASSERT(imaps == 1);
+ if (imap[0].br_startblock == HOLESTARTBLOCK)
+ return 0;
+ if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
+ return imap[0].br_blockcount;
+ return XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+}
+
+/*
* If we don't have a user specified preallocation size, dynamically increase
* the preallocation size as the size of the file grows. Cap the maximum size
* at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +366,17 @@ xfs_iomap_eof_want_preallocate(
STATIC xfs_fsblock_t
xfs_iomap_prealloc_size(
struct xfs_mount *mp,
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ xfs_bmbt_irec_t *imap,
+ int nimaps)
{
xfs_fsblock_t alloc_blocks = 0;
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+ alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, imap, nimaps);
+ if (alloc_blocks > 0) {
int shift = 0;
int64_t freesp;
- /*
- * rounddown_pow_of_two() returns an undefined result
- * if we pass in alloc_blocks = 0. Hence the "+ 1" to
- * ensure we always pass in a non-zero value.
- */
- alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks));
@@ -398,7 +442,10 @@ xfs_iomap_write_delay(
retry:
if (prealloc) {
- xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+ xfs_fsblock_t alloc_blocks;
+
+ alloc_blocks = xfs_iomap_prealloc_size(mp, ip, imap,
+ XFS_WRITE_IMAPS);
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
--
1.7.10
More information about the xfs
mailing list