xfs
[Top] [All Lists]

Re: [PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse fil

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: [PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse files
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Tue, 22 Jan 2013 14:04:12 -0500
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1358772835-21436-3-git-send-email-david@xxxxxxxxxxxxx>
References: <1358772835-21436-1-git-send-email-david@xxxxxxxxxxxxx> <1358772835-21436-3-git-send-email-david@xxxxxxxxxxxxx>
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/17.0 Thunderbird/17.0
On 01/21/2013 07:53 AM, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> This is an RFC that follow sup from a conversion Eric and I had on
> IRC.  The idea is to prevent EOF speculative preallocation from
> triggering larger allocations on IO patterns of
> truncate--to-zero-seek-write-seek-write-....  which results in
> non-sparse files for large files. This, unfortunately, is the way cp
> behaves when copying sparse files, and it results in sub-optimal
> destination file layouts.
> 
> What this code does is that it looks at the current extent over the
> new EOF location, and if it is a hole it turns off preallocation
> altogether. To avoid the next write from doing a large prealloc, it
> takes the size of subsequent preallocations from the current size of
> the existing EOF extent. IOWs, if you leave a hole in the file, it
> resets preallocation behaviour to the same as if it was a zero size
> file.
> 
> I haven't fully tested this, so I'm not sure if it works exactly
> like I think it should, but I wanted to get this out there to get
> more eyes on it...
> 

On a quick test, I didn't quite get the behavior documented below. Is it
possible your test file had the initial extent preallocated from an xfs
module with the current preallocation scheme?

What I see is that sequential writes to a file disable preallocation
completely (so the first extent in the test below is 31m instead of
32m). Digging a bit further, it seemed to be due to start_fsb always
being a hole. I hacked that a bit to read the extent of the block
immediately previous to the write offset (instead of the inode size), e.g.:

        start_fsb = XFS_B_TO_FSBT(mp, offset);
        if (start_fsb)
                start_fsb--;

... and I seem to get expected behavior, at least in the simple xfs_io test.

Brian

> Example new behaviour:
> 
> $ xfs_io -f -c "pwrite 0 31m" \
>             -c "pwrite 33m 1m" \
>             -c "pwrite 128m 1m" \
>             -c "fiemap -v" /mnt/scratch/blah
> wrote 32505856/32505856 bytes at offset 0
> 31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec)
> wrote 1048576/1048576 bytes at offset 34603008
> 1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec)
> wrote 1048576/1048576 bytes at offset 134217728
> 1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec)
> /mnt/scratch/blah:
>  EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
>    0: [0..65535]:      96..65631        65536   0x0
>    1: [65536..67583]:  hole              2048
>    2: [67584..69631]:  67680..69727      2048   0x0
>    3: [69632..262143]: hole             192512
>    4: [262144..264191]: 262240..264287    2048   0x1
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_iomap.c |   65 
> ++++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 56 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index add06b4..3587772 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -311,6 +311,53 @@ xfs_iomap_eof_want_preallocate(
>  }
>  
>  /*
> + * Determine the initial size of the preallocation. It will be bound by the
> + * current file size, but when we are extending sparse files the current file
> + * size is not a good metric to use. Hence we need to look up the extent that
> + * ends at the current EOF and use the result to determine the preallocation
> + * size.
> + *
> + * If the extent is a hole, then preallocation is essentially disabled.
> + * Otherwise we take the size of the data extent as the basis for the
> + * preallocation size. If the size of the extent is greater than half the
> + * maximum extent length, then use the file size as the basis. This ensures 
> that
> + * for large files the preallocation size always extends to MAXEXTLEN rather
> + * than falling short due to things like stripe unit/width alignment of real
> + * extents.
> + */
> +STATIC int
> +xfs_iomap_eof_prealloc_initial_size(
> +     struct xfs_mount        *mp,
> +     struct xfs_inode        *ip,
> +     xfs_bmbt_irec_t         *imap,
> +     int                     nimaps)
> +{
> +     xfs_fileoff_t   start_fsb;
> +     xfs_fsblock_t   firstblock;
> +     int             imaps = 1;
> +     int             error;
> +
> +     ASSERT(nimaps >= imaps);
> +
> +     /* if we are using a specific prealloc size, return now */
> +     if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
> +             return 0;
> +
> +     start_fsb = XFS_B_TO_FSBT(mp, XFS_ISIZE(ip));
> +     error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps,
> +                            XFS_BMAPI_ENTIRE);
> +     if (error)
> +             return 0;
> +
> +     ASSERT(imaps == 1);
> +     if (imap[0].br_startblock == HOLESTARTBLOCK)
> +             return 0;
> +     if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
> +             return imap[0].br_blockcount;
> +     return XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
> +}
> +
> +/*
>   * If we don't have a user specified preallocation size, dynamically increase
>   * the preallocation size as the size of the file grows. Cap the maximum size
>   * at a single extent or less if the filesystem is near full. The closer the
> @@ -319,20 +366,17 @@ xfs_iomap_eof_want_preallocate(
>  STATIC xfs_fsblock_t
>  xfs_iomap_prealloc_size(
>       struct xfs_mount        *mp,
> -     struct xfs_inode        *ip)
> +     struct xfs_inode        *ip,
> +     xfs_bmbt_irec_t         *imap,
> +     int                     nimaps)
>  {
>       xfs_fsblock_t           alloc_blocks = 0;
>  
> -     if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
> +     alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, imap, 
> nimaps);
> +     if (alloc_blocks > 0) {
>               int shift = 0;
>               int64_t freesp;
>  
> -             /*
> -              * rounddown_pow_of_two() returns an undefined result
> -              * if we pass in alloc_blocks = 0. Hence the "+ 1" to
> -              * ensure we always pass in a non-zero value.
> -              */
> -             alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
>               alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
>                                       rounddown_pow_of_two(alloc_blocks));
>  
> @@ -398,7 +442,10 @@ xfs_iomap_write_delay(
>  
>  retry:
>       if (prealloc) {
> -             xfs_fsblock_t   alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
> +             xfs_fsblock_t   alloc_blocks;
> +
> +             alloc_blocks = xfs_iomap_prealloc_size(mp, ip, imap,
> +                                                    XFS_WRITE_IMAPS);
>  
>               aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
>               ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
> 

<Prev in Thread] Current Thread [Next in Thread>