xfs
[Top] [All Lists]

[PATCH v3 3/7] ext4: add support FALLOC_FL_COLLAPSE_RANGE for fallocate

To: viro@xxxxxxxxxxxxxxxxxx, mtk.manpages@xxxxxxxxx, tytso@xxxxxxx, adilger.kernel@xxxxxxxxx, bpm@xxxxxxx, elder@xxxxxxxxxx, hch@xxxxxxxxxxxxx, david@xxxxxxxxxxxxx
Subject: [PATCH v3 3/7] ext4: add support FALLOC_FL_COLLAPSE_RANGE for fallocate
From: Namjae Jeon <linkinjeon@xxxxxxxxx>
Date: Mon, 2 Sep 2013 23:29:24 +0900
Cc: linux-fsdevel@xxxxxxxxxxxxxxx, linux-kernel@xxxxxxxxxxxxxxx, linux-ext4@xxxxxxxxxxxxxxx, xfs@xxxxxxxxxxx, a.sangwan@xxxxxxxxxxx, Namjae Jeon <linkinjeon@xxxxxxxxx>, Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Delivered-to: xfs@xxxxxxxxxxx
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id; bh=KxLjUWGzQdb0J75Exlu1H8drkkFh+cfYxDIcWcdcHqc=; b=LMU7UIE8shzQuWMnoUuh/bKirotjBNDrbd9905p6qtdf49vwAkgsFOHiUy28sdFfdi wWnLwu3BUhoijcFVfjLl9RFLPRNA24fKzm4X41+Odg3VHGmfP3bmyaBZe81c9Wstlew7 qlL+arcuB82Ho8U5jKe6jZo9ZJ7yXhdGZN8xqFHwjLYyXu/QViemMosQ7oJ436x4OnMx rGt8Mt8dLAN9T8GRctPUPw8AU1dJlx5tjdXe7xO0j9ObUzn40sWLjBuS7RSr43Pc3Slh kIseUU2OYMidc2prMS7ez1WxFGRyZGMer/xoKBICWlfWsqB5Y6sTE37bD4ppJv5TO/r4 AFHg==
From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>

Add support FALLOC_FL_COLLAPSE_RANGE for fallocate.

Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Signed-off-by: Ashish Sangwan <a.sangwan@xxxxxxxxxxx>
---
 fs/ext4/ext4.h              |    3 +
 fs/ext4/extents.c           |  286 ++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/move_extent.c       |    2 +-
 include/trace/events/ext4.h |   25 ++++
 4 files changed, 314 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f3c43fc..a13e0f4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2737,6 +2737,7 @@ extern int ext4_find_delalloc_cluster(struct inode 
*inode, ext4_lblk_t lblk);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        __u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
+extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
 
 /* move_extent.c */
 extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2748,6 +2749,8 @@ void ext4_inode_double_unlock(struct inode *inode1, 
struct inode *inode2);
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
                             __u64 start_orig, __u64 start_donor,
                             __u64 len, __u64 *moved_len);
+extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+                           struct ext4_extent **extent);
 
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 54d52af..baf1faf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4580,12 +4580,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t 
offset, loff_t len)
        unsigned int credits, blkbits = inode->i_blkbits;
 
        /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_COLLAPSE_RANGE))
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE)
                return ext4_punch_hole(inode, offset, len);
 
+       if (mode & FALLOC_FL_COLLAPSE_RANGE)
+               return ext4_collapse_range(inode, offset, len);
+
        ret = ext4_convert_inline_data(inode);
        if (ret)
                return ret;
@@ -4884,3 +4888,283 @@ int ext4_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
        ext4_es_lru_add(inode);
        return error;
 }
+
+/*
+ * ext4_access_path:
+ * Function to access the path buffer for marking it dirty.
+ * It also checks if there are sufficient credits left in the journal handle
+ * to update path.
+ */
+static int
+ext4_access_path(handle_t *handle, struct inode *inode,
+               struct ext4_ext_path *path)
+{
+       int credits, err;
+
+       /*
+        * Check if need to extend journal credits
+        * 3 for leaf, sb, and inode plus 2 (bmap and group
+        * descriptor) for each block group; assume two block
+        * groups
+        */
+       if (handle->h_buffer_credits < 7) {
+               credits = ext4_writepage_trans_blocks(inode);
+               err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+               /* EAGAIN is success */
+               if (err && err != -EAGAIN)
+                       return err;
+       }
+
+       err = ext4_ext_get_access(handle, inode, path);
+       return err;
+}
+
+/*
+ * ext4_ext_shift_path_extents:
+ * Shift the extents of a path structure lying between path[depth].p_ext
+ * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift
+ * from starting block for each extent.
+ */
+static int
+ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
+                           struct inode *inode, handle_t *handle,
+                           ext4_lblk_t *start)
+{
+       int depth, err = 0;
+       struct ext4_extent *ex_start, *ex_last;
+       bool update = 0;
+       depth = path->p_depth;
+
+       while (depth >= 0) {
+               if (depth == path->p_depth) {
+                       ex_start = path[depth].p_ext;
+                       if (!ex_start)
+                               return -EIO;
+
+                       ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
+                       if (!ex_last)
+                               return -EIO;
+
+                       err = ext4_access_path(handle, inode, path + depth);
+                       if (err)
+                               goto out;
+
+                       if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
+                               update = 1;
+
+                       *start = ex_last->ee_block +
+                               ext4_ext_get_actual_len(ex_last);
+
+                       while (ex_start <= ex_last) {
+                               ex_start->ee_block -= shift;
+                               ext4_ext_try_to_merge(handle, inode,
+                                               path, ex_start);
+                               ex_start++;
+                       }
+                       err = ext4_ext_dirty(handle, inode, path + depth);
+                       if (err)
+                               goto out;
+
+                       if (--depth < 0 || !update)
+                               break;
+               }
+
+               /* Update index too */
+               err = ext4_access_path(handle, inode, path + depth);
+               if (err)
+                       goto out;
+
+               path[depth].p_idx->ei_block -= shift;
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               if (err)
+                       goto out;
+
+               /* we are done if current index is not a starting index */
+               if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
+                       break;
+
+               depth--;
+       }
+
+out:
+       return err;
+}
+
+/*
+ * ext4_ext_shift_extents:
+ * All the extents which lies in the range from start to the last allocated
+ * block for the file are shifted downwards by shift blocks.
+ * On success, 0 is returned, error otherwise.
+ */
+static int
+ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
+                      ext4_lblk_t start, ext4_lblk_t shift)
+{
+       struct ext4_ext_path *path;
+       int ret = 0, depth;
+       struct ext4_extent *extent;
+       ext4_lblk_t stop_block, current_block;
+       ext4_lblk_t ex_start, ex_end;
+
+       /* Let path point to the last extent */
+       path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       depth = path->p_depth;
+       extent = path[depth].p_ext;
+       if (!extent) {
+               ext4_ext_drop_refs(path);
+               kfree(path);
+               return ret;
+       }
+
+       stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
+       ext4_ext_drop_refs(path);
+       kfree(path);
+
+       /* Nothing to shift, if hole is at the end of file */
+       if (start >= stop_block)
+               return ret;
+
+       /*
+        * Don't start shifting extents until we make sure the hole is big
+        * enough to accomodate the shift.
+        */
+       path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+       depth = path->p_depth;
+       extent =  path[depth].p_ext;
+       ex_start = extent->ee_block;
+       ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
+       ext4_ext_drop_refs(path);
+       kfree(path);
+
+       if ((ex_start > start - 1 && shift > ex_start) ||
+           (ex_end > start - shift))
+               return -EIO;
+
+       /* Its safe to start updating extents */
+       while (start < stop_block) {
+               path = ext4_ext_find_extent(inode, start, NULL, 0);
+               if (IS_ERR(path))
+                       return PTR_ERR(path);
+               depth = path->p_depth;
+               extent = path[depth].p_ext;
+               current_block = extent->ee_block;
+               if (start > current_block) {
+                       /* Hole, move to the next extent */
+                       ret = mext_next_extent(inode, path, &extent);
+                       if (ret != 0) {
+                               ext4_ext_drop_refs(path);
+                               kfree(path);
+                               if (ret == 1)
+                                       ret = 0;
+                               break;
+                       }
+               }
+               ret = ext4_ext_shift_path_extents(path, shift, inode,
+                               handle, &start);
+               ext4_ext_drop_refs(path);
+               kfree(path);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/*
+ * ext4_collapse_range:
+ * This implements the fallocate's collapse range functionality for ext4
+ * Returns: 0 and non-zero on error.
+ */
+int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+{
+       struct super_block *sb = inode->i_sb;
+       ext4_lblk_t punch_start, punch_stop;
+       handle_t *handle;
+       unsigned int credits;
+       unsigned int rounding;
+       loff_t ioffset, new_size;
+       int ret;
+
+       if (!S_ISREG(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       if (EXT4_SB(sb)->s_cluster_ratio > 1)
+               return -EOPNOTSUPP;
+
+       /* Currently just for extent based files */
+       if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+               return -EOPNOTSUPP;
+
+       if (IS_SWAPFILE(inode))
+               return -ETXTBSY;
+
+       trace_ext4_collapse_range(inode, offset, len);
+
+       punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+       punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       rounding = max_t(uint, 1 << EXT4_BLOCK_SIZE_BITS(sb), PAGE_CACHE_SIZE);
+       ioffset = offset & ~(rounding - 1);
+
+       /* Write out all dirty pages */
+       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, -1);
+       if (ret)
+               return ret;
+
+       /* Take mutex lock */
+       mutex_lock(&inode->i_mutex);
+
+       /* Wait for existing dio to complete */
+       ext4_inode_block_unlocked_dio(inode);
+       inode_dio_wait(inode);
+
+       truncate_pagecache_range(inode, ioffset, -1);
+
+       credits = ext4_writepage_trans_blocks(inode);
+       handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out_dio;
+       }
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+
+       ext4_discard_preallocations(inode);
+
+       ret = ext4_es_remove_extent(inode, punch_start,
+                                   EXT_MAX_BLOCKS - punch_start - 1);
+       if (ret)
+               goto journal_stop;
+
+       ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
+       if (ret)
+               goto journal_stop;
+
+       ret = ext4_ext_shift_extents(inode, handle, punch_stop,
+                                    punch_stop - punch_start);
+       if (ret)
+               goto journal_stop;
+
+       if ((offset + len) > i_size_read(inode))
+               new_size = offset;
+       else
+               new_size = i_size_read(inode) - len;
+
+       truncate_setsize(inode, new_size);
+       EXT4_I(inode)->i_disksize = new_size;
+
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
+
+journal_stop:
+       ext4_journal_stop(handle);
+       up_write(&EXT4_I(inode)->i_data_sem);
+
+out_dio:
+       ext4_inode_resume_unlocked_dio(inode);
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7fa4d85..4cbebb0 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct 
ext4_extent *dest)
  * ext4_ext_path structure refers to the last extent, or a negative error
  * value on failure.
  */
-static int
+int
 mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                      struct ext4_extent **extent)
 {
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 197d312..90e2f71 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2410,6 +2410,31 @@ TRACE_EVENT(ext4_es_shrink_exit,
                  __entry->shrunk_nr, __entry->cache_cnt)
 );
 
+TRACE_EVENT(ext4_collapse_range,
+       TP_PROTO(struct inode *inode, loff_t offset, loff_t len),
+
+       TP_ARGS(inode, offset, len),
+
+       TP_STRUCT__entry(
+               __field(dev_t,  dev)
+               __field(ino_t,  ino)
+               __field(loff_t, offset)
+               __field(loff_t, len)
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->offset = offset;
+               __entry->len    = len;
+       ),
+
+       TP_printk("dev %d,%d ino %lu offset %lld len %lld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->offset, __entry->len)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
1.7.9.5

<Prev in Thread] Current Thread [Next in Thread>