xfs
[Top] [All Lists]

RE: [PATCH 3/3] ext4: Add support IOC_MOV_DATA ioctl

To: 'Lukáš Czerner' <lczerner@xxxxxxxxxx>
Subject: RE: [PATCH 3/3] ext4: Add support IOC_MOV_DATA ioctl
From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Date: Wed, 09 Jul 2014 15:35:54 +0900
Cc: 'Dave Chinner' <david@xxxxxxxxxxxxx>, 'Theodore Ts'o' <tytso@xxxxxxx>, 'linux-ext4' <linux-ext4@xxxxxxxxxxxxxxx>, linux-fsdevel@xxxxxxxxxxxxxxx, linux-kernel@xxxxxxxxxxxxxxx, 'Brian Foster' <bfoster@xxxxxxxxxx>, 'Christoph Hellwig' <hch@xxxxxxxxxxxxx>, 'Ashish Sangwan' <a.sangwan@xxxxxxxxxxx>, xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
Dlp-filter: Pass
In-reply-to: <alpine.LFD.2.00.1407081558280.2180@xxxxxxxxxxxxxxxxxxxxx>
References: <004001cf9aa4$2670e280$7352a780$@samsung.com> <alpine.LFD.2.00.1407081558280.2180@xxxxxxxxxxxxxxxxxxxxx>
Thread-index: AQGrJOZNAI0ZuL5IevgT+zbvOhNoQAIW0xsrm89/AFA=
> On Tue, 8 Jul 2014, Namjae Jeon wrote:
> 
> > Date: Tue, 08 Jul 2014 21:00:02 +0900
> > From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
> > To: Dave Chinner <david@xxxxxxxxxxxxx>, Theodore Ts'o <tytso@xxxxxxx>
> > Cc: linux-ext4 <linux-ext4@xxxxxxxxxxxxxxx>, linux-fsdevel@xxxxxxxxxxxxxxx,
> >     linux-kernel@xxxxxxxxxxxxxxx, Lukáš Czerner <lczerner@xxxxxxxxxx>,
> >     Brian Foster <bfoster@xxxxxxxxxx>, Christoph Hellwig 
> > <hch@xxxxxxxxxxxxx>,
> >     Ashish Sangwan <a.sangwan@xxxxxxxxxxx>, xfs@xxxxxxxxxxx
> > Subject: [PATCH 3/3] ext4: Add support IOC_MOV_DATA ioctl
> >
> > This patch implements fs ioctl's IOC_MOV_DATA for Ext4.
> 
> Hmm isn't this basically what ext4_move_extents() does ? eg.
> EXT4_IOC_MOVE_EXT ?
> 
> I guess that the intention here is to do the move, without actually
> moving the data right ? 
Yes, Right.


> But nevertheless maybe some code can be
> shared with ext4_move_extents() ?
Yes, I will try to share IOC_MOV_DATA and EXT4_IOC_MOVE_EXT code as the reply 
for Christoph.

Thanks!
> 
> -Lukas
> 
> >
> > The semantics of this ioctl are:
> > 1) Like collapse range, offsets and length should be file system block size
> >    aligned.
> > 2) In the receiver file, atleast length size hole should be present at
> >    receiver_offset
> > 3) It does not change file size of any of donor or receiver file.
> > 4) It leaves a hole at the place from where blocks are moved out in donor 
> > file.
> > 5) Both (donor_offset + length) and (receiver_offset + length) should be 
> > within
> >    size of donor file and receiver file respectively.
> >    Only unwritten extents resides beyond file size and it does not make 
> > sense
> >    to transfer unwritten extents, leave apart the security issues it may 
> > raise.
> > 6) If the range to be transfered from donor file contain any holes, they are
> >    replicated as it is in receiver file. It mean holes are preserved and
> >    the length of hole will be added to moved_len signifying that the hole 
> > range
> >    is succesfully transfered.
> >
> > Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
> > Signed-off-by: Ashish Sangwan <a.sangwan@xxxxxxxxxxx>
> > ---
> >  fs/ext4/ext4.h    |   2 +
> >  fs/ext4/extents.c | 375 
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/ext4/file.c    |   1 +
> >  3 files changed, 378 insertions(+)
> >
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 6386c5f..26478eb 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -2725,6 +2725,8 @@ extern int ext4_fiemap(struct inode *inode, struct 
> > fiemap_extent_info *fieinfo,
> >  extern int ext4_ext_precache(struct inode *inode);
> >  extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t 
> > len);
> >  extern int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
> > +extern int ext4_mov_data(struct inode *, struct inode *, loff_t, loff_t, 
> > loff_t,
> > +                    loff_t *);
> >
> >  /* move_extent.c */
> >  extern void ext4_double_down_write_data_sem(struct inode *first,
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 0c2432e..511db03 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -5811,3 +5811,378 @@ out_mutex:
> >     mutex_unlock(&inode->i_mutex);
> >     return ret;
> >  }
> > +
> > +/*
> > + * If offset_lblk does not lie on the extent start boundary, split extent
> > + */
> > +int ext4_find_and_split_extent_at(struct inode *inode, ext4_lblk_t 
> > offset_lblk)
> > +{
> > +   struct ext4_ext_path *path;
> > +   handle_t *handle;
> > +   int credits, err = 0, split_flag, ex_len;
> > +   struct ext4_extent *ex;
> > +   int depth = ext_depth(inode);
> > +   ext4_lblk_t ex_start;
> > +
> > +   path = ext4_ext_find_extent(inode, offset_lblk, NULL, 0);
> > +   if (IS_ERR(path))
> > +           return PTR_ERR(path);
> > +
> > +   ex = path[depth].p_ext;
> > +   if (!ex)
> > +           goto free_path;
> > +   ex_start = le32_to_cpu(ex->ee_block);
> > +   ex_len = ext4_ext_get_actual_len(ex);
> > +
> > +   if (offset_lblk > ex_start && offset_lblk < (ex_start + ex_len)) {
> > +           credits = ext4_writepage_trans_blocks(inode);
> > +           handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
> > +           if (IS_ERR(handle)) {
> > +                   err = PTR_ERR(handle);
> > +                   goto free_path;
> > +           }
> > +           if (ext4_ext_is_unwritten(ex))
> > +                   split_flag = EXT4_EXT_MARK_UNWRIT1 |
> > +                                EXT4_EXT_MARK_UNWRIT2;
> > +           else
> > +                   split_flag = 0;
> > +
> > +           err = ext4_split_extent_at(handle, inode, path, offset_lblk,
> > +                                      split_flag, EXT4_EX_NOCACHE |
> > +                                      EXT4_GET_BLOCKS_PRE_IO);
> > +           ext4_journal_stop(handle);
> > +   }
> > +
> > +free_path:
> > +   ext4_ext_drop_refs(path);
> > +   kfree(path);
> > +   return err;
> > +}
> > +
> > +/*
> > + * Compute the size of hole in terms of filesystem blocks present at 
> > offset_lblk
> > + * until the next extent is found OR till we reach the last block within 
> > isize.
> > + * Store the computed value in hole_blkcnt.
> > + * offset_lblk should be within isize of inode.
> > + */
> > +int ext4_compute_hole_size(struct inode *inode, ext4_lblk_t offset_lblk,
> > +                      ext4_lblk_t *hole_blkcnt)
> > +{
> > +   struct ext4_ext_path *path;
> > +   struct ext4_extent *ex;
> > +   ext4_lblk_t ex_start, isize_lblk;
> > +   int ret = 0, depth, ex_len;
> > +
> > +   isize_lblk = (inode->i_size + EXT4_BLOCK_SIZE(inode->i_sb) - 1) >>
> > +                EXT4_BLOCK_SIZE_BITS(inode->i_sb);
> > +
> > +   if (offset_lblk > isize_lblk)
> > +           return -EINVAL;
> > +
> > +   *hole_blkcnt = 0;
> > +   path = ext4_ext_find_extent(inode, offset_lblk, NULL, 0);
> > +   if (IS_ERR(path))
> > +           return PTR_ERR(path);
> > +
> > +   depth = ext_depth(inode);
> > +   ex = path[depth].p_ext;
> > +   if (!ex) {
> > +           /* No blocks allocated in this file */
> > +           *hole_blkcnt = isize_lblk - offset_lblk;
> > +           goto out;
> > +   }
> > +   ex_start = le32_to_cpu(ex->ee_block);
> > +   ex_len = ext4_ext_get_actual_len(ex);
> > +
> > +   /* if offset_lblk lies within extent? */
> > +   if (offset_lblk >= ex_start && offset_lblk < (ex_start + ex_len))
> > +           goto out;
> > +
> > +   if (ex_start < offset_lblk) {
> > +           ret = mext_next_extent(inode, path, &ex);
> > +           if (!ret) {
> > +                   ex_start = le32_to_cpu(ex->ee_block);
> > +           } else {
> > +                   if (ret == 1) {
> > +                           *hole_blkcnt = isize_lblk - offset_lblk;
> > +                           ret = 0;
> > +                   }
> > +                   goto out;
> > +           }
> > +   }
> > +   *hole_blkcnt = (ex_start < isize_lblk) ? (ex_start - offset_lblk) :
> > +                                            (isize_lblk - offset_lblk);
> > +out:
> > +   ext4_ext_drop_refs(path);
> > +   kfree(path);
> > +
> > +   return ret;
> > +}
> > +
> > +/*
> > + * Remove a complete extent from in memory and on-disk extent tree
> > + * without freeing any data blocks covered by the extent. Caller must call
> > + * ext4_mark_inode_dirty() to sync the changes to disk.
> > + */
> > +int ext4_ext_rm_extent(handle_t *handle, struct inode *inode,
> > +                  struct ext4_ext_path *path, struct ext4_extent *ex)
> > +{
> > +   struct ext4_extent_header *eh;
> > +   int depth = ext_depth(inode);
> > +   int credits, err, correct_index = 0;
> > +   int ex_ee_len = ext4_ext_get_actual_len(ex);
> > +
> > +   if (!path[depth].p_hdr)
> > +           path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
> > +   eh = path[depth].p_hdr;
> > +
> > +   credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
> > +   if (ex == EXT_FIRST_EXTENT(eh)) {
> > +           correct_index = 1;
> > +           credits += (ext_depth(inode)) + 1;
> > +   }
> > +   credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
> > +   err = ext4_ext_truncate_extend_restart(handle, inode, credits);
> > +   if (err)
> > +           return err;
> > +
> > +   err = ext4_ext_get_access(handle, inode, path + depth);
> > +   if (err)
> > +           return err;
> > +
> > +   ext4_ext_store_pblock(ex, 0);
> > +   memmove(ex, ex+1,
> > +           (EXT_LAST_EXTENT(eh) - ex) * sizeof(struct ext4_extent));
> > +   memset(EXT_LAST_EXTENT(eh), 0, sizeof(struct ext4_extent));
> > +   le16_add_cpu(&eh->eh_entries, -1);
> > +
> > +   err = ext4_ext_dirty(handle, inode, path + depth);
> > +   if (err)
> > +           return err;
> > +
> > +   if (correct_index && eh->eh_entries)
> > +           err = ext4_ext_correct_indexes(handle, inode, path);
> > +
> > +   if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
> > +           err = ext4_ext_rm_idx(handle, inode, path, depth);
> > +
> > +   return err;
> > +}
> > +
> > +/*
> > + * Move len_lblk amount of blocks from donor inode to receiver inode.
> > + * Blocks are to be moved from doffset_lblk and moved to roffset_lblk.
> > + * Caller of this function must make sure there is atleast len_lblk size
> > + * hole at roffset_lblk. Also doffset_lblk and doffset_lblk + len_lblk
> > + * should fall on extent boundary.
> > + */
> > +int ext4_ext_mov_data(struct inode *donor, struct inode *receiver,
> > +                 ext4_lblk_t doffset_lblk, ext4_lblk_t roffset_lblk,
> > +                 ext4_lblk_t len_lblk, loff_t *bytes_moved)
> > +{
> > +   int error = 0, depth = ext_depth(donor);
> > +   struct ext4_ext_path *path;
> > +   struct ext4_extent *ex;
> > +   loff_t blocks_moved = 0;
> > +   handle_t *handle;
> > +   int credits = ext4_writepage_trans_blocks(donor) +
> > +                 ext4_writepage_trans_blocks(receiver);
> > +
> > +   while (blocks_moved < len_lblk && !error) {
> > +           struct ext4_ext_path *rpath = NULL;
> > +           ext4_lblk_t ex_start;
> > +           int ex_len;
> > +
> > +           path = ext4_ext_find_extent(donor, doffset_lblk, NULL, 0);
> > +           if (IS_ERR(path)) {
> > +                   error = PTR_ERR(path);
> > +                   break;
> > +           }
> > +           ex = path[depth].p_ext;
> > +           /*
> > +            * No allocated blocks? This could only happen during
> > +            * 1st iteration. Otherwise it is en error.
> > +            */
> > +           if (!ex) {
> > +                   if (blocks_moved)
> > +                           error = -EIO;
> > +                   else
> > +                           blocks_moved = len_lblk;
> > +                   goto out;
> > +           }
> > +           ex_start = le32_to_cpu(ex->ee_block);
> > +           ex_len = ext4_ext_get_actual_len(ex);
> > +
> > +           if (doffset_lblk != ex_start) {
> > +                   /* Hole within range, move to the next extent */
> > +                   if (ex_start < doffset_lblk)
> > +                           error = mext_next_extent(donor, path, &ex);
> > +                   /* Below if will also handle ex_start > doffset_lblk */
> > +                   if (error == 0) {
> > +                           ex_start = le32_to_cpu(ex->ee_block);
> > +                           blocks_moved += ex_start - doffset_lblk;
> > +                           roffset_lblk += ex_start - doffset_lblk;
> > +                           doffset_lblk = ex_start;
> > +                   }
> > +                   if (error == 1) {
> > +                           /* doffset_lblk till EOF is hole. Success!! */
> > +                           blocks_moved = len_lblk;
> > +                           error = 0;
> > +                   }
> > +                   goto out;
> > +           }
> > +
> > +           /* Add this extent to receiver */
> > +           handle = ext4_journal_start(donor, EXT4_HT_TRUNCATE, credits);
> > +           if (IS_ERR(handle)) {
> > +                   error = PTR_ERR(handle);
> > +                   goto out;
> > +           }
> > +
> > +           rpath = ext4_ext_find_extent(receiver, roffset_lblk, NULL, 0);
> > +           if (IS_ERR(rpath)) {
> > +                   error = PTR_ERR(rpath);
> > +                   ext4_journal_stop(handle);
> > +                   goto out;
> > +           }
> > +           ex->ee_block = cpu_to_le32(roffset_lblk);
> > +           error = ext4_ext_insert_extent(handle, receiver, rpath, ex, 0);
> > +           if (error)
> > +                   goto hout;
> > +
> > +           /* Remove this extent from donor */
> > +           error = ext4_ext_rm_extent(handle, donor, path, ex);
> > +           if (error)
> > +                   goto hout;
> > +
> > +           /* Extent moved successfully */
> > +           roffset_lblk += ex_len;
> > +           doffset_lblk += ex_len;
> > +           blocks_moved += ex_len;
> > +
> > +           donor->i_blocks -= (ex_len << (donor->i_blkbits - 9));
> > +           receiver->i_blocks += (ex_len << (receiver->i_blkbits - 9));
> > +           donor->i_mtime = donor->i_ctime = ext4_current_time(donor);
> > +           receiver->i_mtime = receiver->i_ctime =
> > +                                           ext4_current_time(receiver);
> > +           ext4_mark_inode_dirty(handle, donor);
> > +           ext4_mark_inode_dirty(handle, receiver);
> > +hout:
> > +           ext4_journal_stop(handle);
> > +           ext4_ext_drop_refs(rpath);
> > +           kfree(rpath);
> > +out:
> > +           ext4_ext_drop_refs(path);
> > +           kfree(path);
> > +   }
> > +
> > +   /* This can happen when (doffset_lblk + len_lblk) is in a hole */
> > +   if (blocks_moved > len_lblk)
> > +           blocks_moved = len_lblk;
> > +
> > +   *bytes_moved = blocks_moved << EXT4_BLOCK_SIZE_BITS(donor->i_sb);
> > +   return error;
> > +}
> > +
> > +int ext4_mov_data(struct inode *donor, struct inode *receiver, loff_t 
> > doffset,
> > +             loff_t roffset, loff_t len, loff_t *moved_len)
> > +{
> > +   struct super_block *sb = donor->i_sb;
> > +   loff_t d_pg_off, r_pg_off, pg_len;
> > +   ext4_lblk_t doffset_lblk, roffset_lblk, len_lblk, hole_size;
> > +   int error;
> > +
> > +   if (doffset & (EXT4_BLOCK_SIZE(sb) - 1) ||
> > +       roffset & (EXT4_BLOCK_SIZE(sb) - 1) ||
> > +       len & (EXT4_BLOCK_SIZE(sb) - 1))
> > +           return -EINVAL;
> > +
> > +   if (EXT4_SB(sb)->s_cluster_ratio > 1)
> > +           return -EOPNOTSUPP;
> > +
> > +   if (!ext4_test_inode_flag(donor, EXT4_INODE_EXTENTS) ||
> > +       !ext4_test_inode_flag(receiver, EXT4_INODE_EXTENTS))
> > +           return -EOPNOTSUPP;
> > +
> > +   doffset_lblk = doffset >> EXT4_BLOCK_SIZE_BITS(sb);
> > +   roffset_lblk = roffset >> EXT4_BLOCK_SIZE_BITS(sb);
> > +   len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
> > +
> > +   d_pg_off = round_down(doffset, PAGE_SIZE);
> > +   r_pg_off = round_down(roffset, PAGE_SIZE);
> > +   pg_len = round_up(len, PAGE_SIZE);
> > +
> > +   if (ext4_should_journal_data(donor)) {
> > +           error = ext4_force_commit(donor->i_sb);
> > +           if (error)
> > +                   return error;
> > +           error = ext4_force_commit(receiver->i_sb);
> > +           if (error)
> > +                   return error;
> > +   }
> > +
> > +   error = filemap_write_and_wait_range(donor->i_mapping,
> > +                                        d_pg_off, d_pg_off + pg_len);
> > +   if (error)
> > +           return error;
> > +   error = filemap_write_and_wait_range(receiver->i_mapping,
> > +                                        r_pg_off, r_pg_off + pg_len);
> > +   if (error)
> > +           return error;
> > +
> > +   lock_two_nondirectories(donor, receiver);
> > +
> > +   /* Check for isize limits for both files */
> > +   if (doffset + len > donor->i_size ||
> > +       roffset + len > receiver->i_size) {
> > +           error = -EINVAL;
> > +           goto out_mutex;
> > +   }
> > +
> > +   truncate_pagecache_range(donor, d_pg_off, d_pg_off + pg_len - 1);
> > +   truncate_pagecache_range(receiver, r_pg_off, r_pg_off + pg_len - 1);
> > +
> > +   ext4_inode_block_unlocked_dio(donor);
> > +   inode_dio_wait(donor);
> > +   ext4_inode_block_unlocked_dio(receiver);
> > +   inode_dio_wait(receiver);
> > +
> > +   ext4_discard_preallocations(donor);
> > +   ext4_discard_preallocations(receiver);
> > +
> > +   error = ext4_es_remove_extent(donor, doffset_lblk, len_lblk);
> > +   if (error)
> > +           goto out_sem;
> > +   error = ext4_es_remove_extent(receiver, roffset_lblk, len_lblk);
> > +   if (error)
> > +           goto out_sem;
> > +
> > +   error = ext4_compute_hole_size(receiver, roffset_lblk, &hole_size);
> > +   if (error)
> > +           goto out_sem;
> > +   if (len_lblk > hole_size) {
> > +           error = -EINVAL;
> > +           goto out_sem;
> > +   }
> > +
> > +   error = ext4_find_and_split_extent_at(donor, doffset_lblk);
> > +   if (error)
> > +           goto out_sem;
> > +
> > +   error = ext4_find_and_split_extent_at(donor, doffset_lblk + len_lblk);
> > +   if (error)
> > +           goto out_sem;
> > +
> > +   error = ext4_ext_mov_data(donor, receiver, doffset_lblk,
> > +                             roffset_lblk, len_lblk, moved_len);
> > +
> > +   ext4_discard_preallocations(donor);
> > +   ext4_discard_preallocations(receiver);
> > +out_sem:
> > +   ext4_inode_resume_unlocked_dio(donor);
> > +   ext4_inode_resume_unlocked_dio(receiver);
> > +
> > +out_mutex:
> > +   unlock_two_nondirectories(donor, receiver);
> > +   return error;
> > +}
> > diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> > index 8695f70..d2feaba 100644
> > --- a/fs/ext4/file.c
> > +++ b/fs/ext4/file.c
> > @@ -614,5 +614,6 @@ const struct inode_operations 
> > ext4_file_inode_operations = {
> >     .get_acl        = ext4_get_acl,
> >     .set_acl        = ext4_set_acl,
> >     .fiemap         = ext4_fiemap,
> > +   .mov_data       = ext4_mov_data,
> >  };
> >
> >

<Prev in Thread] Current Thread [Next in Thread>