xfs
[Top] [All Lists]

Re: [RFC PATCH] xfs: enables file data inlining in inodes

To: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
Subject: Re: [RFC PATCH] xfs: enables file data inlining in inodes
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Fri, 12 Oct 2012 11:31:50 -0400
Cc: xfs@xxxxxxxxxxx
In-reply-to: <1349985158-9952-1-git-send-email-cmaiolino@xxxxxxxxxx>
References: <1349985158-9952-1-git-send-email-cmaiolino@xxxxxxxxxx>
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120827 Thunderbird/15.0
On 10/11/2012 03:52 PM, Carlos Maiolino wrote:
> Hi,
> 
> this is a first RFC patch of my work on data inlining, i.e. use the xfs 
> inode's
> literal area to store user's data.
> 
> This first patch just cares about write and read new files into inode's 
> literal
> area, it does not make any conversion from inline to extent or vice-versa.
> 
> The idea to send this patch to list is just to get comments about this first
> work and see if anybody has some ideas/suggestions about it, mainly related
> with page cache and journal handling, since it's the first time I deal with
> journal and page cache handling, I'm not pretty sure if I did things right
> or not.
> 
> Every comment is very appreciated.
> 
> Thanks
> ---
>  fs/xfs/xfs_aops.c  | 134 
> ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  fs/xfs/xfs_inode.c |  15 +-----
>  2 files changed, 130 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index e562dd4..3d30528 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -31,6 +31,7 @@
>  #include "xfs_vnodeops.h"
>  #include "xfs_trace.h"
>  #include "xfs_bmap.h"
> +#include "xfs_inode_item.h"
>  #include <linux/gfp.h>
>  #include <linux/mpage.h>
>  #include <linux/pagevec.h>
> @@ -460,6 +461,95 @@ xfs_start_page_writeback(
>               end_page_writeback(page);
>  }
>  
> +/* Write data inlined in inode */
> +
> +STATIC int
> +xfs_inline_write(
> +     struct xfs_inode        *ip,
> +     struct page             *page,
> +     struct buffer_head      *bh,
> +     __uint64_t              end_offset)
> +{
> +     char            *data;
> +     int             err = 0;
> +     xfs_trans_t     *tp;
> +     xfs_mount_t     *mp = ip->i_mount;
> +
> +     printk("%s: INLINING INODE %llu\n", __func__, ip->i_ino);
> +     xfs_start_page_writeback(page, 1, 1);
> +
> +     tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
> +     err = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0); /* Is 
> this the right logspace? */
> +
> +     if (err) {
> +             xfs_trans_cancel(tp, 0);

Do you need to undo xfs_start_page_writeback() here?

> +             return -1;
> +     }
> +
> +     xfs_ilock(ip, XFS_ILOCK_EXCL);
> +
> +     xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> +
> +     if (!(ip->i_d.di_format & XFS_DINODE_FMT_LOCAL)) {
> +             ip->i_df.if_flags &= ~XFS_IFEXTENTS;
> +             ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
> +             ip->i_df.if_flags |= XFS_IFINLINE;
> +     }
> +
> +     ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> +
> +     if (end_offset > ip->i_df.if_bytes)
> +             xfs_idata_realloc(ip, end_offset - ip->i_df.if_bytes,
> +                               XFS_DATA_FORK);
> +
> +     xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DDATA);
> +     data = kmap(page);
> +     memcpy(ip->i_df.if_u1.if_data, data, end_offset);
> +     kunmap(page);
> +
> +     ip->i_d.di_size = end_offset;
> +
> +     err = xfs_trans_commit(tp, 0);
> +
> +     set_buffer_uptodate(bh);
> +     clear_buffer_dirty(bh);
> +     clear_buffer_delay(bh);
> +     SetPageUptodate(page);
> +
> +     end_page_writeback(page);
> +     return err;
> +}
> +
> +STATIC int
> +xfs_inline_read(
> +     struct xfs_inode        *ip,
> +     struct page             *page,
> +     struct address_space    *mapping)
> +{
> +     char *data;
> +     unsigned long bsize = 1 << ip->i_vnode.i_blkbits;
> +
> +     ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> +     ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> +
> +     if (!page_has_buffers(page))
> +             create_empty_buffers(page, bsize, 0);
> +
> +     xfs_ilock(ip, XFS_ILOCK_SHARED);
> +
> +     data = kmap(page);
> +     memcpy(data, ip->i_df.if_u1.if_data, (int)ip->i_d.di_size);
> +     kunmap(page);
> +
> +     /* We should not leave garbage after user data into the page */
> +     memset(data + (int)ip->i_d.di_size, 0, PAGE_SIZE - ip->i_d.di_size);
> +
> +     SetPageUptodate(page);
> +     xfs_iunlock(ip, XFS_ILOCK_SHARED);
> +     unlock_page(page);
> +     return 0;
> +}
> +
>  static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
>  {
>       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> @@ -900,6 +990,7 @@ xfs_vm_writepage(
>       struct writeback_control *wbc)
>  {
>       struct inode            *inode = page->mapping->host;
> +     xfs_inode_t             *ip = XFS_I(inode);
>       struct buffer_head      *bh, *head;
>       struct xfs_bmbt_irec    imap;
>       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
> @@ -908,7 +999,7 @@ xfs_vm_writepage(
>       __uint64_t              end_offset;
>       pgoff_t                 end_index, last_index;
>       ssize_t                 len;
> -     int                     err, imap_valid = 0, uptodate = 1;
> +     int                     err = 0, imap_valid = 0, uptodate = 1;
>       int                     count = 0;
>       int                     nonblocking = 0;
>  
> @@ -968,8 +1059,16 @@ xfs_vm_writepage(
>                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
>                       offset);
>       len = 1 << inode->i_blkbits;
> -
>       bh = head = page_buffers(page);
> +
> +     if (end_offset + i_size_read(inode) <= XFS_IFORK_DSIZE(ip)) {

So end_offset looks like it is the size of the file if this page
contains EOF, or the next page-aligned offset of the file otherwise. If
I understand correctly, shouldn't we just check if end_offset falls
within the data fork size here? IOW, I don't understand why you add
end_offset and i_size_read(inode) here.

Brian

> +             err = xfs_inline_write(ip, page, bh, end_offset);
> +
> +             if (err)
> +                     goto error;
> +             return 0;
> +     }
> +
>       offset = page_offset(page);
>       type = XFS_IO_OVERWRITE;
>  
> @@ -1624,20 +1723,43 @@ xfs_vm_bmap(
>  
>  STATIC int
>  xfs_vm_readpage(
> -     struct file             *unused,
> +     struct file             *filp,
>       struct page             *page)
>  {
> -     return mpage_readpage(page, xfs_get_blocks);
> +     struct inode            *inode = filp->f_mapping->host;
> +     struct xfs_inode        *ip = XFS_I(inode);
> +
> +     if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
> +             return xfs_inline_read(ip, page, page->mapping);
> +     else
> +             return mpage_readpage(page, xfs_get_blocks);
>  }
>  
>  STATIC int
>  xfs_vm_readpages(
> -     struct file             *unused,
> +     struct file             *filp,
>       struct address_space    *mapping,
>       struct list_head        *pages,
>       unsigned                nr_pages)
>  {
> -     return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
> +     struct inode            *inode = filp->f_mapping->host;
> +     struct xfs_inode        *ip = XFS_I(inode);
> +
> +     if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> +             struct page     *page = list_first_entry(pages, struct page, 
> lru);
> +
> +             ASSERT(i_size_read(VFS_I(ip) <= PAGE_CACHE_SIZE));
> +
> +             list_del(&page->lru);
> +             if(!(add_to_page_cache_lru(page, mapping,
> +                                         page->index, GFP_KERNEL)))
> +                     return xfs_inline_read(ip, page, page->mapping);
> +
> +             page_cache_release(page);
> +             return 0;
> +     } else {
> +             return mpage_readpages(mapping, pages, nr_pages, 
> xfs_get_blocks);
> +     }
>  }
>  
>  const struct address_space_operations xfs_address_space_operations = {
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 2778258..5e56e5c 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -287,18 +287,6 @@ xfs_iformat(
>       case S_IFDIR:
>               switch (dip->di_format) {
>               case XFS_DINODE_FMT_LOCAL:
> -                     /*
> -                      * no local regular files yet
> -                      */
> -                     if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
> -                             xfs_warn(ip->i_mount,
> -                     "corrupt inode %Lu (local format for regular file).",
> -                                     (unsigned long long) ip->i_ino);
> -                             XFS_CORRUPTION_ERROR("xfs_iformat(4)",
> -                                                  XFS_ERRLEVEL_LOW,
> -                                                  ip->i_mount, dip);
> -                             return XFS_ERROR(EFSCORRUPTED);
> -                     }
>  
>                       di_size = be64_to_cpu(dip->di_size);
>                       if (unlikely(di_size > XFS_DFORK_DSIZE(dip, 
> ip->i_mount))) {
> @@ -2471,7 +2459,8 @@ xfs_iflush_int(
>       if (S_ISREG(ip->i_d.di_mode)) {
>               if (XFS_TEST_ERROR(
>                   (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
> -                 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
> +                 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
> +                 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
>                   mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
>                       xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
>                               "%s: Bad regular inode %Lu, ptr 0x%p",
> 

<Prev in Thread] Current Thread [Next in Thread>