xfs
[Top] [All Lists]

[RFC PATCH] xfs: enables file data inlining in inodes

To: xfs@xxxxxxxxxxx
Subject: [RFC PATCH] xfs: enables file data inlining in inodes
From: Carlos Maiolino <cmaiolino@xxxxxxxxxx>
Date: Thu, 11 Oct 2012 16:52:38 -0300
Hi,

this is a first RFC patch of my work on data inlining, i.e. use the xfs inode's
literal area to store user's data.

This first patch just cares about write and read new files into inode's literal
area, it does not make any conversion from inline to extent or vice-versa.

The idea to send this patch to list is just to get comments about this first
work and see if anybody has some ideas/suggestions about it, mainly related
with page cache and journal handling, since it's the first time I deal with
journal and page cache handling, I'm not pretty sure if I did things right
or not.

Every comment is very appreciated.

Thanks
---
 fs/xfs/xfs_aops.c  | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_inode.c |  15 +-----
 2 files changed, 130 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e562dd4..3d30528 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,6 +31,7 @@
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 #include "xfs_bmap.h"
+#include "xfs_inode_item.h"
 #include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
@@ -460,6 +461,95 @@ xfs_start_page_writeback(
                end_page_writeback(page);
 }
 
+/* Write data inlined in inode */
+
+STATIC int
+xfs_inline_write(
+       struct xfs_inode        *ip,
+       struct page             *page,
+       struct buffer_head      *bh,
+       __uint64_t              end_offset)
+{
+       char            *data;
+       int             err = 0;
+       xfs_trans_t     *tp;
+       xfs_mount_t     *mp = ip->i_mount;
+
+       printk("%s: INLINING INODE %llu\n", __func__, ip->i_ino);
+       xfs_start_page_writeback(page, 1, 1);
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
+       err = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0); /* Is 
this the right logspace? */
+
+       if (err) {
+               xfs_trans_cancel(tp, 0);
+               return -1;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+       if (!(ip->i_d.di_format & XFS_DINODE_FMT_LOCAL)) {
+               ip->i_df.if_flags &= ~XFS_IFEXTENTS;
+               ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
+               ip->i_df.if_flags |= XFS_IFINLINE;
+       }
+
+       ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
+
+       if (end_offset > ip->i_df.if_bytes)
+               xfs_idata_realloc(ip, end_offset - ip->i_df.if_bytes,
+                                 XFS_DATA_FORK);
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DDATA);
+       data = kmap(page);
+       memcpy(ip->i_df.if_u1.if_data, data, end_offset);
+       kunmap(page);
+
+       ip->i_d.di_size = end_offset;
+
+       err = xfs_trans_commit(tp, 0);
+
+       set_buffer_uptodate(bh);
+       clear_buffer_dirty(bh);
+       clear_buffer_delay(bh);
+       SetPageUptodate(page);
+
+       end_page_writeback(page);
+       return err;
+}
+
+STATIC int
+xfs_inline_read(
+       struct xfs_inode        *ip,
+       struct page             *page,
+       struct address_space    *mapping)
+{
+       char *data;
+       unsigned long bsize = 1 << ip->i_vnode.i_blkbits;
+
+       ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+       ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
+
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, bsize, 0);
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+       data = kmap(page);
+       memcpy(data, ip->i_df.if_u1.if_data, (int)ip->i_d.di_size);
+       kunmap(page);
+
+       /* We should not leave garbage after user data into the page */
+       memset(data + (int)ip->i_d.di_size, 0, PAGE_SIZE - ip->i_d.di_size);
+
+       SetPageUptodate(page);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       unlock_page(page);
+       return 0;
+}
+
 static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 {
        return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -900,6 +990,7 @@ xfs_vm_writepage(
        struct writeback_control *wbc)
 {
        struct inode            *inode = page->mapping->host;
+       xfs_inode_t             *ip = XFS_I(inode);
        struct buffer_head      *bh, *head;
        struct xfs_bmbt_irec    imap;
        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
@@ -908,7 +999,7 @@ xfs_vm_writepage(
        __uint64_t              end_offset;
        pgoff_t                 end_index, last_index;
        ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
+       int                     err = 0, imap_valid = 0, uptodate = 1;
        int                     count = 0;
        int                     nonblocking = 0;
 
@@ -968,8 +1059,16 @@ xfs_vm_writepage(
                        (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
                        offset);
        len = 1 << inode->i_blkbits;
-
        bh = head = page_buffers(page);
+
+       if (end_offset + i_size_read(inode) <= XFS_IFORK_DSIZE(ip)) {
+               err = xfs_inline_write(ip, page, bh, end_offset);
+
+               if (err)
+                       goto error;
+               return 0;
+       }
+
        offset = page_offset(page);
        type = XFS_IO_OVERWRITE;
 
@@ -1624,20 +1723,43 @@ xfs_vm_bmap(
 
 STATIC int
 xfs_vm_readpage(
-       struct file             *unused,
+       struct file             *filp,
        struct page             *page)
 {
-       return mpage_readpage(page, xfs_get_blocks);
+       struct inode            *inode = filp->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+               return xfs_inline_read(ip, page, page->mapping);
+       else
+               return mpage_readpage(page, xfs_get_blocks);
 }
 
 STATIC int
 xfs_vm_readpages(
-       struct file             *unused,
+       struct file             *filp,
        struct address_space    *mapping,
        struct list_head        *pages,
        unsigned                nr_pages)
 {
-       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+       struct inode            *inode = filp->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+               struct page     *page = list_first_entry(pages, struct page, 
lru);
+
+               ASSERT(i_size_read(VFS_I(ip) <= PAGE_CACHE_SIZE));
+
+               list_del(&page->lru);
+               if(!(add_to_page_cache_lru(page, mapping,
+                                           page->index, GFP_KERNEL)))
+                       return xfs_inline_read(ip, page, page->mapping);
+
+               page_cache_release(page);
+               return 0;
+       } else {
+               return mpage_readpages(mapping, pages, nr_pages, 
xfs_get_blocks);
+       }
 }
 
 const struct address_space_operations xfs_address_space_operations = {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258..5e56e5c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -287,18 +287,6 @@ xfs_iformat(
        case S_IFDIR:
                switch (dip->di_format) {
                case XFS_DINODE_FMT_LOCAL:
-                       /*
-                        * no local regular files yet
-                        */
-                       if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
-                               xfs_warn(ip->i_mount,
-                       "corrupt inode %Lu (local format for regular file).",
-                                       (unsigned long long) ip->i_ino);
-                               XFS_CORRUPTION_ERROR("xfs_iformat(4)",
-                                                    XFS_ERRLEVEL_LOW,
-                                                    ip->i_mount, dip);
-                               return XFS_ERROR(EFSCORRUPTED);
-                       }
 
                        di_size = be64_to_cpu(dip->di_size);
                        if (unlikely(di_size > XFS_DFORK_DSIZE(dip, 
ip->i_mount))) {
@@ -2471,7 +2459,8 @@ xfs_iflush_int(
        if (S_ISREG(ip->i_d.di_mode)) {
                if (XFS_TEST_ERROR(
                    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
-                   (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
+                   (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
+                   (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
                    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
                        xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                                "%s: Bad regular inode %Lu, ptr 0x%p",
-- 
1.7.11.7

<Prev in Thread] Current Thread [Next in Thread>