On Fri, Oct 12, 2012 at 11:31:50AM -0400, Brian Foster wrote:
> On 10/11/2012 03:52 PM, Carlos Maiolino wrote:
> > Hi,
> >
> > this is a first RFC patch of my work on data inlining, i.e. use the xfs
> > inode's
> > literal area to store user's data.
> >
> > This first patch just cares about write and read new files into inode's
> > literal
> > area, it does not make any conversion from inline to extent or vice-versa.
> >
> > The idea to send this patch to list is just to get comments about this first
> > work and see if anybody has some ideas/suggestions about it, mainly related
> > with page cache and journal handling, since it's the first time I deal with
> > journal and page cache handling, I'm not pretty sure if I did things right
> > or not.
> >
> > Every comment is very appreciated.
> >
> > Thanks
> > ---
> > fs/xfs/xfs_aops.c | 134
> > ++++++++++++++++++++++++++++++++++++++++++++++++++---
> > fs/xfs/xfs_inode.c | 15 +-----
> > 2 files changed, 130 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> > index e562dd4..3d30528 100644
> > --- a/fs/xfs/xfs_aops.c
> > +++ b/fs/xfs/xfs_aops.c
> > @@ -31,6 +31,7 @@
> > #include "xfs_vnodeops.h"
> > #include "xfs_trace.h"
> > #include "xfs_bmap.h"
> > +#include "xfs_inode_item.h"
> > #include <linux/gfp.h>
> > #include <linux/mpage.h>
> > #include <linux/pagevec.h>
> > @@ -460,6 +461,95 @@ xfs_start_page_writeback(
> > end_page_writeback(page);
> > }
> >
> > +/* Write data inlined in inode */
> > +
> > +STATIC int
> > +xfs_inline_write(
> > + struct xfs_inode *ip,
> > + struct page *page,
> > + struct buffer_head *bh,
> > + __uint64_t end_offset)
> > +{
> > + char *data;
> > + int err = 0;
> > + xfs_trans_t *tp;
> > + xfs_mount_t *mp = ip->i_mount;
> > +
> > + printk("%s: INLINING INODE %llu\n", __func__, ip->i_ino);
> > + xfs_start_page_writeback(page, 1, 1);
> > +
> > + tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
> > + err = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0); /* Is
> > this the right logspace? */
> > +
> > + if (err) {
> > + xfs_trans_cancel(tp, 0);
>
> Do you need to undo xfs_start_page_writeback() here?
>
Not sure if I got what you meant
> > + return -1;
> > + }
> > +
> > + xfs_ilock(ip, XFS_ILOCK_EXCL);
> > +
> > + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> > +
> > + if (!(ip->i_d.di_format & XFS_DINODE_FMT_LOCAL)) {
> > + ip->i_df.if_flags &= ~XFS_IFEXTENTS;
> > + ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
> > + ip->i_df.if_flags |= XFS_IFINLINE;
> > + }
> > +
> > + ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> > +
> > + if (end_offset > ip->i_df.if_bytes)
> > + xfs_idata_realloc(ip, end_offset - ip->i_df.if_bytes,
> > + XFS_DATA_FORK);
> > +
> > + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DDATA);
> > + data = kmap(page);
> > + memcpy(ip->i_df.if_u1.if_data, data, end_offset);
> > + kunmap(page);
> > +
> > + ip->i_d.di_size = end_offset;
> > +
> > + err = xfs_trans_commit(tp, 0);
> > +
> > + set_buffer_uptodate(bh);
> > + clear_buffer_dirty(bh);
> > + clear_buffer_delay(bh);
> > + SetPageUptodate(page);
> > +
> > + end_page_writeback(page);
> > + return err;
> > +}
> > +
> > +STATIC int
> > +xfs_inline_read(
> > + struct xfs_inode *ip,
> > + struct page *page,
> > + struct address_space *mapping)
> > +{
> > + char *data;
> > + unsigned long bsize = 1 << ip->i_vnode.i_blkbits;
> > +
> > + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> > + ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> > +
> > + if (!page_has_buffers(page))
> > + create_empty_buffers(page, bsize, 0);
> > +
> > + xfs_ilock(ip, XFS_ILOCK_SHARED);
> > +
> > + data = kmap(page);
> > + memcpy(data, ip->i_df.if_u1.if_data, (int)ip->i_d.di_size);
> > + kunmap(page);
> > +
> > + /* We should not leave garbage after user data into the page */
> > + memset(data + (int)ip->i_d.di_size, 0, PAGE_SIZE - ip->i_d.di_size);
> > +
> > + SetPageUptodate(page);
> > + xfs_iunlock(ip, XFS_ILOCK_SHARED);
> > + unlock_page(page);
> > + return 0;
> > +}
> > +
> > static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
> > {
> > return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> > @@ -900,6 +990,7 @@ xfs_vm_writepage(
> > struct writeback_control *wbc)
> > {
> > struct inode *inode = page->mapping->host;
> > + xfs_inode_t *ip = XFS_I(inode);
> > struct buffer_head *bh, *head;
> > struct xfs_bmbt_irec imap;
> > xfs_ioend_t *ioend = NULL, *iohead = NULL;
> > @@ -908,7 +999,7 @@ xfs_vm_writepage(
> > __uint64_t end_offset;
> > pgoff_t end_index, last_index;
> > ssize_t len;
> > - int err, imap_valid = 0, uptodate = 1;
> > + int err = 0, imap_valid = 0, uptodate = 1;
> > int count = 0;
> > int nonblocking = 0;
> >
> > @@ -968,8 +1059,16 @@ xfs_vm_writepage(
> > (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
> > offset);
> > len = 1 << inode->i_blkbits;
> > -
> > bh = head = page_buffers(page);
> > +
> > + if (end_offset + i_size_read(inode) <= XFS_IFORK_DSIZE(ip)) {
>
> So end_offset looks like it is the size of the file if this page
> contains EOF, or the next page-aligned offset of the file otherwise. If
> I understand correctly, shouldn't we just check if end_offset falls
> within the data fork size here? IOW, I don't understand why you add
> end_offset and i_size_read(inode) here.
>
Check by end_offset is not the right thing to do here, but it needs an inode
size check. What I tried to do was catch any file changes that occurs before the
asignment of end_offset and this check condition. Not sure if I'm right though
and should leave only:
if (i_size_read(inode) <= XFS_IFORK_DSIZE(ip)) {
> Brian
>
> > + err = xfs_inline_write(ip, page, bh, end_offset);
> > +
> > + if (err)
> > + goto error;
> > + return 0;
> > + }
> > +
> > offset = page_offset(page);
> > type = XFS_IO_OVERWRITE;
> >
> > @@ -1624,20 +1723,43 @@ xfs_vm_bmap(
> >
> > STATIC int
> > xfs_vm_readpage(
> > - struct file *unused,
> > + struct file *filp,
> > struct page *page)
> > {
> > - return mpage_readpage(page, xfs_get_blocks);
> > + struct inode *inode = filp->f_mapping->host;
> > + struct xfs_inode *ip = XFS_I(inode);
> > +
> > + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
> > + return xfs_inline_read(ip, page, page->mapping);
> > + else
> > + return mpage_readpage(page, xfs_get_blocks);
> > }
> >
> > STATIC int
> > xfs_vm_readpages(
> > - struct file *unused,
> > + struct file *filp,
> > struct address_space *mapping,
> > struct list_head *pages,
> > unsigned nr_pages)
> > {
> > - return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
> > + struct inode *inode = filp->f_mapping->host;
> > + struct xfs_inode *ip = XFS_I(inode);
> > +
> > + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> > + struct page *page = list_first_entry(pages, struct page,
> > lru);
> > +
> > + ASSERT(i_size_read(VFS_I(ip) <= PAGE_CACHE_SIZE));
> > +
> > + list_del(&page->lru);
> > + if(!(add_to_page_cache_lru(page, mapping,
> > + page->index, GFP_KERNEL)))
> > + return xfs_inline_read(ip, page, page->mapping);
> > +
> > + page_cache_release(page);
> > + return 0;
> > + } else {
> > + return mpage_readpages(mapping, pages, nr_pages,
> > xfs_get_blocks);
> > + }
> > }
> >
> > const struct address_space_operations xfs_address_space_operations = {
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index 2778258..5e56e5c 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -287,18 +287,6 @@ xfs_iformat(
> > case S_IFDIR:
> > switch (dip->di_format) {
> > case XFS_DINODE_FMT_LOCAL:
> > - /*
> > - * no local regular files yet
> > - */
> > - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
> > - xfs_warn(ip->i_mount,
> > - "corrupt inode %Lu (local format for regular file).",
> > - (unsigned long long) ip->i_ino);
> > - XFS_CORRUPTION_ERROR("xfs_iformat(4)",
> > - XFS_ERRLEVEL_LOW,
> > - ip->i_mount, dip);
> > - return XFS_ERROR(EFSCORRUPTED);
> > - }
> >
> > di_size = be64_to_cpu(dip->di_size);
> > if (unlikely(di_size > XFS_DFORK_DSIZE(dip,
> > ip->i_mount))) {
> > @@ -2471,7 +2459,8 @@ xfs_iflush_int(
> > if (S_ISREG(ip->i_d.di_mode)) {
> > if (XFS_TEST_ERROR(
> > (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
> > - (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
> > + (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
> > + (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
> > mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
> > xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
> > "%s: Bad regular inode %Lu, ptr 0x%p",
> >
>
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
--
--Carlos
|