xfs
[Top] [All Lists]

[PATCH 02/55] xfsprogs: port inode create transaction changes

To: xfs@xxxxxxxxxxx
Subject: [PATCH 02/55] xfsprogs: port inode create transaction changes
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Thu, 5 Sep 2013 08:05:06 +1000
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1378332359-14737-1-git-send-email-david@xxxxxxxxxxxxx>
References: <1378332359-14737-1-git-send-email-david@xxxxxxxxxxxxx>
From: Dave Chinner <dchinner@xxxxxxxxxx>

Bring across the relevant parts of the new inode create transaction
sufficient to keep kernel/user code in sync and implement the
infrastructure needed to make it work in xfsprogs.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 include/libxfs.h           |   1 +
 include/xfs_ialloc.h       |   8 +++
 include/xfs_icreate_item.h |   3 ++
 libxfs/xfs.h               |   5 ++
 libxfs/xfs_ialloc.c        |  87 +++++++++++++++++++++++++--------
 libxfs/xfs_trans.c         | 118 +++++++++++++++++++++++++++++----------------
 6 files changed, 160 insertions(+), 62 deletions(-)

diff --git a/include/libxfs.h b/include/libxfs.h
index f11ad52..bd74ca5 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -57,6 +57,7 @@
 #include <xfs/xfs_bmap.h>
 #include <xfs/xfs_trace.h>
 #include <xfs/xfs_symlink.h>
+#include <xfs/xfs_icreate_item.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h
index c8da3df..68c0732 100644
--- a/include/xfs_ialloc.h
+++ b/include/xfs_ialloc.h
@@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, 
xfs_agino_t ino,
 int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
                xfs_inobt_rec_incore_t *rec, int *stat);
 
+/*
+ * Inode chunk initialisation routine
+ */
+int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
+                         struct list_head *buffer_list,
+                         xfs_agnumber_t agno, xfs_agblock_t agbno,
+                         xfs_agblock_t length, unsigned int gen);
+
 extern const struct xfs_buf_ops xfs_agi_buf_ops;
 
 #endif /* __XFS_IALLOC_H__ */
diff --git a/include/xfs_icreate_item.h b/include/xfs_icreate_item.h
index 88ba8aa..70dc03c 100644
--- a/include/xfs_icreate_item.h
+++ b/include/xfs_icreate_item.h
@@ -36,6 +36,8 @@ struct xfs_icreate_log {
        __be32          icl_gen;        /* inode generation number to use */
 };
 
+#ifdef __KERNEL__
+
 /* in memory log item structure */
 struct xfs_icreate_item {
        struct xfs_log_item     ic_item;
@@ -48,5 +50,6 @@ void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t 
agno,
                        xfs_agblock_t agbno, unsigned int count,
                        unsigned int inode_size, xfs_agblock_t length,
                        unsigned int generation);
+#endif /* __KERNEL__ */
 
 #endif /* XFS_ICREATE_ITEM_H */
diff --git a/libxfs/xfs.h b/libxfs/xfs.h
index aa71ecc..15e82d7 100644
--- a/libxfs/xfs.h
+++ b/libxfs/xfs.h
@@ -176,6 +176,7 @@ roundup_pow_of_two(uint v)
 #define XBF_TRYLOCK                    XFS_BUF_TRYLOCK
 #define XBF_DONT_BLOCK                 0
 #define XBF_UNMAPPED                   0
+#define XBF_DONE                       0
 #define XFS_BUF_GETERROR(bp)           0
 #define XFS_BUF_DONE(bp)               ((bp)->b_flags |= LIBXFS_B_UPTODATE)
 #define XFS_BUF_ISDONE(bp)             ((bp)->b_flags & LIBXFS_B_UPTODATE)
@@ -194,6 +195,7 @@ roundup_pow_of_two(uint v)
 #define xfs_buf_relse(bp)              libxfs_putbuf(bp)
 #define xfs_buf_get(devp,blkno,len,f)  (libxfs_getbuf((devp), (blkno), (len)))
 #define xfs_bwrite(bp)                 libxfs_writebuf((bp), 0)
+#define xfs_buf_delwri_queue(bp, bl)   libxfs_writebuf((bp), 0)
 
 #define XBRW_READ                      LIBXFS_BREAD
 #define XBRW_WRITE                     LIBXFS_BWRITE
@@ -252,6 +254,7 @@ roundup_pow_of_two(uint v)
 
 #define xfs_trans_get_block_res(tp)    1
 #define xfs_trans_set_sync(tp)         ((void) 0)
+#define xfs_trans_ordered_buf(tp, bp)  ((void) 0)
 #define        xfs_trans_agblocks_delta(tp, d)
 #define        xfs_trans_agflist_delta(tp, d)
 #define        xfs_trans_agbtree_delta(tp, d)
@@ -325,6 +328,8 @@ do { \
 #define uuid_copy(s,d)         platform_uuid_copy((s),(d))
 #define uuid_equal(s,d)                (platform_uuid_compare((s),(d)) == 0)
 
+#define xfs_icreate_log(tp, agno, agbno, cnt, isize, len, gen) ((void) 0)
+
 /*
  * Prototypes for kernel static functions that are aren't in their
  * associated header files
diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c
index 76fdcea..48916dd 100644
--- a/libxfs/xfs_ialloc.c
+++ b/libxfs/xfs_ialloc.c
@@ -129,12 +129,16 @@ xfs_check_agi_freecount(
 #endif
 
 /*
- * Initialise a new set of inodes.
+ * Initialise a new set of inodes. When called without a transaction context
+ * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
+ * than logging them (which in a transaction context puts them into the AIL
+ * for writeback rather than the xfsbufd queue).
  */
-STATIC int
+int
 xfs_ialloc_inode_init(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
+       struct list_head        *buffer_list,
        xfs_agnumber_t          agno,
        xfs_agblock_t           agbno,
        xfs_agblock_t           length,
@@ -165,22 +169,40 @@ xfs_ialloc_inode_init(
        }
 
        /*
-        * Figure out what version number to use in the inodes we create.
-        * If the superblock version has caught up to the one that supports
-        * the new inode format, then use the new inode version.  Otherwise
-        * use the old version so that old kernels will continue to be
-        * able to use the file system.
+        * Figure out what version number to use in the inodes we create.  If
+        * the superblock version has caught up to the one that supports the new
+        * inode format, then use the new inode version.  Otherwise use the old
+        * version so that old kernels will continue to be able to use the file
+        * system.
         *
         * For v3 inodes, we also need to write the inode number into the inode,
         * so calculate the first inode number of the chunk here as
-        * XFS_OFFBNO_TO_AGINO() only works on filesystem block boundaries, not
-        * cluster boundaries and so cannot be used in the cluster buffer loop
-        * below.
+        * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
+        * across multiple filesystem blocks (such as a cluster) and so cannot
+        * be used in the cluster buffer loop below.
+        *
+        * Further, because we are writing the inode directly into the buffer
+        * and calculating a CRC on the entire inode, we have ot log the entire
+        * inode so that the entire range the CRC covers is present in the log.
+        * That means for v3 inode we log the entire buffer rather than just the
+        * inode cores.
         */
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                version = 3;
                ino = XFS_AGINO_TO_INO(mp, agno,
                                       XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
+
+               /*
+                * log the initialisation that is about to take place as an
+                * logical operation. This means the transaction does not
+                * need to log the physical changes to the inode buffers as log
+                * recovery will know what initialisation is actually needed.
+                * Hence we only need to log the buffers as "ordered" buffers so
+                * they track in the AIL as if they were physically logged.
+                */
+               if (tp)
+                       xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp),
+                                       mp->m_sb.sb_inodesize, length, gen);
        } else if (xfs_sb_version_hasnlink(&mp->m_sb))
                version = 2;
        else
@@ -196,15 +218,10 @@ xfs_ialloc_inode_init(
                                         XBF_UNMAPPED);
                if (!fbuf)
                        return ENOMEM;
-               /*
-                * Initialize all inodes in this buffer and then log them.
-                *
-                * XXX: It would be much better if we had just one transaction
-                *      to log a whole cluster of inodes instead of all the
-                *      individual transactions causing a lot of log traffic.
-                */
+
+               /* Initialize the inode buffers and log them appropriately. */
                fbuf->b_ops = &xfs_inode_buf_ops;
-               xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
+               xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
                for (i = 0; i < ninodes; i++) {
                        int     ioffset = i << mp->m_sb.sb_inodelog;
                        uint    isize = xfs_dinode_size(version);
@@ -220,11 +237,39 @@ xfs_ialloc_inode_init(
                                ino++;
                                uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
                                xfs_dinode_calc_crc(mp, free);
+                       } else if (tp) {
+                               /* just log the inode core */
+                               xfs_trans_log_buf(tp, fbuf, ioffset,
+                                                 ioffset + isize - 1);
                        }
+               }
 
-                       xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 
1);
+               if (tp) {
+                       /*
+                        * Mark the buffer as an inode allocation buffer so it
+                        * sticks in AIL at the point of this allocation
+                        * transaction. This ensures the they are on disk before
+                        * the tail of the log can be moved past this
+                        * transaction (i.e. by preventing relogging from moving
+                        * it forward in the log).
+                        */
+                       xfs_trans_inode_alloc_buf(tp, fbuf);
+                       if (version == 3) {
+                               /*
+                                * Mark the buffer as ordered so that they are
+                                * not physically logged in the transaction but
+                                * still tracked in the AIL as part of the
+                                * transaction and pin the log appropriately.
+                                */
+                               xfs_trans_ordered_buf(tp, fbuf);
+                               xfs_trans_log_buf(tp, fbuf, 0,
+                                                 BBTOB(fbuf->b_length) - 1);
+                       }
+               } else {
+                       fbuf->b_flags |= XBF_DONE;
+                       xfs_buf_delwri_queue(fbuf, buffer_list);
+                       xfs_buf_relse(fbuf);
                }
-               xfs_trans_inode_alloc_buf(tp, fbuf);
        }
        return 0;
 }
@@ -372,7 +417,7 @@ xfs_ialloc_ag_alloc(
         * rather than a linear progression to prevent the next generation
         * number from being easily guessable.
         */
-       error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
+       error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,
                        args.len, prandom_u32());
 
        if (error)
diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c
index bdd0ebc..95fb630 100644
--- a/libxfs/xfs_trans.c
+++ b/libxfs/xfs_trans.c
@@ -208,71 +208,93 @@ xfs_calc_remove_reservation(
 }
 
 /*
- * For symlink we can modify:
+ * For create, break it in to the two cases that the transaction
+ * covers. We start with the modify case - allocation done by modification
+ * of the state of existing inodes - and the allocation case.
+ */
+
+/*
+ * For create we can modify:
  *    the parent directory inode: inode size
  *    the new inode: inode size
- *    the inode btree entry: 1 block
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
  *    the directory btree: (max depth + v2) * dir block size
  *    the directory inode's bmap btree: (max depth + v2) * block size
- *    the blocks for the symlink: 1 kB
- * Or in the first xact we allocate some inodes giving:
+ */
+STATIC uint
+xfs_calc_create_resv_modify(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
+               xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               (uint)XFS_FSB_TO_B(mp, 1) +
+               xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * For create we can allocate some inodes giving:
  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
  *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
  *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
  */
 STATIC uint
-xfs_calc_symlink_reservation(
+xfs_calc_create_resv_alloc(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               mp->m_sb.sb_sectsize +
+               xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+STATIC uint
+__xfs_calc_create_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                    xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(1, 1024)),
-                   (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                    xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(mp->m_in_maxlevels,
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                     XFS_FSB_TO_B(mp, 1))));
+               MAX(xfs_calc_create_resv_alloc(mp),
+                   xfs_calc_create_resv_modify(mp));
 }
 
 /*
- * For create we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: block size
- *    the superblock for the nlink flag: sector size
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode's bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
+ * For icreate we can allocate some inodes giving:
  *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
  *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
  *    the inode btree: max depth * blocksize
  *    the allocation btrees: 2 trees * (max depth - 1) * block size
  */
 STATIC uint
-xfs_calc_create_reservation(
+xfs_calc_icreate_resv_alloc(
        struct xfs_mount        *mp)
 {
+       return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+               mp->m_sb.sb_sectsize +
+               xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+               xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+                                XFS_FSB_TO_B(mp, 1));
+}
+
+STATIC uint
+xfs_calc_icreate_reservation(xfs_mount_t *mp)
+{
        return XFS_DQUOT_LOGRES(mp) +
-               MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-                    xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-                    XFS_FSB_TO_B(mp, 1) +
-                    xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-                                     XFS_FSB_TO_B(mp, 1))),
-                   (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-                    mp->m_sb.sb_sectsize +
-                    xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(mp->m_in_maxlevels,
-                                     XFS_FSB_TO_B(mp, 1)) +
-                    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-                                     XFS_FSB_TO_B(mp, 1))));
+               MAX(xfs_calc_icreate_resv_alloc(mp),
+                   xfs_calc_create_resv_modify(mp));
+}
+
+STATIC uint
+xfs_calc_create_reservation(
+       struct xfs_mount        *mp)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return xfs_calc_icreate_reservation(mp);
+       return __xfs_calc_create_reservation(mp);
+
 }
 
 /*
@@ -285,6 +307,20 @@ xfs_calc_mkdir_reservation(
        return xfs_calc_create_reservation(mp);
 }
 
+
+/*
+ * Making a new symplink is the same as creating a new file, but
+ * with the added blocks for remote symlink data which can be up to 1kB in
+ * length (MAXPATHLEN).
+ */
+STATIC uint
+xfs_calc_symlink_reservation(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_create_reservation(mp) +
+              xfs_calc_buf_res(1, MAXPATHLEN);
+}
+
 /*
  * In freeing an inode we can modify:
  *    the inode being freed: inode size
-- 
1.8.3.2

<Prev in Thread] Current Thread [Next in Thread>