xfs
[Top] [All Lists]

Re: [PATCH 15/22] xfs: add CRCs to dir2/da node blocks

To: Dave Chinner <davidatfromorbit.com@xxxxxxx>
Subject: Re: [PATCH 15/22] xfs: add CRCs to dir2/da node blocks
From: Ben Myers <bpm@xxxxxxx>
Date: Mon, 22 Apr 2013 13:55:50 -0500
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1364965892-19623-16-git-send-email-david@xxxxxxxxxxxxx>
References: <1364965892-19623-1-git-send-email-david@xxxxxxxxxxxxx> <1364965892-19623-16-git-send-email-david@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.20 (2009-06-14)
On Wed, Apr 03, 2013 at 04:11:25PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner at redhat.com>
> 
> Signed-off-by: Dave Chinner <dchinner at redhat.com>

A few nits below.

> ---
>  fs/xfs/xfs_attr.c      |   33 +-
>  fs/xfs/xfs_attr_leaf.c |   29 +-
>  fs/xfs/xfs_bmap.c      |    1 +
>  fs/xfs/xfs_da_btree.c  | 1395 
> +++++++++++++++++++++++++++++-------------------
>  fs/xfs/xfs_da_btree.h  |  106 +++-
>  fs/xfs/xfs_dir2_node.c |   26 +-
>  fs/xfs/xfs_trace.c     |    2 +-
>  7 files changed, 972 insertions(+), 620 deletions(-)
> 
> diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
> index 8886838..e03128c 100644
> --- a/fs/xfs/xfs_attr.c
> +++ b/fs/xfs/xfs_attr.c
> @@ -15,7 +15,6 @@
>   * along with this program; if not, write the Free Software Foundation,
>   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
>   */
> -
>  #include "xfs.h"
>  #include "xfs_fs.h"
>  #include "xfs_types.h"
> @@ -25,6 +24,7 @@
>  #include "xfs_sb.h"
>  #include "xfs_ag.h"
>  #include "xfs_mount.h"
> +#include "xfs_error.h"
>  #include "xfs_da_btree.h"
>  #include "xfs_bmap_btree.h"
>  #include "xfs_attr_sf.h"
> @@ -35,7 +35,6 @@
>  #include "xfs_bmap.h"
>  #include "xfs_attr.h"
>  #include "xfs_attr_leaf.h"
> -#include "xfs_error.h"

Why did xfs_error.h need to be moved?

> @@ -935,16 +936,16 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
>       /*
>        * Set up the new root node.
>        */
> -     error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
> +     error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
>       if (error)
>               goto out;
>       node = bp1->b_addr;
>       leaf = bp2->b_addr;
>       ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
>       /* both on-disk, don't endian-flip twice */
> -     node->btree[0].hashval =
> -             leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
> -     node->btree[0].before = cpu_to_be32(blkno);
> +     btree = xfs_da3_node_tree_p(node);
> +     btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 
> ].hashval;
                                                                       ^

Extra space.

> +static bool
> +xfs_da3_node_verify(
>       struct xfs_buf          *bp)
>  {
>       struct xfs_mount        *mp = bp->b_target->bt_mount;
> -     struct xfs_da_node_hdr *hdr = bp->b_addr;
> -     int                     block_ok = 0;
> -
> -     block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC);
> -     block_ok = block_ok &&
> -                     be16_to_cpu(hdr->level) > 0 &&
> -                     be16_to_cpu(hdr->count) > 0 ;
> -     if (!block_ok) {
> -             XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
> -             xfs_buf_ioerror(bp, EFSCORRUPTED);
> +     struct xfs_da_intnode   *hdr = bp->b_addr;
> +     struct xfs_da3_icnode_hdr ichdr;
> +
> +     xfs_da3_node_hdr_from_disk(&ichdr, hdr);
> +
> +     if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +             struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
> +
> +             if (ichdr.magic != XFS_DA3_NODE_MAGIC)
> +                     return false;
> +
> +             if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
> +                     return false;
> +             if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
> +                     return false;
> +     } else {
> +             if (ichdr.magic != XFS_DA_NODE_MAGIC)
> +                     return false;
>       }
> +     if (ichdr.level == 0)
> +             return false;
> +     if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
> +             return false;
> +     if (ichdr.count == 0)
> +             return false;
> +
> +     /*
> +      * we don't know if the node is for and attribute or directory tree,
> +      * so only fail if the count is outside both bounds
> +      */
> +     if (ichdr.count > mp->m_dir_node_ents &&
> +         ichdr.count > mp->m_attr_node_ents)
> +             return false;
> +
> +     /* XXX: hash order check? */

Still planning to add that in a subsequent patch?

> @@ -178,33 +314,45 @@ xfs_da_node_read(
>   * Create the initial contents of an intermediate node.
>   */
>  int
> -xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
> -                              struct xfs_buf **bpp, int whichfork)
> +xfs_da3_node_create(
> +     struct xfs_da_args      *args,
> +     xfs_dablk_t             blkno,
> +     int                     level,
> +     struct xfs_buf          **bpp,
> +     int                     whichfork)
>  {
> -     xfs_da_intnode_t *node;
> -     struct xfs_buf *bp;
> -     int error;
> -     xfs_trans_t *tp;
> +     struct xfs_da_intnode   *node;
> +     struct xfs_trans        *tp = args->trans;
> +     struct xfs_mount        *mp = tp->t_mountp;
> +     struct xfs_da3_icnode_hdr ichdr = {0};
> +     struct xfs_buf          *bp;
> +     int                     error;
>  
>       trace_xfs_da_node_create(args);
> +     ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
>  
> -     tp = args->trans;
>       error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);

Looks like this function can return 0 with a null bp.  We should probably check
for that.

> @@ -320,8 +474,10 @@ xfs_da_split(xfs_da_state_t *state)
>        * just got bumped because of the addition of a new root node.
>        * There might be three blocks involved if a double split occurred,
>        * and the original block 0 could be at any position in the list.
> +      *
> +      * Note: the info structures being modified here for both v2 and v3 da
> +      * headers, so we can do this linkage just using the v2 structures.

I had a hard time parsing that the first few times.  How about something like:

Note:  The xfs_da_blkinfo fields being modified here are shared by both
xfs_da_node_hdr and xfs_da3_intnode, and occur before the additional fields
added in xfs_da3_intnode, so this linkage can be done using just the v2
structures.

> @@ -591,12 +801,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, 
> xfs_da_state_blk_t *blk1,
>                * Move the req'd B-tree elements from high in node1 to
>                * low in node2.
>                */
> -             be16_add_cpu(&node2->hdr.count, count);
> +             nodehdr2.count += count;
>               tmp = count * (uint)sizeof(xfs_da_node_entry_t);
> -             btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
> -             btree_d = &node2->btree[0];
> +             btree_s = &btree1[nodehdr1.count- count];
                                                ^ 
Add a space.

> @@ -926,35 +1172,34 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int 
> *action)
>        * We prefer coalescing with the lower numbered sibling so as
>        * to shrink a directory over time.
>        */
> +     count  = state->node_ents;
> +     count -= state->node_ents >> 2;
> +     count -= nodehdr.count;
> +
>       /* start with smaller blk num */
> -     forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
> +     forward = nodehdr.forw < nodehdr.back;
>       for (i = 0; i < 2; forward = !forward, i++) {
>               if (forward)
> -                     blkno = be32_to_cpu(info->forw);
> +                     blkno = nodehdr.forw;
>               else
> -                     blkno = be32_to_cpu(info->back);
> +                     blkno = nodehdr.back;
>               if (blkno == 0)
>                       continue;
> -             error = xfs_da_node_read(state->args->trans, state->args->dp,
> +             error = xfs_da3_node_read(state->args->trans, state->args->dp,
>                                       blkno, -1, &bp, state->args->whichfork);
>               if (error)
>                       return(error);
> -             ASSERT(bp != NULL);
>  
> -             node = (xfs_da_intnode_t *)info;
> -             count  = state->node_ents;
> -             count -= state->node_ents >> 2;
> -             count -= be16_to_cpu(node->hdr.count);

At first initializing count outside the loop looked like a problem to me, but
upon a closer inspection now I believe it's ok.

>  /*
> - * Unbalance the btree elements between two intermediate nodes,
> + * Unbalance the elements between two intermediate nodes,
>   * move all Btree elements from one node into another.
>   */
>  STATIC void
> -xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
> -                                  xfs_da_state_blk_t *save_blk)
> +xfs_da3_node_unbalance(
> +     struct xfs_da_state     *state,
> +     struct xfs_da_state_blk *drop_blk,
> +     struct xfs_da_state_blk *save_blk)
>  {
> -     xfs_da_intnode_t *drop_node, *save_node;
> -     xfs_da_node_entry_t *btree;
> -     int tmp;
> -     xfs_trans_t *tp;
> +     struct xfs_da_intnode   *drop_node;
> +     struct xfs_da_intnode   *save_node;
> +     struct xfs_da_node_entry *dbtree;
> +     struct xfs_da_node_entry *sbtree;
> +     struct xfs_da3_icnode_hdr dhdr;
> +     struct xfs_da3_icnode_hdr shdr;

Would be better as drop and save.

> +     struct xfs_trans        *tp;
> +     int                     sindex;
> +     int                     tmp;
>  
>       trace_xfs_da_node_unbalance(state->args);
>  
>       drop_node = drop_blk->bp->b_addr;
>       save_node = save_blk->bp->b_addr;
> -     ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> -     ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> +     xfs_da3_node_hdr_from_disk(&dhdr, drop_node);
> +     xfs_da3_node_hdr_from_disk(&shdr, save_node);
> +     dbtree = xfs_da3_node_tree_p(drop_node);
> +     sbtree = xfs_da3_node_tree_p(save_node);
>       tp = state->args->trans;
>  
>       /*
>        * If the dying block has lower hashvals, then move all the
>        * elements in the remaining block up to make a hole.
>        */
> -     if ((be32_to_cpu(drop_node->btree[0].hashval) < 
> be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
> -         
> (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
> -          
> be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
> -     {
> -             btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
> -             tmp = be16_to_cpu(save_node->hdr.count) * 
> (uint)sizeof(xfs_da_node_entry_t);
> -             memmove(btree, &save_node->btree[0], tmp);
> -             btree = &save_node->btree[0];
> +     if ((be32_to_cpu(dbtree[0].hashval) < be32_to_cpu(sbtree[ 0 ].hashval)) 
> ||
                                                                 ^ ^

Extra spaces.

> +         (be32_to_cpu(dbtree[dhdr.count - 1].hashval) <
> +                             be32_to_cpu(sbtree[shdr.count - 1].hashval))) {
> +             /* XXX: check this - is memmove dst correct? */
> +             tmp = shdr.count * (uint)sizeof(xfs_da_node_entry_t);
> +             memmove(&sbtree[dhdr.count], &sbtree[0], tmp);

I believe the destination in this memmove is correct.  We're moving items from
the beginning of the save_node toward the end of save_node so that later we can
copy from drop_node into the beginning of save_node.  Making room.

> +
> +             sindex = 0;
>               xfs_trans_log_buf(tp, save_blk->bp,
> -                     XFS_DA_LOGRANGE(save_node, btree,
> -                             (be16_to_cpu(save_node->hdr.count) + 
> be16_to_cpu(drop_node->hdr.count)) *
> -                             sizeof(xfs_da_node_entry_t)));
> +                     XFS_DA_LOGRANGE(save_node, &sbtree[0],
> +                             (shdr.count + dhdr.count) *
> +                                             sizeof(xfs_da_node_entry_t)));

Seems like here we are logging more of the node than is strictly necessary.  So
far we have only modified save_node at shdr.count up to shdr.count +
dhdr.count, there is no need to log the beginning of save_node until after
we've copied from drop_node into it.  Not a big deal I guess.

>       } else {
> -             btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
> +             sindex = shdr.count;
>               xfs_trans_log_buf(tp, save_blk->bp,
> -                     XFS_DA_LOGRANGE(save_node, btree,
> -                             be16_to_cpu(drop_node->hdr.count) *
> -                             sizeof(xfs_da_node_entry_t)));
> +                     XFS_DA_LOGRANGE(save_node, &sbtree[sindex],
> +                             dhdr.count * sizeof(xfs_da_node_entry_t)));

And here it seems that we're logging save_node before we've made any
modification to it.  Maybe I'm missing something.

>       }
>  
>       /*
>        * Move all the B-tree elements from drop_blk to save_blk.
>        */
> -     tmp = be16_to_cpu(drop_node->hdr.count) * 
> (uint)sizeof(xfs_da_node_entry_t);
> -     memcpy(btree, &drop_node->btree[0], tmp);
> -     be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
> +     tmp = dhdr.count * (uint)sizeof(xfs_da_node_entry_t);
> +     memcpy(&sbtree[sindex], &dbtree[0], tmp);
> +     shdr.count += dhdr.count;
> +     xfs_da3_node_hdr_to_disk(save_node, &shdr);
>       xfs_trans_log_buf(tp, save_blk->bp,
>               XFS_DA_LOGRANGE(save_node, &save_node->hdr,
> -                     sizeof(save_node->hdr)));
> +                             xfs_da3_node_hdr_size(save_node)));
>  

And then after modifying save_node by copying stuff in from drop_blk we only
log the header, and not the entries.  I guess I'd prefer to see the entries
logged after they've been modified.  It's not a bug, looks like.. just not what
I'm used to seeing.

> @@ -1190,66 +1478,73 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int 
> *result)
>               }
>               curr = blk->bp->b_addr;
>               blk->magic = be16_to_cpu(curr->magic);
> -             ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
> -                    blk->magic == XFS_DIR2_LEAFN_MAGIC ||
> -                    blk->magic == XFS_ATTR_LEAF_MAGIC);
> +
> +             if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
> +                     blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
> +                     break;
> +             }
> +
> +             if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
> +                 blk->magic == XFS_DIR3_LEAFN_MAGIC) {
> +                     blk->magic = XFS_DIR2_LEAFN_MAGIC;
> +                     blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
> +                     break;
> +             }

Something like this would be better for this rearrangement:

        if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
                ...
                break;
        } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
                   blk->magic == XFS_DIR3_LEAFN_MAGIC) {
                ...
                break;
        } else if (blk->magic != XFS_DA_NODE_MAGIC) {
                return XFS_ERROR(EFSCORRUPTED);
        }

So that we're sure what kind of block we have.

> +
> +             blk->magic = XFS_DA_NODE_MAGIC;
> +

Why was that assignment necessary?



> @@ -1316,9 +1647,6 @@ xfs_da_blk_link(xfs_da_state_t *state, 
> xfs_da_state_blk_t *old_blk,
>       ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
>              old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
>              old_blk->magic == XFS_ATTR_LEAF_MAGIC);

Seems like these asserts need to be updated.

> -     ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
> -     ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
> -     ASSERT(old_blk->magic == new_blk->magic);

These seem like good asserts.



> @@ -1449,8 +1738,6 @@ xfs_da_blk_unlink(xfs_da_state_t *state, 
> xfs_da_state_blk_t *drop_blk,
>       ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
>              save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
>              save_blk->magic == XFS_ATTR_LEAF_MAGIC);
                
Seems like these asserts need to be updated.

> -     ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
> -     ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));

These two seem like useful asserts.


> @@ -1856,17 +2176,22 @@ xfs_da_swap_lastblock(
>               dead_level = 0;
>               dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
>       } else {
> -             ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> +             struct xfs_da3_icnode_hdr deadhdr;
> +
> +             ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
> +                    dead_info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));

I think these are covered by asserts in xfs_da3_node_hdr_from_disk and can be 
removed.

> @@ -1912,31 +2237,31 @@ xfs_da_swap_lastblock(
>        * Walk down the tree looking for the parent of the moved block.
>        */
>       for (;;) {
> -             error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
> +             error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
>               if (error)
>                       goto done;
>               par_node = par_buf->b_addr;
> -             if (unlikely(par_node->hdr.info.magic !=
> -                 cpu_to_be16(XFS_DA_NODE_MAGIC) ||
> -                 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 
> 1))) {
> +             xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
> +             if (level >= 0 && level != par_hdr.level + 1) {

Do we need to test par_node magic here like we were before? 

>                       XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
>                                        XFS_ERRLEVEL_LOW, mp);
>                       error = XFS_ERROR(EFSCORRUPTED);
>                       goto done;
>               }

<Prev in Thread] Current Thread [Next in Thread>