xfs
[Top] [All Lists]

Re: [PATCH 4/9] xfs: add CRC checks to the AGFL

To: Dave Chinner <david@xxxxxxxxxxxxx>
Subject: Re: [PATCH 4/9] xfs: add CRC checks to the AGFL
From: Ben Myers <bpm@xxxxxxx>
Date: Wed, 27 Feb 2013 16:37:50 -0600
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1358774760-21841-5-git-send-email-david@xxxxxxxxxxxxx>
References: <1358774760-21841-1-git-send-email-david@xxxxxxxxxxxxx> <1358774760-21841-5-git-send-email-david@xxxxxxxxxxxxx>
User-agent: Mutt/1.5.20 (2009-06-14)
Hi Dave,

On Tue, Jan 22, 2013 at 12:25:55AM +1100, Dave Chinner wrote:
> From: Christoph Hellwig <hch@xxxxxx>
> 
> Add CRC checks, location information and a magic number to the AGFL.
> Previously the AGFL was just a block containing nothing but the
> free block pointers.  The new AGFL has a real header with the usual
> boilerplate instead, so that we can verify it's not corrupted and
> written into the right place.
> 
> [dchinner@xxxxxxxxxx] Added LSN field, reworked significantly to fit
> into new verifier structure and growfs structure, enabled full
> verifier functionality now there is a header to verify and we can
> guarantee an initialised AGFL.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>

I have a couple comments below.

-Ben

> ---
>  fs/xfs/xfs_ag.h          |   25 +++++++++-
>  fs/xfs/xfs_alloc.c       |  119 
> ++++++++++++++++++++++++++++++----------------
>  fs/xfs/xfs_buf_item.h    |    4 +-
>  fs/xfs/xfs_fsops.c       |    5 ++
>  fs/xfs/xfs_log_recover.c |    7 +++
>  5 files changed, 116 insertions(+), 44 deletions(-)
> 
> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
> index b382703..813caea 100644
> --- a/fs/xfs/xfs_ag.h
> +++ b/fs/xfs/xfs_ag.h
> @@ -30,6 +30,7 @@ struct xfs_trans;
>  
>  #define      XFS_AGF_MAGIC   0x58414746      /* 'XAGF' */
>  #define      XFS_AGI_MAGIC   0x58414749      /* 'XAGI' */
> +#define      XFS_AGFL_MAGIC  0x5841464c      /* 'XAFL' */
>  #define      XFS_AGF_VERSION 1
>  #define      XFS_AGI_VERSION 1
>  
> @@ -190,11 +191,31 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
>   */
>  #define XFS_AGFL_DADDR(mp)   ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
>  #define      XFS_AGFL_BLOCK(mp)      XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
> -#define XFS_AGFL_SIZE(mp)    ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
>  #define      XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)((bp)->b_addr))
>  
> +#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
> +     (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
> +             &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
> +             (__be32 *)(bp)->b_addr)
> +
> +/*
> + * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of
> + * slots in the beginning of the block for a proper header with the
> + * location information and CRC.
> + */
> +#define XFS_AGFL_SIZE(mp) \
> +     (((mp)->m_sb.sb_sectsize - \
> +      (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
> +             sizeof(struct xfs_agfl) : 0)) / \
> +       sizeof(xfs_agblock_t))
> +
>  typedef struct xfs_agfl {
> -     __be32          agfl_bno[1];    /* actually XFS_AGFL_SIZE(mp) */
> +     __be32          agfl_magicnum;
> +     __be32          agfl_seqno;
> +     uuid_t          agfl_uuid;
> +     __be64          agfl_lsn;
> +     __be32          agfl_crc;
> +     __be32          agfl_bno[];     /* actually XFS_AGFL_SIZE(mp) */
>  } xfs_agfl_t;
>  
>  /*
> diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
> index 1d2e9c3..53e699e 100644
> --- a/fs/xfs/xfs_alloc.c
> +++ b/fs/xfs/xfs_alloc.c
> @@ -432,53 +432,84 @@ xfs_alloc_fixup_trees(
>       return 0;
>  }
>  
> -static void
> +static bool
>  xfs_agfl_verify(
>       struct xfs_buf  *bp)
>  {
> -#ifdef WHEN_CRCS_COME_ALONG
> -     /*
> -      * we cannot actually do any verification of the AGFL because mkfs does
> -      * not initialise the AGFL to zero or NULL. Hence the only valid part of
> -      * the AGFL is what the AGF says is active. We can't get to the AGF, so
> -      * we can't verify just those entries are valid.
> -      *
> -      * This problem goes away when the CRC format change comes along as that
> -      * requires the AGFL to be initialised by mkfs. At that point, we can
> -      * verify the blocks in the agfl -active or not- lie within the bounds
> -      * of the AG. Until then, just leave this check ifdef'd out.
> -      */
>       struct xfs_mount *mp = bp->b_target->bt_mount;
>       struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
> -     int             agfl_ok = 1;
> -
>       int             i;
>  
> +     if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
> +             return false;
> +     if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
> +             return false;
> +     /*
> +      * during growfs operations, the perag is not fully initialised,
> +      * so we can't use it for any useful checking. growfs ensures we can't
> +      * use it by using uncached buffers that don't have the perag attached
> +      * so we can detect and avoid this problem.
> +      */
> +     if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
> +             return false;
> +
>       for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
> -             if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
> +             if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
>                   be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
                                                   <

Any non NULLAGBLOCK should be less than m_sb.sb_agblocks, correct?

> -                     agfl_ok = 0;
> +                     return false;
>       }
> +     return true;
> +}
> +
> +static void
> +xfs_agfl_read_verify(
> +     struct xfs_buf  *bp)
> +{
> +     struct xfs_mount *mp = bp->b_target->bt_mount;
> +     int             agfl_ok = 1;
> +
> +     /*
> +      * There is no verification of non-crc AGFLs because mkfs does not
> +      * initialise the AGFL to zero or NULL. Hence the only valid part of the
> +      * AGFL is what the AGF says is active. We can't get to the AGF, so we
> +      * can't verify just those entries are valid.
> +      */
> +     if (!xfs_sb_version_hascrc(&mp->m_sb))
> +             return;
> +
> +     agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
> +                                offsetof(struct xfs_agfl, agfl_crc));
> +
> +     agfl_ok = agfl_ok && xfs_agfl_verify(bp);
>  
>       if (!agfl_ok) {
> -             XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
> +             XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 
> bp->b_addr);
>               xfs_buf_ioerror(bp, EFSCORRUPTED);
>       }
> -#endif
>  }
>  
>  static void
>  xfs_agfl_write_verify(
>       struct xfs_buf  *bp)
>  {
> -     xfs_agfl_verify(bp);
> -}
> +     struct xfs_mount *mp = bp->b_target->bt_mount;
> +     struct xfs_buf_log_item *bip = bp->b_fspriv;
>  
> -static void
> -xfs_agfl_read_verify(
> -     struct xfs_buf  *bp)
> -{
> -     xfs_agfl_verify(bp);
> +     /* no verification of non-crc AGFLs */
> +     if (!xfs_sb_version_hascrc(&mp->m_sb))
> +             return;
> +
> +     if (!xfs_agfl_verify(bp)) {
> +             XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 
> bp->b_addr);
> +             xfs_buf_ioerror(bp, EFSCORRUPTED);
> +             return;
> +     }
> +
> +     if (bip)
> +             XFS_BUF_TO_AGFL(bp)->agfl_lsn = 
> cpu_to_be64(bip->bli_item.li_lsn);
> +
> +     xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
> +                      offsetof(struct xfs_agfl, agfl_crc));
>  }
>  
>  const struct xfs_buf_ops xfs_agfl_buf_ops = {
> @@ -1984,18 +2015,18 @@ xfs_alloc_get_freelist(
>       int             btreeblk) /* destination is a AGF btree */
>  {
>       xfs_agf_t       *agf;   /* a.g. freespace structure */
> -     xfs_agfl_t      *agfl;  /* a.g. freelist structure */
>       xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
>       xfs_agblock_t   bno;    /* block number returned */
> +     __be32          *agfl_bno;
>       int             error;
>       int             logflags;
> -     xfs_mount_t     *mp;    /* mount structure */
> +     xfs_mount_t     *mp = tp->t_mountp;
>       xfs_perag_t     *pag;   /* per allocation group data */
>  
> -     agf = XFS_BUF_TO_AGF(agbp);
>       /*
>        * Freelist is empty, give up.
>        */
> +     agf = XFS_BUF_TO_AGF(agbp);
>       if (!agf->agf_flcount) {
>               *bnop = NULLAGBLOCK;
>               return 0;
> @@ -2003,15 +2034,17 @@ xfs_alloc_get_freelist(
>       /*
>        * Read the array of free blocks.
>        */
> -     mp = tp->t_mountp;
> -     if ((error = xfs_alloc_read_agfl(mp, tp,
> -                     be32_to_cpu(agf->agf_seqno), &agflbp)))
> +     error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
> +                                 &agflbp);
> +     if (error)
>               return error;
> -     agfl = XFS_BUF_TO_AGFL(agflbp);
> +
> +
>       /*
>        * Get the block number and update the data structures.
>        */
> -     bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
> +     agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
> +     bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
>       be32_add_cpu(&agf->agf_flfirst, 1);
>       xfs_trans_brelse(tp, agflbp);
>       if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
> @@ -2104,12 +2137,13 @@ xfs_alloc_put_freelist(
>       int                     btreeblk) /* block came from a AGF btree */
>  {
>       xfs_agf_t               *agf;   /* a.g. freespace structure */
> -     xfs_agfl_t              *agfl;  /* a.g. free block array */
>       __be32                  *blockp;/* pointer to array entry */
>       int                     error;
>       int                     logflags;
>       xfs_mount_t             *mp;    /* mount structure */
>       xfs_perag_t             *pag;   /* per allocation group data */
> +     __be32                  *agfl_bno;
> +     int                     startoff;
>  
>       agf = XFS_BUF_TO_AGF(agbp);
>       mp = tp->t_mountp;
> @@ -2117,7 +2151,6 @@ xfs_alloc_put_freelist(
>       if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
>                       be32_to_cpu(agf->agf_seqno), &agflbp)))
>               return error;
> -     agfl = XFS_BUF_TO_AGFL(agflbp);
>       be32_add_cpu(&agf->agf_fllast, 1);
>       if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
>               agf->agf_fllast = 0;
> @@ -2138,13 +2171,17 @@ xfs_alloc_put_freelist(
>       xfs_alloc_log_agf(tp, agbp, logflags);
>  
>       ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
> -     blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
> +
> +     agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
> +     blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
>       *blockp = cpu_to_be32(bno);
> +     startoff = (char *)blockp - (char *)agflbp->b_addr;
> +
>       xfs_alloc_log_agf(tp, agbp, logflags);
> -     xfs_trans_log_buf(tp, agflbp,
> -             (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
> -             (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
> -                     sizeof(xfs_agblock_t) - 1));
> +
> +     xfs_trans_buf_set_type(tp, agflbp, XFS_BLF_AGFL_BUF);
> +     xfs_trans_log_buf(tp, agflbp, startoff,
> +                       startoff + sizeof(xfs_agblock_t) - 1);
>       return 0;
>  }
>  
> diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
> index 76bd7a1..067d5f0 100644
> --- a/fs/xfs/xfs_buf_item.h
> +++ b/fs/xfs/xfs_buf_item.h
> @@ -46,13 +46,15 @@ extern kmem_zone_t        *xfs_buf_item_zone;
>   */
>  #define XFS_BLF_BTREE_BUF    (1<<5)
>  #define XFS_BLF_AGF_BUF              (1<<6)
> +#define XFS_BLF_AGFL_BUF     (1<<7)
>  
>  #define XFS_BLF_TYPE_MASK    \
>               (XFS_BLF_UDQUOT_BUF | \
>                XFS_BLF_PDQUOT_BUF | \
>                XFS_BLF_GDQUOT_BUF | \
>                XFS_BLF_BTREE_BUF | \
> -              XFS_BLF_AGF_BUF)
> +              XFS_BLF_AGF_BUF | \
> +              XFS_BLF_AGFL_BUF)
>  
>  #define      XFS_BLF_CHUNK           128
>  #define      XFS_BLF_SHIFT           7
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index 10ee0b8..1edfdf0 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -268,6 +268,11 @@ xfs_growfs_data_private(
>               }
>  
>               agfl = XFS_BUF_TO_AGFL(bp);
> +             if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +                     agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
> +                     agfl->agfl_seqno = cpu_to_be32(agno);
> +                     uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
> +             }
>               for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
>                       agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
>  
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 65c35d5..81d3cc5a 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -1961,6 +1961,13 @@ xlog_recover_do_reg_buffer(
>               }
>               bp->b_ops = &xfs_agf_buf_ops;
>               break;
> +     case XFS_BLF_AGFL_BUF:
> +             if (*(__be32 *)bp->b_addr != cpu_to_be32(XFS_AGFL_MAGIC)) {
> +                     xfs_warn(mp, "Bad AGFL block magic!");
> +                     ASSERT(0);
> +             }
> +             bp->b_ops = &xfs_agfl_buf_ops;
> +             break;

Your changes for v2 in this section look good.

>       default:
>               break;
>       }
> -- 
> 1.7.10
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

<Prev in Thread] Current Thread [Next in Thread>