[PATCH 1/6] xfs: rework dquot CRCs
Brian Foster
bfoster at redhat.com
Mon Jun 3 13:18:33 CDT 2013
On 06/03/2013 01:28 AM, Dave Chinner wrote:
> From: Dave Chinner <dchinner at redhat.com>
>
> Calculating dquot CRCs when the backing buffer is written back just
> doesn't work reliably. There are several places which manipulate
> dquots directly in the buffers, and they don't calculate CRCs
> appropriately, nor do they always set the buffer up to calculate
> CRCs appropriately.
>
> Firstly, if we log a dquot buffer (e.g. during allocation) it gets
> logged without valid CRC, and so on recovery we end up with a dquot
> that is not valid.
>
> Secondly, if we recover/repair a dquot, we don't have a verifier
> attached to the buffer and hence CRCs arenot calculate don the way
> down to disk.
>
> Thirdly, calculating the CRC after we've changed the contents means
> that if we re-read the dquot from the buffer, we cannot verify the
> contents of the dquot are valid, as the CRC is invalid.
>
> So, to avoid all the dquot CRC errors that are being detected by the
> read verifier, change to using the same model as for inodes. that
> is, dquot CRCs are calculated and written to the backing buffer at
> the time the dquot is flushed to the backing buffer. If we modify
> the dquuot directly in the backing buffer, calculate the CRC
> immediately after the modification is complete. Hence the dquot in
> the on-disk buffer should always have a valid CRC.
>
> Signed-off-by: Dave Chinner <dchinner at redhat.com>
Reviewed-by: Brian Foster <bfoster at redhat.com>
> ---
> fs/xfs/xfs_dquot.c | 37 ++++++++++++++++---------------------
> fs/xfs/xfs_log_recover.c | 10 ++++++++++
> fs/xfs/xfs_qm.c | 40 ++++++++++++++++++++++++++++++----------
> fs/xfs/xfs_quota.h | 2 ++
> 4 files changed, 58 insertions(+), 31 deletions(-)
>
> diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
> index a41f8bf..044e97a 100644
> --- a/fs/xfs/xfs_dquot.c
> +++ b/fs/xfs/xfs_dquot.c
> @@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk(
> d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
> d->dd_diskdq.d_id = cpu_to_be32(curid);
> d->dd_diskdq.d_flags = type;
> - if (xfs_sb_version_hascrc(&mp->m_sb))
> + if (xfs_sb_version_hascrc(&mp->m_sb)) {
> uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
> + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
> + XFS_DQUOT_CRC_OFF);
> + }
> }
>
> xfs_trans_dquot_buf(tp, bp,
> @@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
> dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
> }
>
> -STATIC void
> -xfs_dquot_buf_calc_crc(
> - struct xfs_mount *mp,
> - struct xfs_buf *bp)
> -{
> - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
> - int i;
> -
> - if (!xfs_sb_version_hascrc(&mp->m_sb))
> - return;
> -
> - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
> - xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
> - offsetof(struct xfs_dqblk, dd_crc));
> - }
> -}
> -
> STATIC bool
> xfs_dquot_buf_verify_crc(
> struct xfs_mount *mp,
> @@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc(
>
> for (i = 0; i < ndquots; i++, d++) {
> if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
> - offsetof(struct xfs_dqblk, dd_crc)))
> + XFS_DQUOT_CRC_OFF))
> return false;
> if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
> return false;
> }
> -
> return true;
> }
>
> @@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify(
> }
> }
>
> +/*
> + * we don't calculate the CRC here as that is done when the dquot is flushed to
> + * the buffer after the update is done. This ensures that the dquot in the
> + * buffer always has an up-to-date CRC value.
> + */
> void
> xfs_dquot_buf_write_verify(
> struct xfs_buf *bp)
> @@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify(
> xfs_buf_ioerror(bp, EFSCORRUPTED);
> return;
> }
> - xfs_dquot_buf_calc_crc(mp, bp);
> }
>
> const struct xfs_buf_ops xfs_dquot_buf_ops = {
> @@ -1151,11 +1140,17 @@ xfs_qm_dqflush(
> * copy the lsn into the on-disk dquot now while we have the in memory
> * dquot here. This can't be done later in the write verifier as we
> * can't get access to the log item at that point in time.
> + *
> + * We also calculate the CRC here so that the on-disk dquot in the
> + * buffer always has a valid CRC. This ensures there is no possibility
> + * of a dquot without an up-to-date CRC getting to disk.
> */
> if (xfs_sb_version_hascrc(&mp->m_sb)) {
> struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
>
> dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
> + xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
> + XFS_DQUOT_CRC_OFF);
> }
>
> /*
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index d9e4d3c..d6204d1 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2266,6 +2266,12 @@ xfs_qm_dqcheck(
> d->dd_diskdq.d_flags = type;
> d->dd_diskdq.d_id = cpu_to_be32(id);
>
> + if (xfs_sb_version_hascrc(&mp->m_sb)) {
> + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
> + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
> + XFS_DQUOT_CRC_OFF);
> + }
> +
> return errs;
> }
>
> @@ -2793,6 +2799,10 @@ xlog_recover_dquot_pass2(
> }
>
> memcpy(ddq, recddq, item->ri_buf[1].i_len);
> + if (xfs_sb_version_hascrc(&mp->m_sb)) {
> + xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
> + XFS_DQUOT_CRC_OFF);
> + }
>
> ASSERT(dq_f->qlf_size == 2);
> ASSERT(bp->b_target->bt_mount == mp);
> diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
> index f41702b..b75c9bb 100644
> --- a/fs/xfs/xfs_qm.c
> +++ b/fs/xfs/xfs_qm.c
> @@ -41,6 +41,7 @@
> #include "xfs_qm.h"
> #include "xfs_trace.h"
> #include "xfs_icache.h"
> +#include "xfs_cksum.h"
>
> /*
> * The global quota manager. There is only one of these for the entire
> @@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts(
> xfs_dqid_t id,
> uint type)
> {
> - xfs_disk_dquot_t *ddq;
> + struct xfs_dqblk *dqb;
> int j;
>
> trace_xfs_reset_dqcounts(bp, _RET_IP_);
> @@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts(
> do_div(j, sizeof(xfs_dqblk_t));
> ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
> #endif
> - ddq = bp->b_addr;
> + dqb = bp->b_addr;
> for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
> + struct xfs_disk_dquot *ddq;
> +
> + ddq = (struct xfs_disk_dquot *)&dqb[j];
> +
> /*
> * Do a sanity check, and if needed, repair the dqblk. Don't
> * output any warnings because it's perfectly possible to
> @@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts(
> ddq->d_bwarns = 0;
> ddq->d_iwarns = 0;
> ddq->d_rtbwarns = 0;
> - ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
> +
> + if (xfs_sb_version_hascrc(&mp->m_sb)) {
> + xfs_update_cksum((char *)&dqb[j],
> + sizeof(struct xfs_dqblk),
> + XFS_DQUOT_CRC_OFF);
> + }
> }
> }
>
> @@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs(
> XFS_FSB_TO_DADDR(mp, bno),
> mp->m_quotainfo->qi_dqchunklen, 0, &bp,
> &xfs_dquot_buf_ops);
> - if (error)
> - break;
>
> /*
> - * XXX(hch): need to figure out if it makes sense to validate
> - * the CRC here.
> + * CRC and validation errors will return a EFSCORRUPTED here. If
> + * this occurs, re-read without CRC validation so that we can
> + * repair the damage via xfs_qm_reset_dqcounts(). This process
> + * will leave a trace in the log indicating corruption has
> + * been detected.
> */
> + if (error == EFSCORRUPTED) {
> + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
> + XFS_FSB_TO_DADDR(mp, bno),
> + mp->m_quotainfo->qi_dqchunklen, 0, &bp,
> + NULL);
> + }
> +
> + if (error)
> + break;
> +
> xfs_qm_reset_dqcounts(mp, bp, firstid, type);
> xfs_buf_delwri_queue(bp, buffer_list);
> xfs_buf_relse(bp);
> - /*
> - * goto the next block.
> - */
> +
> + /* goto the next block. */
> bno++;
> firstid += mp->m_quotainfo->qi_dqperchunk;
> }
> diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
> index c61e31c..c38068f 100644
> --- a/fs/xfs/xfs_quota.h
> +++ b/fs/xfs/xfs_quota.h
> @@ -87,6 +87,8 @@ typedef struct xfs_dqblk {
> uuid_t dd_uuid; /* location information */
> } xfs_dqblk_t;
>
> +#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc)
> +
> /*
> * flags for q_flags field in the dquot.
> */
>
More information about the xfs
mailing list