On Mon, Dec 07, 2015 at 09:21:23AM -0500, Brian Foster wrote:
> On Fri, Dec 04, 2015 at 12:26:06PM -0800, Darrick J. Wong wrote:
> > Teach the inobt/finobt scanning functions how to deal with sparse
> > inode chunks well enough that we can pass the spot-check. Should
> > fix the xfs/076 failures.
> >
> > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > ---
>
> Hi Darrick,
>
> Thanks for the patch...
>
> > db/check.c | 90
> > ++++++++++++++++++++++++++++++++++++++++++++++++++----------
> > 1 file changed, 75 insertions(+), 15 deletions(-)
> >
> > diff --git a/db/check.c b/db/check.c
> > index 9c1541d..14c7de5 100644
> > --- a/db/check.c
> > +++ b/db/check.c
> > @@ -4319,6 +4319,30 @@ scanfunc_cnt(
> > scan_sbtree(agf, be32_to_cpu(pp[i]), level, 0, scanfunc_cnt,
> > TYP_CNTBT);
> > }
> >
> > +static bool
> > +ino_issparse(
> > + struct xfs_inobt_rec *rp,
> > + int offset)
> > +{
> > + if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
> > + return false;
> > +
> > + return xfs_inobt_is_sparse_disk(rp, offset);
> > +}
> > +
> > +static int
> > +find_first_zero_bit(
> > + unsigned long mask)
> > +{
> > + int n;
> > + int b = 0;
> > +
> > + for (n = 0; n < sizeof(mask) * NBBY && (mask & 1); n++, mask >>= 1)
> > + b++;
> > +
> > + return b;
> > +}
> > +
> > static void
> > scanfunc_ino(
> > struct xfs_btree_block *block,
> > @@ -4336,6 +4360,10 @@ scanfunc_ino(
> > int off;
> > xfs_inobt_ptr_t *pp;
> > xfs_inobt_rec_t *rp;
> > + bool sparse;
> > + int inodes_per_chunk;
> > + int freecount;
> > + int startidx;
> >
> > if (be32_to_cpu(block->bb_magic) != XFS_IBT_MAGIC &&
> > be32_to_cpu(block->bb_magic) != XFS_IBT_CRC_MAGIC) {
> > @@ -4364,29 +4392,44 @@ scanfunc_ino(
> > }
> > rp = XFS_INOBT_REC_ADDR(mp, block, 1);
> > for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
> > - agino = be32_to_cpu(rp[i].ir_startino);
> > + sparse = xfs_sb_version_hassparseinodes(&mp->m_sb);
> > + if (sparse) {
> > + unsigned long holemask;
> > +
> > + inodes_per_chunk = rp[i].ir_u.sp.ir_count;
> > + freecount = rp[i].ir_u.sp.ir_freecount;
> > + holemask =
> > be16_to_cpu(rp[i].ir_u.sp.ir_holemask);
> > + startidx = find_first_zero_bit(holemask) *
> > XFS_INODES_PER_HOLEMASK_BIT;
> > + } else {
> > + inodes_per_chunk = XFS_INODES_PER_CHUNK;
> > + freecount =
> > be32_to_cpu(rp[i].ir_u.f.ir_freecount);
> > + startidx = 0;
> > + }
>
> This looks Ok...
>
> > + agino = be32_to_cpu(rp[i].ir_startino) + startidx;
> > off = XFS_INO_TO_OFFSET(mp, agino);
> > if (off == 0) {
> > - if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
> > + if (!sparse &&
> > + (sbversion & XFS_SB_VERSION_ALIGNBIT) &&
>
> Here we're not checking the record alignment solely based on whether the
> fs has the sparse inodes feature, which I don't think is what we want.
> The sparse inodes feature tweaks sb_inoalignmt to the record size and
> sets sb_spino_align (to the cluster size) to dictate the sparse chunk
> allocation requirements. IOW, we probably want to check startino
> alignment against sb_inoalignmt even if startino is not actually a
> physical inode, because the record must still be correctly aligned.
<nod>
> > mp->m_sb.sb_inoalignmt &&
> > (XFS_INO_TO_AGBNO(mp, agino) %
> > mp->m_sb.sb_inoalignmt))
> > sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
> > set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino),
> > (xfs_extlen_t)MAX(1,
> > - XFS_INODES_PER_CHUNK >>
> > + inodes_per_chunk >>
>
> While I think this might be practically correct in that with the current
> supported alignments, the chunk of the record that is physically
> allocated is typically contiguous, this might not always be the case.
> E.g., if the cluster size is reduced due to some future mkfs change, the
> allocated regions of the sparse record could be discontiguous.
Aha, I hadn't realized that this could be a future possibility. It's not
difficult to make it examine each chunk individually.
>
> > mp->m_sb.sb_inopblog),
> > DBM_INODE, seqno, bno);
> > }
> > - icount += XFS_INODES_PER_CHUNK;
> > - agicount += XFS_INODES_PER_CHUNK;
> > - ifree += be32_to_cpu(rp[i].ir_u.f.ir_freecount);
> > - agifreecount += be32_to_cpu(rp[i].ir_u.f.ir_freecount);
> > + icount += inodes_per_chunk;
> > + agicount += inodes_per_chunk;
> > + ifree += freecount;
> > + agifreecount += freecount;
> > push_cur();
> > set_cur(&typtab[TYP_INODE],
> > XFS_AGB_TO_DADDR(mp, seqno,
> > XFS_AGINO_TO_AGBNO(mp, agino)),
> > - (int)XFS_FSB_TO_BB(mp, mp->m_ialloc_blks),
> > + (int)XFS_FSB_TO_BB(mp, inodes_per_chunk >>
> > + mp->m_sb.sb_inopblog),
>
> The same general contiguity issue applies here (and to the finobt
> equivalent scan function)...
>
> I think what we need to do here is rather than try to tweak the
> set_dbmap()/set_cur() call params to try and cover the allocated range
> in one shot, refactor the part of the code that processes the actual
> inodes to walk the the record a cluster buffer at a time. E.g., after
> we've grabbed the record data, checked the startino alignment, etc., add
> a new loop that iterates the associated inode chunk a cluster buffer at
> a time. If the starting inode of that cluster buffer is sparse, just
> skip to the next cluster. Otherwise, carry on with the process_inode(),
> set_dbmap() bits, etc., for each inode in the buffer.
Ok, so it'll be easy to iterate through holemask to figure out where the chunks
live and then do the set_cur/process_inode parts on just those parts.
Now that I think about it, there's no check for sb_spino_align either, so
that'll get added in too.
> FWIW, xfsprogs commit 04b21e41 ("metadump: support sparse inode
> records") should provide a pretty close example of the same change
> required here (db/metadump.c:copy_inode_chunk()).
<nod> Thanks for the input!
--D
>
> Brian
>
> > DB_RING_IGN, NULL);
> > if (iocur_top->data == NULL) {
> > if (!sflag)
> > @@ -4399,20 +4442,22 @@ scanfunc_ino(
> > continue;
> > }
> > for (j = 0, nfree = 0; j < XFS_INODES_PER_CHUNK; j++) {
> > + if (ino_issparse(&rp[i], j))
> > + continue;
> > isfree = XFS_INOBT_IS_FREE_DISK(&rp[i], j);
> > if (isfree)
> > nfree++;
> > - process_inode(agf, agino + j,
> > - (xfs_dinode_t *)((char
> > *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)),
> > + process_inode(agf, agino - startidx + j,
> > + (xfs_dinode_t *)((char
> > *)iocur_top->data + ((off - startidx + j) << mp->m_sb.sb_inodelog)),
> > isfree);
> > }
> > - if (nfree != be32_to_cpu(rp[i].ir_u.f.ir_freecount)) {
> > + if (nfree != freecount) {
> > if (!sflag)
> > dbprintf(_("ir_freecount/free mismatch,
> > "
> > "inode chunk %u/%u, freecount "
> > "%d nfree %d\n"),
> > seqno, agino,
> > -
> > be32_to_cpu(rp[i].ir_u.f.ir_freecount), nfree);
> > + freecount, nfree);
> > error++;
> > }
> > pop_cur();
> > @@ -4447,6 +4492,9 @@ scanfunc_fino(
> > int off;
> > xfs_inobt_ptr_t *pp;
> > struct xfs_inobt_rec *rp;
> > + bool sparse;
> > + int inodes_per_chunk;
> > + int startidx;
> >
> > if (be32_to_cpu(block->bb_magic) != XFS_FIBT_MAGIC &&
> > be32_to_cpu(block->bb_magic) != XFS_FIBT_CRC_MAGIC) {
> > @@ -4475,17 +4523,29 @@ scanfunc_fino(
> > }
> > rp = XFS_INOBT_REC_ADDR(mp, block, 1);
> > for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) {
> > - agino = be32_to_cpu(rp[i].ir_startino);
> > + sparse = xfs_sb_version_hassparseinodes(&mp->m_sb);
> > + if (sparse) {
> > + unsigned long holemask;
> > +
> > + inodes_per_chunk = rp[i].ir_u.sp.ir_count;
> > + holemask =
> > be16_to_cpu(rp[i].ir_u.sp.ir_holemask);
> > + startidx = find_first_zero_bit(holemask) *
> > XFS_INODES_PER_HOLEMASK_BIT;
> > + } else {
> > + inodes_per_chunk = XFS_INODES_PER_CHUNK;
> > + startidx = 0;
> > + }
> > + agino = be32_to_cpu(rp[i].ir_startino) + startidx;
> > off = XFS_INO_TO_OFFSET(mp, agino);
> > if (off == 0) {
> > - if ((sbversion & XFS_SB_VERSION_ALIGNBIT) &&
> > + if (!sparse &&
> > + (sbversion & XFS_SB_VERSION_ALIGNBIT) &&
> > mp->m_sb.sb_inoalignmt &&
> > (XFS_INO_TO_AGBNO(mp, agino) %
> > mp->m_sb.sb_inoalignmt))
> > sbversion &= ~XFS_SB_VERSION_ALIGNBIT;
> > check_set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp,
> > agino),
> > (xfs_extlen_t)MAX(1,
> > - XFS_INODES_PER_CHUNK >>
> > + inodes_per_chunk >>
> > mp->m_sb.sb_inopblog),
> > DBM_INODE, DBM_INODE, seqno, bno);
> > }
> >
> > _______________________________________________
> > xfs mailing list
> > xfs@xxxxxxxxxxx
> > http://oss.sgi.com/mailman/listinfo/xfs
|