xfs
[Top] [All Lists]

[PATCH v3 14/20] xfsprogs/repair: phase 2 finobt scan

To: xfs@xxxxxxxxxxx
Subject: [PATCH v3 14/20] xfsprogs/repair: phase 2 finobt scan
From: Brian Foster <bfoster@xxxxxxxxxx>
Date: Thu, 10 Apr 2014 12:11:04 -0400
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <1397146270-42993-1-git-send-email-bfoster@xxxxxxxxxx>
References: <1397146270-42993-1-git-send-email-bfoster@xxxxxxxxxx>
If one exists, scan the free inode btree in phase 2 of xfs_repair.
We use the same general infrastructure as for the inobt scan, but
trigger finobt chunk scan logic in in scan_inobt() via the magic
value.

The new scan_single_finobt_chunk() function is similar to the inobt
equivalent with some finobt specific logic. We can expect that
underlying inode chunk blocks are already marked used due to the
previous inobt scan. We can also expect to find every record
tracked by the finobt already accounted for in the in-core tree
with equivalent (and internally consistent) inobt record data.

Spit out a warning on any divergences from the above and add the
inodes referenced by the current finobt record to the appropriate
in-core tree.

Signed-off-by: Brian Foster <bfoster@xxxxxxxxxx>
---
 repair/scan.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 234 insertions(+), 5 deletions(-)

diff --git a/repair/scan.c b/repair/scan.c
index 1744c32..cd4d51b 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -46,6 +46,7 @@ struct aghdr_cnts {
        __uint64_t      fdblocks;
        __uint64_t      icount;
        __uint64_t      ifreecount;
+       __uint32_t      fibtfreecount;
 };
 
 void
@@ -877,6 +878,196 @@ _("inode rec for ino %" PRIu64 " (%d/%d) overlaps 
existing rec (start %d/%d)\n")
        return suspect;
 }
 
+static int
+scan_single_finobt_chunk(
+       xfs_agnumber_t          agno,
+       xfs_inobt_rec_t         *rp,
+       int                     suspect)
+{
+       xfs_ino_t               lino;
+       xfs_agino_t             ino;
+       xfs_agblock_t           agbno;
+       int                     j;
+       int                     nfree;
+       int                     off;
+       int                     state;
+       ino_tree_node_t         *first_rec, *last_rec, *ino_rec;
+
+       ino = be32_to_cpu(rp->ir_startino);
+       off = XFS_AGINO_TO_OFFSET(mp, ino);
+       agbno = XFS_AGINO_TO_AGBNO(mp, ino);
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+
+       /*
+        * on multi-block block chunks, all chunks start
+        * at the beginning of the block.  with multi-chunk
+        * blocks, all chunks must start on 64-inode boundaries
+        * since each block can hold N complete chunks. if
+        * fs has aligned inodes, all chunks must start
+        * at a fs_ino_alignment*N'th agbno.  skip recs
+        * with badly aligned starting inodes.
+        */
+       if (ino == 0 ||
+           (inodes_per_block <= XFS_INODES_PER_CHUNK && off !=  0) ||
+           (inodes_per_block > XFS_INODES_PER_CHUNK &&
+            off % XFS_INODES_PER_CHUNK != 0) ||
+           (fs_aligned_inodes && agbno % fs_ino_alignment != 0)) {
+               do_warn(
+       _("badly aligned finobt inode rec (starting inode = %" PRIu64 ")\n"),
+                       lino);
+               suspect++;
+       }
+
+       /*
+        * verify numeric validity of inode chunk first
+        * before inserting into a tree.  don't have to
+        * worry about the overflow case because the
+        * starting ino number of a chunk can only get
+        * within 255 inodes of max (NULLAGINO).  if it
+        * gets closer, the agino number will be illegal
+        * as the agbno will be too large.
+        */
+       if (verify_aginum(mp, agno, ino)) {
+               do_warn(
+_("bad starting inode # (%" PRIu64 " (0x%x 0x%x)) in finobt rec, skipping 
rec\n"),
+                       lino, agno, ino);
+               return ++suspect;
+       }
+
+       if (verify_aginum(mp, agno,
+                       ino + XFS_INODES_PER_CHUNK - 1)) {
+               do_warn(
+_("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in finobt rec, skipping 
rec\n"),
+                       lino + XFS_INODES_PER_CHUNK - 1,
+                       agno,
+                       ino + XFS_INODES_PER_CHUNK - 1);
+               return ++suspect;
+       }
+
+       /*
+        * cross check state of each block containing inodes referenced by the
+        * finobt against what we have already scanned from the alloc inobt.
+        */
+       if (off == 0 && !suspect) {
+               for (j = 0;
+                    j < XFS_INODES_PER_CHUNK;
+                    j += mp->m_sb.sb_inopblock) {
+                       agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
+
+                       state = get_bmap(agno, agbno);
+                       if (state == XR_E_INO) {
+                               continue;
+                       } else if ((state == XR_E_UNKNOWN) ||
+                                  (state == XR_E_INUSE_FS && agno == 0 &&
+                                   ino + j >= first_prealloc_ino &&
+                                   ino + j < last_prealloc_ino)) {
+                               do_warn(
+_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb 
%d\n"),
+                                       agno, agbno, mp->m_sb.sb_inopblock);
+
+                               set_bmap(agno, agbno, XR_E_INO);
+                               suspect++;
+                       } else {
+                               do_warn(
+_("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
+                                       agno, agbno, mp->m_sb.sb_inopblock);
+                               return ++suspect;
+                       }
+               }
+       }
+
+       /*
+        * ensure we have an incore entry for each chunk
+        */
+       find_inode_rec_range(mp, agno, ino, ino + XFS_INODES_PER_CHUNK,
+                            &first_rec, &last_rec);
+
+       if (first_rec) {
+               if (suspect)
+                       return suspect;
+
+               /*
+                * verify consistency between finobt record and incore state
+                */
+               if (first_rec->ino_startnum != ino) {
+                       do_warn(
+_("finobt rec for ino %" PRIu64 " (%d/%u) does not match existing rec 
(%d/%d)\n"),
+                               lino, agno, ino, agno, first_rec->ino_startnum);
+                       return ++suspect;
+               }
+
+               nfree = 0;
+               for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                       if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
+                               nfree++;
+                               if (!suspect && !is_inode_free(first_rec, j))
+                                       suspect++;
+                       }
+               }
+
+               goto check_freecount;
+       }
+
+       /*
+        * the finobt contains a record that the previous alloc inobt scan never
+        * found. insert the inodes into the appropriate tree.
+        */
+
+       do_warn(
+               _("undiscovered finobt record, ino %" PRIu64 " (%d/%u)\n"),
+               lino, agno, ino);
+
+       if (!suspect) {
+               /*
+                * inodes previously inserted into the uncertain tree should be
+                * superceded by these when the uncertain tree is processed
+                */
+               nfree = 0;
+               if (XFS_INOBT_IS_FREE_DISK(rp, 0)) {
+                       nfree++;
+                       ino_rec = set_inode_free_alloc(mp, agno, ino);
+               } else  {
+                       ino_rec = set_inode_used_alloc(mp, agno, ino);
+               }
+               for (j = 1; j < XFS_INODES_PER_CHUNK; j++) {
+                       if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
+                               nfree++;
+                               set_inode_free(ino_rec, j);
+                       } else  {
+                               set_inode_used(ino_rec, j);
+                       }
+               }
+       } else {
+               /*
+                * this should handle the case where the inobt scan may have
+                * already added uncertain inodes
+                */
+               nfree = 0;
+               for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+                       if (XFS_INOBT_IS_FREE_DISK(rp, j)) {
+                               add_aginode_uncertain(mp, agno, ino + j, 1);
+                               nfree++;
+                       } else {
+                               add_aginode_uncertain(mp, agno, ino + j, 0);
+                       }
+               }
+       }
+
+check_freecount:
+
+       if (nfree != be32_to_cpu(rp->ir_freecount)) {
+               do_warn(
+_("finobt ir_freecount/free mismatch, inode chunk %d/%u, freecount %d nfree 
%d\n"),
+                       agno, ino, be32_to_cpu(rp->ir_freecount), nfree);
+       }
+
+       if (!nfree) {
+               do_warn(
+_("finobt record with no free inodes, inode chunk %d/%u\n"), agno, ino);
+       }
+
+       return suspect;
+}
 
 /*
  * this one walks the inode btrees sucking the info there into
@@ -985,12 +1176,29 @@ _("inode btree block claimed (state %d), agno %d, bno 
%d, suspect %d\n"),
                 * the block.  skip processing of bogus records.
                 */
                for (i = 0; i < numrecs; i++) {
-                       agcnts->agicount += XFS_INODES_PER_CHUNK;
-                       agcnts->icount += XFS_INODES_PER_CHUNK;
-                       agcnts->agifreecount += be32_to_cpu(rp[i].ir_freecount);
-                       agcnts->ifreecount += be32_to_cpu(rp[i].ir_freecount);
+                       if (magic == XFS_IBT_MAGIC ||
+                           magic == XFS_IBT_CRC_MAGIC) {
+                               agcnts->agicount += XFS_INODES_PER_CHUNK;
+                               agcnts->icount += XFS_INODES_PER_CHUNK;
+                               agcnts->agifreecount +=
+                                       be32_to_cpu(rp[i].ir_freecount);
+                               agcnts->ifreecount +=
+                                       be32_to_cpu(rp[i].ir_freecount);
+
+                               suspect = scan_single_ino_chunk(agno, &rp[i],
+                                               suspect);
+                       } else {
+                               /*
+                                * the finobt tracks records with free inodes,
+                                * so only the free inode count is expected to 
be
+                                * consistent with the agi
+                                */
+                               agcnts->fibtfreecount +=
+                                       be32_to_cpu(rp[i].ir_freecount);
 
-                       suspect = scan_single_ino_chunk(agno, &rp[i], suspect);
+                               suspect = scan_single_finobt_chunk(agno, &rp[i],
+                                               suspect);
+                       }
                }
 
                if (suspect)
@@ -1175,6 +1383,20 @@ validate_agi(
                        be32_to_cpu(agi->agi_root), agno);
        }
 
+       if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+               bno = be32_to_cpu(agi->agi_free_root);
+               if (bno != 0 && verify_agbno(mp, agno, bno)) {
+                       magic = xfs_sb_version_hascrc(&mp->m_sb) ?
+                                       XFS_FIBT_CRC_MAGIC : XFS_FIBT_MAGIC;
+                       scan_sbtree(bno, be32_to_cpu(agi->agi_free_level),
+                                   agno, 0, scan_inobt, 1, magic, agcnts,
+                                   &xfs_inobt_buf_ops);
+               } else {
+                       do_warn(_("bad agbno %u for finobt root, agno %d\n"),
+                               be32_to_cpu(agi->agi_free_root), agno);
+               }
+       }
+
        if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
                do_warn(_("agi_count %u, counted %u in ag %u\n"),
                         be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
@@ -1185,6 +1407,13 @@ validate_agi(
                        be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, 
agno);
        }
 
+       if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
+           be32_to_cpu(agi->agi_freecount) != agcnts->fibtfreecount) {
+               do_warn(_("agi_freecount %u, counted %u in ag %u finobt\n"),
+                       be32_to_cpu(agi->agi_freecount), agcnts->fibtfreecount,
+                       agno);
+       }
+
        for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
                xfs_agino_t     agino = be32_to_cpu(agi->agi_unlinked[i]);
 
-- 
1.8.3.1

<Prev in Thread] Current Thread [Next in Thread>