xfs
[Top] [All Lists]

[PATCH] xfs_repair: multithread phase 2

To: xfs@xxxxxxxxxxx
Subject: [PATCH] xfs_repair: multithread phase 2
From: Dave Chinner <david@xxxxxxxxxxxxx>
Date: Tue, 4 Jan 2011 17:13:08 +1100
From: Dave Chinner <dchinner@xxxxxxxxxx>

Running some recent repair tests on broken filesystem meant running
phase 1 and 2 repeatedly to reproduce an issue at the start of phase
3. Phase 2 was taking approximately 10 minutes to run as it
processes each AG serially.

Phase 2 can be trivially parallelised - it is simply scanning the
per AG trees to calculate free block counts and free and used inodes
counts. This can be done safely in parallel by giving each AG it's
own structure to aggregate counts into, then once the AG scan is
complete adding them all together.

This patch uses 32-way threading which results in no noticable
slowdown on single SATA drives with NCQ, but results in ~10x
reduction in runtime on a 12 disk RAID-0 array.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 repair/phase2.c |   16 +---
 repair/scan.c   |  303 +++++++++++++++++++++++++++++++-----------------------
 repair/scan.h   |   37 -------
 3 files changed, 176 insertions(+), 180 deletions(-)

diff --git a/repair/phase2.c b/repair/phase2.c
index e81ebf0..2f22c51 100644
--- a/repair/phase2.c
+++ b/repair/phase2.c
@@ -26,8 +26,7 @@
 #include "progress.h"
 
 void   set_mp(xfs_mount_t *mpp);
-void   scan_ag(xfs_agnumber_t agno);
-void   validate_sb(struct xfs_sb *sb);
+void   scan_ags(struct xfs_mount *mp);
 
 /* workaround craziness in the xlog routines */
 int xlog_recover_do_trans(xlog_t *log, xlog_recover_t *t, int p) { return 0; }
@@ -109,7 +108,6 @@ zero_log(xfs_mount_t *mp)
 void
 phase2(xfs_mount_t *mp)
 {
-       xfs_agnumber_t          i;
        int                     j;
        ino_tree_node_t         *ino_rec;
 
@@ -138,17 +136,7 @@ phase2(xfs_mount_t *mp)
 
        set_progress_msg(PROG_FMT_SCAN_AG, (__uint64_t) glob_agcount);
 
-       for (i = 0; i < mp->m_sb.sb_agcount; i++)  {
-               scan_ag(i);
-#ifdef XR_INODE_TRACE
-               print_inode_list(i);
-#endif
-       }
-
-       /*
-        * Validate that our manual counts match the superblock.
-        */
-       validate_sb(&mp->m_sb);
+       scan_ags(mp);
 
        print_final_rpt();
 
diff --git a/repair/scan.c b/repair/scan.c
index 85017ff..dd62776 100644
--- a/repair/scan.c
+++ b/repair/scan.c
@@ -35,27 +35,32 @@ extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t 
*sbuf, xfs_sb_t *sb,
 static xfs_mount_t     *mp = NULL;
 
 /*
- * Global variables to validate superblock values against the manual count
+ * Variables to validate AG header values against the manual count
  * from the btree traversal.
- *
- * No locking for now as phase2 is not threaded.
  */
-static __uint64_t      fdblocks;
-static __uint64_t      icount;
-static __uint64_t      ifreecount;
+struct aghdr_cnts {
+       xfs_agnumber_t  agno;
+       xfs_extlen_t    agffreeblks;
+       xfs_extlen_t    agflongest;
+       __uint64_t      agfbtreeblks;
+       __uint32_t      agicount;
+       __uint32_t      agifreecount;
+       __uint64_t      fdblocks;
+       __uint64_t      icount;
+       __uint64_t      ifreecount;
+};
+
+static void
+scanfunc_allocbt(
+       struct xfs_btree_block  *block,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot,
+       __uint32_t              magic,
+       struct aghdr_cnts       *agcnts);
 
-/*
- * Global variables to validate AG header values against the manual count
- * from the btree traversal.
- *
- * Note: these values must be reset when processing a new AG, and for now
- * forces the AG scanning in phase2 to not be threaded.
- */
-static xfs_extlen_t    agffreeblks;
-static xfs_extlen_t    agflongest;
-static __uint64_t      agfbtreeblks;
-static __uint32_t      agicount;
-static __uint32_t      agifreecount;
 
 void
 set_mp(xfs_mount_t *mpp)
@@ -75,8 +80,10 @@ scan_sbtree(
                                xfs_agblock_t           bno,
                                xfs_agnumber_t          agno,
                                int                     suspect,
-                               int                     isroot),
-       int             isroot)
+                               int                     isroot,
+                               struct aghdr_cnts       *agcnts),
+       int             isroot,
+       struct aghdr_cnts *agcnts)
 {
        xfs_buf_t       *bp;
 
@@ -86,7 +93,8 @@ scan_sbtree(
                do_error(_("can't read btree block %d/%d\n"), agno, root);
                return;
        }
-       (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect, isroot);
+       (*func)(XFS_BUF_TO_BLOCK(bp), nlevels - 1, root, agno, suspect,
+                                                       isroot, agcnts);
        libxfs_putbuf(bp);
 }
 
@@ -469,6 +477,34 @@ _("out-of-order bmap key (file offset) in inode %llu, %s 
fork, fsbno %llu\n"),
 }
 
 void
+scanfunc_bno(
+       struct xfs_btree_block  *block,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot,
+       struct aghdr_cnts       *agcnts)
+{
+       return scanfunc_allocbt(block, level, bno, agno,
+                               suspect, isroot, XFS_ABTB_MAGIC, agcnts);
+}
+
+void
+scanfunc_cnt(
+       struct xfs_btree_block  *block,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot,
+       struct aghdr_cnts       *agcnts)
+{
+       return scanfunc_allocbt(block, level, bno, agno,
+                               suspect, isroot, XFS_ABTC_MAGIC, agcnts);
+}
+
+void
 scanfunc_allocbt(
        struct xfs_btree_block  *block,
        int                     level,
@@ -476,7 +512,8 @@ scanfunc_allocbt(
        xfs_agnumber_t          agno,
        int                     suspect,
        int                     isroot,
-       __uint32_t              magic)
+       __uint32_t              magic,
+       struct aghdr_cnts       *agcnts)
 {
        const char              *name;
        int                     i;
@@ -506,8 +543,8 @@ scanfunc_allocbt(
         * free data block counter.
         */
        if (!isroot) {
-               agfbtreeblks++;
-               fdblocks++;
+               agcnts->agfbtreeblks++;
+               agcnts->fdblocks++;
        }
 
        if (be16_to_cpu(block->bb_level) != level) {
@@ -583,10 +620,10 @@ _("%s freespace btree block claimed (state %d), agno %d, 
bno %d, suspect %d\n"),
                                        lastblock = b;
                                }
                        } else {
-                               fdblocks += len;
-                               agffreeblks += len;
-                               if (len > agflongest)
-                                       agflongest = len;
+                               agcnts->fdblocks += len;
+                               agcnts->agffreeblks += len;
+                               if (len > agcnts->agflongest)
+                                       agcnts->agflongest = len;
                                if (len < lastcount) {
                                        do_warn(_(
        "out-of-order cnt btree record %d (%u %u) block %u/%u\n"),
@@ -670,38 +707,12 @@ _("%s freespace btree block claimed (state %d), agno %d, 
bno %d, suspect %d\n"),
                if (bno != 0 && verify_agbno(mp, agno, bno)) {
                        scan_sbtree(bno, level, agno, suspect,
                                    (magic == XFS_ABTB_MAGIC) ?
-                                    scanfunc_bno : scanfunc_cnt, 0);
+                                    scanfunc_bno : scanfunc_cnt, 0,
+                                    agcnts);
                }
        }
 }
 
-void
-scanfunc_bno(
-       struct xfs_btree_block  *block,
-       int                     level,
-       xfs_agblock_t           bno,
-       xfs_agnumber_t          agno,
-       int                     suspect,
-       int                     isroot)
-{
-       return scanfunc_allocbt(block, level, bno, agno,
-                               suspect, isroot, XFS_ABTB_MAGIC);
-}
-
-void
-scanfunc_cnt(
-       struct xfs_btree_block  *block,
-       int                     level,
-       xfs_agblock_t           bno,
-       xfs_agnumber_t          agno,
-       int                     suspect,
-       int                     isroot
-       )
-{
-       return scanfunc_allocbt(block, level, bno, agno,
-                               suspect, isroot, XFS_ABTC_MAGIC);
-}
-
 static int
 scan_single_ino_chunk(
        xfs_agnumber_t          agno,
@@ -886,8 +897,8 @@ scanfunc_ino(
        xfs_agblock_t           bno,
        xfs_agnumber_t          agno,
        int                     suspect,
-       int                     isroot
-       )
+       int                     isroot,
+       struct aghdr_cnts       *agcnts)
 {
        int                     i;
        int                     numrecs;
@@ -968,10 +979,10 @@ _("inode btree block claimed (state %d), agno %d, bno %d, 
suspect %d\n"),
                 * the block.  skip processing of bogus records.
                 */
                for (i = 0; i < numrecs; i++) {
-                       agicount += XFS_INODES_PER_CHUNK;
-                       icount += XFS_INODES_PER_CHUNK;
-                       agifreecount += be32_to_cpu(rp[i].ir_freecount);
-                       ifreecount += be32_to_cpu(rp[i].ir_freecount);
+                       agcnts->agicount += XFS_INODES_PER_CHUNK;
+                       agcnts->icount += XFS_INODES_PER_CHUNK;
+                       agcnts->agifreecount += be32_to_cpu(rp[i].ir_freecount);
+                       agcnts->ifreecount += be32_to_cpu(rp[i].ir_freecount);
 
                        suspect = scan_single_ino_chunk(agno, &rp[i], suspect);
                }
@@ -1015,13 +1026,14 @@ _("inode btree block claimed (state %d), agno %d, bno 
%d, suspect %d\n"),
                if (be32_to_cpu(pp[i]) != 0 && verify_agbno(mp, agno,
                                                        be32_to_cpu(pp[i])))
                        scan_sbtree(be32_to_cpu(pp[i]), level, agno,
-                                       suspect, scanfunc_ino, 0);
+                                       suspect, scanfunc_ino, 0, agcnts);
        }
 }
 
 void
 scan_freelist(
-       xfs_agf_t       *agf)
+       xfs_agf_t       *agf,
+       struct aghdr_cnts *agcnts)
 {
        xfs_agfl_t      *agfl;
        xfs_buf_t       *agflbuf;
@@ -1068,7 +1080,7 @@ scan_freelist(
                        be32_to_cpu(agf->agf_flcount), agno);
        }
 
-       fdblocks += count;
+       agcnts->fdblocks += count;
 
        libxfs_putbuf(agflbuf);
 }
@@ -1076,14 +1088,15 @@ scan_freelist(
 static void
 validate_agf(
        struct xfs_agf          *agf,
-       xfs_agnumber_t          agno)
+       xfs_agnumber_t          agno,
+       struct aghdr_cnts       *agcnts)
 {
        xfs_agblock_t           bno;
 
        bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]);
        if (bno != 0 && verify_agbno(mp, agno, bno)) {
                scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-                           agno, 0, scanfunc_bno, 1);
+                           agno, 0, scanfunc_bno, 1, agcnts);
        } else {
                do_warn(_("bad agbno %u for btbno root, agno %d\n"),
                        bno, agno);
@@ -1092,33 +1105,34 @@ validate_agf(
        bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]);
        if (bno != 0 && verify_agbno(mp, agno, bno)) {
                scan_sbtree(bno, be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-                           agno, 0, scanfunc_cnt, 1);
+                           agno, 0, scanfunc_cnt, 1, agcnts);
        } else  {
                do_warn(_("bad agbno %u for btbcnt root, agno %d\n"),
                        bno, agno);
        }
 
-       if (be32_to_cpu(agf->agf_freeblks) != agffreeblks) {
+       if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
                do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
-                       be32_to_cpu(agf->agf_freeblks), agffreeblks, agno);
+                       be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, 
agno);
        }
 
-       if (be32_to_cpu(agf->agf_longest) != agflongest) {
+       if (be32_to_cpu(agf->agf_longest) != agcnts->agflongest) {
                do_warn(_("agf_longest %u, counted %u in ag %u\n"),
-                       be32_to_cpu(agf->agf_longest), agflongest, agno);
+                       be32_to_cpu(agf->agf_longest), agcnts->agflongest, 
agno);
        }
 
        if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
-           be32_to_cpu(agf->agf_btreeblks) != agfbtreeblks) {
+           be32_to_cpu(agf->agf_btreeblks) != agcnts->agfbtreeblks) {
                do_warn(_("agf_btreeblks %u, counted %u in ag %u\n"),
-                       be32_to_cpu(agf->agf_btreeblks), agfbtreeblks, agno);
+                       be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, 
agno);
        }
 }
 
 static void
 validate_agi(
        struct xfs_agi          *agi,
-       xfs_agnumber_t          agno)
+       xfs_agnumber_t          agno,
+       struct aghdr_cnts       *agcnts)
 {
        xfs_agblock_t           bno;
        int                     i;
@@ -1126,20 +1140,20 @@ validate_agi(
        bno = be32_to_cpu(agi->agi_root);
        if (bno != 0 && verify_agbno(mp, agno, bno)) {
                scan_sbtree(bno, be32_to_cpu(agi->agi_level),
-                           agno, 0, scanfunc_ino, 1);
+                           agno, 0, scanfunc_ino, 1, agcnts);
        } else {
                do_warn(_("bad agbno %u for inobt root, agno %d\n"),
                        be32_to_cpu(agi->agi_root), agno);
        }
 
-       if (be32_to_cpu(agi->agi_count) != agicount) {
+       if (be32_to_cpu(agi->agi_count) != agcnts->agicount) {
                do_warn(_("agi_count %u, counted %u in ag %u\n"),
-                        be32_to_cpu(agi->agi_count), agicount, agno);
+                        be32_to_cpu(agi->agi_count), agcnts->agicount, agno);
        }
 
-       if (be32_to_cpu(agi->agi_freecount) != agifreecount) {
+       if (be32_to_cpu(agi->agi_freecount) != agcnts->agifreecount) {
                do_warn(_("agi_freecount %u, counted %u in ag %u\n"),
-                       be32_to_cpu(agi->agi_freecount), agifreecount, agno);
+                       be32_to_cpu(agi->agi_freecount), agcnts->agifreecount, 
agno);
        }
 
        for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
@@ -1155,42 +1169,15 @@ validate_agi(
 }
 
 /*
- * Validate block/inode counts in the superblock.
- *
- * Note: needs to be called after scan_ag() has been called for all
- * allocation groups.
- */
-void
-validate_sb(
-       struct xfs_sb           *sb)
-{
-       if (sb->sb_icount != icount) {
-               do_warn(_("sb_icount %lld, counted %lld\n"),
-                       sb->sb_icount, icount);
-       }
-
-       if (sb->sb_ifree != ifreecount) {
-               do_warn(_("sb_ifree %lld, counted %lld\n"),
-                       sb->sb_ifree, ifreecount);
-       }
-
-       if (sb->sb_fdblocks != fdblocks) {
-               do_warn(_("sb_fdblocks %lld, counted %lld\n"),
-                       sb->sb_fdblocks, fdblocks);
-       }
-
-       /* XXX: check sb_frextents */
-}
-
-/*
  * Scan an AG for obvious corruption.
  *
  * Note: This code is not reentrant due to the use of global variables.
  */
-void
-scan_ag(
-       xfs_agnumber_t  agno)
+void *
+scan_ag(void *args)
 {
+       struct aghdr_cnts *agcnts = args;
+       xfs_agnumber_t  agno = agcnts->agno;
        xfs_agf_t       *agf;
        xfs_buf_t       *agfbuf;
        int             agf_dirty = 0;
@@ -1202,28 +1189,18 @@ scan_ag(
        int             sb_dirty = 0;
        int             status;
 
-       /*
-        * Reset the global variables to track the AG header validity.
-        *
-        * Because we use global variable but can get called multiple times
-        * we have to make sure to always reset these variables.
-        */
-       agicount = agifreecount = 0;
-       agffreeblks = agfbtreeblks = 0;
-       agflongest = 0;
-
        sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
                                XFS_FSS_TO_BB(mp, 1), 0);
        if (!sbbuf)  {
                do_error(_("can't get root superblock for ag %d\n"), agno);
-               return;
+               return NULL;
        }
 
        sb = (xfs_sb_t *)calloc(BBSIZE, 1);
        if (!sb) {
                do_error(_("can't allocate memory for superblock\n"));
                libxfs_putbuf(sbbuf);
-               return;
+               return NULL;
        }
        libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbbuf));
 
@@ -1234,7 +1211,7 @@ scan_ag(
                do_error(_("can't read agf block for ag %d\n"), agno);
                libxfs_putbuf(sbbuf);
                free(sb);
-               return;
+               return NULL;
        }
        agf = XFS_BUF_TO_AGF(agfbuf);
 
@@ -1246,7 +1223,7 @@ scan_ag(
                libxfs_putbuf(agfbuf);
                libxfs_putbuf(sbbuf);
                free(sb);
-               return;
+               return NULL;
        }
        agi = XFS_BUF_TO_AGI(agibuf);
 
@@ -1298,13 +1275,13 @@ scan_ag(
                do_warn(_("bad uncorrected agheader %d, skipping ag...\n"),
                        agno);
 
-               return;
+               return NULL;
        }
 
-       scan_freelist(agf);
+       scan_freelist(agf, agcnts);
 
-       validate_agf(agf, agno);
-       validate_agi(agi, agno);
+       validate_agf(agf, agno, agcnts);
+       validate_agi(agi, agno, agcnts);
 
        ASSERT(agi_dirty == 0 || (agi_dirty && !no_modify));
 
@@ -1331,4 +1308,72 @@ scan_ag(
                libxfs_putbuf(sbbuf);
        free(sb);
        PROG_RPT_INC(prog_rpt_done[agno], 1);
+
+#ifdef XR_INODE_TRACE
+       print_inode_list(i);
+#endif
+       return NULL;
+}
+
+#define SCAN_THREADS 32
+
+void
+scan_ags(
+       struct xfs_mount        *mp)
+{
+       struct aghdr_cnts agcnts[mp->m_sb.sb_agcount];
+       pthread_t       thr[SCAN_THREADS];
+       __uint64_t      fdblocks = 0;
+       __uint64_t      icount = 0;
+       __uint64_t      ifreecount = 0;
+       int             i, j, err;
+
+       /*
+        * scan a few AGs in parallel. The scan is IO latency bound,
+        * so running a few at a time will speed it up significantly.
+        */
+       for (i = 0; i < mp->m_sb.sb_agcount; i += SCAN_THREADS) {
+               for (j = 0; j < SCAN_THREADS; j++) {
+                       if (i + j >= mp->m_sb.sb_agcount)
+                               break;
+                       memset(&agcnts[i + j], 0, sizeof(agcnts[i]));
+                       agcnts[i + j].agno = i + j;
+                       err = pthread_create(&thr[j], NULL, scan_ag,
+                                                       &agcnts[i + j]);
+                       if (err)
+                               do_abort(_("pthread_create failed in 
scan_ags\n"));
+               }
+               for (j = 0; j < SCAN_THREADS; j++) {
+                       if (i + j >= mp->m_sb.sb_agcount)
+                               break;
+                       pthread_join(thr[j], NULL);
+               }
+       }
+
+       for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+               fdblocks += agcnts[i].fdblocks;
+               icount += agcnts[i].icount;
+               ifreecount += agcnts[i].ifreecount;
+       }
+
+       /*
+        * Validate that our manual counts match the superblock.
+        */
+       if (mp->m_sb.sb_icount != icount) {
+               do_warn(_("sb_icount %lld, counted %lld\n"),
+                       mp->m_sb.sb_icount, icount);
+       }
+
+       if (mp->m_sb.sb_ifree != ifreecount) {
+               do_warn(_("sb_ifree %lld, counted %lld\n"),
+                       mp->m_sb.sb_ifree, ifreecount);
+       }
+
+       if (mp->m_sb.sb_fdblocks != fdblocks) {
+               do_warn(_("sb_fdblocks %lld, counted %lld\n"),
+                       mp->m_sb.sb_fdblocks, fdblocks);
+       }
+
+       /* XXX: check sb_frextents */
 }
+
diff --git a/repair/scan.h b/repair/scan.h
index 20567fb..4de1cef 100644
--- a/repair/scan.h
+++ b/repair/scan.h
@@ -20,19 +20,6 @@
 
 struct blkmap;
 
-void scan_sbtree(
-       xfs_agblock_t   root,
-       int             nlevels,
-       xfs_agnumber_t  agno,
-       int             suspect,
-       void            (*func)(struct xfs_btree_block  *block,
-                               int                     level,
-                               xfs_agblock_t           bno,
-                               xfs_agnumber_t          agno,
-                               int                     suspect,
-                               int                     isroot),
-       int             isroot);
-
 int scan_lbtree(
        xfs_dfsbno_t    root,
        int             nlevels,
@@ -74,29 +61,5 @@ int scanfunc_bmap(
        int                     check_dups,
        int                     *dirty);
 
-void scanfunc_bno(
-       struct xfs_btree_block  *block,
-       int                     level,
-       xfs_agblock_t           bno,
-       xfs_agnumber_t          agno,
-       int                     suspect,
-       int                     isroot);
-
-void scanfunc_cnt(
-       struct xfs_btree_block  *block,
-       int                     level,
-       xfs_agblock_t           bno,
-       xfs_agnumber_t          agno,
-       int                     suspect,
-       int                     isroot);
-
-void
-scanfunc_ino(
-       struct xfs_btree_block  *block,
-       int                     level,
-       xfs_agblock_t           bno,
-       xfs_agnumber_t          agno,
-       int                     suspect,
-       int                     isroot);
 
 #endif /* _XR_SCAN_H */
-- 
1.7.2.3

<Prev in Thread] Current Thread [Next in Thread>