xfs
[Top] [All Lists]

[PATCH 13/15] xfs_repair: process reverse-mapping data into refcount dat

To: david@xxxxxxxxxxxxx, darrick.wong@xxxxxxxxxx
Subject: [PATCH 13/15] xfs_repair: process reverse-mapping data into refcount data
From: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx>
Date: Mon, 29 Jun 2015 20:27:02 -0700
Cc: xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <20150630032538.572.20293.stgit@xxxxxxxxxxxxxxxx>
References: <20150630032538.572.20293.stgit@xxxxxxxxxxxxxxxx>
User-agent: StGit/0.17.1-dirty
Take all the reverse-mapping data we've acquired and use it to generate
reference count data.  This data is used in phase 5 to rebuild the
reflink btree.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 repair/phase4.c |   65 +++++++++
 repair/rmap.c   |  414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 repair/rmap.h   |    4 +
 3 files changed, 481 insertions(+), 2 deletions(-)


diff --git a/repair/phase4.c b/repair/phase4.c
index 2c2cccb..64627a5 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -30,6 +30,8 @@
 #include "versions.h"
 #include "dir2.h"
 #include "progress.h"
+#include "slab.h"
+#include "rmap.h"
 
 bool collect_rmaps = false;
 
@@ -154,6 +156,61 @@ process_ags(
        do_inode_prefetch(mp, ag_stride, process_ag_func, true, false);
 }
 
+static void
+process_ag_rmaps(
+       work_queue_t    *wq,
+       xfs_agnumber_t  agno,
+       void            *arg)
+{
+       int             error;
+
+       do_log(_("        - agno = %d\n"), agno);
+       error = rebuild_ag_rlrmap_records(wq->mp, agno);
+       if (error)
+               do_error(
+_("%s while processing reverse-mapping records.\n"),
+                        strerror(-error));
+}
+
+static void
+process_inode_reflink_flags(
+       work_queue_t    *wq,
+       xfs_agnumber_t  agno,
+       void            *arg)
+{
+       int             error;
+
+       error = reflink_fix_inode_flags(wq->mp, agno);
+       if (error)
+               do_error(
+_("%s while fixing inode reflink flags.\n"),
+                        strerror(-error));
+}
+
+static void
+process_rmaps(
+       xfs_mount_t             *mp)
+{
+       struct work_queue       wq;
+       xfs_agnumber_t          i;
+
+       if (!needs_rmap_work(mp))
+               return;
+
+       do_log(_("        - processing reverse mapping data...\n"));
+       create_work_queue(&wq, mp, libxfs_nproc());
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)
+               queue_work(&wq, process_ag_rmaps, i, NULL);
+       destroy_work_queue(&wq);
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return;
+
+       create_work_queue(&wq, mp, libxfs_nproc());
+       for (i = 0; i < mp->m_sb.sb_agcount; i++)
+               queue_work(&wq, process_inode_reflink_flags, i, NULL);
+       destroy_work_queue(&wq);
+}
 
 void
 phase4(xfs_mount_t *mp)
@@ -302,6 +359,14 @@ phase4(xfs_mount_t *mp)
         * already in phase 3.
         */
        process_ags(mp);
+
+
+       /*
+        * Rebuild the reverse mapping and reflink records based on the
+        * mappings we observed.
+        */
+       process_rmaps(mp);
+
        print_final_rpt();
 
        /*
diff --git a/repair/rmap.c b/repair/rmap.c
index 2e1829c..cc34570 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -40,7 +40,6 @@ typedef struct xfs_rmap {
        xfs_fileoff_t   rm_startoff;    /* starting file offset */
        xfs_agblock_t   rm_startblock;  /* starting AG block number */
        xfs_extlen_t    rm_blockcount;  /* number of AG blocks */
-       struct xfs_rmap *rm_next;       /* next item in stack */
 } xfs_rmap_t;
 
 /* per-AG rmap object anchor */
@@ -150,7 +149,6 @@ add_rmap(
        rmap.rm_startoff = irec->br_startoff;
        rmap.rm_startblock = agbno;
        rmap.rm_blockcount = irec->br_blockcount;
-       rmap.rm_next = NULL;
        return slab_add(rmaps, &rmap);
 }
 
@@ -174,6 +172,312 @@ dump_rmap(
 #endif
 
 /**
+ * rmap_compare() -- Compare rmap observations for array sorting.
+ */
+static int
+rmap_compare(
+       const void              *a,
+       const void              *b)
+{
+       const xfs_rmap_t        *pa;
+       const xfs_rmap_t        *pb;
+
+       pa = a; pb = b;
+       if (pa->rm_startblock < pb->rm_startblock)
+               return -1;
+       else if (pa->rm_startblock > pb->rm_startblock)
+               return 1;
+       else if (pa->rm_ino < pb->rm_ino)
+               return -1;
+       else if (pa->rm_ino > pb->rm_ino)
+               return 1;
+       else if (pa->rm_startoff < pb->rm_startoff)
+               return 1;
+       else if (pa->rm_startoff > pb->rm_startoff)
+               return 1;
+       else
+               return 0;
+}
+
+/**
+ * rmap_sb_compare() -- Compare function for rmap observations so that they
+ *                      come out in pblk order.
+ */
+static int
+rmap_sb_compare(
+       const void              *a,
+       const void              *b)
+{
+       const xfs_rmap_t        *pa;
+       const xfs_rmap_t        *pb;
+
+       pa = a; pb = b;
+       if (pa->rm_startblock < pb->rm_startblock)
+               return -1;
+       else if (pa->rm_startblock > pb->rm_startblock)
+               return 1;
+       else
+               return 0;
+}
+
+/**
+ * mark_inode_rl() -- Mark all inodes in the reverse-mapping observation stack
+ *                    has requiring the reflink inode flag, if the stack depth
+ *                    is greater than 1.
+ *
+ * @mp: XFS mount object.
+ * @rmaps: Head of the stack of rmap observations.
+ * @nr_rmaps: Depth of the stack.
+ */
+static void
+mark_inode_rl(
+       xfs_mount_t             *mp,
+       xfs_bag_t               *rmaps)
+{
+       xfs_agnumber_t          iagno;
+       xfs_rmap_t              *rmap;
+       ino_tree_node_t         *irec;
+       int                     off;
+       size_t                  idx;
+       xfs_agino_t             ino;
+
+       if (bag_count(rmaps) < 2)
+               return;
+
+       /* Reflink flag accounting */
+       foreach_bag_ptr(rmaps, idx, rmap) {
+               iagno = XFS_INO_TO_AGNO(mp, rmap->rm_ino);
+               ino = XFS_INO_TO_AGINO(mp, rmap->rm_ino);
+               pthread_mutex_lock(&ag_locks[iagno].lock);
+               irec = find_inode_rec(mp, iagno, ino);
+               off = get_inode_offset(mp, rmap->rm_ino, irec);
+               /* lock here because we might go outside this ag */
+               set_inode_is_rl(irec, off);
+               pthread_mutex_unlock(&ag_locks[iagno].lock);
+       }
+}
+
+/**
+ * rmap_emit() -- Emit reverse-mapping objects for rmapbt reconstruction
+ *                during phase 5.
+ *
+ * @mp: XFS mount object.
+ * @agno: The AG number.
+ * @agbno: AG block number of the reverse mapping extent.
+ * @len: Length of the extent.
+ * @rmaps: Stack of reverse-mapping observations.
+ * @nr_rmaps: Depth of the stack.
+ */
+static void
+rmap_emit(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno,
+       xfs_extlen_t            len,
+       xfs_bag_t               *rmaps)
+{
+       xfs_rmap_t              *rmap;
+       size_t                  n;
+
+       ASSERT(bag_count(rmaps) > 0);
+
+       foreach_bag_ptr(rmaps, n, rmap) {
+               ASSERT(rmap->rm_blockcount >= len);
+               ASSERT(rmap->rm_startblock <= agbno);
+               dbg_printf("RMAP(%zu): agno=%lu pblk=%llu, len=%lu -> ino=%llu, 
lblk=%llu\n",
+                       n, (unsigned long)agno, (unsigned long long)agbno,
+                       (unsigned long)len, (unsigned long long)rmap->rm_ino,
+                       (unsigned long long)rmap->rm_startoff);
+       }
+}
+
+/**
+ * refcount_emit() -- Emit a reflink object for rlbt reconstruction
+ *                    during phase 5.
+ *
+ * @mp: XFS mount object.
+ * @agno: The AG number.
+ * @agbno: AG block number of the reverse mapping extent.
+ * @len: Length of the extent.
+ * @rmaps: Stack of reverse-mapping observations.
+ * @nr_rmaps: Depth of the stack.
+ * @is_rmap: True if reverse-mapping is enabled.
+ * @is_reflink: True if reflinking is enabled.
+ */
+#define REFCOUNT_CLAMP(nr)     ((nr) > MAXRLCOUNT ? MAXRLCOUNT : (nr))
+static void
+refcount_emit(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           agbno,
+       xfs_extlen_t            len,
+       size_t                  nr_rmaps)
+{
+       xfs_reflink_rec_incore_t        rlrec;
+       int                     error;
+       xfs_slab_t              *rlslab;
+
+       rlslab = ag_rmaps[agno].ar_reflink_items;
+       ASSERT(nr_rmaps > 0);
+
+       dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
+               agno, agbno, len, nr_rmaps);
+       rlrec.rr_startblock = agbno;
+       rlrec.rr_blockcount = len;
+       rlrec.rr_nlinks = REFCOUNT_CLAMP(nr_rmaps);
+       error = slab_add(rlslab, &rlrec);
+       if (error)
+               do_error(
+_("Insufficient memory while recreating reflink tree."));
+}
+#undef REFCOUNT_CLAMP
+
+/**
+ * rebuild_ag_rlrmap_records() - transform a pile of physical block mapping
+ *                              observations into reflink and rmap data for
+ *                              eventual rebuilding of the btrees.
+ *
+ * XXX: Should the stack be sorted in order of last pblk?
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ */
+#define RMAP_END(r)    ((r)->rm_startblock + (r)->rm_blockcount)
+int
+rebuild_ag_rlrmap_records(
+       xfs_mount_t             *mp,
+       xfs_agnumber_t          agno)
+{
+       xfs_bag_t               *stack_top = NULL;
+       xfs_slab_t              *rmaps;
+       xfs_slab_cursor_t       *rmaps_cur;
+       xfs_rmap_t              *array_cur;
+       xfs_rmap_t              *rmap;
+       xfs_agblock_t           sbno;   /* first bno of this rmap set */
+       xfs_agblock_t           cbno;   /* first bno of this refcount set */
+       xfs_agblock_t           nbno;   /* next bno where rmap set changes */
+       size_t                  n, idx;
+       size_t                  old_stack_nr;
+       bool                    is_rmap;
+       bool                    is_reflink;
+       int                     error;
+
+       is_reflink = xfs_sb_version_hasreflink(&mp->m_sb);
+       is_rmap = xfs_sb_version_hasrmapbt(&mp->m_sb);
+       if (!is_reflink && !is_rmap)
+               return 0;
+
+       rmaps = ag_rmaps[agno].ar_rmaps;
+       qsort_slab(rmaps, rmap_compare);
+
+       error = init_slab_cursor(rmaps, rmap_sb_compare, &rmaps_cur);
+       if (error)
+               return error;
+
+       error = init_bag(&stack_top);
+       if (error)
+               goto err;
+
+       /* While there are rmaps to be processed... */
+       n = 0;
+       while (n < slab_count(rmaps)) {
+               array_cur = peek_slab_cursor(rmaps_cur);
+               sbno = cbno = array_cur->rm_startblock;
+               /* Push all rmaps with pblk == sbno onto the stack */
+               for (;
+                    array_cur && array_cur->rm_startblock == sbno;
+                    array_cur = peek_slab_cursor(rmaps_cur)) {
+                       advance_slab_cursor(rmaps_cur); n++;
+                       dump_rmap("push0", agno, array_cur);
+                       error = bag_add(stack_top, array_cur);
+                       if (error)
+                               goto err;
+               }
+               mark_inode_rl(mp, stack_top);
+
+               /* Set nbno to the bno of the next refcount change */
+               if (n < slab_count(rmaps))
+                       nbno = array_cur->rm_startblock;
+               else
+                       nbno = NULLAGBLOCK;
+               foreach_bag_ptr(stack_top, idx, rmap) {
+                       nbno = min(nbno, RMAP_END(rmap));
+               }
+
+               /* Emit reverse mappings, if needed */
+               ASSERT(nbno > sbno);
+               if (is_rmap) {
+                       rmap_emit(mp, agno, sbno, nbno - sbno, stack_top);
+               }
+               old_stack_nr = bag_count(stack_top);
+
+               /* While stack isn't empty... */
+               while (bag_count(stack_top)) {
+                       /* Pop all rmaps that end at nbno */
+                       foreach_bag_ptr_reverse(stack_top, idx, rmap) {
+                               if (RMAP_END(rmap) != nbno)
+                                       continue;
+                               dump_rmap("pop", agno, rmap);
+                               error = bag_remove(stack_top, idx);
+                               if (error)
+                                       goto err;
+                       }
+
+                       /* Push array items that start at nbno */
+                       for (;
+                            array_cur && array_cur->rm_startblock == nbno;
+                            array_cur = peek_slab_cursor(rmaps_cur)) {
+                               advance_slab_cursor(rmaps_cur); n++;
+                               dump_rmap("push1", agno, array_cur);
+                               error = bag_add(stack_top, array_cur);
+                               if (error)
+                                       goto err;
+                       }
+                       mark_inode_rl(mp, stack_top);
+
+                       /* Emit refcount if necessary */
+                       ASSERT(nbno > cbno);
+                       if (bag_count(stack_top) != old_stack_nr) {
+                               if (is_reflink && old_stack_nr > 1) {
+                                       refcount_emit(mp, agno, cbno,
+                                                     nbno - cbno,
+                                                     old_stack_nr);
+                               }
+                               cbno = nbno;
+                       }
+
+                       /* Stack empty, go find the next rmap */
+                       if (bag_count(stack_top) == 0)
+                               break;
+                       old_stack_nr = bag_count(stack_top);
+                       sbno = nbno;
+
+                       /* Set nbno to the bno of the next refcount change */
+                       if (n < slab_count(rmaps))
+                               nbno = array_cur->rm_startblock;
+                       else
+                               nbno = NULLAGBLOCK;
+                       foreach_bag_ptr(stack_top, idx, rmap) {
+                               nbno = min(nbno, RMAP_END(rmap));
+                       }
+
+                       /* Emit reverse mappings, if needed */
+                       ASSERT(nbno > sbno);
+                       if (is_rmap) {
+                               rmap_emit(mp, agno, sbno, nbno - sbno,
+                                         stack_top);
+                       }
+               }
+       }
+err:
+       free_bag(&stack_top);
+       free_slab_cursor(&rmaps_cur);
+       free_slab(&ag_rmaps[agno].ar_rmaps);
+
+       return error;
+}
+#undef RMAP_END
+
+/**
  * reflink_record_inode_flag() -- Record that an inode had the reflink flag
  *                                set when repair started.  The inode reflink
  *                                flag will be adjusted as necessary.
@@ -204,3 +508,109 @@ reflink_record_inode_flag(
        dbg_printf("set was_rl lino=%llu was=0x%llx\n",
                (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
 }
+
+/**
+ * set_rl() -- Fix an inode's reflink flag.
+ *
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ * @agino: per-AG inode number.
+ * @set: True if the flag must be set; False if it must be cleared.
+ */
+static int
+set_rl(
+       xfs_mount_t     *mp,
+       xfs_agnumber_t  agno,
+       xfs_agino_t     agino,
+       bool            set)
+{
+       xfs_dinode_t    *dino;
+       xfs_buf_t       *buf;
+
+       buf = get_agino_buf(mp, agno, agino, &dino);
+       if (!buf)
+               return 1;
+       ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
+
+       if (set)
+               do_warn(
+_("setting reflink flag on inode %"PRIu64"\n"),
+                       XFS_AGINO_TO_INO(mp, agno, agino));
+       else if (!no_modify) /* && !set */
+               do_warn(
+_("clearing reflink flag on inode %"PRIu64"\n"),
+                       XFS_AGINO_TO_INO(mp, agno, agino));
+       if (no_modify) {
+               libxfs_putbuf(buf);
+               return 0;
+       }
+       if (set)
+               dino->di_flags |= cpu_to_be16(XFS_DIFLAG_REFLINK);
+       else
+               dino->di_flags &= cpu_to_be16(~XFS_DIFLAG_REFLINK);
+       libxfs_dinode_calc_crc(mp, dino);
+       libxfs_writebuf(buf, 0);
+
+       return 0;
+}
+
+/**
+ * reflink_fix_inode_flags() -- Fix discrepancies between the state of the
+ *                              inode reflink flag and our observations as to
+ *                              whether or not the inode really needs it.
+ * @mp: XFS mountpoint.
+ * @agno: AG number.
+ */
+int
+reflink_fix_inode_flags(
+       xfs_mount_t     *mp,
+       xfs_agnumber_t  agno)
+{
+       ino_tree_node_t *irec;
+       int             bit;
+       __uint64_t      was;
+       __uint64_t      is;
+       __uint64_t      diff;
+       __uint64_t      mask;
+       int             error = 0;
+       xfs_agino_t     agino;
+
+       /*
+        * Update the reflink flag for any inode where there's a discrepancy
+        * between the inode flag and whether or not we found any reflinked
+        * extents.
+        */
+       for (irec = findfirst_inode_rec(agno);
+            irec != NULL;
+            irec = next_ino_rec(irec)) {
+               ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
+               ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
+               was = irec->ino_was_rl;
+               is = irec->ino_is_rl;
+               if (was == is)
+                       continue;
+               diff = was ^ is;
+               dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
+                       (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
+                                               irec->ino_startnum),
+                       was, is, diff);
+
+               for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
+                       agino = bit + irec->ino_startnum;
+                       if (!(diff & mask))
+                               continue;
+                       else if (was & mask)
+                               error = set_rl(mp, agno, agino, false);
+                       else if (is & mask)
+                               error = set_rl(mp, agno, agino, true);
+                       else
+                               ASSERT(0);
+                       if (error)
+                               do_error(
+_("Unable to fix reflink flag on inode %"PRIu64".\n"),
+                                       XFS_AGINO_TO_INO(mp, agno, agino));
+               }
+       }
+
+       return error;
+}
diff --git a/repair/rmap.h b/repair/rmap.h
index 16ad157..7dc709f 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -31,4 +31,8 @@ extern void reflink_record_inode_flag(xfs_mount_t *mp, 
xfs_dinode_t *dino,
 
 extern bool needs_rmap_work(xfs_mount_t *mp);
 
+extern int reflink_fix_inode_flags(xfs_mount_t *mp, xfs_agnumber_t agno);
+
+extern int rebuild_ag_rlrmap_records(xfs_mount_t *mp, xfs_agnumber_t agno);
+
 #endif /* RMAP_H_ */

<Prev in Thread] Current Thread [Next in Thread>