Take all the reverse-mapping data we've acquired and use it to generate
reference count data. This data is used in phase 5 to rebuild the
reflink btree.
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
repair/phase4.c | 65 +++++++++
repair/rmap.c | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 4 +
3 files changed, 481 insertions(+), 2 deletions(-)
diff --git a/repair/phase4.c b/repair/phase4.c
index 2c2cccb..64627a5 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -30,6 +30,8 @@
#include "versions.h"
#include "dir2.h"
#include "progress.h"
+#include "slab.h"
+#include "rmap.h"
bool collect_rmaps = false;
@@ -154,6 +156,61 @@ process_ags(
do_inode_prefetch(mp, ag_stride, process_ag_func, true, false);
}
+static void
+process_ag_rmaps(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ do_log(_(" - agno = %d\n"), agno);
+ error = rebuild_ag_rlrmap_records(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while processing reverse-mapping records.\n"),
+ strerror(-error));
+}
+
+static void
+process_inode_reflink_flags(
+ work_queue_t *wq,
+ xfs_agnumber_t agno,
+ void *arg)
+{
+ int error;
+
+ error = reflink_fix_inode_flags(wq->mp, agno);
+ if (error)
+ do_error(
+_("%s while fixing inode reflink flags.\n"),
+ strerror(-error));
+}
+
+static void
+process_rmaps(
+ xfs_mount_t *mp)
+{
+ struct work_queue wq;
+ xfs_agnumber_t i;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ do_log(_(" - processing reverse mapping data...\n"));
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, process_ag_rmaps, i, NULL);
+ destroy_work_queue(&wq);
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return;
+
+ create_work_queue(&wq, mp, libxfs_nproc());
+ for (i = 0; i < mp->m_sb.sb_agcount; i++)
+ queue_work(&wq, process_inode_reflink_flags, i, NULL);
+ destroy_work_queue(&wq);
+}
void
phase4(xfs_mount_t *mp)
@@ -302,6 +359,14 @@ phase4(xfs_mount_t *mp)
* already in phase 3.
*/
process_ags(mp);
+
+
+ /*
+ * Rebuild the reverse mapping and reflink records based on the
+ * mappings we observed.
+ */
+ process_rmaps(mp);
+
print_final_rpt();
/*
diff --git a/repair/rmap.c b/repair/rmap.c
index 2e1829c..cc34570 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -40,7 +40,6 @@ typedef struct xfs_rmap {
xfs_fileoff_t rm_startoff; /* starting file offset */
xfs_agblock_t rm_startblock; /* starting AG block number */
xfs_extlen_t rm_blockcount; /* number of AG blocks */
- struct xfs_rmap *rm_next; /* next item in stack */
} xfs_rmap_t;
/* per-AG rmap object anchor */
@@ -150,7 +149,6 @@ add_rmap(
rmap.rm_startoff = irec->br_startoff;
rmap.rm_startblock = agbno;
rmap.rm_blockcount = irec->br_blockcount;
- rmap.rm_next = NULL;
return slab_add(rmaps, &rmap);
}
@@ -174,6 +172,312 @@ dump_rmap(
#endif
/**
+ * rmap_compare() -- Compare rmap observations for array sorting.
+ */
+static int
+rmap_compare(
+ const void *a,
+ const void *b)
+{
+ const xfs_rmap_t *pa;
+ const xfs_rmap_t *pb;
+
+ pa = a; pb = b;
+ if (pa->rm_startblock < pb->rm_startblock)
+ return -1;
+ else if (pa->rm_startblock > pb->rm_startblock)
+ return 1;
+ else if (pa->rm_ino < pb->rm_ino)
+ return -1;
+ else if (pa->rm_ino > pb->rm_ino)
+ return 1;
+ else if (pa->rm_startoff < pb->rm_startoff)
+ return 1;
+ else if (pa->rm_startoff > pb->rm_startoff)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * rmap_sb_compare() -- Compare function for rmap observations so that they
+ * come out in pblk order.
+ */
+static int
+rmap_sb_compare(
+ const void *a,
+ const void *b)
+{
+ const xfs_rmap_t *pa;
+ const xfs_rmap_t *pb;
+
+ pa = a; pb = b;
+ if (pa->rm_startblock < pb->rm_startblock)
+ return -1;
+ else if (pa->rm_startblock > pb->rm_startblock)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * mark_inode_rl() -- Mark all inodes in the reverse-mapping observation stack
+ * has requiring the reflink inode flag, if the stack depth
+ * is greater than 1.
+ *
+ * @mp: XFS mount object.
+ * @rmaps: Head of the stack of rmap observations.
+ * @nr_rmaps: Depth of the stack.
+ */
+static void
+mark_inode_rl(
+ xfs_mount_t *mp,
+ xfs_bag_t *rmaps)
+{
+ xfs_agnumber_t iagno;
+ xfs_rmap_t *rmap;
+ ino_tree_node_t *irec;
+ int off;
+ size_t idx;
+ xfs_agino_t ino;
+
+ if (bag_count(rmaps) < 2)
+ return;
+
+ /* Reflink flag accounting */
+ foreach_bag_ptr(rmaps, idx, rmap) {
+ iagno = XFS_INO_TO_AGNO(mp, rmap->rm_ino);
+ ino = XFS_INO_TO_AGINO(mp, rmap->rm_ino);
+ pthread_mutex_lock(&ag_locks[iagno].lock);
+ irec = find_inode_rec(mp, iagno, ino);
+ off = get_inode_offset(mp, rmap->rm_ino, irec);
+ /* lock here because we might go outside this ag */
+ set_inode_is_rl(irec, off);
+ pthread_mutex_unlock(&ag_locks[iagno].lock);
+ }
+}
+
+/**
+ * rmap_emit() -- Emit reverse-mapping objects for rmapbt reconstruction
+ * during phase 5.
+ *
+ * @mp: XFS mount object.
+ * @agno: The AG number.
+ * @agbno: AG block number of the reverse mapping extent.
+ * @len: Length of the extent.
+ * @rmaps: Stack of reverse-mapping observations.
+ * @nr_rmaps: Depth of the stack.
+ */
+static void
+rmap_emit(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ xfs_bag_t *rmaps)
+{
+ xfs_rmap_t *rmap;
+ size_t n;
+
+ ASSERT(bag_count(rmaps) > 0);
+
+ foreach_bag_ptr(rmaps, n, rmap) {
+ ASSERT(rmap->rm_blockcount >= len);
+ ASSERT(rmap->rm_startblock <= agbno);
+ dbg_printf("RMAP(%zu): agno=%lu pblk=%llu, len=%lu -> ino=%llu,
lblk=%llu\n",
+ n, (unsigned long)agno, (unsigned long long)agbno,
+ (unsigned long)len, (unsigned long long)rmap->rm_ino,
+ (unsigned long long)rmap->rm_startoff);
+ }
+}
+
+/**
+ * refcount_emit() -- Emit a reflink object for rlbt reconstruction
+ * during phase 5.
+ *
+ * @mp: XFS mount object.
+ * @agno: The AG number.
+ * @agbno: AG block number of the reverse mapping extent.
+ * @len: Length of the extent.
+ * @rmaps: Stack of reverse-mapping observations.
+ * @nr_rmaps: Depth of the stack.
+ * @is_rmap: True if reverse-mapping is enabled.
+ * @is_reflink: True if reflinking is enabled.
+ */
+#define REFCOUNT_CLAMP(nr) ((nr) > MAXRLCOUNT ? MAXRLCOUNT : (nr))
+static void
+refcount_emit(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ size_t nr_rmaps)
+{
+ xfs_reflink_rec_incore_t rlrec;
+ int error;
+ xfs_slab_t *rlslab;
+
+ rlslab = ag_rmaps[agno].ar_reflink_items;
+ ASSERT(nr_rmaps > 0);
+
+ dbg_printf("REFL: agno=%u pblk=%u, len=%u -> refcount=%zu\n",
+ agno, agbno, len, nr_rmaps);
+ rlrec.rr_startblock = agbno;
+ rlrec.rr_blockcount = len;
+ rlrec.rr_nlinks = REFCOUNT_CLAMP(nr_rmaps);
+ error = slab_add(rlslab, &rlrec);
+ if (error)
+ do_error(
+_("Insufficient memory while recreating reflink tree."));
+}
+#undef REFCOUNT_CLAMP
+
+/**
+ * rebuild_ag_rlrmap_records() - transform a pile of physical block mapping
+ * observations into reflink and rmap data for
+ * eventual rebuilding of the btrees.
+ *
+ * XXX: Should the stack be sorted in order of last pblk?
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ */
+#define RMAP_END(r) ((r)->rm_startblock + (r)->rm_blockcount)
+int
+rebuild_ag_rlrmap_records(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ xfs_bag_t *stack_top = NULL;
+ xfs_slab_t *rmaps;
+ xfs_slab_cursor_t *rmaps_cur;
+ xfs_rmap_t *array_cur;
+ xfs_rmap_t *rmap;
+ xfs_agblock_t sbno; /* first bno of this rmap set */
+ xfs_agblock_t cbno; /* first bno of this refcount set */
+ xfs_agblock_t nbno; /* next bno where rmap set changes */
+ size_t n, idx;
+ size_t old_stack_nr;
+ bool is_rmap;
+ bool is_reflink;
+ int error;
+
+ is_reflink = xfs_sb_version_hasreflink(&mp->m_sb);
+ is_rmap = xfs_sb_version_hasrmapbt(&mp->m_sb);
+ if (!is_reflink && !is_rmap)
+ return 0;
+
+ rmaps = ag_rmaps[agno].ar_rmaps;
+ qsort_slab(rmaps, rmap_compare);
+
+ error = init_slab_cursor(rmaps, rmap_sb_compare, &rmaps_cur);
+ if (error)
+ return error;
+
+ error = init_bag(&stack_top);
+ if (error)
+ goto err;
+
+ /* While there are rmaps to be processed... */
+ n = 0;
+ while (n < slab_count(rmaps)) {
+ array_cur = peek_slab_cursor(rmaps_cur);
+ sbno = cbno = array_cur->rm_startblock;
+ /* Push all rmaps with pblk == sbno onto the stack */
+ for (;
+ array_cur && array_cur->rm_startblock == sbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ dump_rmap("push0", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+ mark_inode_rl(mp, stack_top);
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ if (is_rmap) {
+ rmap_emit(mp, agno, sbno, nbno - sbno, stack_top);
+ }
+ old_stack_nr = bag_count(stack_top);
+
+ /* While stack isn't empty... */
+ while (bag_count(stack_top)) {
+ /* Pop all rmaps that end at nbno */
+ foreach_bag_ptr_reverse(stack_top, idx, rmap) {
+ if (RMAP_END(rmap) != nbno)
+ continue;
+ dump_rmap("pop", agno, rmap);
+ error = bag_remove(stack_top, idx);
+ if (error)
+ goto err;
+ }
+
+ /* Push array items that start at nbno */
+ for (;
+ array_cur && array_cur->rm_startblock == nbno;
+ array_cur = peek_slab_cursor(rmaps_cur)) {
+ advance_slab_cursor(rmaps_cur); n++;
+ dump_rmap("push1", agno, array_cur);
+ error = bag_add(stack_top, array_cur);
+ if (error)
+ goto err;
+ }
+ mark_inode_rl(mp, stack_top);
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (bag_count(stack_top) != old_stack_nr) {
+ if (is_reflink && old_stack_nr > 1) {
+ refcount_emit(mp, agno, cbno,
+ nbno - cbno,
+ old_stack_nr);
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (bag_count(stack_top) == 0)
+ break;
+ old_stack_nr = bag_count(stack_top);
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ if (n < slab_count(rmaps))
+ nbno = array_cur->rm_startblock;
+ else
+ nbno = NULLAGBLOCK;
+ foreach_bag_ptr(stack_top, idx, rmap) {
+ nbno = min(nbno, RMAP_END(rmap));
+ }
+
+ /* Emit reverse mappings, if needed */
+ ASSERT(nbno > sbno);
+ if (is_rmap) {
+ rmap_emit(mp, agno, sbno, nbno - sbno,
+ stack_top);
+ }
+ }
+ }
+err:
+ free_bag(&stack_top);
+ free_slab_cursor(&rmaps_cur);
+ free_slab(&ag_rmaps[agno].ar_rmaps);
+
+ return error;
+}
+#undef RMAP_END
+
+/**
* reflink_record_inode_flag() -- Record that an inode had the reflink flag
* set when repair started. The inode reflink
* flag will be adjusted as necessary.
@@ -204,3 +508,109 @@ reflink_record_inode_flag(
dbg_printf("set was_rl lino=%llu was=0x%llx\n",
(unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
}
+
+/**
+ * set_rl() -- Fix an inode's reflink flag.
+ *
+ * @mp: XFS mount object.
+ * @agno: AG number.
+ * @agino: per-AG inode number.
+ * @set: True if the flag must be set; False if it must be cleared.
+ */
+static int
+set_rl(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ bool set)
+{
+ xfs_dinode_t *dino;
+ xfs_buf_t *buf;
+
+ buf = get_agino_buf(mp, agno, agino, &dino);
+ if (!buf)
+ return 1;
+ ASSERT(XFS_AGINO_TO_INO(mp, agno, agino) == be64_to_cpu(dino->di_ino));
+
+ if (set)
+ do_warn(
+_("setting reflink flag on inode %"PRIu64"\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ else if (!no_modify) /* && !set */
+ do_warn(
+_("clearing reflink flag on inode %"PRIu64"\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ if (no_modify) {
+ libxfs_putbuf(buf);
+ return 0;
+ }
+ if (set)
+ dino->di_flags |= cpu_to_be16(XFS_DIFLAG_REFLINK);
+ else
+ dino->di_flags &= cpu_to_be16(~XFS_DIFLAG_REFLINK);
+ libxfs_dinode_calc_crc(mp, dino);
+ libxfs_writebuf(buf, 0);
+
+ return 0;
+}
+
+/**
+ * reflink_fix_inode_flags() -- Fix discrepancies between the state of the
+ * inode reflink flag and our observations as to
+ * whether or not the inode really needs it.
+ * @mp: XFS mountpoint.
+ * @agno: AG number.
+ */
+int
+reflink_fix_inode_flags(
+ xfs_mount_t *mp,
+ xfs_agnumber_t agno)
+{
+ ino_tree_node_t *irec;
+ int bit;
+ __uint64_t was;
+ __uint64_t is;
+ __uint64_t diff;
+ __uint64_t mask;
+ int error = 0;
+ xfs_agino_t agino;
+
+ /*
+ * Update the reflink flag for any inode where there's a discrepancy
+ * between the inode flag and whether or not we found any reflinked
+ * extents.
+ */
+ for (irec = findfirst_inode_rec(agno);
+ irec != NULL;
+ irec = next_ino_rec(irec)) {
+ ASSERT((irec->ino_was_rl & irec->ir_free) == 0);
+ ASSERT((irec->ino_is_rl & irec->ir_free) == 0);
+ was = irec->ino_was_rl;
+ is = irec->ino_is_rl;
+ if (was == is)
+ continue;
+ diff = was ^ is;
+ dbg_printf("mismatch ino=%llu was=0x%lx is=0x%lx dif=0x%lx\n",
+ (unsigned long long)XFS_AGINO_TO_INO(mp, agno,
+ irec->ino_startnum),
+ was, is, diff);
+
+ for (bit = 0, mask = 1; bit < 64; bit++, mask <<= 1) {
+ agino = bit + irec->ino_startnum;
+ if (!(diff & mask))
+ continue;
+ else if (was & mask)
+ error = set_rl(mp, agno, agino, false);
+ else if (is & mask)
+ error = set_rl(mp, agno, agino, true);
+ else
+ ASSERT(0);
+ if (error)
+ do_error(
+_("Unable to fix reflink flag on inode %"PRIu64".\n"),
+ XFS_AGINO_TO_INO(mp, agno, agino));
+ }
+ }
+
+ return error;
+}
diff --git a/repair/rmap.h b/repair/rmap.h
index 16ad157..7dc709f 100644
--- a/repair/rmap.h
+++ b/repair/rmap.h
@@ -31,4 +31,8 @@ extern void reflink_record_inode_flag(xfs_mount_t *mp,
xfs_dinode_t *dino,
extern bool needs_rmap_work(xfs_mount_t *mp);
+extern int reflink_fix_inode_flags(xfs_mount_t *mp, xfs_agnumber_t agno);
+
+extern int rebuild_ag_rlrmap_records(xfs_mount_t *mp, xfs_agnumber_t agno);
+
#endif /* RMAP_H_ */
|