Collect reverse-mapping data for the entire filesystem so that we can
later check and rebuild the reference count tree and the reverse mapping
tree.
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
repair/Makefile | 4 -
repair/dinode.c | 42 ++++++++++
repair/incore.h | 38 +++++++++
repair/incore_ino.c | 2
repair/phase4.c | 2
repair/rmap.c | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++
repair/rmap.h | 34 ++++++++
repair/xfs_repair.c | 4 +
8 files changed, 330 insertions(+), 2 deletions(-)
create mode 100644 repair/rmap.c
create mode 100644 repair/rmap.h
diff --git a/repair/Makefile b/repair/Makefile
index 82cba8e..7239a9e 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -11,14 +11,14 @@ LTCOMMAND = xfs_repair
HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
dinode.h dir2.h err_protos.h globals.h incore.h protos.h rt.h \
- progress.h scan.h versions.h prefetch.h threads.h slab.h
+ progress.h scan.h versions.h prefetch.h threads.h slab.h rmap.h
CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
dino_chunks.c dinode.c dir2.c globals.c incore.c \
incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
progress.c prefetch.c rt.c sb.c scan.c threads.c \
- versions.c xfs_repair.c slab.c
+ versions.c xfs_repair.c slab.c rmap.c
LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG)
diff --git a/repair/dinode.c b/repair/dinode.c
index fc8bc12..e706998 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -30,6 +30,8 @@
#include "attr_repair.h"
#include "bmap.h"
#include "threads.h"
+#include "slab.h"
+#include "rmap.h"
/*
* gettext lookups for translations of strings use mutexes internally to
@@ -720,6 +722,9 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n"
* checking each entry without setting the
* block bitmap
*/
+ if (type == XR_INO_DATA &&
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ goto skip_dup;
if (search_dup_extent(agno, agbno, ebno)) {
do_warn(
_("%s fork in ino %" PRIu64 " claims dup extent, "
@@ -729,6 +734,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
irec.br_blockcount);
goto done;
}
+skip_dup:
*tot += irec.br_blockcount;
continue;
}
@@ -768,6 +774,9 @@ _("%s fork in inode %" PRIu64 " claims metadata block %"
PRIu64 "\n"),
case XR_E_INUSE:
case XR_E_MULT:
set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+ if (type == XR_INO_DATA &&
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ break;
do_warn(
_("%s fork in %s inode %" PRIu64 " claims used block %" PRIu64 "\n"),
forkname, ftype, ino, b);
@@ -779,6 +788,13 @@ _("illegal state %d in block map %" PRIu64 "\n"),
state, b);
}
}
+ if (collect_rmaps) { /* && !check_dups */
+ error = add_rmap(mp, ino, whichfork, &irec);
+ if (error)
+ do_error(
+_("couldn't add reverse mapping\n")
+ );
+ }
*tot += irec.br_blockcount;
}
error = 0;
@@ -2387,6 +2403,26 @@ _("bad (negative) size %" PRId64 " on inode %" PRIu64
"\n"),
flags &= XFS_DIFLAG_ANY;
}
+ if ((flags & XFS_DIFLAG_REFLINK) &&
+ !xfs_sb_version_hasreflink(&mp->m_sb)) {
+ if (!uncertain) {
+ do_warn(
+ _("inode %" PRIu64 " is marked reflinked but file system does not
support reflink\n"),
+ lino);
+ }
+ goto clear_bad_out;
+ }
+
+ if ((flags & XFS_DIFLAG_REFLINK) &&
+ (flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT))) {
+ if (!uncertain) {
+ do_warn(
+ _("Cannot have a reflinked realtime inode %" PRIu64 "\n"),
+ lino);
+ }
+ goto clear_bad_out;
+ }
+
if (flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) {
/* need an rt-dev! */
if (!rt_name) {
@@ -2544,6 +2580,12 @@ _("bad non-zero extent size %u for non-realtime/extsize
inode %" PRIu64 ", "),
goto clear_bad_out;
/*
+ * record the state of the reflink flag
+ */
+ if (collect_rmaps)
+ reflink_record_inode_flag(mp, dino, agno, ino, lino);
+
+ /*
* check data fork -- if it's bad, clear the inode
*/
if (process_inode_data_fork(mp, agno, ino, dino, type, dirty,
diff --git a/repair/incore.h b/repair/incore.h
index ad19daa..e6c5310 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -282,6 +282,8 @@ typedef struct ino_tree_node {
__uint64_t ir_sparse; /* sparse inode bitmask */
__uint64_t ino_confirmed; /* confirmed bitmask */
__uint64_t ino_isa_dir; /* bit == 1 if a directory */
+ __uint64_t ino_was_rl; /* bit == 1 if reflink flag set
*/
+ __uint64_t ino_is_rl; /* bit == 1 if reflink flag
should be set */
__uint8_t nlink_size;
union ino_nlink disk_nlinks; /* on-disk nlinks, set in P3 */
union {
@@ -493,6 +495,42 @@ static inline bool is_inode_sparse(struct ino_tree_node
*irec, int offset)
}
/*
+ * set/clear/test was inode marked as reflinked
+ */
+static inline void set_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_was_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_was_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+ return (irec->ino_was_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
+ * set/clear/test should inode be marked as reflinked
+ */
+static inline void set_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_is_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ irec->ino_is_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+ return (irec->ino_is_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
* add_inode_reached() is set on inode I only if I has been reached
* by an inode P claiming to be the parent and if I is a directory,
* the .. link in the I says that P is I's parent.
diff --git a/repair/incore_ino.c b/repair/incore_ino.c
index cda6c2b..dd426aa 100644
--- a/repair/incore_ino.c
+++ b/repair/incore_ino.c
@@ -257,6 +257,8 @@ alloc_ino_node(
irec->ino_startnum = starting_ino;
irec->ino_confirmed = 0;
irec->ino_isa_dir = 0;
+ irec->ino_was_rl = 0;
+ irec->ino_is_rl = 0;
irec->ir_free = (xfs_inofree_t) - 1;
irec->ir_sparse = 0;
irec->ino_un.ex_data = NULL;
diff --git a/repair/phase4.c b/repair/phase4.c
index aa79ae0..2c2cccb 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -31,6 +31,7 @@
#include "dir2.h"
#include "progress.h"
+bool collect_rmaps = false;
/*
* null out quota inode fields in sb if they point to non-existent inodes.
@@ -169,6 +170,7 @@ phase4(xfs_mount_t *mp)
int ag_hdr_block;
int bstate;
+ collect_rmaps = true;
ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize);
do_log(_("Phase 4 - check for duplicate blocks...\n"));
diff --git a/repair/rmap.c b/repair/rmap.c
new file mode 100644
index 0000000..2e1829c
--- /dev/null
+++ b/repair/rmap.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+
+#undef RMAP_DEBUG
+
+#ifdef RMAP_DEBUG
+# define dbg_printf(f, a...) do {printf(f, ## a); fflush(stdout); } while (0)
+#else
+# define dbg_printf(f, a...)
+#endif
+
+/* Reverse mapping observation */
+typedef struct xfs_rmap {
+ xfs_ino_t rm_ino; /* inode number */
+ xfs_fileoff_t rm_startoff; /* starting file offset */
+ xfs_agblock_t rm_startblock; /* starting AG block number */
+ xfs_extlen_t rm_blockcount; /* number of AG blocks */
+ struct xfs_rmap *rm_next; /* next item in stack */
+} xfs_rmap_t;
+
+/* per-AG rmap object anchor */
+typedef struct xfs_ag_rmap {
+ xfs_slab_t *ar_rmaps; /* rmap observations, p4 */
+ xfs_slab_t *ar_reflink_items; /* reflink items, p4-5 */
+} xfs_ag_rmap_t;
+
+static xfs_ag_rmap_t *ag_rmaps;
+
+/**
+ * needs_rmap_work() -- Return true if we must reconstruct either the
+ * reference count or reverse mapping trees.
+ */
+bool
+needs_rmap_work(
+ xfs_mount_t *mp)
+{
+ return xfs_sb_version_hasreflink(&mp->m_sb) ||
+ xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/**
+ * init_rmaps() -- Initialize per-AG reverse map data.
+ */
+void
+init_rmaps(
+ xfs_mount_t *mp)
+{
+ xfs_agnumber_t i;
+ int error;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(xfs_ag_rmap_t));
+ if (!ag_rmaps)
+ do_error(_("couldn't allocate per-AG reverse map roots\n"));
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ error = init_slab(&ag_rmaps[i].ar_rmaps, sizeof(xfs_rmap_t));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating reverse mapping slabs."));
+ error = init_slab(&ag_rmaps[i].ar_reflink_items,
+ sizeof(xfs_reflink_rec_incore_t));
+ if (error)
+ do_error(
+_("Insufficient memory while allocating reflink item slabs."));
+ }
+}
+
+/**
+ * free_rmaps() -- Free the per-AG reverse-mapping data.
+ */
+void
+free_rmaps(
+ xfs_mount_t *mp)
+{
+ xfs_agnumber_t i;
+
+ if (!needs_rmap_work(mp))
+ return;
+
+ for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+ free_slab(&ag_rmaps[i].ar_rmaps);
+ free_slab(&ag_rmaps[i].ar_reflink_items);
+ }
+ free(ag_rmaps);
+ ag_rmaps = NULL;
+}
+
+/**
+ * add_rmap() -- Add an observation about a physical block mapping for later
+ * btree reconstruction.
+ *
+ * @mp: XFS mount object.
+ * @ino: The inode number associated with the extent mapping.
+ * @whichfork: Data or attribute fork?
+ * @irec: The extent mapping to record.
+ */
+int
+add_rmap(
+ xfs_mount_t *mp,
+ xfs_ino_t ino,
+ int whichfork,
+ xfs_bmbt_irec_t *irec)
+{
+ xfs_slab_t *rmaps;
+ xfs_rmap_t rmap;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+
+ if (!needs_rmap_work(mp))
+ return 0;
+
+ agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+ ASSERT(agno != NULLAGNUMBER);
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
+ ASSERT(ino != NULLFSINO);
+ ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+ rmaps = ag_rmaps[agno].ar_rmaps;
+ rmap.rm_ino = ino;
+ rmap.rm_startoff = irec->br_startoff;
+ rmap.rm_startblock = agbno;
+ rmap.rm_blockcount = irec->br_blockcount;
+ rmap.rm_next = NULL;
+ return slab_add(rmaps, &rmap);
+}
+
+#ifdef RMAP_DEBUG
+static void
+dump_rmap(
+ const char *msg,
+ xfs_agnumber_t agno,
+ xfs_rmap_t *rmap)
+{
+ printf("%s: %p agno=%u pblk=%llu ino=%llu lblk=%llu len=%u\n", msg,
+ rmap,
+ (unsigned)agno,
+ (unsigned long long)rmap->rm_startblock,
+ (unsigned long long)rmap->rm_ino,
+ (unsigned long long)rmap->rm_startoff,
+ (unsigned)rmap->rm_blockcount);
+}
+#else
+# define dump_rmap(m, a, r)
+#endif
+
+/**
+ * reflink_record_inode_flag() -- Record that an inode had the reflink flag
+ * set when repair started. The inode reflink
+ * flag will be adjusted as necessary.
+ * @mp: XFS mount object.
+ * @dino: On-disk inode.
+ * @agno: AG number of the inode.
+ * @ino: AG inode number.
+ * @lino: Full inode number.
+ */
+void
+reflink_record_inode_flag(
+ xfs_mount_t *mp,
+ xfs_dinode_t *dino,
+ xfs_agnumber_t agno,
+ xfs_agino_t ino,
+ xfs_ino_t lino)
+{
+ ino_tree_node_t *irec;
+ int off;
+
+ ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
+ if (!(be16_to_cpu(dino->di_flags) & XFS_DIFLAG_REFLINK))
+ return;
+ irec = find_inode_rec(mp, agno, ino);
+ off = get_inode_offset(mp, lino, irec);
+ ASSERT(!inode_was_rl(irec, off));
+ set_inode_was_rl(irec, off);
+ dbg_printf("set was_rl lino=%llu was=0x%llx\n",
+ (unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
+}
diff --git a/repair/rmap.h b/repair/rmap.h
new file mode 100644
index 0000000..16ad157
--- /dev/null
+++ b/repair/rmap.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef RMAP_H_
+#define RMAP_H_
+
+extern bool collect_rmaps;
+
+extern void init_rmaps(xfs_mount_t *mp);
+extern void free_rmaps(xfs_mount_t *mp);
+
+extern int add_rmap(xfs_mount_t *mp, xfs_ino_t ino, int whichfork,
+ xfs_bmbt_irec_t *irec);
+
+extern void reflink_record_inode_flag(xfs_mount_t *mp, xfs_dinode_t *dino,
+ xfs_agnumber_t agno, xfs_agino_t ino, xfs_ino_t lino);
+
+extern bool needs_rmap_work(xfs_mount_t *mp);
+
+#endif /* RMAP_H_ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 07ddd00..3cd288a 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -31,6 +31,8 @@
#include "threads.h"
#include "progress.h"
#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
#define rounddown(x, y) (((x)/(y))*(y))
@@ -771,6 +773,7 @@ main(int argc, char **argv)
init_bmaps(mp);
incore_ino_init(mp);
incore_ext_init(mp);
+ init_rmaps(mp);
/* initialize random globals now that we know the fs geometry */
inodes_per_block = mp->m_sb.sb_inopblock;
@@ -804,6 +807,7 @@ main(int argc, char **argv)
/*
* Done with the block usage maps, toss them...
*/
+ free_rmaps(mp);
free_bmaps(mp);
if (!bad_ino_btree) {
|