Create a per-AG btree to track the reference counts of physical blocks
to support reflink.
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_alloc.c | 19 +
fs/xfs/libxfs/xfs_btree.c | 8 -
fs/xfs/libxfs/xfs_btree.h | 7
fs/xfs/libxfs/xfs_format.h | 59 ++++
fs/xfs/libxfs/xfs_reflink_btree.c | 531 +++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_reflink_btree.h | 70 +++++
fs/xfs/libxfs/xfs_sb.c | 7
fs/xfs/libxfs/xfs_shared.h | 1
fs/xfs/libxfs/xfs_trans_resv.c | 2
fs/xfs/libxfs/xfs_types.h | 2
fs/xfs/xfs_mount.h | 5
fs/xfs/xfs_stats.c | 1
fs/xfs/xfs_stats.h | 18 +
14 files changed, 722 insertions(+), 9 deletions(-)
create mode 100644 fs/xfs/libxfs/xfs_reflink_btree.c
create mode 100644 fs/xfs/libxfs/xfs_reflink_btree.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index e338595..ba89aee 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_log_rlimit.o \
xfs_rmap.o \
xfs_rmap_btree.o \
+ xfs_reflink_btree.o \
xfs_sb.o \
xfs_symlink_remote.o \
xfs_trans_resv.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c6a1372..fc8a499 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -54,6 +54,8 @@ xfs_extlen_t
xfs_prealloc_blocks(
struct xfs_mount *mp)
{
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ return XFS_RL_BLOCK(mp) + 1;
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
return XFS_RMAP_BLOCK(mp) + 1;
if (xfs_sb_version_hasfinobt(&mp->m_sb))
@@ -91,9 +93,11 @@ xfs_alloc_set_aside(
unsigned int blocks;
blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return blocks;
- return blocks + (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ blocks += (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ blocks += (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+ return blocks;
}
/*
@@ -123,6 +127,10 @@ xfs_alloc_ag_max_usable(struct xfs_mount *mp)
/* rmap root block + full tree split on full AG */
blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
}
+ if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+ /* reflink root block + full tree split on full AG */
+ blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
+ }
return mp->m_sb.sb_agblocks - blocks;
}
@@ -2378,6 +2386,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
return false;
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_reflink_level) > XFS_BTREE_MAXLEVELS)
+ return false;
+
return true;;
}
@@ -2497,6 +2509,7 @@ xfs_alloc_read_agf(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
pag->pagf_levels[XFS_BTNUM_RMAPi] =
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+ pag->pagf_reflink_level = be32_to_cpu(agf->agf_reflink_level);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 4c9b9b3..8820aad 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone;
*/
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
- XFS_FIBT_MAGIC },
+ XFS_FIBT_MAGIC, 0 },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
- XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
+ XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
+ XFS_RLBT_CRC_MAGIC }
};
#define xfs_btree_magic(cur) \
xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -1117,6 +1118,9 @@ xfs_btree_set_refs(
case XFS_BTNUM_RMAP:
xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
break;
+ case XFS_BTNUM_RL:
+ xfs_buf_set_ref(bp, XFS_REFLINK_BTREE_REF);
+ break;
default:
ASSERT(0);
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 48ab2b1..a3f8661 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -43,6 +43,7 @@ union xfs_btree_key {
xfs_alloc_key_t alloc;
struct xfs_inobt_key inobt;
struct xfs_rmap_key rmap;
+ xfs_reflink_key_t reflink;
};
union xfs_btree_rec {
@@ -51,6 +52,7 @@ union xfs_btree_rec {
struct xfs_alloc_rec alloc;
struct xfs_inobt_rec inobt;
struct xfs_rmap_rec rmap;
+ xfs_reflink_rec_t reflink;
};
/*
@@ -67,6 +69,8 @@ union xfs_btree_rec {
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
+#define XFS_BTNUM_RL ((xfs_btnum_t)XFS_BTNUM_RLi)
+
/*
* For logging record fields.
*/
@@ -98,6 +102,7 @@ do { \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break; \
+ case XFS_BTNUM_RL: __XFS_BTREE_STATS_INC(rlbt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -113,6 +118,7 @@ do { \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
+ case XFS_BTNUM_RL: __XFS_BTREE_STATS_INC(rlbt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \
} while (0)
@@ -205,6 +211,7 @@ typedef struct xfs_btree_cur
xfs_bmbt_irec_t b;
xfs_inobt_rec_incore_t i;
struct xfs_rmap_irec r;
+ xfs_reflink_rec_incore_t rl;
} bc_rec; /* current insert/search record value */
struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */
int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9cff517..e4954ab 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -446,9 +446,11 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode
btree */
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map
btree */
+#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflink
btree */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
- XFS_SB_FEAT_RO_COMPAT_RMAPBT)
+ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
+ XFS_SB_FEAT_RO_COMPAT_REFLINK)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
@@ -522,6 +524,12 @@ static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb
*sbp)
(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
}
+static inline int xfs_sb_version_hasreflink(xfs_sb_t *sbp)
+{
+ return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+ (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
+}
+
/*
* end of superblock version macros
*/
@@ -616,12 +624,15 @@ typedef struct xfs_agf {
__be32 agf_btreeblks; /* # of blocks held in AGF btrees */
uuid_t agf_uuid; /* uuid of filesystem */
+ __be32 agf_reflink_root; /* reflink tree root block */
+ __be32 agf_reflink_level; /* reflink btree levels */
+
/*
* reserve some contiguous space for future logged fields before we add
* the unlogged fields. This makes the range logging via flags and
* structure offsets much simpler.
*/
- __be64 agf_spare64[16];
+ __be64 agf_spare64[15];
/* unlogged fields, written during buffer writeback. */
__be64 agf_lsn; /* last write sequence */
@@ -1338,6 +1349,50 @@ typedef __be32 xfs_rmap_ptr_t;
XFS_IBT_BLOCK(mp) + 1)
/*
+ * reflink Btree format definitions
+ *
+ */
+#define XFS_RLBT_CRC_MAGIC 0x524C4233 /* 'RLB3' */
+
+/*
+ * Data record/key structure
+ */
+typedef struct xfs_reflink_rec {
+ __be32 rr_startblock; /* starting block number */
+ __be32 rr_blockcount; /* count of blocks */
+ __be32 rr_nlinks; /* number of inodes linked here */
+} xfs_reflink_rec_t;
+
+typedef struct xfs_reflink_key {
+ __be32 rr_startblock; /* starting block number */
+} xfs_reflink_key_t;
+
+typedef struct xfs_reflink_rec_incore {
+ xfs_agblock_t rr_startblock; /* starting block number */
+ xfs_extlen_t rr_blockcount; /* count of free blocks */
+ xfs_nlink_t rr_nlinks; /* number of inodes linked here */
+} xfs_reflink_rec_incore_t;
+
+/*
+ * When a block hits MAXRLCOUNT references, it becomes permanently
+ * stuck in CoW mode, because who knows how many times it's really
+ * referenced.
+ */
+#define MAXRLCOUNT ((xfs_nlink_t)~0U)
+#define MAXRLEXTLEN ((xfs_extlen_t)~0U)
+
+/* btree pointer type */
+typedef __be32 xfs_reflink_ptr_t;
+
+#define XFS_RL_BLOCK(mp) \
+ (xfs_sb_version_hasrmapbt(&((mp)->m_sb)) ? \
+ XFS_RMAP_BLOCK(mp) + 1 : \
+ (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
+ XFS_FIBT_BLOCK(mp) + 1 : \
+ XFS_IBT_BLOCK(mp) + 1))
+
+
+/*
* BMAP Btree format definitions
*
* This includes both the root block definition that sits inside an inode fork
diff --git a/fs/xfs/libxfs/xfs_reflink_btree.c
b/fs/xfs/libxfs/xfs_reflink_btree.c
new file mode 100644
index 0000000..8a0fa5d
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_reflink_btree.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_reflink_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+
+#undef REFLINK_DEBUG
+
+#ifdef REFLINK_DEBUG
+# define dbg_printk(f, a...) do {printk(KERN_ERR f, ## a); } while (0)
+#else
+# define dbg_printk(f, a...)
+#endif
+
+#define CHECK_AG_NUMBER(mp, agno) \
+ do { \
+ ASSERT((agno) != NULLAGNUMBER); \
+ ASSERT((agno) < (mp)->m_sb.sb_agcount); \
+ } while(0);
+
+#define CHECK_AG_EXTENT(mp, agbno, len) \
+ do { \
+ ASSERT((agbno) != NULLAGBLOCK); \
+ ASSERT((len) > 0); \
+ ASSERT((unsigned long long)(agbno) + (len) <= \
+ (mp)->m_sb.sb_agblocks); \
+ } while(0);
+
+#define XFS_WANT_CORRUPTED_RLEXT_GOTO(mp, have, agbno, len, nr, label) \
+ do { \
+ XFS_WANT_CORRUPTED_GOTO((mp), (have) == 1, label); \
+ XFS_WANT_CORRUPTED_GOTO((mp), (len) > 0, label); \
+ XFS_WANT_CORRUPTED_GOTO((mp), (nr) >= 2, label); \
+ XFS_WANT_CORRUPTED_GOTO((mp), (unsigned long long)(agbno) + \
+ (len) <= (mp)->m_sb.sb_agblocks, label); \
+ } while(0);
+
+STATIC struct xfs_btree_cur *
+xfs_reflinkbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_reflinkbt_init_cursor(cur->bc_mp, cur->bc_tp,
+ cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_reflinkbt_set_root(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ int inc)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
+ struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+ ASSERT(ptr->s != 0);
+
+ agf->agf_reflink_root = ptr->s;
+ be32_add_cpu(&agf->agf_reflink_level, inc);
+ pag->pagf_reflink_level += inc;
+ xfs_perag_put(pag);
+
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_reflinkbt_alloc_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ int error;
+ xfs_agblock_t bno;
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+ /* Allocate the new block from the freelist. If we can't, give up. */
+ error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+ &bno, 1);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
+
+ if (bno == NULLAGBLOCK) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 0;
+ return 0;
+ }
+
+ xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
false);
+
+ xfs_trans_agbtree_delta(cur->bc_tp, 1);
+ new->s = cpu_to_be32(bno);
+
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ *stat = 1;
+ return 0;
+}
+
+STATIC int
+xfs_reflinkbt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf *agbp = cur->bc_private.a.agbp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ xfs_agblock_t bno;
+ int error;
+
+ bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+ error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+ if (error)
+ return error;
+
+ xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+ XFS_EXTENT_BUSY_SKIP_DISCARD);
+ xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+ xfs_trans_binval(cur->bc_tp, bp);
+ return 0;
+}
+
+STATIC int
+xfs_reflinkbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rlbt_mnr[level != 0];
+}
+
+STATIC int
+xfs_reflinkbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_mp->m_rlbt_mxr[level != 0];
+}
+
+STATIC void
+xfs_reflinkbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(rec->reflink.rr_startblock != 0);
+
+ key->reflink.rr_startblock = rec->reflink.rr_startblock;
+}
+
+STATIC void
+xfs_reflinkbt_init_rec_from_key(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(key->reflink.rr_startblock != 0);
+
+ rec->reflink.rr_startblock = key->reflink.rr_startblock;
+}
+
+STATIC void
+xfs_reflinkbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ ASSERT(cur->bc_rec.rl.rr_startblock != 0);
+
+ rec->reflink.rr_startblock = cpu_to_be32(cur->bc_rec.rl.rr_startblock);
+ rec->reflink.rr_blockcount = cpu_to_be32(cur->bc_rec.rl.rr_blockcount);
+ rec->reflink.rr_nlinks = cpu_to_be32(cur->bc_rec.rl.rr_nlinks);
+}
+
+STATIC void
+xfs_reflinkbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+ ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(agf->agf_reflink_root != 0);
+
+ ptr->s = agf->agf_reflink_root;
+}
+
+STATIC __int64_t
+xfs_reflinkbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ xfs_reflink_rec_incore_t *rec = &cur->bc_rec.rl;
+ xfs_reflink_key_t *kp = &key->reflink;
+
+ return (__int64_t)be32_to_cpu(kp->rr_startblock) - rec->rr_startblock;
+}
+
+static bool
+xfs_reflinkbt_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+ unsigned int level;
+
+ if (block->bb_magic != cpu_to_be32(XFS_RLBT_CRC_MAGIC))
+ return false;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag &&
+ be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+
+ level = be16_to_cpu(block->bb_level);
+ if (pag && pag->pagf_init) {
+ if (level >= pag->pagf_reflink_level)
+ return false;
+ } else if (level >= mp->m_ag_maxlevels)
+ return false;
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > mp->m_rlbt_mxr[level != 0])
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
+
+static void
+xfs_reflinkbt_read_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, -EFSBADCRC);
+ else if (!xfs_reflinkbt_verify(bp))
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
+ }
+}
+
+static void
+xfs_reflinkbt_write_verify(
+ struct xfs_buf *bp)
+{
+ if (!xfs_reflinkbt_verify(bp)) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
+ }
+ xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_reflinkbt_buf_ops = {
+ .verify_read = xfs_reflinkbt_read_verify,
+ .verify_write = xfs_reflinkbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_reflinkbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->reflink.rr_startblock) <
+ be32_to_cpu(k2->reflink.rr_startblock);
+}
+
+STATIC int
+xfs_reflinkbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->reflink.rr_startblock) +
+ be32_to_cpu(r1->reflink.rr_blockcount) <=
+ be32_to_cpu(r2->reflink.rr_startblock);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_reflinkbt_ops = {
+ .rec_len = sizeof(xfs_reflink_rec_t),
+ .key_len = sizeof(xfs_reflink_key_t),
+
+ .dup_cursor = xfs_reflinkbt_dup_cursor,
+ .set_root = xfs_reflinkbt_set_root,
+ .alloc_block = xfs_reflinkbt_alloc_block,
+ .free_block = xfs_reflinkbt_free_block,
+ .get_minrecs = xfs_reflinkbt_get_minrecs,
+ .get_maxrecs = xfs_reflinkbt_get_maxrecs,
+ .init_key_from_rec = xfs_reflinkbt_init_key_from_rec,
+ .init_rec_from_key = xfs_reflinkbt_init_rec_from_key,
+ .init_rec_from_cur = xfs_reflinkbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_reflinkbt_init_ptr_from_cur,
+ .key_diff = xfs_reflinkbt_key_diff,
+ .buf_ops = &xfs_reflinkbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_reflinkbt_keys_inorder,
+ .recs_inorder = xfs_reflinkbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new reflink btree cursor.
+ */
+struct xfs_btree_cur * /* new reflink btree cursor */
+xfs_reflinkbt_init_cursor(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_buf *agbp, /* buffer for agf structure */
+ xfs_agnumber_t agno) /* allocation group number */
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+ struct xfs_btree_cur *cur;
+
+ CHECK_AG_NUMBER(mp, agno);
+ cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+ cur->bc_tp = tp;
+ cur->bc_mp = mp;
+ cur->bc_btnum = XFS_BTNUM_RL;
+ cur->bc_blocklog = mp->m_sb.sb_blocklog;
+ cur->bc_ops = &xfs_reflinkbt_ops;
+
+ cur->bc_nlevels = be32_to_cpu(agf->agf_reflink_level);
+
+ cur->bc_private.a.agbp = agbp;
+ cur->bc_private.a.agno = agno;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+ return cur;
+}
+
+/*
+ * Calculate number of records in an reflink btree block.
+ */
+int
+xfs_reflinkbt_maxrecs(
+ struct xfs_mount *mp,
+ int blocklen,
+ int leaf)
+{
+ blocklen -= XFS_REFLINK_BLOCK_LEN;
+
+ if (leaf)
+ return blocklen / sizeof(xfs_reflink_rec_t);
+ return blocklen / (sizeof(xfs_reflink_key_t) +
+ sizeof(xfs_reflink_ptr_t));
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int /* error */
+xfs_reflink_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.rl.rr_startblock = bno;
+ cur->bc_rec.rl.rr_blockcount = 0;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int /* error */
+xfs_reflink_lookup_ge(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ int *stat) /* success/failure */
+{
+ cur->bc_rec.rl.rr_startblock = bno;
+ cur->bc_rec.rl.rr_blockcount = 0;
+ return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int /* error */
+xfs_reflink_get_rec(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t *bno, /* output: starting block of extent */
+ xfs_extlen_t *len, /* output: length of extent */
+ xfs_nlink_t *nlink, /* output: number of links */
+ int *stat) /* output: success/failure */
+{
+ union xfs_btree_rec *rec;
+ int error;
+
+ error = xfs_btree_get_rec(cur, &rec, stat);
+ if (!error && *stat == 1) {
+ CHECK_AG_EXTENT(cur->bc_mp,
+ be32_to_cpu(rec->reflink.rr_startblock),
+ be32_to_cpu(rec->reflink.rr_blockcount));
+ *bno = be32_to_cpu(rec->reflink.rr_startblock);
+ *len = be32_to_cpu(rec->reflink.rr_blockcount);
+ *nlink = be32_to_cpu(rec->reflink.rr_nlinks);
+ }
+ return error;
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, nr].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_reflinkbt_update(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ xfs_nlink_t nr) /* reference count */
+{
+ union xfs_btree_rec rec;
+
+ CHECK_AG_EXTENT(cur->bc_mp, bno, len);
+ ASSERT(nr > 1);
+
+ rec.reflink.rr_startblock = cpu_to_be32(bno);
+ rec.reflink.rr_blockcount = cpu_to_be32(len);
+ rec.reflink.rr_nlinks = cpu_to_be32(nr);
+ return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Insert the record referred to by cur to the value given
+ * by [bno, len, nr].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_reflinkbt_insert(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ xfs_nlink_t nr, /* reference count */
+ int *i) /* success? */
+{
+ CHECK_AG_EXTENT(cur->bc_mp, bno, len);
+ ASSERT(nr > 1);
+
+ cur->bc_rec.rl.rr_startblock = bno;
+ cur->bc_rec.rl.rr_blockcount = len;
+ cur->bc_rec.rl.rr_nlinks = nr;
+ return xfs_btree_insert(cur, i);
+}
+
+/*
+ * Remove the record referred to by cur.
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int /* error */
+xfs_reflinkbt_delete(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int *i) /* success? */
+{
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
+ xfs_nlink_t nr;
+ int x;
+ int error;
+
+ error = xfs_reflink_get_rec(cur, &bno, &len, &nr, &x);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, x == 1, error0);
+ error = xfs_btree_delete(cur, i);
+ if (error)
+ return error;
+ error = xfs_reflink_lookup_ge(cur, bno, &x);
+error0:
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_reflink_btree.h
b/fs/xfs/libxfs/xfs_reflink_btree.h
new file mode 100644
index 0000000..a27588a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_reflink_btree.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_REFLINK_BTREE_H__
+#define __XFS_REFLINK_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/*
+ * Btree block header size depends on a superblock flag.
+ */
+#define XFS_REFLINK_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_REFLINK_REC_ADDR(block, index) \
+ ((xfs_reflink_rec_t *) \
+ ((char *)(block) + \
+ XFS_REFLINK_BLOCK_LEN + \
+ (((index) - 1) * sizeof(xfs_reflink_rec_t))))
+
+#define XFS_REFLINK_KEY_ADDR(block, index) \
+ ((xfs_reflink_key_t *) \
+ ((char *)(block) + \
+ XFS_REFLINK_BLOCK_LEN + \
+ ((index) - 1) * sizeof(xfs_reflink_key_t)))
+
+#define XFS_REFLINK_PTR_ADDR(block, index, maxrecs) \
+ ((xfs_reflink_ptr_t *) \
+ ((char *)(block) + \
+ XFS_REFLINK_BLOCK_LEN + \
+ (maxrecs) * sizeof(xfs_reflink_key_t) + \
+ ((index) - 1) * sizeof(xfs_reflink_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_reflinkbt_init_cursor(struct xfs_mount *,
+ struct xfs_trans *, struct xfs_buf *,
+ xfs_agnumber_t);
+extern int xfs_reflinkbt_maxrecs(struct xfs_mount *, int, int);
+extern int xfs_reflink_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ int *stat);
+extern int xfs_reflink_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ int *stat);
+extern int xfs_reflink_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
+ xfs_extlen_t *len, xfs_nlink_t *nlink, int *stat);
+
+#endif /* __XFS_REFLINK_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index db5a19d3..5f8f7fd 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -36,6 +36,8 @@
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_reflink_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -717,6 +719,11 @@ xfs_sb_mount_common(
mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+ mp->m_rlbt_mxr[0] = xfs_reflinkbt_maxrecs(mp, sbp->sb_blocksize, 1);
+ mp->m_rlbt_mxr[1] = xfs_reflinkbt_maxrecs(mp, sbp->sb_blocksize, 0);
+ mp->m_rlbt_mnr[0] = mp->m_rlbt_mxr[0] / 2;
+ mp->m_rlbt_mnr[1] = mp->m_rlbt_mxr[1] / 2;
+
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
sbp->sb_inopblock);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 88efbb4..d1de74e 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -216,6 +216,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
#define XFS_INO_REF 2
#define XFS_ATTR_BTREE_REF 1
#define XFS_DQUOT_REF 1
+#define XFS_REFLINK_BTREE_REF 1
/*
* Flags for xfs_trans_ichgtime().
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index d495f82..a6d1d3b 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -81,6 +81,8 @@ xfs_allocfree_log_count(
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
num_trees++;
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ num_trees++;
return num_ops * num_trees * (2 * mp->m_ag_maxlevels - 1);
}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 3d50364..1a93ac9 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -109,7 +109,7 @@ typedef enum {
typedef enum {
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
- XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+ XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_RLi, XFS_BTNUM_MAX
} xfs_btnum_t;
struct xfs_name {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cdced0b..69af7f7 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -92,6 +92,8 @@ typedef struct xfs_mount {
uint m_inobt_mnr[2]; /* min inobt btree records */
uint m_rmap_mxr[2]; /* max rmap btree records */
uint m_rmap_mnr[2]; /* min rmap btree records */
+ uint m_rlbt_mxr[2]; /* max rlbt btree records */
+ uint m_rlbt_mnr[2]; /* min rlbt btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
uint m_in_maxlevels; /* max inobt btree levels. */
@@ -315,6 +317,9 @@ typedef struct xfs_perag {
/* for rcu-safe freeing */
struct rcu_head rcu_head;
int pagb_count; /* pagb slots in use */
+
+ /* reflink */
+ __uint8_t pagf_reflink_level;
} xfs_perag_t;
extern int xfs_log_sbcount(xfs_mount_t *);
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 67bbfa2..57449b8 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
{ "ibt2", XFSSTAT_END_IBT_V2 },
{ "fibt2", XFSSTAT_END_FIBT_V2 },
{ "rmapbt", XFSSTAT_END_RMAP_V2 },
+ { "rlbt2", XFSSTAT_END_RLBT_V2 },
/* we print both series of quota information together */
{ "qm", XFSSTAT_END_QM },
};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 8414db2..d943c04 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -215,7 +215,23 @@ struct xfsstats {
__uint32_t xs_rmap_2_alloc;
__uint32_t xs_rmap_2_free;
__uint32_t xs_rmap_2_moves;
-#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RMAP_V2+6)
+#define XFSSTAT_END_RLBT_V2 (XFSSTAT_END_RMAP_V2+15)
+ __uint32_t xs_rlbt_2_lookup;
+ __uint32_t xs_rlbt_2_compare;
+ __uint32_t xs_rlbt_2_insrec;
+ __uint32_t xs_rlbt_2_delrec;
+ __uint32_t xs_rlbt_2_newroot;
+ __uint32_t xs_rlbt_2_killroot;
+ __uint32_t xs_rlbt_2_increment;
+ __uint32_t xs_rlbt_2_decrement;
+ __uint32_t xs_rlbt_2_lshift;
+ __uint32_t xs_rlbt_2_rshift;
+ __uint32_t xs_rlbt_2_split;
+ __uint32_t xs_rlbt_2_join;
+ __uint32_t xs_rlbt_2_alloc;
+ __uint32_t xs_rlbt_2_free;
+ __uint32_t xs_rlbt_2_moves;
+#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RLBT_V2+6)
__uint32_t xs_qm_dqreclaims;
__uint32_t xs_qm_dqreclaim_misses;
__uint32_t xs_qm_dquot_dups;
|