[PATCH 12/14] repair: switch block usage bitmap to a btree
Alex Elder
aelder at sgi.com
Thu Oct 22 11:22:42 CDT 2009
Christoph Hellwig wrote:
> Using a btree representing the extents is much more space efficient than
> using a bitmap tracking every single block. In addition it also allows
> for more optimal algorithms checking range overlaps instead of walking
> every block in various places.
>
> Also move the RT tracking bitmap into incore.c instead of leaving it
> a as macros - this keeps the implementation contained.
This is a really good change, and I'm now seeing the benefit
of the whole series.
I have a few minor things I would like to see changed at some
point, but nothing looks obviously incorrect so I'll wait and
post a patch against this once it's committed.
Here are a couple other general thoughts:
- One minor concern is that the btree code has cases in which
the peek routines don't work (when the keys_valid flag in the
btree root is zero) and this code doesn't check for that.
I'll just assume for now that never happens here.
- The bitfield code used for the real-time volume map is
generally useful and could be separated into its own module.
-Alex
> Signed-off-by: Barry Naujok <bnaujok at sgi.com>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
Reviewed-by: Alex Elder <aelder at sgi.com>
> Index: xfsprogs-dev/repair/dino_chunks.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/dino_chunks.c 2009-09-02 14:51:09.449268859 -0300
> +++ xfsprogs-dev/repair/dino_chunks.c 2009-09-02 14:51:18.593298964 -0300
> @@ -118,6 +118,7 @@ verify_inode_chunk(xfs_mount_t *mp,
> int i;
> int j;
> int state;
> + xfs_extlen_t blen;
>
> agno = XFS_INO_TO_AGNO(mp, ino);
> agino = XFS_INO_TO_AGINO(mp, ino);
> @@ -433,9 +434,10 @@ verify_inode_chunk(xfs_mount_t *mp,
> * entry or an iunlinked pointer
> */
> pthread_mutex_lock(&ag_locks[agno]);
> - for (j = 0, cur_agbno = chunk_start_agbno;
> - cur_agbno < chunk_stop_agbno; cur_agbno++) {
> - state = get_bmap(agno, cur_agbno);
> + for (cur_agbno = chunk_start_agbno;
> + cur_agbno < chunk_stop_agbno;
> + cur_agbno += blen) {
> + state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
> switch (state) {
> case XR_E_MULT:
> case XR_E_INUSE:
> @@ -444,9 +446,9 @@ verify_inode_chunk(xfs_mount_t *mp,
> do_warn(
> _("inode block %d/%d multiply claimed, (state %d)\n"),
> agno, cur_agbno, state);
> - set_bmap(agno, cur_agbno, XR_E_MULT);
> - j = 1;
> - break;
> + set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT);
> + pthread_mutex_unlock(&ag_locks[agno]);
> + return 0;
> case XR_E_INO:
> do_error(
> _("uncertain inode block overlap, agbno = %d, ino = %llu\n"),
> @@ -455,11 +457,6 @@ verify_inode_chunk(xfs_mount_t *mp,
> default:
> break;
> }
> -
> - if (j) {
> - pthread_mutex_unlock(&ag_locks[agno]);
> - return(0);
> - }
> }
> pthread_mutex_unlock(&ag_locks[agno]);
>
> @@ -487,8 +484,9 @@ verify_inode_chunk(xfs_mount_t *mp,
> pthread_mutex_lock(&ag_locks[agno]);
>
> for (cur_agbno = chunk_start_agbno;
> - cur_agbno < chunk_stop_agbno; cur_agbno++) {
> - state = get_bmap(agno, cur_agbno);
> + cur_agbno < chunk_stop_agbno;
> + cur_agbno += blen) {
> + state = get_bmap_ext(agno, cur_agbno, chunk_stop_agbno, &blen);
> switch (state) {
> case XR_E_INO:
> do_error(
> @@ -498,7 +496,7 @@ verify_inode_chunk(xfs_mount_t *mp,
> case XR_E_UNKNOWN:
> case XR_E_FREE1:
> case XR_E_FREE:
> - set_bmap(agno, cur_agbno, XR_E_INO);
> + set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
> break;
> case XR_E_MULT:
> case XR_E_INUSE:
> @@ -512,7 +510,7 @@ verify_inode_chunk(xfs_mount_t *mp,
> do_warn(
> _("inode block %d/%d bad state, (state %d)\n"),
> agno, cur_agbno, state);
> - set_bmap(agno, cur_agbno, XR_E_INO);
> + set_bmap_ext(agno, cur_agbno, blen, XR_E_INO);
> break;
> }
> }
> Index: xfsprogs-dev/repair/dinode.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/dinode.c 2009-09-02 14:51:09.457268829 -0300
> +++ xfsprogs-dev/repair/dinode.c 2009-09-02 14:51:18.593298964 -0300
> @@ -524,6 +524,7 @@ process_rt_rec(
>
> /*
> * set the appropriate number of extents
> + * this iterates block by block, this can be optimised using extents
> */
> for (b = irec->br_startblock; b < irec->br_startblock +
> irec->br_blockcount; b += mp->m_sb.sb_rextsize) {
> @@ -614,9 +615,10 @@ process_bmbt_reclist_int(
> char *forkname;
> int i;
> int state;
> - xfs_dfsbno_t e;
> xfs_agnumber_t agno;
> xfs_agblock_t agbno;
> + xfs_agblock_t ebno;
> + xfs_extlen_t blen;
> xfs_agnumber_t locked_agno = -1;
> int error = 1;
>
> @@ -718,7 +720,7 @@ process_bmbt_reclist_int(
> */
> agno = XFS_FSB_TO_AGNO(mp, irec.br_startblock);
> agbno = XFS_FSB_TO_AGBNO(mp, irec.br_startblock);
> - e = irec.br_startblock + irec.br_blockcount;
> + ebno = agbno + irec.br_blockcount;
> if (agno != locked_agno) {
> if (locked_agno != -1)
> pthread_mutex_unlock(&ag_locks[locked_agno]);
> @@ -733,7 +735,9 @@ process_bmbt_reclist_int(
> * checking each entry without setting the
> * block bitmap
> */
> - for (b = irec.br_startblock; b < e; b++, agbno++) {
> + for (b = irec.br_startblock;
> + agbno < ebno;
> + b++, agbno++) {
> if (search_dup_extent(mp, agno, agbno)) {
> do_warn(_("%s fork in ino %llu claims "
> "dup extent, off - %llu, "
> @@ -748,22 +752,10 @@ process_bmbt_reclist_int(
> continue;
> }
>
> - for (b = irec.br_startblock; b < e; b++, agbno++) {
> - /*
> - * Process in chunks of 16 (XR_BB_UNIT/XR_BB)
> - * for common XR_E_UNKNOWN to XR_E_INUSE transition
> - */
> - if (((agbno & XR_BB_MASK) == 0) && ((irec.br_startblock + irec.br_blockcount - b) >=
> (XR_BB_UNIT/XR_BB))) {
> - if (ba_bmap[agno][agbno>>XR_BB] == XR_E_UNKNOWN_LL) {
> - ba_bmap[agno][agbno>>XR_BB] = XR_E_INUSE_LL;
> - agbno += (XR_BB_UNIT/XR_BB) - 1;
> - b += (XR_BB_UNIT/XR_BB) - 1;
> - continue;
> - }
> -
> - }
> -
> - state = get_bmap(agno, agbno);
> + for (b = irec.br_startblock;
> + agbno < ebno;
> + b += blen, agbno += blen) {
> + state = get_bmap_ext(agno, agbno, ebno, &blen);
> switch (state) {
> case XR_E_FREE:
> case XR_E_FREE1:
> @@ -772,7 +764,7 @@ process_bmbt_reclist_int(
> forkname, ino, (__uint64_t) b);
> /* fall through ... */
> case XR_E_UNKNOWN:
> - set_bmap(agno, agbno, XR_E_INUSE);
> + set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
> break;
>
> case XR_E_BAD_STATE:
> @@ -788,7 +780,7 @@ process_bmbt_reclist_int(
>
> case XR_E_INUSE:
> case XR_E_MULT:
> - set_bmap(agno, agbno, XR_E_MULT);
> + set_bmap_ext(agno, agbno, blen, XR_E_MULT);
> do_warn(_("%s fork in %s inode %llu claims "
> "used block %llu\n"),
> forkname, ftype, ino, (__uint64_t) b);
> Index: xfsprogs-dev/repair/globals.h
> ===================================================================
> --- xfsprogs-dev.orig/repair/globals.h 2009-09-02 14:51:09.461268919 -0300
> +++ xfsprogs-dev/repair/globals.h 2009-09-02 14:51:18.597292070 -0300
> @@ -156,11 +156,6 @@ EXTERN int chunks_pblock; /* # of 64-in
> EXTERN int max_symlink_blocks;
> EXTERN __int64_t fs_max_file_offset;
>
> -/* block allocation bitmaps */
> -
> -EXTERN __uint64_t **ba_bmap; /* see incore.h */
> -EXTERN __uint64_t *rt_ba_bmap; /* see incore.h */
> -
> /* realtime info */
>
> EXTERN xfs_rtword_t *btmcompute;
> Index: xfsprogs-dev/repair/phase2.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/phase2.c 2009-09-02 14:51:09.465298621 -0300
> +++ xfsprogs-dev/repair/phase2.c 2009-09-02 14:51:18.605297206 -0300
> @@ -109,7 +109,6 @@ void
> phase2(xfs_mount_t *mp)
> {
> xfs_agnumber_t i;
> - xfs_agblock_t b;
> int j;
> ino_tree_node_t *ino_rec;
>
> @@ -169,11 +168,8 @@ phase2(xfs_mount_t *mp)
> /*
> * also mark blocks
> */
> - for (b = 0; b < mp->m_ialloc_blks; b++) {
> - set_bmap(0,
> - b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
> - XR_E_INO);
> - }
> + set_bmap_ext(0, XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino),
> + mp->m_ialloc_blks, XR_E_INO);
> } else {
> do_log(_(" - found root inode chunk\n"));
>
> Index: xfsprogs-dev/repair/phase4.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/phase4.c 2009-09-02 14:51:09.533268366 -0300
> +++ xfsprogs-dev/repair/phase4.c 2009-09-02 14:51:18.609296598 -0300
> @@ -192,8 +192,7 @@ phase4(xfs_mount_t *mp)
> xfs_agnumber_t i;
> xfs_agblock_t j;
> xfs_agblock_t ag_end;
> - xfs_agblock_t extent_start;
> - xfs_extlen_t extent_len;
> + xfs_extlen_t blen;
> int ag_hdr_len = 4 * mp->m_sb.sb_sectsize;
> int ag_hdr_block;
> int bstate;
> @@ -226,29 +225,13 @@ phase4(xfs_mount_t *mp)
> ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks :
> mp->m_sb.sb_dblocks -
> (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i;
> - extent_start = extent_len = 0;
> +
> /*
> * set up duplicate extent list for this ag
> */
> - for (j = ag_hdr_block; j < ag_end; j++) {
> -
> - /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
> - if ((extent_start == 0) && ((j & XR_BB_MASK) == 0)) {
> - switch(ba_bmap[i][j>>XR_BB]) {
> - case XR_E_UNKNOWN_LL:
> - case XR_E_FREE1_LL:
> - case XR_E_FREE_LL:
> - case XR_E_INUSE_LL:
> - case XR_E_INUSE_FS_LL:
> - case XR_E_INO_LL:
> - case XR_E_FS_MAP_LL:
> - j += (XR_BB_UNIT/XR_BB) - 1;
> - continue;
> - }
> - }
> -
> - bstate = get_bmap(i, j);
> - switch (bstate) {
> + for (j = ag_hdr_block; j < ag_end; j += blen) {
> + bstate = get_bmap_ext(i, j, ag_end, &blen);
> + switch (bstate) {
> case XR_E_BAD_STATE:
> default:
> do_warn(
> @@ -262,37 +245,13 @@ phase4(xfs_mount_t *mp)
> case XR_E_INUSE_FS:
> case XR_E_INO:
> case XR_E_FS_MAP:
> - if (extent_start == 0)
> - continue;
> - else {
> - /*
> - * add extent and reset extent state
> - */
> - add_dup_extent(i, extent_start,
> - extent_len);
> - extent_start = 0;
> - extent_len = 0;
> - }
> break;
> case XR_E_MULT:
> - if (extent_start == 0) {
> - extent_start = j;
> - extent_len = 1;
> - } else if (extent_len == MAXEXTLEN) {
> - add_dup_extent(i, extent_start,
> - extent_len);
> - extent_start = j;
> - extent_len = 1;
> - } else
> - extent_len++;
> + add_dup_extent(i, j, blen);
> break;
> }
> }
> - /*
> - * catch tail-case, extent hitting the end of the ag
> - */
> - if (extent_start != 0)
> - add_dup_extent(i, extent_start, extent_len);
> +
> PROG_RPT_INC(prog_rpt_done[i], 1);
> }
> print_final_rpt();
> Index: xfsprogs-dev/repair/phase5.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/phase5.c 2009-09-02 14:51:09.561269620 -0300
> +++ xfsprogs-dev/repair/phase5.c 2009-09-02 14:51:18.613269588 -0300
> @@ -88,10 +88,8 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_ag
> xfs_agblock_t agbno;
> xfs_agblock_t ag_end;
> uint free_blocks;
> -#ifdef XR_BLD_FREE_TRACE
> - int old_state;
> - int state = XR_E_BAD_STATE;
> -#endif
> + xfs_extlen_t blen;
> + int bstate;
>
> /*
> * scan the bitmap for the ag looking for continuous
> @@ -120,30 +118,10 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_ag
> * ok, now find the number of extents, keep track of the
> * largest extent.
> */
> - for (agbno = 0; agbno < ag_end; agbno++) {
> -#if 0
> - old_state = state;
> - state = get_bmap(agno, agbno);
> - if (state != old_state) {
> - fprintf(stderr, "agbno %u - new state is %d\n",
> - agbno, state);
> - }
> -#endif
> - /* Process in chunks of 16 (XR_BB_UNIT/XR_BB) */
> - if ((in_extent == 0) && ((agbno & XR_BB_MASK) == 0)) {
> - /* testing >= XR_E_INUSE */
> - switch (ba_bmap[agno][agbno>>XR_BB]) {
> - case XR_E_INUSE_LL:
> - case XR_E_INUSE_FS_LL:
> - case XR_E_INO_LL:
> - case XR_E_FS_MAP_LL:
> - agbno += (XR_BB_UNIT/XR_BB) - 1;
> - continue;
> - }
> -
> - }
> - if (get_bmap(agno, agbno) < XR_E_INUSE) {
> - free_blocks++;
> + for (agbno = 0; agbno < ag_end; agbno += blen) {
> + bstate = get_bmap_ext(agno, agbno, ag_end, &blen);
> + if (bstate < XR_E_INUSE) {
> + free_blocks += blen;
> if (in_extent == 0) {
> /*
> * found the start of a free extent
> @@ -151,9 +129,9 @@ mk_incore_fstree(xfs_mount_t *mp, xfs_ag
> in_extent = 1;
> num_extents++;
> extent_start = agbno;
> - extent_len = 1;
> + extent_len = blen;
> } else {
> - extent_len++;
> + extent_len += blen;
> }
> } else {
> if (in_extent) {
> Index: xfsprogs-dev/repair/incore.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/incore.c 2009-09-02 14:51:09.565269570 -0300
> +++ xfsprogs-dev/repair/incore.c 2009-09-02 14:51:29.072772399 -0300
> @@ -18,6 +18,7 @@
>
> #include <libxfs.h>
> #include "avl.h"
> +#include "btree.h"
> #include "globals.h"
> #include "incore.h"
> #include "agheader.h"
> @@ -52,14 +53,192 @@ free_allocations(ba_rec_t *list)
> return;
> }
>
> +/*
> + * The following manages the in-core bitmap of the entire filesystem
> + * using extents in a btree.
> + *
> + * The btree items will point to one of the state values below,
> + * rather than storing the value itself in the pointer.
> + */
> +static int states[16] =
> + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
> +
> +static struct btree_root **ag_bmap;
> +
> +static void
> +update_bmap(
> + struct btree_root *bmap,
> + unsigned long offset,
> + xfs_extlen_t blen,
> + void *new_state)
> +{
> + unsigned long end = offset + blen;
> + int *cur_state;
> + unsigned long cur_key;
> + int *next_state;
> + unsigned long next_key;
> + int *prev_state;
> +
> + cur_state = btree_find(bmap, offset, &cur_key);
> + if (!cur_state)
> + return;
> +
> + if (offset == cur_key) {
> + /* if the start is the same as the "item" extent */
> + if (cur_state == new_state)
> + return;
> +
> + /*
> + * Note: this may be NULL if we are updating the map for
> + * the superblock.
> + */
> + prev_state = btree_peek_prev(bmap, NULL);
> +
> + next_state = btree_peek_next(bmap, &next_key);
> + if (next_key > end) {
> + /* different end */
> + if (new_state == prev_state) {
> + /* #1: prev has same state, move offset up */
> + btree_update_key(bmap, offset, end);
> + return;
> + }
> +
> + /* #4: insert new extent after, update current value */
> + btree_update_value(bmap, offset, new_state);
> + btree_insert(bmap, end, cur_state);
> + return;
> + }
> +
> + /* same end (and same start) */
> + if (new_state == next_state) {
> + /* next has same state */
> + if (new_state == prev_state) {
> + /* #3: merge prev & next */
> + btree_delete(bmap, offset);
> + btree_delete(bmap, end);
> + return;
> + }
> +
> + /* #8: merge next */
> + btree_update_value(bmap, offset, new_state);
> + btree_delete(bmap, end);
> + return;
> + }
> +
> + /* same start, same end, next has different state */
> + if (new_state == prev_state) {
> + /* #5: prev has same state */
> + btree_delete(bmap, offset);
> + return;
> + }
> +
> + /* #6: update value only */
> + btree_update_value(bmap, offset, new_state);
> + return;
> + }
> +
> + /* different start, offset is in the middle of "cur" */
> + prev_state = btree_peek_prev(bmap, NULL);
> + ASSERT(prev_state != NULL);
> + if (prev_state == new_state)
> + return;
> +
> + if (end == cur_key) {
> + /* end is at the same point as the current extent */
> + if (new_state == cur_state) {
> + /* #7: move next extent down */
> + btree_update_key(bmap, end, offset);
> + return;
> + }
> +
> + /* #9: different start, same end, add new extent */
> + btree_insert(bmap, offset, new_state);
> + return;
> + }
> +
> + /* #2: insert an extent into the middle of another extent */
> + btree_insert(bmap, offset, new_state);
> + btree_insert(bmap, end, prev_state);
> +}
> +
> +void
> +set_bmap_ext(
> + xfs_agnumber_t agno,
> + xfs_agblock_t agbno,
> + xfs_extlen_t blen,
> + int state)
> +{
> + update_bmap(ag_bmap[agno], agbno, blen, &states[state]);
> +}
> +
> +int
> +get_bmap_ext(
> + xfs_agnumber_t agno,
> + xfs_agblock_t agbno,
> + xfs_agblock_t maxbno,
> + xfs_extlen_t *blen)
> +{
> + int *statep;
> + unsigned long key;
> +
> + statep = btree_find(ag_bmap[agno], agbno, &key);
> + if (!statep)
> + return -1;
> +
> + if (key == agbno) {
> + if (blen) {
> + if (!btree_peek_next(ag_bmap[agno], &key))
> + return -1;
> + *blen = MIN(maxbno, key) - agbno;
> + }
> + return *statep;
> + }
> +
> + statep = btree_peek_prev(ag_bmap[agno], NULL);
> + if (!statep)
> + return -1;
> + if (blen)
> + *blen = MIN(maxbno, key) - agbno;
> +
> + return *statep;
> +}
>
> +static uint64_t *rt_bmap;
> static size_t rt_bmap_size;
>
> +/* block records fit into __uint64_t's units */
> +#define XR_BB_UNIT 64 /* number of bits/unit */
> +#define XR_BB 4 /* bits per block record */
> +#define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */
> +#define XR_BB_MASK 0xF /* block record mask */
> +
> +/*
> + * these work in real-time extents (e.g. fsbno == rt extent number)
> + */
> +int
> +get_rtbmap(
> + xfs_drtbno_t bno)
> +{
> + return (*(rt_bmap + bno / XR_BB_NUM) >>
> + ((bno % XR_BB_NUM) * XR_BB)) & XR_BB_MASK;
> +}
> +
> +void
> +set_rtbmap(
> + xfs_drtbno_t bno,
> + int state)
> +{
> + *(rt_bmap + bno / XR_BB_NUM) =
> + ((*(rt_bmap + bno / XR_BB_NUM) &
> + (~((__uint64_t) XR_BB_MASK << ((bno % XR_BB_NUM) * XR_BB)))) |
> + (((__uint64_t) state) << ((bno % XR_BB_NUM) * XR_BB)));
> +}
> +
> static void
> reset_rt_bmap(void)
> {
> - if (rt_ba_bmap)
> - memset(rt_ba_bmap, 0x22, rt_bmap_size); /* XR_E_FREE */
> + if (rt_bmap)
> + memset(rt_bmap, 0x22, rt_bmap_size); /* XR_E_FREE */
> }
>
> static void
> @@ -72,8 +251,8 @@ init_rt_bmap(
> rt_bmap_size = roundup(mp->m_sb.sb_rextents / (NBBY / XR_BB),
> sizeof(__uint64_t));
>
> - rt_ba_bmap = memalign(sizeof(__uint64_t), rt_bmap_size);
> - if (!rt_ba_bmap) {
> + rt_bmap = memalign(sizeof(__uint64_t), rt_bmap_size);
> + if (!rt_bmap) {
> do_error(
> _("couldn't allocate realtime block map, size = %llu\n"),
> mp->m_sb.sb_rextents);
> @@ -84,8 +263,8 @@ init_rt_bmap(
> static void
> free_rt_bmap(xfs_mount_t *mp)
> {
> - free(rt_ba_bmap);
> - rt_ba_bmap = NULL;
> + free(rt_bmap);
> + rt_bmap = NULL;
> }
>
>
> @@ -93,28 +272,41 @@ void
> reset_bmaps(xfs_mount_t *mp)
> {
> xfs_agnumber_t agno;
> + xfs_agblock_t ag_size;
> int ag_hdr_block;
> - int i;
>
> ag_hdr_block = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
> + ag_size = mp->m_sb.sb_agblocks;
>
> - for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> - memset(ba_bmap[agno], 0,
> - roundup((mp->m_sb.sb_agblocks + (NBBY / XR_BB) - 1) /
> - (NBBY / XR_BB), sizeof(__uint64_t)));
> - for (i = 0; i < ag_hdr_block; i++)
> - set_bmap(agno, i, XR_E_INUSE_FS);
> + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> + if (agno == mp->m_sb.sb_agcount - 1)
> + ag_size = (xfs_extlen_t)(mp->m_sb.sb_dblocks -
> + (xfs_drfsbno_t)mp->m_sb.sb_agblocks * agno);
> +#ifdef BTREE_STATS
> + if (btree_find(ag_bmap[agno], 0, NULL)) {
> + printf("ag_bmap[%d] btree stats:\n", i);
> + btree_print_stats(ag_bmap[agno], stdout);
> + }
> +#endif
> + /*
> + * We always insert an item for the first block having a
> + * given state. So the code below means:
> + *
> + * block 0..ag_hdr_block-1: XR_E_INUSE_FS
> + * ag_hdr_block..ag_size: XR_E_UNKNOWN
> + * ag_size... XR_E_BAD_STATE
> + */
> + btree_clear(ag_bmap[agno]);
> + btree_insert(ag_bmap[agno], 0, &states[XR_E_INUSE_FS]);
> + btree_insert(ag_bmap[agno],
> + ag_hdr_block, &states[XR_E_UNKNOWN]);
> + btree_insert(ag_bmap[agno], ag_size, &states[XR_E_BAD_STATE]);
> }
>
> if (mp->m_sb.sb_logstart != 0) {
> - xfs_dfsbno_t logend;
> -
> - logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks;
> -
> - for (i = mp->m_sb.sb_logstart; i < logend ; i++) {
> - set_bmap(XFS_FSB_TO_AGNO(mp, i),
> - XFS_FSB_TO_AGBNO(mp, i), XR_E_INUSE_FS);
> - }
> + set_bmap_ext(XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart),
> + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
> + mp->m_sb.sb_logblocks, XR_E_INUSE_FS);
> }
>
> reset_rt_bmap();
> @@ -123,30 +315,18 @@ reset_bmaps(xfs_mount_t *mp)
> void
> init_bmaps(xfs_mount_t *mp)
> {
> - xfs_agblock_t numblocks = mp->m_sb.sb_agblocks;
> - int agcount = mp->m_sb.sb_agcount;
> - int i;
> - size_t size = 0;
> -
> - ba_bmap = calloc(agcount, sizeof(__uint64_t *));
> - if (!ba_bmap)
> - do_error(_("couldn't allocate block map pointers\n"));
> + xfs_agnumber_t i;
>
> - ag_locks = calloc(agcount, sizeof(pthread_mutex_t));
> + ag_bmap = calloc(mp->m_sb.sb_agcount, sizeof(struct btree_root *));
> + if (!ag_bmap)
> + do_error(_("couldn't allocate block map btree roots\n"));
> +
> + ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t));
> if (!ag_locks)
> do_error(_("couldn't allocate block map locks\n"));
>
> - for (i = 0; i < agcount; i++) {
> - size = roundup((numblocks+(NBBY/XR_BB)-1) / (NBBY/XR_BB),
> - sizeof(__uint64_t));
> -
> - ba_bmap[i] = memalign(sizeof(__uint64_t), size);
> - if (!ba_bmap[i]) {
> - do_error(_("couldn't allocate block map, size = %d\n"),
> - numblocks);
> - return;
> - }
> - memset(ba_bmap[i], 0, size);
> + for (i = 0; i < mp->m_sb.sb_agcount; i++) {
> + btree_init(&ag_bmap[i]);
> pthread_mutex_init(&ag_locks[i], NULL);
> }
>
> @@ -160,9 +340,9 @@ free_bmaps(xfs_mount_t *mp)
> xfs_agnumber_t i;
>
> for (i = 0; i < mp->m_sb.sb_agcount; i++)
> - free(ba_bmap[i]);
> - free(ba_bmap);
> - ba_bmap = NULL;
> + btree_destroy(ag_bmap[i]);
> + free(ag_bmap);
> + ag_bmap = NULL;
>
> free_rt_bmap(mp);
> }
> Index: xfsprogs-dev/repair/incore.h
> ===================================================================
> --- xfsprogs-dev.orig/repair/incore.h 2009-09-02 14:51:09.573269190 -0300
> +++ xfsprogs-dev/repair/incore.h 2009-09-02 14:51:18.621298890 -0300
> @@ -37,59 +37,32 @@ void record_allocation(ba_rec_t *addr,
> void free_allocations(ba_rec_t *list);
>
> /*
> - * block bit map defs -- track state of each filesystem block.
> - * ba_bmap is an array of bitstrings declared in the globals.h file.
> - * the bitstrings are broken up into 64-bit chunks. one bitstring per AG.
> - */
> -#define BA_BMAP_SIZE(x) (howmany(x, 4))
> -
> -void init_bmaps(xfs_mount_t *mp);
> -void reset_bmaps(xfs_mount_t *mp);
> -void free_bmaps(xfs_mount_t *mp);
> -
> -
> -/* blocks are numbered from zero */
> -
> -/* block records fit into __uint64_t's units */
> -
> -#define XR_BB_UNIT 64 /* number of bits/unit */
> -#define XR_BB 4 /* bits per block record */
> -#define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */
> -#define XR_BB_MASK 0xF /* block record mask */
> -
> -/*
> - * bitstring ops -- set/get block states, either in filesystem
> - * bno's or in agbno's. turns out that fsbno addressing is
> - * more convenient when dealing with bmap extracted addresses
> - * and agbno addressing is more convenient when dealing with
> - * meta-data extracted addresses. So the fsbno versions use
> - * mtype (which can be one of the block map types above) to
> - * set the correct block map while the agbno versions assume
> - * you want to use the regular block map.
> - */
> -
> -#define get_bmap(agno, ag_blockno) \
> - ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
> - >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
> - & XR_BB_MASK)
> -#define set_bmap(agno, ag_blockno, state) \
> - *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
> - ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
> - (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
> - (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
> -
> -/*
> - * these work in real-time extents (e.g. fsbno == rt extent number)
> - */
> -#define get_rtbmap(fsbno) \
> - ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
> - (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
> -#define set_rtbmap(fsbno, state) \
> - *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
> - ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
> - (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
> - (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
> + * block map -- track state of each filesystem block.
> + */
> +
> +void init_bmaps(xfs_mount_t *mp);
> +void reset_bmaps(xfs_mount_t *mp);
> +void free_bmaps(xfs_mount_t *mp);
> +
> +void set_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
> + xfs_extlen_t blen, int state);
> +int get_bmap_ext(xfs_agnumber_t agno, xfs_agblock_t agbno,
> + xfs_agblock_t maxbno, xfs_extlen_t *blen);
>
> +void set_rtbmap(xfs_drtbno_t bno, int state);
> +int get_rtbmap(xfs_drtbno_t bno);
> +
> +static inline void
> +set_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno, int state)
> +{
> + set_bmap_ext(agno, agbno, 1, state);
> +}
> +
> +static inline int
> +get_bmap(xfs_agnumber_t agno, xfs_agblock_t agbno)
> +{
> + return get_bmap_ext(agno, agbno, agbno + 1, NULL);
> +}
>
> /*
> * extent tree definitions
> Index: xfsprogs-dev/repair/scan.c
> ===================================================================
> --- xfsprogs-dev.orig/repair/scan.c 2009-09-02 14:51:09.577269000 -0300
> +++ xfsprogs-dev/repair/scan.c 2009-09-02 14:51:18.629269735 -0300
> @@ -509,7 +509,7 @@ _("%s freespace btree block claimed (sta
> rp = XFS_ALLOC_REC_ADDR(mp, block, 1);
> for (i = 0; i < numrecs; i++) {
> xfs_agblock_t b, end;
> - xfs_extlen_t len;
> + xfs_extlen_t len, blen;
>
> b = be32_to_cpu(rp[i].ar_startblock);
> len = be32_to_cpu(rp[i].ar_blockcount);
> @@ -522,8 +522,8 @@ _("%s freespace btree block claimed (sta
> if (!verify_agbno(mp, agno, end - 1))
> continue;
>
> - for ( ; b < end; b++) {
> - state = get_bmap(agno, b);
> + for ( ; b < end; b += blen) {
> + state = get_bmap_ext(agno, b, end, &blen);
> switch (state) {
> case XR_E_UNKNOWN:
> set_bmap(agno, b, XR_E_FREE1);
> @@ -534,13 +534,15 @@ _("%s freespace btree block claimed (sta
> * FREE1 blocks later
> */
> if (magic != XFS_ABTB_MAGIC) {
> - set_bmap(agno, b, XR_E_FREE);
> + set_bmap_ext(agno, b, blen,
> + XR_E_FREE);
> break;
> }
> default:
> do_warn(
> - _("block (%d,%d) multiply claimed by %s space tree, state - %d\n"),
> - agno, b, name, state);
> + _("block (%d,%d-%d) multiply claimed by %s space tree, state - %d\n"),
> + agno, b, b + blen - 1,
> + name, state);
> break;
> }
> }
>
> _______________________________________________
> xfs mailing list
> xfs at oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
More information about the xfs
mailing list