xfs
[Top] [All Lists]

[PATCH 014/119] xfs: introduce interval queries on btrees

To: david@xxxxxxxxxxxxx, darrick.wong@xxxxxxxxxx
Subject: [PATCH 014/119] xfs: introduce interval queries on btrees
From: "Darrick J. Wong" <darrick.wong@xxxxxxxxxx>
Date: Thu, 16 Jun 2016 18:19:21 -0700
Cc: linux-fsdevel@xxxxxxxxxxxxxxx, vishal.l.verma@xxxxxxxxx, xfs@xxxxxxxxxxx
Delivered-to: xfs@xxxxxxxxxxx
In-reply-to: <146612627129.12839.3827886950949809165.stgit@xxxxxxxxxxxxxxxx>
References: <146612627129.12839.3827886950949809165.stgit@xxxxxxxxxxxxxxxx>
User-agent: StGit/0.17.1-dirty
Create a function to enable querying of btree records mapping to a
range of keys.  This will be used in subsequent patches to allow
querying the reverse mapping btree to find the extents mapped to a
range of physical blocks, though the generic code can be used for
any range query.

v2: add some shortcuts so that we can jump out of processing once
we know there won't be any more records to find.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/libxfs/xfs_btree.c |  249 +++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_btree.h |   22 +++-
 fs/xfs/xfs_trace.h        |    1 
 3 files changed, 267 insertions(+), 5 deletions(-)


diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index afcafd6..5f5cf23 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4509,3 +4509,252 @@ xfs_btree_calc_size(
        }
        return rval;
 }
+
+/* Query a regular btree for all records overlapping a given interval. */
+STATIC int
+xfs_btree_simple_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_irec            *low_rec,
+       union xfs_btree_irec            *high_rec,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       union xfs_btree_rec             *recp;
+       union xfs_btree_rec             rec;
+       union xfs_btree_key             low_key;
+       union xfs_btree_key             high_key;
+       union xfs_btree_key             rec_key;
+       __int64_t                       diff;
+       int                             stat;
+       bool                            firstrec = true;
+       int                             error;
+
+       ASSERT(cur->bc_ops->init_high_key_from_rec);
+
+       /* Find the keys of both ends of the interval. */
+       cur->bc_rec = *high_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&high_key, &rec);
+
+       cur->bc_rec = *low_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&low_key, &rec);
+
+       /* Find the leftmost record. */
+       stat = 0;
+       error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
+       if (error)
+               goto out;
+
+       while (stat) {
+               /* Find the record. */
+               error = xfs_btree_get_rec(cur, &recp, &stat);
+               if (error || !stat)
+                       break;
+
+               /* Can we tell if this record is too low? */
+               if (firstrec) {
+                       cur->bc_rec = *low_rec;
+                       cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
+                       diff = cur->bc_ops->key_diff(cur, &rec_key);
+                       if (diff < 0)
+                               goto advloop;
+               }
+               firstrec = false;
+
+               /* Have we gone past the end? */
+               cur->bc_rec = *high_rec;
+               cur->bc_ops->init_key_from_rec(&rec_key, recp);
+               diff = cur->bc_ops->key_diff(cur, &rec_key);
+               if (diff > 0)
+                       break;
+
+               /* Callback */
+               error = fn(cur, recp, priv);
+               if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
+                       break;
+
+advloop:
+               /* Move on to the next record. */
+               error = xfs_btree_increment(cur, 0, &stat);
+               if (error)
+                       break;
+       }
+
+out:
+       return error;
+}
+
+/*
+ * Query an overlapped interval btree for all records overlapping a given
+ * interval.
+ */
+STATIC int
+xfs_btree_overlapped_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_irec            *low_rec,
+       union xfs_btree_irec            *high_rec,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       union xfs_btree_ptr             ptr;
+       union xfs_btree_ptr             *pp;
+       union xfs_btree_key             rec_key;
+       union xfs_btree_key             low_key;
+       union xfs_btree_key             high_key;
+       union xfs_btree_key             *lkp;
+       union xfs_btree_key             *hkp;
+       union xfs_btree_rec             rec;
+       union xfs_btree_rec             *recp;
+       struct xfs_btree_block          *block;
+       __int64_t                       ldiff;
+       __int64_t                       hdiff;
+       int                             level;
+       struct xfs_buf                  *bp;
+       int                             i;
+       int                             error;
+
+       /* Find the keys of both ends of the interval. */
+       cur->bc_rec = *high_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&high_key, &rec);
+
+       cur->bc_rec = *low_rec;
+       cur->bc_ops->init_rec_from_cur(cur, &rec);
+       cur->bc_ops->init_key_from_rec(&low_key, &rec);
+
+       /* Load the root of the btree. */
+       level = cur->bc_nlevels - 1;
+       cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+       error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
+       if (error)
+               return error;
+       xfs_btree_get_block(cur, level, &bp);
+       trace_xfs_btree_overlapped_query_range(cur, level, bp);
+#ifdef DEBUG
+       error = xfs_btree_check_block(cur, block, level, bp);
+       if (error)
+               goto out;
+#endif
+       cur->bc_ptrs[level] = 1;
+
+       while (level < cur->bc_nlevels) {
+               block = XFS_BUF_TO_BLOCK(cur->bc_bufs[level]);
+
+               if (level == 0) {
+                       /* End of leaf, pop back towards the root. */
+                       if (cur->bc_ptrs[level] >
+                           be16_to_cpu(block->bb_numrecs)) {
+leaf_pop_up:
+                               if (level < cur->bc_nlevels - 1)
+                                       cur->bc_ptrs[level + 1]++;
+                               level++;
+                               continue;
+                       }
+
+                       recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
+
+                       cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
+                       ldiff = cur->bc_ops->diff_two_keys(cur, &low_key,
+                                       &rec_key);
+
+                       cur->bc_ops->init_key_from_rec(&rec_key, recp);
+                       hdiff = cur->bc_ops->diff_two_keys(cur, &rec_key,
+                                       &high_key);
+
+                       /* If the record matches, callback */
+                       if (ldiff >= 0 && hdiff >= 0) {
+                               error = fn(cur, recp, priv);
+                               if (error < 0 ||
+                                   error == XFS_BTREE_QUERY_RANGE_ABORT)
+                                       break;
+                       } else if (hdiff < 0) {
+                               /* Record is larger than high key; pop. */
+                               goto leaf_pop_up;
+                       }
+                       cur->bc_ptrs[level]++;
+                       continue;
+               }
+
+               /* End of node, pop back towards the root. */
+               if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
+node_pop_up:
+                       if (level < cur->bc_nlevels - 1)
+                               cur->bc_ptrs[level + 1]++;
+                       level++;
+                       continue;
+               }
+
+               lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
+               hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
+               pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
+
+               ldiff = cur->bc_ops->diff_two_keys(cur, &low_key, hkp);
+               hdiff = cur->bc_ops->diff_two_keys(cur, lkp, &high_key);
+
+               /* If the key matches, drill another level deeper. */
+               if (ldiff >= 0 && hdiff >= 0) {
+                       level--;
+                       error = xfs_btree_lookup_get_block(cur, level, pp,
+                                       &block);
+                       if (error)
+                               goto out;
+                       xfs_btree_get_block(cur, level, &bp);
+                       trace_xfs_btree_overlapped_query_range(cur, level, bp);
+#ifdef DEBUG
+                       error = xfs_btree_check_block(cur, block, level, bp);
+                       if (error)
+                               goto out;
+#endif
+                       cur->bc_ptrs[level] = 1;
+                       continue;
+               } else if (hdiff < 0) {
+                       /* The low key is larger than the upper range; pop. */
+                       goto node_pop_up;
+               }
+               cur->bc_ptrs[level]++;
+       }
+
+out:
+       /*
+        * If we don't end this function with the cursor pointing at a record
+        * block, a subsequent non-error cursor deletion will not release
+        * node-level buffers, causing a buffer leak.  This is quite possible
+        * with a zero-results range query, so release the buffers if we
+        * failed to return any results.
+        */
+       if (cur->bc_bufs[0] == NULL) {
+               for (i = 0; i < cur->bc_nlevels; i++) {
+                       if (cur->bc_bufs[i]) {
+                               xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+                               cur->bc_bufs[i] = NULL;
+                               cur->bc_ptrs[i] = 0;
+                               cur->bc_ra[i] = 0;
+                       }
+               }
+       }
+
+       return error;
+}
+
+/*
+ * Query a btree for all records overlapping a given interval of keys.  The
+ * supplied function will be called with each record found; return one of the
+ * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
+ * code.  This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
+ * negative error code.
+ */
+int
+xfs_btree_query_range(
+       struct xfs_btree_cur            *cur,
+       union xfs_btree_irec            *low_rec,
+       union xfs_btree_irec            *high_rec,
+       xfs_btree_query_range_fn        fn,
+       void                            *priv)
+{
+       if (!(cur->bc_ops->flags & XFS_BTREE_OPS_OVERLAPPING))
+               return xfs_btree_simple_query_range(cur, low_rec,
+                               high_rec, fn, priv);
+       return xfs_btree_overlapped_query_range(cur, low_rec, high_rec,
+                       fn, priv);
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index a5ec6c7..898fee5 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -206,6 +206,12 @@ struct xfs_btree_ops {
 #define LASTREC_DELREC 2
 
 
+union xfs_btree_irec {
+       xfs_alloc_rec_incore_t          a;
+       xfs_bmbt_irec_t                 b;
+       xfs_inobt_rec_incore_t          i;
+};
+
 /*
  * Btree cursor structure.
  * This collects all information needed by the btree code in one place.
@@ -216,11 +222,7 @@ typedef struct xfs_btree_cur
        struct xfs_mount        *bc_mp; /* file system mount struct */
        const struct xfs_btree_ops *bc_ops;
        uint                    bc_flags; /* btree features - below */
-       union {
-               xfs_alloc_rec_incore_t  a;
-               xfs_bmbt_irec_t         b;
-               xfs_inobt_rec_incore_t  i;
-       }               bc_rec;         /* current insert/search record value */
+       union xfs_btree_irec    bc_rec; /* current insert/search record value */
        struct xfs_buf  *bc_bufs[XFS_BTREE_MAXLEVELS];  /* buf ptr per level */
        int             bc_ptrs[XFS_BTREE_MAXLEVELS];   /* key/record # */
        __uint8_t       bc_ra[XFS_BTREE_MAXLEVELS];     /* readahead bits */
@@ -494,4 +496,14 @@ xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, 
uint *limits,
 uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
                unsigned long len);
 
+/* return codes */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE 0       /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT    1       /* stop iterating */
+typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
+               union xfs_btree_rec *rec, void *priv);
+
+int xfs_btree_query_range(struct xfs_btree_cur *cur,
+               union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
+               xfs_btree_query_range_fn fn, void *priv);
+
 #endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ffea28c..f0ac9c9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2218,6 +2218,7 @@ DEFINE_EVENT(xfs_btree_cur_class, name, \
        TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \
        TP_ARGS(cur, level, bp))
 DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys);
+DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range);
 
 #endif /* _TRACE_XFS_H */
 

<Prev in Thread] Current Thread [Next in Thread>