pctrie: create iterator

Define a pctrie iterator type. A pctrie iterator is a wrapper around a
pctrie that remembers a position in the trie where the last search
left off, and where a new search can resume. When the next search is
for an item very near in the trie to where the last search left off,
iter-based search is faster because instead of starting from the root,
the search usually only has to back up one or two steps up the
root-to-last-search path to find the branch that leads to the new
search target.

Every kind of lookup (plain, lookup_ge, lookup_le) that can begin with
the trie root can begin with an iterator instead. An iterator can also
do a relative search ("look for the item 4 greater than the last item
I found") because it remembers where that last search ended. It can
also search within limits ("look for the item bigger than this one,
but it has to be less than 100"), which can save time when the next
item beyond the limits and that is known before we actually know what
that item it is. An iterator can also be used to remove an item that
has already been found, without having to search for it again.

Iterators are vulnerable to unsynchronized data changes. If the
iterator is created with a lock held, and that lock is released and
acquired again, there's no guarantee that the iterator path remains
valid.

Reviewed by:	markj
Tested by:	pho
Differential Revision:	https://reviews.freebsd.org/D45627
This commit is contained in:
Doug Moore 2024-09-13 10:36:54 -05:00
parent 40d0f179a8
commit fd1d666289
2 changed files with 493 additions and 50 deletions

View file

@ -62,9 +62,6 @@
#include <ddb/ddb.h>
#endif
#define PCTRIE_MASK (PCTRIE_COUNT - 1)
#define PCTRIE_LIMIT (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
#if PCTRIE_WIDTH == 3
typedef uint8_t pn_popmap_t;
#elif PCTRIE_WIDTH == 4
@ -87,18 +84,13 @@ struct pctrie_node {
smr_pctnode_t pn_child[PCTRIE_COUNT]; /* Child nodes. */
};
enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
enum pctrie_access access);
/*
* Map index to an array position for the children of node,
*/
static __inline int
pctrie_slot(struct pctrie_node *node, uint64_t index)
{
return ((index >> node->pn_clev) & PCTRIE_MASK);
return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
}
/*
@ -137,6 +129,8 @@ pctrie_node_put(struct pctrie_node *node)
#endif
}
enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
/*
* Fetch a node pointer from a slot.
*/
@ -476,6 +470,21 @@ pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
}
/*
* Return the value associated with the node, if the node is a leaf that matches
* the index; otherwise NULL.
*/
static __always_inline uint64_t *
pctrie_match_value(struct pctrie_node *node, uint64_t index)
{
uint64_t *m;
if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
*m != index)
m = NULL;
return (m);
}
/*
* Returns the value stored at the index. If the index is not present,
* NULL is returned.
@ -485,21 +494,13 @@ _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
enum pctrie_access access)
{
struct pctrie_node *node;
uint64_t *m;
int slot;
node = pctrie_root_load(ptree, smr, access);
for (;;) {
if (pctrie_isleaf(node)) {
if ((m = pctrie_toval(node)) != NULL && *m == index)
return (m);
break;
}
if (pctrie_keybarr(node, index, &slot))
break;
/* Seek a node that matches index. */
while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
node = pctrie_node_load(&node->pn_child[slot], smr, access);
}
return (NULL);
return (pctrie_match_value(node, index));
}
/*
@ -530,6 +531,118 @@ pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
return (res);
}
/*
* Returns the last node examined in the search for the index, and updates the
* search path to that node.
*/
static __always_inline struct pctrie_node *
_pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
enum pctrie_access access)
{
struct pctrie_node *node;
int slot;
/*
* Climb the search path to find the lowest node from which to start the
* search for a value matching 'index'.
*/
while (it->top != 0) {
node = it->path[it->top - 1];
KASSERT(!powerof2(node->pn_popmap),
("%s: freed node in iter path", __func__));
if (!pctrie_keybarr(node, index, &slot)) {
node = pctrie_node_load(
&node->pn_child[slot], smr, access);
break;
}
--it->top;
}
if (it->top == 0)
node = pctrie_root_load(it->ptree, smr, access);
/* Seek a node that matches index. */
while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
KASSERT(it->top < nitems(it->path),
("%s: path overflow in trie %p", __func__, it->ptree));
it->path[it->top++] = node;
node = pctrie_node_load(&node->pn_child[slot], smr, access);
}
return (node);
}
/*
* Returns the value stored at a given index value, possibly NULL.
*/
static __always_inline uint64_t *
_pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
enum pctrie_access access)
{
struct pctrie_node *node;
it->index = index;
node = _pctrie_iter_lookup_node(it, index, smr, access);
return (pctrie_match_value(node, index));
}
/*
* Returns the value stored at a given index value, possibly NULL.
*/
uint64_t *
pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
{
return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
}
/*
* Returns the value stored at a fixed offset from the current index value,
* possibly NULL.
*/
static __always_inline uint64_t *
_pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
enum pctrie_access access)
{
uint64_t index = it->index + stride;
/* Detect stride overflow. */
if ((stride > 0) != (index > it->index))
return (NULL);
/* Detect crossing limit */
if ((index < it->limit) != (it->index < it->limit))
return (NULL);
return (_pctrie_iter_lookup(it, index, smr, access));
}
/*
* Returns the value stored at a fixed offset from the current index value,
* possibly NULL.
*/
uint64_t *
pctrie_iter_stride(struct pctrie_iter *it, int stride)
{
return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
}
/*
* Returns the value stored at one more than the current index value, possibly
* NULL, assuming access is externally synchronized by a lock.
*/
uint64_t *
pctrie_iter_next(struct pctrie_iter *it)
{
return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
}
/*
* Returns the value stored at one less than the current index value, possibly
* NULL, assuming access is externally synchronized by a lock.
*/
uint64_t *
pctrie_iter_prev(struct pctrie_iter *it)
{
return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
}
/*
* Returns the value with the least index that is greater than or equal to the
* specified index, or NULL if there are no such values.
@ -633,6 +746,78 @@ pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
return (pctrie_lookup_ge_node(node, index + 1));
}
/*
* Find first leaf >= index, and fill iter with the path to the parent of that
* leaf. Return NULL if there is no such leaf less than limit.
*/
uint64_t *
pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
{
struct pctrie_node *node;
uint64_t *m;
int slot;
/* Seek a node that matches index. */
node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
/*
* If no such node was found, and instead this path leads only to nodes
* < index, back up to find a subtrie with the least value > index.
*/
if (pctrie_isleaf(node) ?
(m = pctrie_toval(node)) == NULL || *m < index :
node->pn_owner < index) {
/* Climb the path to find a node with a descendant > index. */
while (it->top != 0) {
node = it->path[it->top - 1];
slot = pctrie_slot(node, index) + 1;
if ((node->pn_popmap >> slot) != 0)
break;
--it->top;
}
if (it->top == 0)
return (NULL);
/* Step to the least child with a descendant > index. */
slot += ffs(node->pn_popmap >> slot) - 1;
node = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
/* Descend to the least leaf of the subtrie. */
while (!pctrie_isleaf(node)) {
if (it->limit != 0 && node->pn_owner >= it->limit)
return (NULL);
slot = ffs(node->pn_popmap) - 1;
KASSERT(it->top < nitems(it->path),
("%s: path overflow in trie %p", __func__, it->ptree));
it->path[it->top++] = node;
node = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
m = pctrie_toval(node);
if (it->limit != 0 && *m >= it->limit)
return (NULL);
it->index = *m;
return (m);
}
/*
* Find the first leaf with value at least 'jump' greater than the previous
* leaf. Return NULL if that value is >= limit.
*/
uint64_t *
pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
{
uint64_t index = it->index + jump;
/* Detect jump overflow. */
if ((jump > 0) != (index > it->index))
return (NULL);
if (it->limit != 0 && index >= it->limit)
return (NULL);
return (pctrie_iter_lookup_ge(it, index));
}
#ifdef INVARIANTS
void
pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
@ -720,6 +905,79 @@ pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
return (pctrie_lookup_le_node(node, index - 1));
}
/*
* Find first leaf <= index, and fill iter with the path to the parent of that
* leaf. Return NULL if there is no such leaf greater than limit.
*/
uint64_t *
pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
{
struct pctrie_node *node;
uint64_t *m;
int slot;
/* Seek a node that matches index. */
node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
/*
* If no such node was found, and instead this path leads only to nodes
* > index, back up to find a subtrie with the least value > index.
*/
if (pctrie_isleaf(node) ?
(m = pctrie_toval(node)) == NULL || *m > index :
node->pn_owner > index) {
/* Climb the path to find a node with a descendant < index. */
while (it->top != 0) {
node = it->path[it->top - 1];
slot = pctrie_slot(node, index);
if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
break;
--it->top;
}
if (it->top == 0)
return (NULL);
/* Step to the greatest child with a descendant < index. */
slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
node = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
/* Descend to the greatest leaf of the subtrie. */
while (!pctrie_isleaf(node)) {
if (it->limit != 0 && it->limit >=
node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
return (NULL);
slot = ilog2(node->pn_popmap);
KASSERT(it->top < nitems(it->path),
("%s: path overflow in trie %p", __func__, it->ptree));
it->path[it->top++] = node;
node = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
m = pctrie_toval(node);
if (it->limit != 0 && *m <= it->limit)
return (NULL);
it->index = *m;
return (m);
}
/*
* Find the first leaf with value at most 'jump' less than the previous
* leaf. Return NULL if that value is <= limit.
*/
uint64_t *
pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
{
uint64_t index = it->index - jump;
/* Detect jump overflow. */
if ((jump > 0) != (index < it->index))
return (NULL);
if (it->limit != 0 && index <= it->limit)
return (NULL);
return (pctrie_iter_lookup_le(it, index));
}
#ifdef INVARIANTS
void
pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
@ -738,42 +996,25 @@ pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
}
#endif
/*
* Remove the specified index from the tree, and return the value stored at
* that index. If the index is not present, return NULL.
*/
uint64_t *
pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
struct pctrie_node **freenode)
static void
pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
struct pctrie_node *node, struct pctrie_node **freenode)
{
struct pctrie_node *child, *node, *parent;
uint64_t *m;
struct pctrie_node *child;
int slot;
*freenode = node = NULL;
child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
for (;;) {
if (pctrie_isleaf(child))
break;
parent = node;
node = child;
slot = pctrie_slot(node, index);
child = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
if ((m = pctrie_toval(child)) == NULL || *m != index)
return (NULL);
if (node == NULL) {
pctrie_root_store(ptree, PCTRIE_NULL, PCTRIE_LOCKED);
return (m);
return;
}
slot = pctrie_slot(node, index);
KASSERT((node->pn_popmap & (1 << slot)) != 0,
("%s: bad popmap slot %d in node %p",
__func__, slot, node));
node->pn_popmap ^= 1 << slot;
pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
if (!powerof2(node->pn_popmap))
return (m);
return;
KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
slot = ffs(node->pn_popmap) - 1;
child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
@ -795,9 +1036,89 @@ pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
*/
pctrie_node_put(node);
*freenode = node;
}
/*
* Remove the specified index from the tree, and return the value stored at
* that index. If the index is not present, return NULL.
*/
uint64_t *
pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
struct pctrie_node **freenode)
{
struct pctrie_node *child, *node, *parent;
uint64_t *m;
int slot;
DEBUG_POISON_POINTER(parent);
*freenode = node = NULL;
child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
while (!pctrie_isleaf(child)) {
parent = node;
node = child;
slot = pctrie_slot(node, index);
child = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
m = pctrie_match_value(child, index);
if (m != NULL)
pctrie_remove(ptree, index, parent, node, freenode);
return (m);
}
/*
* Remove from the trie the leaf last chosen by the iterator, and
* adjust the path if it's last member is to be freed.
*/
uint64_t *
pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
{
struct pctrie_node *child, *node, *parent;
uint64_t *m;
int slot;
DEBUG_POISON_POINTER(parent);
*freenode = NULL;
if (it->top >= 1) {
parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
node = it->path[it->top - 1];
slot = pctrie_slot(node, it->index);
child = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
} else {
node = NULL;
child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
}
m = pctrie_match_value(child, it->index);
if (m != NULL)
pctrie_remove(it->ptree, it->index, parent, node, freenode);
if (*freenode != NULL)
--it->top;
return (m);
}
/*
* Return the current leaf, assuming access is externally synchronized by a
* lock.
*/
uint64_t *
pctrie_iter_value(struct pctrie_iter *it)
{
struct pctrie_node *node;
int slot;
if (it->top == 0)
node = pctrie_root_load(it->ptree, NULL,
PCTRIE_LOCKED);
else {
node = it->path[it->top - 1];
slot = pctrie_slot(node, it->index);
node = pctrie_node_load(&node->pn_child[slot], NULL,
PCTRIE_LOCKED);
}
return (pctrie_toval(node));
}
/*
* Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
* using the leftmost child pointer for path reversal, until an interior node

View file

@ -240,6 +240,85 @@ name##_PCTRIE_RECLAIM_CALLBACK(struct pctrie *ptree, \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_LOOKUP(struct pctrie_iter *it, uint64_t index) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup(it, index)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_STRIDE(struct pctrie_iter *it, int stride) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_stride(it, stride)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_NEXT(struct pctrie_iter *it) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_next(it)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_PREV(struct pctrie_iter *it) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_prev(it)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_VALUE(struct pctrie_iter *it) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_value(it)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_LOOKUP_GE(struct pctrie_iter *it, uint64_t index) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup_ge(it, index)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_JUMP_GE(struct pctrie_iter *it, int64_t jump) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_ge(it, jump)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_STEP_GE(struct pctrie_iter *it) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_ge(it, 1)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_LOOKUP_LE(struct pctrie_iter *it, uint64_t index) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_lookup_le(it, index)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_JUMP_LE(struct pctrie_iter *it, int64_t jump) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_le(it, jump)); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_ITER_STEP_LE(struct pctrie_iter *it) \
{ \
return name##_PCTRIE_VAL2PTR(pctrie_iter_jump_le(it, 1)); \
} \
\
static __inline __unused void \
name##_PCTRIE_ITER_REMOVE(struct pctrie_iter *it) \
{ \
uint64_t *val; \
struct pctrie_node *freenode; \
\
val = pctrie_iter_remove(it, &freenode); \
if (val == NULL) \
panic("%s: key not found", __func__); \
if (freenode != NULL) \
freefn(it->ptree, freenode); \
} \
\
static __inline __unused struct type * \
name##_PCTRIE_REPLACE(struct pctrie *ptree, struct type *ptr) \
{ \
\
@ -272,6 +351,7 @@ name##_PCTRIE_REMOVE_LOOKUP(struct pctrie *ptree, uint64_t key) \
return name##_PCTRIE_VAL2PTR(val); \
}
struct pctrie_iter;
void *pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
uint64_t **found_out);
void *pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
@ -283,10 +363,22 @@ void *pctrie_insert_lookup_strict(struct pctrie *ptree,
void pctrie_insert_node(void *parentp,
struct pctrie_node *parent, uint64_t *val);
uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key,
smr_t smr);
uint64_t *pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index);
uint64_t *pctrie_iter_stride(struct pctrie_iter *it, int stride);
uint64_t *pctrie_iter_next(struct pctrie_iter *it);
uint64_t *pctrie_iter_prev(struct pctrie_iter *it);
uint64_t *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node,
uint64_t key);
uint64_t *pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index);
uint64_t *pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump);
uint64_t *pctrie_lookup_le(struct pctrie *ptree, uint64_t key);
uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node,
uint64_t key);
uint64_t *pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index);
uint64_t *pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump);
struct pctrie_node *pctrie_reclaim_begin(struct pctrie_node **pnode,
struct pctrie *ptree);
struct pctrie_node *pctrie_reclaim_resume(struct pctrie_node **pnode);
@ -297,11 +389,10 @@ struct pctrie_node *pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
pctrie_cb_t callback, int keyoff, void *arg);
uint64_t *pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
struct pctrie_node **killnode);
uint64_t *pctrie_iter_remove(struct pctrie_iter *it,
struct pctrie_node **freenode);
uint64_t *pctrie_iter_value(struct pctrie_iter *it);
uint64_t *pctrie_replace(struct pctrie *ptree, uint64_t *newval);
uint64_t *pctrie_subtree_lookup_gt(struct pctrie_node *node,
uint64_t key);
uint64_t *pctrie_subtree_lookup_lt(struct pctrie_node *node,
uint64_t key);
size_t pctrie_node_size(void);
int pctrie_zone_init(void *mem, int size, int flags);
@ -342,6 +433,37 @@ pctrie_is_empty(struct pctrie *ptree)
#endif
#define PCTRIE_COUNT (1 << PCTRIE_WIDTH)
#define PCTRIE_LIMIT howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH)
struct pctrie_iter {
struct pctrie *ptree;
struct pctrie_node *path[PCTRIE_LIMIT];
uint64_t index;
uint64_t limit;
int top;
};
static __inline void
pctrie_iter_reset(struct pctrie_iter *it)
{
it->top = 0;
}
static __inline void
pctrie_iter_init(struct pctrie_iter *it, struct pctrie *ptree)
{
it->ptree = ptree;
it->top = 0;
it->limit = 0;
}
static __inline void
pctrie_iter_limit_init(struct pctrie_iter *it, struct pctrie *ptree,
uint64_t limit)
{
pctrie_iter_init(it, ptree);
it->limit = limit;
}
#endif /* _KERNEL */
#endif /* !_SYS_PCTRIE_H_ */