ida: Convert to XArray

Use the XA_TRACK_FREE ability to track which entries have a free bit,
similarly to how it uses the radix tree's IDR_FREE tag.  This eliminates
the per-cpu ida_bitmap preload, and fixes the memory consumption
regression I introduced when making the IDR able to store any pointer.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
This commit is contained in:
Matthew Wilcox 2018-07-04 15:42:46 -04:00
parent 371c752dc6
commit f32f004cdd
4 changed files with 218 additions and 268 deletions

View file

@ -214,8 +214,7 @@ static inline void idr_preload_end(void)
++id, (entry) = idr_get_next((idr), &(id)))
/*
* IDA - IDR based id allocator, use when translation from id to
* pointer isn't necessary.
* IDA - ID Allocator, use when translation from id to pointer isn't necessary.
*/
#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long))
@ -225,14 +224,14 @@ struct ida_bitmap {
unsigned long bitmap[IDA_BITMAP_LONGS];
};
DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
struct ida {
struct radix_tree_root ida_rt;
struct xarray xa;
};
#define IDA_INIT_FLAGS (XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC)
#define IDA_INIT(name) { \
.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \
.xa = XARRAY_INIT(name, IDA_INIT_FLAGS) \
}
#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
@ -292,7 +291,7 @@ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
static inline void ida_init(struct ida *ida)
{
INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
}
#define ida_simple_get(ida, start, end, gfp) \
@ -301,9 +300,6 @@ static inline void ida_init(struct ida *ida)
static inline bool ida_is_empty(const struct ida *ida)
{
return radix_tree_empty(&ida->ida_rt);
return xa_empty(&ida->xa);
}
/* in lib/radix-tree.c */
int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
#endif /* __IDR_H__ */

389
lib/idr.c
View file

@ -6,8 +6,6 @@
#include <linux/spinlock.h>
#include <linux/xarray.h>
DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
/**
* idr_alloc_u32() - Allocate an ID.
* @idr: IDR handle.
@ -320,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
* free the individual IDs in it. You can use ida_is_empty() to find
* out whether the IDA has any IDs currently allocated.
*
* The IDA handles its own locking. It is safe to call any of the IDA
* functions without synchronisation in your code.
*
* IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
* limitation, it should be quite straightforward to raise the maximum.
*/
@ -327,180 +328,38 @@ EXPORT_SYMBOL(idr_replace);
/*
* Developer's notes:
*
* The IDA uses the functionality provided by the IDR & radix tree to store
* bitmaps in each entry. The IDR_FREE tag means there is at least one bit
* free, unlike the IDR where it means at least one entry is free.
* The IDA uses the functionality provided by the XArray to store bitmaps in
* each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
* have been set.
*
* I considered telling the radix tree that each slot is an order-10 node
* and storing the bit numbers in the radix tree, but the radix tree can't
* allow a single multiorder entry at index 0, which would significantly
* increase memory consumption for the IDA. So instead we divide the index
* by the number of bits in the leaf bitmap before doing a radix tree lookup.
* I considered telling the XArray that each slot is an order-10 node
* and indexing by bit number, but the XArray can't allow a single multi-index
* entry in the head, which would significantly increase memory consumption
* for the IDA. So instead we divide the index by the number of bits in the
* leaf bitmap before doing a radix tree lookup.
*
* As an optimisation, if there are only a few low bits set in any given
* leaf, instead of allocating a 128-byte bitmap, we store the bits
* directly in the entry.
* as a value entry. Value entries never have the XA_FREE_MARK cleared
* because we can always convert them into a bitmap entry.
*
* We allow the radix tree 'exceptional' count to get out of date. Nothing
* in the IDA nor the radix tree code checks it. If it becomes important
* to maintain an accurate exceptional count, switch the rcu_assign_pointer()
* calls to radix_tree_iter_replace() which will correct the exceptional
* count.
* It would be possible to optimise further; once we've run out of a
* single 128-byte bitmap, we currently switch to a 576-byte node, put
* the 128-byte bitmap in the first entry and then start allocating extra
* 128-byte entries. We could instead use the 512 bytes of the node's
* data as a bitmap before moving to that scheme. I do not believe this
* is a worthwhile optimisation; Rasmus Villemoes surveyed the current
* users of the IDA and almost none of them use more than 1024 entries.
* Those that do use more than the 8192 IDs that the 512 bytes would
* provide.
*
* The IDA always requires a lock to alloc/free. If we add a 'test_bit'
* The IDA always uses a lock to alloc/free. If we add a 'test_bit'
* equivalent, it will still need locking. Going to RCU lookup would require
* using RCU to free bitmaps, and that's not trivial without embedding an
* RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
* bitmap, which is excessive.
*/
#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
static int ida_get_new_above(struct ida *ida, int start)
{
struct radix_tree_root *root = &ida->ida_rt;
void __rcu **slot;
struct radix_tree_iter iter;
struct ida_bitmap *bitmap;
unsigned long index;
unsigned bit;
int new;
index = start / IDA_BITMAP_BITS;
bit = start % IDA_BITMAP_BITS;
slot = radix_tree_iter_init(&iter, index);
for (;;) {
if (slot)
slot = radix_tree_next_slot(slot, &iter,
RADIX_TREE_ITER_TAGGED);
if (!slot) {
slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
if (IS_ERR(slot)) {
if (slot == ERR_PTR(-ENOMEM))
return -EAGAIN;
return PTR_ERR(slot);
}
}
if (iter.index > index)
bit = 0;
new = iter.index * IDA_BITMAP_BITS;
bitmap = rcu_dereference_raw(*slot);
if (xa_is_value(bitmap)) {
unsigned long tmp = xa_to_value(bitmap);
int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
bit);
if (vbit < BITS_PER_XA_VALUE) {
tmp |= 1UL << vbit;
rcu_assign_pointer(*slot, xa_mk_value(tmp));
return new + vbit;
}
bitmap = this_cpu_xchg(ida_bitmap, NULL);
if (!bitmap)
return -EAGAIN;
bitmap->bitmap[0] = tmp;
rcu_assign_pointer(*slot, bitmap);
}
if (bitmap) {
bit = find_next_zero_bit(bitmap->bitmap,
IDA_BITMAP_BITS, bit);
new += bit;
if (new < 0)
return -ENOSPC;
if (bit == IDA_BITMAP_BITS)
continue;
__set_bit(bit, bitmap->bitmap);
if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
radix_tree_iter_tag_clear(root, &iter,
IDR_FREE);
} else {
new += bit;
if (new < 0)
return -ENOSPC;
if (bit < BITS_PER_XA_VALUE) {
bitmap = xa_mk_value(1UL << bit);
} else {
bitmap = this_cpu_xchg(ida_bitmap, NULL);
if (!bitmap)
return -EAGAIN;
__set_bit(bit, bitmap->bitmap);
}
radix_tree_iter_replace(root, &iter, slot, bitmap);
}
return new;
}
}
static void ida_remove(struct ida *ida, int id)
{
unsigned long index = id / IDA_BITMAP_BITS;
unsigned offset = id % IDA_BITMAP_BITS;
struct ida_bitmap *bitmap;
unsigned long *btmp;
struct radix_tree_iter iter;
void __rcu **slot;
slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
if (!slot)
goto err;
bitmap = rcu_dereference_raw(*slot);
if (xa_is_value(bitmap)) {
btmp = (unsigned long *)slot;
offset += 1; /* Intimate knowledge of the value encoding */
if (offset >= BITS_PER_LONG)
goto err;
} else {
btmp = bitmap->bitmap;
}
if (!test_bit(offset, btmp))
goto err;
__clear_bit(offset, btmp);
radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
if (xa_is_value(bitmap)) {
if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
kfree(bitmap);
radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
}
return;
err:
WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
}
/**
* ida_destroy() - Free all IDs.
* @ida: IDA handle.
*
* Calling this function frees all IDs and releases all resources used
* by an IDA. When this call returns, the IDA is empty and can be reused
* or freed. If the IDA is already empty, there is no need to call this
* function.
*
* Context: Any context.
*/
void ida_destroy(struct ida *ida)
{
unsigned long flags;
struct radix_tree_iter iter;
void __rcu **slot;
xa_lock_irqsave(&ida->ida_rt, flags);
radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
if (!xa_is_value(bitmap))
kfree(bitmap);
radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
}
xa_unlock_irqrestore(&ida->ida_rt, flags);
}
EXPORT_SYMBOL(ida_destroy);
/**
* ida_alloc_range() - Allocate an unused ID.
* @ida: IDA handle.
@ -518,8 +377,10 @@ EXPORT_SYMBOL(ida_destroy);
int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
gfp_t gfp)
{
int id = 0;
XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
unsigned bit = min % IDA_BITMAP_BITS;
unsigned long flags;
struct ida_bitmap *bitmap, *alloc = NULL;
if ((int)min < 0)
return -ENOSPC;
@ -527,22 +388,87 @@ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
if ((int)max < 0)
max = INT_MAX;
again:
xa_lock_irqsave(&ida->ida_rt, flags);
id = ida_get_new_above(ida, min);
if (id > (int)max) {
ida_remove(ida, id);
id = -ENOSPC;
}
xa_unlock_irqrestore(&ida->ida_rt, flags);
retry:
xas_lock_irqsave(&xas, flags);
next:
bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
if (xas.xa_index > min / IDA_BITMAP_BITS)
bit = 0;
if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
goto nospc;
if (unlikely(id == -EAGAIN)) {
if (!ida_pre_get(ida, gfp))
return -ENOMEM;
goto again;
if (xa_is_value(bitmap)) {
unsigned long tmp = xa_to_value(bitmap);
if (bit < BITS_PER_XA_VALUE) {
bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
goto nospc;
if (bit < BITS_PER_XA_VALUE) {
tmp |= 1UL << bit;
xas_store(&xas, xa_mk_value(tmp));
goto out;
}
}
bitmap = alloc;
if (!bitmap)
bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
if (!bitmap)
goto alloc;
bitmap->bitmap[0] = tmp;
xas_store(&xas, bitmap);
if (xas_error(&xas)) {
bitmap->bitmap[0] = 0;
goto out;
}
}
return id;
if (bitmap) {
bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
goto nospc;
if (bit == IDA_BITMAP_BITS)
goto next;
__set_bit(bit, bitmap->bitmap);
if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
xas_clear_mark(&xas, XA_FREE_MARK);
} else {
if (bit < BITS_PER_XA_VALUE) {
bitmap = xa_mk_value(1UL << bit);
} else {
bitmap = alloc;
if (!bitmap)
bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
if (!bitmap)
goto alloc;
__set_bit(bit, bitmap->bitmap);
}
xas_store(&xas, bitmap);
}
out:
xas_unlock_irqrestore(&xas, flags);
if (xas_nomem(&xas, gfp)) {
xas.xa_index = min / IDA_BITMAP_BITS;
bit = min % IDA_BITMAP_BITS;
goto retry;
}
if (bitmap != alloc)
kfree(alloc);
if (xas_error(&xas))
return xas_error(&xas);
return xas.xa_index * IDA_BITMAP_BITS + bit;
alloc:
xas_unlock_irqrestore(&xas, flags);
alloc = kzalloc(sizeof(*bitmap), gfp);
if (!alloc)
return -ENOMEM;
xas_set(&xas, min / IDA_BITMAP_BITS);
bit = min % IDA_BITMAP_BITS;
goto retry;
nospc:
xas_unlock_irqrestore(&xas, flags);
return -ENOSPC;
}
EXPORT_SYMBOL(ida_alloc_range);
@ -555,11 +481,112 @@ EXPORT_SYMBOL(ida_alloc_range);
*/
void ida_free(struct ida *ida, unsigned int id)
{
XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
unsigned bit = id % IDA_BITMAP_BITS;
struct ida_bitmap *bitmap;
unsigned long flags;
BUG_ON((int)id < 0);
xa_lock_irqsave(&ida->ida_rt, flags);
ida_remove(ida, id);
xa_unlock_irqrestore(&ida->ida_rt, flags);
xas_lock_irqsave(&xas, flags);
bitmap = xas_load(&xas);
if (xa_is_value(bitmap)) {
unsigned long v = xa_to_value(bitmap);
if (bit >= BITS_PER_XA_VALUE)
goto err;
if (!(v & (1UL << bit)))
goto err;
v &= ~(1UL << bit);
if (!v)
goto delete;
xas_store(&xas, xa_mk_value(v));
} else {
if (!test_bit(bit, bitmap->bitmap))
goto err;
__clear_bit(bit, bitmap->bitmap);
xas_set_mark(&xas, XA_FREE_MARK);
if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
kfree(bitmap);
delete:
xas_store(&xas, NULL);
}
}
xas_unlock_irqrestore(&xas, flags);
return;
err:
xas_unlock_irqrestore(&xas, flags);
WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
}
EXPORT_SYMBOL(ida_free);
/**
* ida_destroy() - Free all IDs.
* @ida: IDA handle.
*
* Calling this function frees all IDs and releases all resources used
* by an IDA. When this call returns, the IDA is empty and can be reused
* or freed. If the IDA is already empty, there is no need to call this
* function.
*
* Context: Any context.
*/
void ida_destroy(struct ida *ida)
{
XA_STATE(xas, &ida->xa, 0);
struct ida_bitmap *bitmap;
unsigned long flags;
xas_lock_irqsave(&xas, flags);
xas_for_each(&xas, bitmap, ULONG_MAX) {
if (!xa_is_value(bitmap))
kfree(bitmap);
xas_store(&xas, NULL);
}
xas_unlock_irqrestore(&xas, flags);
}
EXPORT_SYMBOL(ida_destroy);
#ifndef __KERNEL__
extern void xa_dump_index(unsigned long index, unsigned int shift);
#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS)
static void ida_dump_entry(void *entry, unsigned long index)
{
unsigned long i;
if (!entry)
return;
if (xa_is_node(entry)) {
struct xa_node *node = xa_to_node(entry);
unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
XA_CHUNK_SHIFT;
xa_dump_index(index * IDA_BITMAP_BITS, shift);
xa_dump_node(node);
for (i = 0; i < XA_CHUNK_SIZE; i++)
ida_dump_entry(node->slots[i],
index | (i << node->shift));
} else if (xa_is_value(entry)) {
xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
} else {
struct ida_bitmap *bitmap = entry;
xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
pr_cont("bitmap: %p data", bitmap);
for (i = 0; i < IDA_BITMAP_LONGS; i++)
pr_cont(" %lx", bitmap->bitmap[i]);
pr_cont("\n");
}
}
static void ida_dump(struct ida *ida)
{
struct xarray *xa = &ida->xa;
pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
xa->xa_flags >> ROOT_TAG_SHIFT);
ida_dump_entry(xa->xa_head, 0);
}
#endif

View file

@ -255,54 +255,6 @@ static unsigned long next_index(unsigned long index,
return (index & ~node_maxindex(node)) + (offset << node->shift);
}
#ifndef __KERNEL__
static void dump_ida_node(void *entry, unsigned long index)
{
unsigned long i;
if (!entry)
return;
if (radix_tree_is_internal_node(entry)) {
struct radix_tree_node *node = entry_to_node(entry);
pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
node, node->offset, index * IDA_BITMAP_BITS,
((index | node_maxindex(node)) + 1) *
IDA_BITMAP_BITS - 1,
node->parent, node->tags[0][0], node->shift,
node->count);
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
dump_ida_node(node->slots[i],
index | (i << node->shift));
} else if (xa_is_value(entry)) {
pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
entry, (int)(index & RADIX_TREE_MAP_MASK),
index * IDA_BITMAP_BITS,
index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
xa_to_value(entry));
} else {
struct ida_bitmap *bitmap = entry;
pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
(int)(index & RADIX_TREE_MAP_MASK),
index * IDA_BITMAP_BITS,
(index + 1) * IDA_BITMAP_BITS - 1);
for (i = 0; i < IDA_BITMAP_LONGS; i++)
pr_cont(" %lx", bitmap->bitmap[i]);
pr_cont("\n");
}
}
static void ida_dump(struct ida *ida)
{
struct radix_tree_root *root = &ida->ida_rt;
pr_debug("ida: %p node %p free %d\n", ida, root->xa_head,
root->xa_flags >> ROOT_TAG_SHIFT);
dump_ida_node(root->xa_head, 0);
}
#endif
/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
@ -2039,27 +1991,6 @@ void idr_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(idr_preload);
int ida_pre_get(struct ida *ida, gfp_t gfp)
{
/*
* The IDA API has no preload_end() equivalent. Instead,
* ida_get_new() can return -EAGAIN, prompting the caller
* to return to the ida_pre_get() step.
*/
if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
preempt_enable();
if (!this_cpu_read(ida_bitmap)) {
struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
if (!bitmap)
return 0;
if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
kfree(bitmap);
}
return 1;
}
void __rcu **idr_get_free(struct radix_tree_root *root,
struct radix_tree_iter *iter, gfp_t gfp,
unsigned long max)
@ -2201,8 +2132,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
kmem_cache_free(radix_tree_node_cachep, node);
rtp->nr--;
}
kfree(per_cpu(ida_bitmap, cpu));
per_cpu(ida_bitmap, cpu) = NULL;
return 0;
}

View file

@ -402,16 +402,15 @@ void ida_check_nomem(void)
*/
void ida_check_conv_user(void)
{
#if 0
DEFINE_IDA(ida);
unsigned long i;
radix_tree_cpu_dead(1);
for (i = 0; i < 1000000; i++) {
int id = ida_alloc(&ida, GFP_NOWAIT);
if (id == -ENOMEM) {
IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
BITS_PER_XA_VALUE);
IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) !=
BITS_PER_XA_VALUE) &&
((i % IDA_BITMAP_BITS) != 0));
id = ida_alloc(&ida, GFP_KERNEL);
} else {
IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
@ -420,7 +419,6 @@ void ida_check_conv_user(void)
IDA_BUG_ON(&ida, id != i);
}
ida_destroy(&ida);
#endif
}
void ida_check_random(void)