From d88252f9bb74d266653542b753f9ab404e8b88db Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:19 +1100 Subject: [PATCH 1/9] rhashtable: Add barrier to ensure we see new tables in walker The walker is a lockless reader so it too needs an smp_rmb before reading the future_tbl field in order to see any new tables that may contain elements that we should have walked over. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 83cfedd6612a..618a3f00d712 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -477,6 +477,9 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) iter->skip = 0; } + /* Ensure we see any new tables. */ + smp_rmb(); + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (iter->walker->tbl) { iter->slot = 0; From de91b25c8011089f5dd99b9d24743db1f550ca4b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:20 +1100 Subject: [PATCH 2/9] rhashtable: Eliminate unnecessary branch in rht_key_hashfn When rht_key_hashfn is called from rhashtable itself and params is equal to ht->p, there is no point in checking params.key_len and falling back to ht->p.key_len. For some reason gcc couldn't figure out that params is the same as ht->p. So let's help it by only checking params.key_len when it's a constant. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 89d102270570..3851952781d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -199,8 +199,12 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - return rht_bucket_index(tbl, params.hashfn(key, params.key_len ?: - ht->p.key_len, + /* params must be equal to ht->p if it isn't constant. */ + unsigned key_len = __builtin_constant_p(params.key_len) ? + (params.key_len ?: ht->p.key_len) : + params.key_len; + + return rht_bucket_index(tbl, params.hashfn(key, key_len, tbl->hash_rnd)); } From 31ccde2dacea8375c3a7d6fffbf0060ee0d40214 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:21 +1100 Subject: [PATCH 3/9] rhashtable: Allow hashfn to be unset Since every current rhashtable user uses jhash as their hash function, the fact that jhash is an inline function causes each user to generate a copy of its code. This function provides a solution to this problem by allowing hashfn to be unset. In which case rhashtable will automatically set it to jhash. Furthermore, if the key length is a multiple of 4, we will switch over to jhash2. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 35 ++++++++++++++++++++++++++++------- lib/rhashtable.c | 17 ++++++++++++++++- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3851952781d7..bc2488b98321 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @hashfn: Function to hash key + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ @@ -125,6 +126,7 @@ struct rhashtable_params { * struct rhashtable - Hash table handle * @tbl: Bucket table * @nelems: Number of elements in table + * @key_len: Key length for hashfn * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -134,6 +136,7 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; bool being_destroyed; + unsigned int key_len; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -199,13 +202,31 @@ static inline unsigned int rht_key_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const void *key, const struct rhashtable_params params) { - /* params must be equal to ht->p if it isn't constant. */ - unsigned key_len = __builtin_constant_p(params.key_len) ? - (params.key_len ?: ht->p.key_len) : - params.key_len; + unsigned hash; - return rht_bucket_index(tbl, params.hashfn(key, key_len, - tbl->hash_rnd)); + /* params must be equal to ht->p if it isn't constant. */ + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } + + return rht_bucket_index(tbl, hash); } static inline unsigned int rht_head_hashfn( diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 618a3f00d712..798f01d64ab0 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -532,6 +532,11 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params) (unsigned long)params->min_size); } +static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) +{ + return jhash2(key, length, seed); +} + /** * rhashtable_init - initialize a new hash table * @ht: hash table to be initialized @@ -583,7 +588,7 @@ int rhashtable_init(struct rhashtable *ht, size = HASH_DEFAULT_SIZE; - if ((!(params->key_len && params->hashfn) && !params->obj_hashfn) || + if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -610,6 +615,16 @@ int rhashtable_init(struct rhashtable *ht, else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; + ht->key_len = ht->p.key_len; + if (!params->hashfn) { + ht->p.hashfn = jhash; + + if (!(ht->key_len & (sizeof(u32) - 1))) { + ht->key_len /= sizeof(u32); + ht->p.hashfn = rhashtable_jhash2; + } + } + tbl = bucket_table_alloc(ht, size); if (tbl == NULL) return -ENOMEM; From 11b58ba146ccd7b105c4962c75f2e744053c85bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:22 +1100 Subject: [PATCH 4/9] netlink: Use default rhashtable hashfn This patch removes the explicit jhash value for the hashfn parameter of rhashtable. As the key length is a multiple of 4, this means that we will actually end up using jhash2. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 651792141f07..e2f7f28148e0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3133,13 +3133,12 @@ static inline u32 netlink_hash(const void *data, u32 seed) struct netlink_compare_arg arg; netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid); - return jhash(&arg, netlink_compare_arg_len, seed); + return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed); } static const struct rhashtable_params netlink_rhashtable_params = { .head_offset = offsetof(struct netlink_sock, node), .key_len = netlink_compare_arg_len, - .hashfn = jhash, .obj_hashfn = netlink_hash, .obj_cmpfn = netlink_compare, .max_size = 65536, From 6d022949810b1ea82d46a576d6166035720bbb32 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:24 +1100 Subject: [PATCH 5/9] tipc: Use default rhashtable hashfn This patch removes the explicit jhash value for the hashfn parameter of rhashtable. The default is now jhash so removing the setting makes no difference apart from making one less copy of jhash in the kernel. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 73c2f518a7c0..6dd5bd95236a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,7 +35,6 @@ */ #include -#include #include "core.h" #include "name_table.h" #include "node.h" @@ -2294,7 +2293,6 @@ static const struct rhashtable_params tsk_rht_params = { .head_offset = offsetof(struct tipc_sock, node), .key_offset = offsetof(struct tipc_sock, portid), .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, .max_size = 1048576, .min_size = 256, }; From 18093d1c0d1e032142ee24825678b0a8977d74ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:25 +1100 Subject: [PATCH 6/9] rhashtable: Shrink to fit This patch changes rhashtable_shrink to shrink to the smallest size possible rather than halving the table. This is needed because with multiple rehashing we will defer shrinking until all other rehashing is done, meaning that when we do shrink we may be able to shrink a lot. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 798f01d64ab0..9623be345d9c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * This function shrinks the hash table to fit, i.e., the smallest + * size would not cause it to expand right away automatically. * * The caller must ensure that no concurrent resizing occurs by holding * ht->mutex. @@ -276,10 +276,17 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size / 2); + if (size < ht->p.min_size) + size = ht->p.min_size; + + if (old_tbl->size <= size) + return 0; + + new_tbl = bucket_table_alloc(ht, size); if (new_tbl == NULL) return -ENOMEM; From b824478b2145be78ac19e1cf44e2b9036c7a9608 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:26 +1100 Subject: [PATCH 7/9] rhashtable: Add multiple rehash support This patch adds the missing bits to allow multiple rehashes. The read-side as well as remove already handle this correctly. So it's only the rehasher and insertion that need modification to handle this. Note that this patch doesn't actually enable it so for now rehashing is still only performed by the worker thread. This patch also disables the explicit expand/shrink interface because the table is meant to expand and shrink automatically, and continuing to export these interfaces unnecessarily complicates the life of the rehasher since the rehash process is now composed of two parts. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 26 ++++++------ lib/rhashtable.c | 87 +++++++++++++++++++++++++++++++------- lib/test_rhashtable.c | 24 ----------- 3 files changed, 86 insertions(+), 51 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index bc2488b98321..e8ffcdb5e239 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -308,9 +308,6 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); -int rhashtable_expand(struct rhashtable *ht); -int rhashtable_shrink(struct rhashtable *ht); - int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); @@ -541,17 +538,22 @@ static inline int __rhashtable_insert_fast( rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - /* Because we have already taken the bucket lock in tbl, - * if we find that future_tbl is not yet visible then - * that guarantees all other insertions of the same entry - * will also grab the bucket lock in tbl because until - * the rehash completes ht->tbl won't be changed. + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9623be345d9c..5e04403e25f5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -136,11 +136,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return tbl; } +static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, + struct bucket_table *tbl) +{ + struct bucket_table *new_tbl; + + do { + new_tbl = tbl; + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } while (tbl); + + return new_tbl; +} + static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = - rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; + struct bucket_table *new_tbl = rhashtable_last_table(ht, + rht_dereference_rcu(old_tbl->future_tbl, ht)); struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -196,12 +209,18 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_unlock_bh(old_bucket_lock); } -static void rhashtable_rehash(struct rhashtable *ht, - struct bucket_table *new_tbl) +static int rhashtable_rehash_attach(struct rhashtable *ht, + struct bucket_table *old_tbl, + struct bucket_table *new_tbl) { - struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct rhashtable_walker *walker; - unsigned old_hash; + /* Protect future_tbl using the first bucket lock. */ + spin_lock_bh(old_tbl->locks); + + /* Did somebody beat us to it? */ + if (rcu_access_pointer(old_tbl->future_tbl)) { + spin_unlock_bh(old_tbl->locks); + return -EEXIST; + } /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. @@ -211,6 +230,22 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Ensure the new table is visible to readers. */ smp_wmb(); + spin_unlock_bh(old_tbl->locks); + + return 0; +} + +static int rhashtable_rehash_table(struct rhashtable *ht) +{ + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; + struct rhashtable_walker *walker; + unsigned old_hash; + + new_tbl = rht_dereference(old_tbl->future_tbl, ht); + if (!new_tbl) + return 0; + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) rhashtable_rehash_chain(ht, old_hash); @@ -225,6 +260,8 @@ static void rhashtable_rehash(struct rhashtable *ht, * remain. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + + return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } /** @@ -242,20 +279,25 @@ static void rhashtable_rehash(struct rhashtable *ht, * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + int err; ASSERT_RHT_MUTEX(ht); + old_tbl = rhashtable_last_table(ht, old_tbl); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups @@ -273,10 +315,11 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_shrink(struct rhashtable *ht) +static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + int err; ASSERT_RHT_MUTEX(ht); @@ -286,19 +329,25 @@ int rhashtable_shrink(struct rhashtable *ht) if (old_tbl->size <= size) return 0; + if (rht_dereference(old_tbl->future_tbl, ht)) + return -EEXIST; + new_tbl = bucket_table_alloc(ht, size); if (new_tbl == NULL) return -ENOMEM; - rhashtable_rehash(ht, new_tbl); - return 0; + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); + + return err; } -EXPORT_SYMBOL_GPL(rhashtable_shrink); static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; + int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); @@ -306,13 +355,20 @@ static void rht_deferred_worker(struct work_struct *work) goto unlock; tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); else if (rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); + + err = rhashtable_rehash_table(ht); + unlock: mutex_unlock(&ht->mutex); + + if (err) + schedule_work(&ht->run_work); } int rhashtable_insert_slow(struct rhashtable *ht, const void *key, @@ -323,6 +379,7 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key, unsigned hash; int err = -EEXIST; + tbl = rhashtable_last_table(ht, tbl); hash = head_hashfn(ht, tbl, obj); spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index a2ba6adb60a2..a42a0d44e818 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -155,30 +155,6 @@ static int __init test_rhashtable(struct rhashtable *ht) test_rht_lookup(ht); rcu_read_unlock(); - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_expand(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_shrink(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - rcu_read_lock(); test_bucket_stats(ht, true); rcu_read_unlock(); From b9ecfdaa1090b5988422eaf5348ea1954d2d7219 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:27 +1100 Subject: [PATCH 8/9] rhashtable: Allow GFP_ATOMIC bucket table allocation This patch adds the ability to allocate bucket table with GFP_ATOMIC instead of GFP_KERNEL. This is needed when we perform an immediate rehash during insertion. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- lib/rhashtable.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5e04403e25f5..220a11a13d40 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -58,7 +58,8 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #endif -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) +static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, + gfp_t gfp) { unsigned int i, size; #if defined(CONFIG_PROVE_LOCKING) @@ -75,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) if (sizeof(spinlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) + if (size * sizeof(spinlock_t) > PAGE_SIZE && + gfp == GFP_KERNEL) tbl->locks = vmalloc(size * sizeof(spinlock_t)); else #endif tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); + gfp); if (!tbl->locks) return -ENOMEM; for (i = 0; i < size; i++) @@ -105,23 +107,25 @@ static void bucket_table_free_rcu(struct rcu_head *head) } static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets) + size_t nbuckets, + gfp_t gfp) { struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (tbl == NULL) + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + gfp != GFP_KERNEL) + tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); if (tbl == NULL) return NULL; tbl->size = nbuckets; - if (alloc_bucket_locks(ht, tbl) < 0) { + if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); return NULL; } @@ -288,7 +292,7 @@ static int rhashtable_expand(struct rhashtable *ht) old_tbl = rhashtable_last_table(ht, old_tbl); - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; @@ -332,7 +336,7 @@ static int rhashtable_shrink(struct rhashtable *ht) if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; - new_tbl = bucket_table_alloc(ht, size); + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; @@ -689,7 +693,7 @@ int rhashtable_init(struct rhashtable *ht, } } - tbl = bucket_table_alloc(ht, size); + tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (tbl == NULL) return -ENOMEM; From ccd57b1bd32460d27bbb9c599e795628a3c66983 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:28 +1100 Subject: [PATCH 9/9] rhashtable: Add immediate rehash during insertion This patch reintroduces immediate rehash during insertion. If we find during insertion that the table is full or the chain length exceeds a set limit (currently 16 but may be disabled with insecure_elasticity) then we will force an immediate rehash. The rehash will contain an expansion if the table utilisation exceeds 75%. If this rehash fails then the insertion will fail. Otherwise the insertion will be reattempted in the new hash table. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 42 ++++++++++++++++++++++---- lib/rhashtable.c | 60 +++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e8ffcdb5e239..f9ecf32bce55 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -103,6 +103,7 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -116,6 +117,7 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; + bool insecure_elasticity; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -127,6 +129,7 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -137,6 +140,7 @@ struct rhashtable { atomic_t nelems; bool being_destroyed; unsigned int key_len; + unsigned int elasticity; struct rhashtable_params p; struct work_struct run_work; struct mutex mutex; @@ -266,6 +270,17 @@ static inline bool rht_shrink_below_30(const struct rhashtable *ht, tbl->size > ht->p.min_size; } +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size; +} + /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -307,6 +322,7 @@ int rhashtable_init(struct rhashtable *ht, int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *old_tbl); +int rhashtable_insert_rehash(struct rhashtable *ht); int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); @@ -529,12 +545,14 @@ static inline int __rhashtable_insert_fast( .ht = ht, .key = key, }; - int err = -EEXIST; struct bucket_table *tbl, *new_tbl; struct rhash_head *head; spinlock_t *lock; + unsigned elasticity; unsigned hash; + int err; +restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); @@ -557,20 +575,34 @@ static inline int __rhashtable_insert_fast( new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { err = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (err == -EAGAIN) + goto slow_path; goto out; } - if (!key) - goto skip_lookup; + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + rcu_read_unlock(); + err = rhashtable_insert_rehash(ht); + if (err) + return err; + goto restart; + } + + err = -EEXIST; + elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { - if (unlikely(!(params.obj_cmpfn ? + if (key && + unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : rhashtable_compare(&arg, rht_obj(ht, head))))) goto out; + if (!--elasticity) + goto slow_path; } -skip_lookup: err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 220a11a13d40..7686c1e9934a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -375,21 +375,76 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } +static bool rhashtable_check_elasticity(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned hash) +{ + unsigned elasticity = ht->elasticity; + struct rhash_head *head; + + rht_for_each(head, tbl, hash) + if (!--elasticity) + return true; + + return false; +} + +int rhashtable_insert_rehash(struct rhashtable *ht) +{ + struct bucket_table *old_tbl; + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int size; + int err; + + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = rhashtable_last_table(ht, old_tbl); + + size = tbl->size; + + if (rht_grow_above_75(ht, tbl)) + size *= 2; + /* More than two rehashes (not resizes) detected. */ + else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + return -EBUSY; + + new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); + if (new_tbl == NULL) + return -ENOMEM; + + err = rhashtable_rehash_attach(ht, tbl, new_tbl); + if (err) { + bucket_table_free(new_tbl); + if (err == -EEXIST) + err = 0; + } else + schedule_work(&ht->run_work); + + return err; +} +EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); + int rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, struct bucket_table *tbl) { struct rhash_head *head; unsigned hash; - int err = -EEXIST; + int err; tbl = rhashtable_last_table(ht, tbl); hash = head_hashfn(ht, tbl, obj); spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + err = -EEXIST; if (key && rhashtable_lookup_fast(ht, key, ht->p)) goto exit; + err = -EAGAIN; + if (rhashtable_check_elasticity(ht, tbl, hash) || + rht_grow_above_100(ht, tbl)) + goto exit; + err = 0; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); @@ -678,6 +733,9 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + if (!params->insecure_elasticity) + ht->elasticity = 16; + if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else