linux/lib/percpu_counter.c
Dave Chinner 80188b0d77 percpu_counter: batch size aware __percpu_counter_compare()
XFS uses non-stanard batch sizes for avoiding frequent global
counter updates on it's allocated inode counters, as they increment
or decrement in batches of 64 inodes. Hence the standard percpu
counter batch of 32 means that the counter is effectively a global
counter. Currently Xfs uses a batch size of 128 so that it doesn't
take the global lock on every single modification.

However, Xfs also needs to compare accurately against zero, which
means we need to use percpu_counter_compare(), and that has a
hard-coded batch size of 32, and hence will spuriously fail to
detect when it is supposed to use precise comparisons and hence
the accounting goes wrong.

Add __percpu_counter_compare() to take a custom batch size so we can
use it sanely in XFS and factor percpu_counter_compare() to use it.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 07:39:34 +10:00

229 lines
5.5 KiB
C

/*
* Fast batching percpu counters.
*/
#include <linux/percpu_counter.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/debugobjects.h>
#ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD(percpu_counters);
static DEFINE_SPINLOCK(percpu_counters_lock);
#endif
#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
static struct debug_obj_descr percpu_counter_debug_descr;
static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
{
struct percpu_counter *fbc = addr;
switch (state) {
case ODEBUG_STATE_ACTIVE:
percpu_counter_destroy(fbc);
debug_object_free(fbc, &percpu_counter_debug_descr);
return 1;
default:
return 0;
}
}
static struct debug_obj_descr percpu_counter_debug_descr = {
.name = "percpu_counter",
.fixup_free = percpu_counter_fixup_free,
};
static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
{
debug_object_init(fbc, &percpu_counter_debug_descr);
debug_object_activate(fbc, &percpu_counter_debug_descr);
}
static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
{
debug_object_deactivate(fbc, &percpu_counter_debug_descr);
debug_object_free(fbc, &percpu_counter_debug_descr);
}
#else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
{ }
static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
{ }
#endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
int cpu;
unsigned long flags;
raw_spin_lock_irqsave(&fbc->lock, flags);
for_each_possible_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
*pcount = 0;
}
fbc->count = amount;
raw_spin_unlock_irqrestore(&fbc->lock, flags);
}
EXPORT_SYMBOL(percpu_counter_set);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
{
s64 count;
preempt_disable();
count = __this_cpu_read(*fbc->counters) + amount;
if (count >= batch || count <= -batch) {
unsigned long flags;
raw_spin_lock_irqsave(&fbc->lock, flags);
fbc->count += count;
__this_cpu_sub(*fbc->counters, count - amount);
raw_spin_unlock_irqrestore(&fbc->lock, flags);
} else {
this_cpu_add(*fbc->counters, amount);
}
preempt_enable();
}
EXPORT_SYMBOL(__percpu_counter_add);
/*
* Add up all the per-cpu counts, return the result. This is a more accurate
* but much slower version of percpu_counter_read_positive()
*/
s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
s64 ret;
int cpu;
unsigned long flags;
raw_spin_lock_irqsave(&fbc->lock, flags);
ret = fbc->count;
for_each_online_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
}
raw_spin_unlock_irqrestore(&fbc->lock, flags);
return ret;
}
EXPORT_SYMBOL(__percpu_counter_sum);
int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
struct lock_class_key *key)
{
unsigned long flags __maybe_unused;
raw_spin_lock_init(&fbc->lock);
lockdep_set_class(&fbc->lock, key);
fbc->count = amount;
fbc->counters = alloc_percpu_gfp(s32, gfp);
if (!fbc->counters)
return -ENOMEM;
debug_percpu_counter_activate(fbc);
#ifdef CONFIG_HOTPLUG_CPU
INIT_LIST_HEAD(&fbc->list);
spin_lock_irqsave(&percpu_counters_lock, flags);
list_add(&fbc->list, &percpu_counters);
spin_unlock_irqrestore(&percpu_counters_lock, flags);
#endif
return 0;
}
EXPORT_SYMBOL(__percpu_counter_init);
void percpu_counter_destroy(struct percpu_counter *fbc)
{
unsigned long flags __maybe_unused;
if (!fbc->counters)
return;
debug_percpu_counter_deactivate(fbc);
#ifdef CONFIG_HOTPLUG_CPU
spin_lock_irqsave(&percpu_counters_lock, flags);
list_del(&fbc->list);
spin_unlock_irqrestore(&percpu_counters_lock, flags);
#endif
free_percpu(fbc->counters);
fbc->counters = NULL;
}
EXPORT_SYMBOL(percpu_counter_destroy);
int percpu_counter_batch __read_mostly = 32;
EXPORT_SYMBOL(percpu_counter_batch);
static void compute_batch_value(void)
{
int nr = num_online_cpus();
percpu_counter_batch = max(32, nr*2);
}
static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
unsigned long action, void *hcpu)
{
#ifdef CONFIG_HOTPLUG_CPU
unsigned int cpu;
struct percpu_counter *fbc;
compute_batch_value();
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
return NOTIFY_OK;
cpu = (unsigned long)hcpu;
spin_lock_irq(&percpu_counters_lock);
list_for_each_entry(fbc, &percpu_counters, list) {
s32 *pcount;
unsigned long flags;
raw_spin_lock_irqsave(&fbc->lock, flags);
pcount = per_cpu_ptr(fbc->counters, cpu);
fbc->count += *pcount;
*pcount = 0;
raw_spin_unlock_irqrestore(&fbc->lock, flags);
}
spin_unlock_irq(&percpu_counters_lock);
#endif
return NOTIFY_OK;
}
/*
* Compare counter against given value.
* Return 1 if greater, 0 if equal and -1 if less
*/
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
s64 count;
count = percpu_counter_read(fbc);
/* Check to see if rough count will be sufficient for comparison */
if (abs(count - rhs) > (batch * num_online_cpus())) {
if (count > rhs)
return 1;
else
return -1;
}
/* Need to use precise count */
count = percpu_counter_sum(fbc);
if (count > rhs)
return 1;
else if (count < rhs)
return -1;
else
return 0;
}
EXPORT_SYMBOL(__percpu_counter_compare);
static int __init percpu_counter_startup(void)
{
compute_batch_value();
hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
return 0;
}
module_init(percpu_counter_startup);