serenity/AK/Statistics.h
Staubfinger da1023fcc5 AK: Add thresholds to quickselect_inline and Statistics::Median
I did a bit of Profiling and made the quickselect and median algorithms
use the best of option for the respective input size.
2023-02-03 19:04:15 +01:00

118 lines
3.2 KiB
C++

/*
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
#include <AK/QuickSort.h>
#include <AK/Vector.h>
namespace AK {
static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
template<Arithmetic T = float>
class Statistics {
public:
Statistics() = default;
~Statistics() = default;
void add(T const& value)
{
// FIXME: Check for an overflow
m_sum += value;
m_values.append(value);
}
T const sum() const { return m_sum; }
// FIXME: Unclear Wording, average can mean a lot of different things
// Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
float average() const
{
// Let's assume the average of an empty dataset is 0
if (size() == 0)
return 0;
// TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
return (float)sum() / size();
}
T const min() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T minimum = m_values[0];
for (T number : values()) {
if (number < minimum) {
minimum = number;
}
}
return minimum;
}
T const max() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T maximum = m_values[0];
for (T number : values()) {
if (number > maximum) {
maximum = number;
}
}
return maximum;
}
T const median()
{
// Let's assume the Median of an empty dataset is 0
if (size() == 0)
return 0;
// If the number of values is even, the median is the arithmetic mean of the two middle values
if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
quick_sort(m_values);
return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
} else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
quick_sort(m_values);
return m_values.at(m_values.size() / 2);
} else if (size() % 2 == 0) {
auto index = size() / 2;
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
return (median1 + median2) / 2;
}
return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
}
float standard_deviation() const { return sqrt(variance()); }
float variance() const
{
float summation = 0;
float avg = average();
for (T number : values()) {
float difference = (float)number - avg;
summation += (difference * difference);
}
summation = summation / size();
return summation;
}
Vector<T> const& values() const { return m_values; }
size_t size() const { return m_values.size(); }
private:
Vector<T> m_values;
T m_sum {};
};
}