From da1023fcc59a5edc3e47b6f88763e1ded9477564 Mon Sep 17 00:00:00 2001 From: Staubfinger Date: Sun, 8 Jan 2023 14:55:54 +0100 Subject: [PATCH] AK: Add thresholds to `quickselect_inline` and `Statistics::Median` I did a bit of Profiling and made the quickselect and median algorithms use the best of option for the respective input size. --- AK/QuickSelect.h | 13 +++++-- AK/Statistics.h | 12 ++++++- Tests/AK/CMakeLists.txt | 1 + Tests/AK/TestStatistics.cpp | 70 +++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 Tests/AK/TestStatistics.cpp diff --git a/AK/QuickSelect.h b/AK/QuickSelect.h index 81a7973e10..bd11c11ded 100644 --- a/AK/QuickSelect.h +++ b/AK/QuickSelect.h @@ -11,6 +11,9 @@ #include namespace AK { + +static constexpr int MEDIAN_OF_MEDIAN_CUTOFF = 4500; + // FIXME: Stole and adapted these two functions from `Userland/Demos/Tubes/Tubes.cpp` we really need something like this in `AK/Random.h` static inline double random_double() { @@ -161,9 +164,13 @@ size_t quickselect_inplace(Collection& collection, size_t k, PivotFn pivot_fn) template size_t quickselect_inplace(Collection& collection, size_t k) { - // By default, lets use middle_element to match `quicksort` - return quickselect_inplace( - collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::middle_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); + if (collection.size() >= MEDIAN_OF_MEDIAN_CUTOFF) + return quickselect_inplace( + collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::median_of_medians(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); + + else + return quickselect_inplace( + collection, 0, collection.size() - 1, k, [](auto collection, size_t left, size_t right, auto less_than) { return PivotFunctions::random_element(collection, left, right, less_than); }, [](auto& a, auto& b) { return a < b; }); } } diff --git a/AK/Statistics.h b/AK/Statistics.h index 93df7bd110..0c193d7ed4 100644 --- a/AK/Statistics.h +++ b/AK/Statistics.h @@ -9,10 +9,14 @@ #include #include #include +#include #include namespace AK { +static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200; +static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350; + template class Statistics { public: @@ -75,7 +79,13 @@ public: return 0; // If the number of values is even, the median is the arithmetic mean of the two middle values - if (size() % 2 == 0) { + if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) { + quick_sort(m_values); + return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2; + } else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) { + quick_sort(m_values); + return m_values.at(m_values.size() / 2); + } else if (size() % 2 == 0) { auto index = size() / 2; auto median1 = m_values.at(AK::quickselect_inplace(m_values, index)); auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1)); diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt index 408c4575a3..c59619642e 100644 --- a/Tests/AK/CMakeLists.txt +++ b/Tests/AK/CMakeLists.txt @@ -67,6 +67,7 @@ set(AK_TEST_SOURCES TestSourceLocation.cpp TestSpan.cpp TestStack.cpp + TestStatistics.cpp TestStdLibExtras.cpp TestString.cpp TestStringFloatingPointConversions.cpp diff --git a/Tests/AK/TestStatistics.cpp b/Tests/AK/TestStatistics.cpp new file mode 100644 index 0000000000..2caa61db8d --- /dev/null +++ b/Tests/AK/TestStatistics.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +TEST_CASE(Statistics) +{ + // Setup Test Data + AK::Statistics odd_number_elements; + AK::Statistics even_number_elements; + AK::Statistics odd_number_elements_large; + AK::Statistics even_number_elements_large; + + odd_number_elements.add(5.0); + odd_number_elements.add(4.0); + odd_number_elements.add(3.0); + odd_number_elements.add(2.0); + odd_number_elements.add(1.0); + + even_number_elements.add(6.0); + even_number_elements.add(5.0); + even_number_elements.add(4.0); + even_number_elements.add(3.0); + even_number_elements.add(2.0); + even_number_elements.add(1.0); + + for (int i = 201; i > 0; i--) { + odd_number_elements_large.add(i); + } + + for (int i = 360; i > 0; i--) { + even_number_elements_large.add(i); + } + + // Sum + EXPECT_APPROXIMATE(odd_number_elements.sum(), 15.0); + EXPECT_APPROXIMATE(even_number_elements.sum(), 21.0); + + // Average + EXPECT_APPROXIMATE(odd_number_elements.average(), 3.0); + EXPECT_APPROXIMATE(even_number_elements.average(), 3.5); + + // Min + EXPECT_APPROXIMATE(odd_number_elements.min(), 1.0); + EXPECT_APPROXIMATE(even_number_elements.min(), 1.0); + + // Max + EXPECT_APPROXIMATE(odd_number_elements.max(), 5.0); + EXPECT_APPROXIMATE(even_number_elements.max(), 6.0); + + // Median + EXPECT_APPROXIMATE(odd_number_elements.median(), 3.0); + EXPECT_APPROXIMATE(even_number_elements.median(), 3.5); + EXPECT_APPROXIMATE(odd_number_elements_large.median(), 101.0); + EXPECT_APPROXIMATE(even_number_elements_large.median(), 180.5); + + // The expected values for standard deviation and variance were calculated by my school issued scientific calculator + + // Standard Deviation + EXPECT_APPROXIMATE(odd_number_elements.standard_deviation(), 1.4142135623731); + EXPECT_APPROXIMATE(even_number_elements.standard_deviation(), 1.7078251276599); + + // Variance + EXPECT_APPROXIMATE(odd_number_elements.variance(), 2.0); + EXPECT_APPROXIMATE(even_number_elements.variance(), 2.9166666666667); +}