AK: Add a prefix tree implementation

`AK::Trie` can be keyed by any given hashable type, and can store any
metadata (including nothing at all).
Also adds a test.
This commit is contained in:
AnotherTest 2020-12-26 13:52:44 +03:30 committed by Andreas Kling
parent 3dcdee75be
commit cb3348191b
3 changed files with 316 additions and 0 deletions

View file

@ -33,6 +33,7 @@ set(AK_TEST_SOURCES
TestString.cpp
TestStringUtils.cpp
TestStringView.cpp
TestTrie.cpp
TestTypedTransfer.cpp
TestURL.cpp
TestUtf8.cpp

83
AK/Tests/TestTrie.cpp Normal file
View file

@ -0,0 +1,83 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/TestSuite.h>
#include <AK/Trie.h>
TEST_CASE(normal_behaviour)
{
Trie<char, String> dictionary('/', "");
constexpr static const char* data[] { "test", "example", "foo", "foobar" };
constexpr static const size_t total_chars = 18; // root (1), 'test' (4), 'example' (7), 'foo' (3), 'foobar' (3, "foo" already stored).
for (auto& entry : data) {
StringView view { entry };
auto it = view.begin();
dictionary.insert(it, view.end(), view, [](auto& parent, auto& it) -> Optional<String> { return String::formatted("{}{}", parent.metadata_value(), *it); });
}
size_t i = 0;
for ([[maybe_unused]] auto& node : dictionary)
++i;
EXPECT_EQ(i, total_chars);
for (auto& entry : data) {
StringView view { entry };
auto it = view.begin();
auto& node = dictionary.traverse_until_last_accessible_node(it, view.end());
EXPECT(it.is_end());
EXPECT(node.metadata().has_value());
EXPECT_EQ(view, node.metadata_value());
}
constexpr static const char* test_data_with_prefix_in_dict[] { "testx", "exampley", "fooa", "foobarb", "fox", "text" };
for (auto& entry : test_data_with_prefix_in_dict) {
StringView view { entry };
auto it = view.begin();
auto& node = dictionary.traverse_until_last_accessible_node(it, view.end());
EXPECT(!it.is_end());
EXPECT(node.metadata().has_value());
EXPECT(view.starts_with(node.metadata_value()));
}
}
TEST_CASE(iterate)
{
Trie<int> bunch_of_numbers { 0 };
Array<int, 64> input;
for (size_t i = 0; i < input.size(); ++i)
input[i] = i;
// Iteration order is preorder (order between adjacent nodes is not defined, but parents come before children)
// in this case, the tree is linear.
size_t i = 0;
for (auto& node : bunch_of_numbers) {
EXPECT_EQ(input[i], node.value());
++i;
}
}
TEST_MAIN(AllOf)

232
AK/Trie.h Normal file
View file

@ -0,0 +1,232 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Forward.h>
#include <AK/HashMap.h>
#include <AK/Types.h>
namespace AK {
namespace Detail {
template<typename TypeA, typename Default>
struct SubstituteIfVoid {
using Type = TypeA;
};
template<typename Default>
struct SubstituteIfVoid<void, Default> {
using Type = Default;
};
template<typename DeclaredBaseType, typename DefaultBaseType, typename ValueType, typename MetadataT, typename ValueTraits>
class Trie {
using BaseType = typename SubstituteIfVoid<DeclaredBaseType, DefaultBaseType>::Type;
class ConstIterator {
public:
static ConstIterator end() { return {}; }
bool operator==(const ConstIterator& other) const { return m_current_node == other.m_current_node; }
const BaseType& operator*() const { return static_cast<const BaseType&>(*m_current_node); }
const BaseType* operator->() const { return static_cast<const BaseType*>(m_current_node); }
void operator++() { skip_to_next(); }
explicit ConstIterator(const Trie& node)
{
m_current_node = &node;
m_state.empend(false, node.m_children.begin(), node.m_children.end());
}
private:
void skip_to_next()
{
auto& current_state = m_state.last();
if (current_state.did_generate_root)
++current_state.it;
else
current_state.did_generate_root = true;
if (current_state.it == current_state.end)
return pop_and_get_next();
m_current_node = &*(*current_state.it).value;
m_state.empend(false, m_current_node->m_children.begin(), m_current_node->m_children.end());
}
void pop_and_get_next()
{
m_state.take_last();
if (m_state.is_empty()) {
m_current_node = nullptr;
return;
}
skip_to_next();
}
ConstIterator() = default;
struct State {
bool did_generate_root { false };
typename HashMap<ValueType, NonnullOwnPtr<Trie>, ValueTraits>::ConstIteratorType it;
typename HashMap<ValueType, NonnullOwnPtr<Trie>, ValueTraits>::ConstIteratorType end;
};
Vector<State> m_state;
const Trie* m_current_node { nullptr };
};
public:
using MetadataType = MetadataT;
Trie(ValueType value, Optional<MetadataType> metadata)
: m_value(move(value))
, m_metadata(move(metadata))
{
}
template<typename It>
BaseType& traverse_until_last_accessible_node(It& it, const It& end)
{
Trie* node = this;
for (; it < end; ++it) {
auto next_it = node->m_children.find(*it);
if (next_it == node->m_children.end())
return static_cast<BaseType&>(*node);
node = &*(*next_it).value;
}
return static_cast<BaseType&>(*node);
}
template<typename It>
const BaseType& traverse_until_last_accessible_node(It& it, const It& end) const { return const_cast<Trie*>(this)->traverse_until_last_accessible_node(it, end); }
template<typename It>
BaseType& traverse_until_last_accessible_node(const It& begin, const It& end)
{
auto it = begin;
return const_cast<Trie*>(this)->traverse_until_last_accessible_node(it, end);
}
template<typename It>
const BaseType& traverse_until_last_accessible_node(const It& begin, const It& end) const
{
auto it = begin;
return const_cast<Trie*>(this)->traverse_until_last_accessible_node(it, end);
}
Optional<MetadataType> metadata() const requires(!IsSame<MetadataType, decltype(nullptr)>::value) { return m_metadata; }
void set_metadata(MetadataType metadata) requires(!IsSame<MetadataType, decltype(nullptr)>::value) { m_metadata = move(metadata); }
const MetadataType& metadata_value() const requires(!IsSame<MetadataType, decltype(nullptr)>::value) { return m_metadata.value(); }
const ValueType& value() const { return m_value; }
ValueType& value() { return m_value; }
Trie& ensure_child(ValueType value, Optional<MetadataType> metadata = {})
{
auto it = m_children.find(value);
if (it == m_children.end()) {
auto node = make<Trie>(value, move(metadata));
auto& node_ref = *node;
m_children.set(move(value), move(node));
return static_cast<BaseType&>(node_ref);
}
auto& node_ref = *it->value;
if (metadata.has_value())
node_ref.m_metadata = move(metadata);
return static_cast<BaseType&>(node_ref);
}
template<typename It, typename ProvideMetadataFunction>
BaseType& insert(
It& it, const It& end, MetadataType metadata, ProvideMetadataFunction provide_missing_metadata) requires(!IsSame<MetadataType, decltype(nullptr)>::value)
{
Trie* last_root_node = &traverse_until_last_accessible_node(it, end);
for (; it != end; ++it)
last_root_node = static_cast<Trie*>(&last_root_node->ensure_child(*it, provide_missing_metadata(static_cast<BaseType&>(*last_root_node), it)));
last_root_node->set_metadata(move(metadata));
return static_cast<BaseType&>(*last_root_node);
}
template<typename It>
BaseType& insert(It& it, const It& end) requires(IsSame<MetadataType, decltype(nullptr)>::value)
{
Trie* last_root_node = &traverse_until_last_accessible_node(it, end);
for (; it != end; ++it)
last_root_node = static_cast<Trie*>(&last_root_node->ensure_child(*it, {}));
return static_cast<BaseType&>(*last_root_node);
}
ConstIterator begin() const { return ConstIterator(*this); }
ConstIterator end() const { return ConstIterator::end(); }
bool is_empty() const { return m_children.is_empty(); }
void clear() { m_children.clear(); }
BaseType deep_copy()
{
Trie root(m_value, m_metadata);
for (auto& it : m_children)
root.m_children.set(it.key, make<Trie>(it.value->deep_copy()));
return static_cast<BaseType&&>(move(root));
}
private:
ValueType m_value;
Optional<MetadataType> m_metadata;
HashMap<ValueType, NonnullOwnPtr<Trie>, ValueTraits> m_children;
};
template<typename BaseType, typename DefaultBaseType, typename ValueType, typename ValueTraits>
class Trie<BaseType, DefaultBaseType, ValueType, void, ValueTraits> : public Trie<BaseType, DefaultBaseType, ValueType, decltype(nullptr), ValueTraits> {
using Trie<BaseType, DefaultBaseType, ValueType, decltype(nullptr), ValueTraits>::Trie;
};
}
template<typename ValueType, typename MetadataT = void, typename ValueTraits = Traits<ValueType>, typename BaseT = void>
class Trie : public Detail::Trie<BaseT, Trie<ValueType, MetadataT, ValueTraits>, ValueType, MetadataT, ValueTraits> {
public:
using DetailTrie = Detail::Trie<BaseT, Trie<ValueType, MetadataT, ValueTraits>, ValueType, MetadataT, ValueTraits>;
using MetadataType = typename DetailTrie::MetadataType;
Trie(ValueType value, MetadataType metadata) requires(!IsSame<MetadataType, void>::value && !IsSame<MetadataType, decltype(nullptr)>::value)
: DetailTrie(move(value), move(metadata))
{
}
explicit Trie(ValueType value)
: DetailTrie(move(value), Optional<MetadataType> {})
{
}
};
}
using AK::Trie;