AK: Add FuzzyMatch header

This patch adds a header containing the fuzzy match algorithm
previously used in Assistant. The algorithm was moved to AK
since there are many places where a search may benefit from fuzzyness.
This commit is contained in:
faxe1008 2022-04-16 22:01:20 +02:00 committed by Linus Groh
parent 1074c399f3
commit b8bd667782
4 changed files with 22 additions and 36 deletions

View file

@ -4,11 +4,17 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "FuzzyMatch.h"
#include <AK/CharacterTypes.h>
#include <string.h>
#pragma once
namespace Assistant {
#include <AK/CharacterTypes.h>
#include <AK/String.h>
namespace AK {
struct FuzzyMatchResult {
bool matched { false };
int score { 0 };
};
static constexpr int const RECURSION_LIMIT = 10;
static constexpr int const MAX_MATCHES = 256;
@ -118,7 +124,15 @@ static FuzzyMatchResult fuzzy_match_recursive(String const& needle, String const
return { true, out_score };
}
FuzzyMatchResult fuzzy_match(String const& needle, String const& haystack)
// This fuzzy_match algorithm is based off a similar algorithm used by Sublime Text. The key insight is that instead
// of doing a total in the distance between characters (I.E. Levenshtein Distance), we apply some meaningful heuristics
// related to our dataset that we're trying to match to build up a score. Scores can then be sorted and displayed
// with the highest at the top.
//
// Scores are not normalized between any values and have no particular meaning. The starting value is 100 and when we
// detect good indicators of a match we add to the score. When we detect bad indicators, we penalize the match and subtract
// from its score. Therefore, the longer the needle/haystack the greater the range of scores could be.
static FuzzyMatchResult fuzzy_match(String const& needle, String const& haystack)
{
int recursion_count = 0;
u8 matches[MAX_MATCHES] {};
@ -126,3 +140,5 @@ FuzzyMatchResult fuzzy_match(String const& needle, String const& haystack)
}
}
using AK::fuzzy_match;
using AK::FuzzyMatchResult;

View file

@ -6,7 +6,6 @@ serenity_component(
set(SOURCES
Providers.cpp
FuzzyMatch.cpp
main.cpp
)

View file

@ -1,29 +0,0 @@
/*
* Copyright (c) 2021, Spencer Dixon <spencercdixon@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <AK/Tuple.h>
namespace Assistant {
struct FuzzyMatchResult {
bool matched { false };
int score { 0 };
};
// This fuzzy_match algorithm is based off a similar algorithm used by Sublime Text. The key insight is that instead
// of doing a total in the distance between characters (I.E. Levenshtein Distance), we apply some meaningful heuristics
// related to our dataset that we're trying to match to build up a score. Scores can then be sorted and displayed
// with the highest at the top.
//
// Scores are not normalized between any values and have no particular meaning. The starting value is 100 and when we
// detect good indicators of a match we add to the score. When we detect bad indicators, we penalize the match and subtract
// from its score. Therefore, the longer the needle/haystack the greater the range of scores could be.
FuzzyMatchResult fuzzy_match(String const& needle, String const& haystack);
}

View file

@ -5,7 +5,7 @@
*/
#include "Providers.h"
#include "FuzzyMatch.h"
#include <AK/FuzzyMatch.h>
#include <AK/LexicalPath.h>
#include <AK/URL.h>
#include <LibCore/DirIterator.h>