From f1b79e0cd30b9f55275eeb73ac766433f98ef227 Mon Sep 17 00:00:00 2001 From: Gurkirat Singh Date: Wed, 27 Sep 2023 22:41:57 +0530 Subject: [PATCH] AK: Implement `slugify` function for URL slug generation The slugify function is used to convert input into URL-friendly slugs. It processes each character in the input, keeping ascii alpha characters after lowercase and replacing non-alphanum characters with the glue character or a space if multiple spaces are encountered consecutively. The resulting string is trimmed of leading and trailing whitespace, and any internal whitespace is replaced with the glue character. It is currently used in LibMarkdown headings generation code. --- AK/CMakeLists.txt | 1 + AK/Slugify.cpp | 33 ++++++++++++++++++++++++++++++ AK/Slugify.h | 17 ++++++++++++++++ Tests/AK/CMakeLists.txt | 1 + Tests/AK/TestSlugify.cpp | 43 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+) create mode 100644 AK/Slugify.cpp create mode 100644 AK/Slugify.h create mode 100644 Tests/AK/TestSlugify.cpp diff --git a/AK/CMakeLists.txt b/AK/CMakeLists.txt index c5eb4c37e5..366b1d271f 100644 --- a/AK/CMakeLists.txt +++ b/AK/CMakeLists.txt @@ -24,6 +24,7 @@ set(AK_SOURCES OptionParser.cpp Random.cpp SipHash.cpp + Slugify.cpp StackInfo.cpp Stream.cpp String.cpp diff --git a/AK/Slugify.cpp b/AK/Slugify.cpp new file mode 100644 index 0000000000..51feb5ef78 --- /dev/null +++ b/AK/Slugify.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, Gurkirat Singh + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +namespace AK { +ErrorOr slugify(String const& input, char const glue) +{ + StringBuilder sb; + bool just_processed_space = false; + + for (auto const& code_point : input.code_points()) { + if (is_ascii_alphanumeric(code_point)) { + sb.append_code_point(to_ascii_lowercase(code_point)); + just_processed_space = false; + } else if ((code_point == static_cast(glue) || is_ascii_space(code_point)) && !just_processed_space) { + sb.append_code_point(glue); + just_processed_space = true; + } + } + + auto output = TRY(sb.to_string()); + if (output.ends_with(static_cast(glue))) { + return output.trim(StringView { &glue, 1 }, TrimMode::Right); + } + return output; +} +} diff --git a/AK/Slugify.h b/AK/Slugify.h new file mode 100644 index 0000000000..477f09a9a1 --- /dev/null +++ b/AK/Slugify.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2023, Gurkirat Singh + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +namespace AK { +ErrorOr slugify(String const& input, char glue = '-'); +} + +#if USING_AK_GLOBALLY +using AK::slugify; +#endif diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt index 177fd6cf7b..02dcc4ebb9 100644 --- a/Tests/AK/CMakeLists.txt +++ b/Tests/AK/CMakeLists.txt @@ -65,6 +65,7 @@ set(AK_TEST_SOURCES TestRefPtr.cpp TestSIMD.cpp TestSinglyLinkedList.cpp + TestSlugify.cpp TestSourceGenerator.cpp TestSourceLocation.cpp TestSpan.cpp diff --git a/Tests/AK/TestSlugify.cpp b/Tests/AK/TestSlugify.cpp new file mode 100644 index 0000000000..afa8db73ee --- /dev/null +++ b/Tests/AK/TestSlugify.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023, Gurkirat Singh + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +TEST_CASE(ignore_unicode_characters) +{ + EXPECT_EQ(MUST(slugify("Hello World!🎉"_string)), "hello-world"_string); +} + +TEST_CASE(all_whitespace_empty_string) +{ + EXPECT_EQ(MUST(slugify(" "_string)), ""_string); +} + +TEST_CASE(squeeze_multiple_whitespace) +{ + EXPECT_EQ(MUST(slugify("Hello World"_string)), "hello-world"_string); +} + +TEST_CASE(trim_trailing_whitelist) +{ + EXPECT_EQ(MUST(slugify("Hello World "_string)), "hello-world"_string); +} + +TEST_CASE(lowercase_all_result) +{ + EXPECT_EQ(MUST(slugify("HelloWorld"_string)), "helloworld"_string); +} + +TEST_CASE(slug_glue_change) +{ + EXPECT_EQ(MUST(slugify("Hello World"_string, '|')), "hello|world"_string); +} + +TEST_CASE(multiple_glue_squeeze) +{ + EXPECT_EQ(MUST(slugify("Hello_ World"_string, '_')), "hello_world"_string); +}