LibPublicSuffix: Add Library and Generators

This commit is contained in:
Cameron Youell 2023-05-31 11:38:52 +10:00 committed by Sam Atkins
parent c53d3e7aa4
commit 8fcf42f684
10 changed files with 287 additions and 0 deletions

View file

@ -17,6 +17,7 @@ serenity_option(ENABLE_COMPILETIME_HEADER_CHECK OFF CACHE BOOL "Enable compileti
serenity_option(ENABLE_TIME_ZONE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of the IANA Time Zone Database at build time")
serenity_option(ENABLE_UNICODE_DATABASE_DOWNLOAD ON CACHE BOOL "Enable download of Unicode UCD and CLDR files at build time")
serenity_option(ENABLE_PUBLIC_SUFFIX_DOWNLOAD ON CACHE BOOL "Enable download of the Public Suffix List at build time")
serenity_option(INCLUDE_WASM_SPEC_TESTS OFF CACHE BOOL "Download and include the WebAssembly spec testsuite")
serenity_option(INCLUDE_FLAC_SPEC_TESTS OFF CACHE BOOL "Download and include the FLAC spec testsuite")
serenity_option(ENABLE_CACERT_DOWNLOAD ON CACHE BOOL "Enable download of cacert.pem at build time")

View file

@ -0,0 +1,25 @@
include(${CMAKE_CURRENT_LIST_DIR}/utils.cmake)
if (ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
set(PUBLIC_SUFFIX_PATH "${SERENITY_CACHE_DIR}/PublicSuffix" CACHE PATH "Download location for PublicSuffix files")
set(PUBLIC_SUFFIX_DATA_URL "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat")
set(PUBLIC_SUFFIX_DATA_PATH "${PUBLIC_SUFFIX_PATH}/public_suffix_list.dat")
set(PUBLIC_SUFFIX_DATA_HEADER PublicSuffixData.h)
set(PUBLIC_SUFFIX_DATA_IMPLEMENTATION PublicSuffixData.cpp)
download_file("${PUBLIC_SUFFIX_DATA_URL}" "${PUBLIC_SUFFIX_DATA_PATH}")
invoke_generator(
"PublicSuffixData"
Lagom::GeneratePublicSuffixData
"${PUBLIC_SUFFIX_PATH}/"
"${PUBLIC_SUFFIX_DATA_HEADER}"
"${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}"
arguments -p "${PUBLIC_SUFFIX_DATA_PATH}"
)
set(PUBLIC_SUFFIX_SOURCES
${PUBLIC_SUFFIX_DATA_HEADER}
${PUBLIC_SUFFIX_DATA_IMPLEMENTATION}
)
endif()

View file

@ -404,6 +404,7 @@ if (BUILD_LAGOM)
Markdown
PDF
Protocol
PublicSuffix
Regex
SoftGPU
SQL

View file

@ -3,6 +3,7 @@ add_subdirectory(IPCCompiler)
add_subdirectory(LibEDID)
add_subdirectory(LibGL)
add_subdirectory(LibLocale)
add_subdirectory(LibPublicSuffix)
add_subdirectory(LibTimeZone)
add_subdirectory(LibUnicode)
add_subdirectory(LibWeb)

View file

@ -0,0 +1 @@
lagom_tool(GeneratePublicSuffixData SOURCES GeneratePublicSuffixData.cpp LIBS LibMain)

View file

@ -0,0 +1,185 @@
/*
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "../LibUnicode/GeneratorUtil.h"
#include <AK/SourceGenerator.h>
#include <AK/StringBuilder.h>
#include <LibCore/ArgsParser.h>
#include <LibCore/File.h>
#include <LibMain/Main.h>
ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File&);
ErrorOr<void> generate_implementation_file(Core::InputBufferedFile&, Core::File&);
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView generated_header_path;
StringView generated_implementation_path;
StringView public_suffix_list_path;
Core::ArgsParser args_parser;
args_parser.add_option(generated_header_path, "Path to the header file to generate", "generated-header-path", 'h', "generated-header-path");
args_parser.add_option(generated_implementation_path, "Path to the implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
args_parser.add_option(public_suffix_list_path, "Path to the public suffix list", "public-suffix-list-path", 'p', "public-suffix-list-path");
args_parser.parse(arguments);
auto identifier_data = TRY(open_file(public_suffix_list_path, Core::File::OpenMode::Read));
auto generated_header_file = TRY(Core::File::open(generated_header_path, Core::File::OpenMode::Write));
auto generated_implementation_file = TRY(Core::File::open(generated_implementation_path, Core::File::OpenMode::Write));
TRY(generate_header_file(*identifier_data, *generated_header_file));
TRY(generate_implementation_file(*identifier_data, *generated_implementation_file));
return 0;
}
ErrorOr<void> generate_header_file(Core::InputBufferedFile&, Core::File& file)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.append(R"~~~(
#pragma once
#include <AK/DeprecatedString.h>
#include <AK/Forward.h>
#include <AK/Trie.h>
namespace PublicSuffix {
class PublicSuffixData {
protected:
PublicSuffixData();
public:
PublicSuffixData(PublicSuffixData const&) = delete;
PublicSuffixData& operator=(PublicSuffixData const&) = delete;
static PublicSuffixData* the()
{
static PublicSuffixData* s_the;
if (!s_the)
s_the = new PublicSuffixData;
return s_the;
}
ErrorOr<Optional<String>> get_public_suffix(StringView string);
private:
Trie<char, DeprecatedString> m_dictionary;
};
} // namespace PublicSuffix
)~~~");
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
return {};
}
ErrorOr<void> generate_implementation_file(Core::InputBufferedFile& input, Core::File& file)
{
StringBuilder builder;
SourceGenerator generator { builder };
generator.append(R"~~~(
#include <LibPublicSuffix/PublicSuffixData.h>
#include <AK/Vector.h>
#include <AK/String.h>
namespace PublicSuffix {
static Vector<StringView> s_public_suffixes {)~~~");
Array<u8, 1024> buffer {};
while (TRY(input.can_read_line())) {
auto line = TRY(input.read_line(buffer));
if (line.starts_with("//"sv) || line.is_empty())
continue;
auto view = line.split_view("."sv);
view.reverse();
StringBuilder builder;
builder.join("."sv, view);
auto val = builder.string_view();
generator.set("line", val);
generator.append(R"~~~(
{"@line@"sv},)~~~");
}
generator.append(R"~~~(
};
PublicSuffixData::PublicSuffixData()
: m_dictionary('/', "")
{
// FIXME: Reduce the depth of this trie
for (auto str : s_public_suffixes) {
MUST(m_dictionary.insert(str.begin(), str.end(), str, [](auto& parent, auto& it) -> Optional<DeprecatedString> {
return DeprecatedString::formatted("{}{}", parent.metadata_value(), *it);
}));
}
}
ErrorOr<Optional<String>> PublicSuffixData::get_public_suffix(StringView string)
{
auto input = string.split_view("."sv);
input.reverse();
auto can_find = [&](StringView input) -> bool {
auto it = input.begin();
auto& node = m_dictionary.traverse_until_last_accessible_node(it, input.end());
return it.is_end() && node.metadata().has_value();
};
StringBuilder overall_search_string;
StringBuilder search_string;
for (auto part : input) {
search_string.clear();
TRY(search_string.try_append(TRY(overall_search_string.to_string())));
TRY(search_string.try_append(part));
if (can_find(search_string.string_view())) {
overall_search_string.append(TRY(String::from_utf8(part)));
overall_search_string.append("."sv);
continue;
}
search_string.clear();
TRY(search_string.try_append(TRY(overall_search_string.to_string())));
TRY(search_string.try_append("*"sv));
if (can_find(search_string.string_view())) {
overall_search_string.append(TRY(String::from_utf8(part)));
overall_search_string.append("."sv);
continue;
}
break;
}
auto view = overall_search_string.string_view().split_view("."sv);
view.reverse();
StringBuilder return_string_builder;
return_string_builder.join('.', view);
auto returnString = TRY(return_string_builder.to_string());
if (!returnString.is_empty())
return returnString;
return Optional<String> {};
}
} // namespace PublicSuffix
)~~~");
TRY(file.write_until_depleted(generator.as_string_view().bytes()));
return {};
}

View file

@ -44,6 +44,7 @@ add_subdirectory(LibPartition)
add_subdirectory(LibPCIDB)
add_subdirectory(LibPDF)
add_subdirectory(LibProtocol)
add_subdirectory(LibPublicSuffix)
add_subdirectory(LibRegex)
add_subdirectory(LibSanitizer)
add_subdirectory(LibSoftGPU)

View file

@ -0,0 +1,10 @@
include(${SerenityOS_SOURCE_DIR}/Meta/CMake/public_suffix.cmake)
set(SOURCES
URL.cpp
${PUBLIC_SUFFIX_SOURCES}
)
set(GENERATED_SOURCES ${CURRENT_LIB_GENERATED})
serenity_lib(LibPublicSuffix publicsuffix)
target_compile_definitions(LibPublicSuffix PRIVATE ENABLE_PUBLIC_SUFFIX_DOWNLOAD=$<BOOL:${ENABLE_PUBLIC_SUFFIX_DOWNLOAD}>)

View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/String.h>
#include <AK/URL.h>
#include <LibPublicSuffix/URL.h>
#if defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
# include <LibPublicSuffix/PublicSuffixData.h>
#endif
namespace PublicSuffix {
ErrorOr<String> absolute_url(StringView url)
{
String out = TRY(String::from_utf8(url));
#if !defined(ENABLE_PUBLIC_SUFFIX_DOWNLOAD)
return out;
#else
if (!out.contains("://"sv))
out = TRY(String::formatted("https://{}"sv, out));
auto final_url = URL::create_with_url_or_path(out.to_deprecated_string());
if (!final_url.is_valid())
return Error::from_string_view("Invalid URL"sv);
if (final_url.host().has<URL::IPv4Address>() || final_url.host().has<URL::IPv6Address>())
return out;
if (final_url.scheme() != "http"sv && final_url.scheme() != "https"sv)
return out;
if (final_url.host().has<String>()) {
auto string_host = final_url.host().get<String>();
auto maybe_public_suffix = TRY(PublicSuffixData::the()->get_public_suffix(string_host));
if (maybe_public_suffix.has_value())
return out;
if (string_host.ends_with_bytes(".local"sv) || string_host.ends_with_bytes("localhost"sv))
return out;
}
return Error::from_string_view("Invalid URL"sv);
#endif
}
}

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2023, Cameron Youell <cameronyouell@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
namespace PublicSuffix {
ErrorOr<String> absolute_url(StringView url);
}