From e1099a1757d22c5725ffdc79538318b420668051 Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Sun, 5 Nov 2023 19:24:59 +0000 Subject: [PATCH] Fuzzers: Use a single fuzzer to test all LibTextCodec encodings This commit replaces the 5 fuzzers that previously tested LibTextCodec with a single fuzzer. We now rely on the fuzzer to generate the encoding and separate it from the encoded data with a magic separator. This increases the overall coverage of LibTextCodec and eliminates the possibility of the same error being generated by multiple fuzzers. --- Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp | 18 -------------- Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp | 18 -------------- Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp | 18 -------------- Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp | 18 -------------- Meta/Lagom/Fuzzers/FuzzTextDecoder.cpp | 29 ++++++++++++++++++++++ Meta/Lagom/Fuzzers/FuzzTextDecoder.dict | 15 +++++++++++ Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp | 18 -------------- Meta/Lagom/Fuzzers/fuzzers.cmake | 12 ++------- Userland/Utilities/test-fuzz.cpp | 6 +---- 9 files changed, 47 insertions(+), 105 deletions(-) delete mode 100644 Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp delete mode 100644 Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp delete mode 100644 Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp delete mode 100644 Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp create mode 100644 Meta/Lagom/Fuzzers/FuzzTextDecoder.cpp create mode 100644 Meta/Lagom/Fuzzers/FuzzTextDecoder.dict delete mode 100644 Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp diff --git a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp deleted file mode 100644 index 3095a51958..0000000000 --- a/Meta/Lagom/Fuzzers/FuzzCyrillicDecoder.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2021, the SerenityOS developers. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) -{ - AK::set_debug_enabled(false); - auto decoder = TextCodec::decoder_for("windows-1251"sv); - VERIFY(decoder.has_value()); - (void)decoder->to_utf8({ data, size }); - return 0; -} diff --git a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp deleted file mode 100644 index 2fdcd27559..0000000000 --- a/Meta/Lagom/Fuzzers/FuzzHebrewDecoder.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2021, the SerenityOS developers. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) -{ - AK::set_debug_enabled(false); - auto decoder = TextCodec::decoder_for("windows-1255"sv); - VERIFY(decoder.has_value()); - (void)decoder->to_utf8({ data, size }); - return 0; -} diff --git a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp deleted file mode 100644 index 85472507f5..0000000000 --- a/Meta/Lagom/Fuzzers/FuzzLatin1Decoder.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2021, the SerenityOS developers. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) -{ - AK::set_debug_enabled(false); - auto decoder = TextCodec::decoder_for("windows-1252"sv); - VERIFY(decoder.has_value()); - (void)decoder->to_utf8({ data, size }); - return 0; -} diff --git a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp b/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp deleted file mode 100644 index 0d3394cc79..0000000000 --- a/Meta/Lagom/Fuzzers/FuzzLatin2Decoder.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2021, the SerenityOS developers. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) -{ - AK::set_debug_enabled(false); - auto decoder = TextCodec::decoder_for("iso-8859-2"sv); - VERIFY(decoder.has_value()); - (void)decoder->to_utf8({ data, size }); - return 0; -} diff --git a/Meta/Lagom/Fuzzers/FuzzTextDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzTextDecoder.cpp new file mode 100644 index 0000000000..3125527ac7 --- /dev/null +++ b/Meta/Lagom/Fuzzers/FuzzTextDecoder.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021-2023, the SerenityOS developers. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) +{ + AK::set_debug_enabled(false); + + static constexpr StringView MAGIC_SEPARATOR = "|DATA|"sv; + StringView data_string_view { data, size }; + auto separator_index = data_string_view.find(MAGIC_SEPARATOR); + if (!separator_index.has_value()) + return 0; + + auto encoding = data_string_view.substring_view(0, separator_index.value()); + auto encoded_data = data_string_view.substring_view(separator_index.value() + MAGIC_SEPARATOR.length()); + auto decoder = TextCodec::decoder_for(encoding); + if (!decoder.has_value()) + return 0; + + (void)decoder->to_utf8(encoded_data); + return 0; +} diff --git a/Meta/Lagom/Fuzzers/FuzzTextDecoder.dict b/Meta/Lagom/Fuzzers/FuzzTextDecoder.dict new file mode 100644 index 0000000000..2caa3ca5b3 --- /dev/null +++ b/Meta/Lagom/Fuzzers/FuzzTextDecoder.dict @@ -0,0 +1,15 @@ +magic_separator="|DATA|" + +# encodings +cyrillic_encoding="windows-1251" +hebrew_encoding="windows-1255" +koi8r_encoding="koi8-r" +latin1_encoding="windows-1252" +latin2_encoding="iso-8859-2" +latin9_encoding="iso-8859-15" +mac_roman_encoding="macintosh" +turkish_encoding="windows-1254" +user_defined_encoding="x-user-defined" +utf16be_encoding="utf-16be" +utf16le_encoding="utf-16le" +utf8_encoding="utf-8" diff --git a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp b/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp deleted file mode 100644 index 2541293138..0000000000 --- a/Meta/Lagom/Fuzzers/FuzzUTF16BEDecoder.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2021, the SerenityOS developers. - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include - -extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) -{ - AK::set_debug_enabled(false); - auto decoder = TextCodec::decoder_for("utf-16be"sv); - VERIFY(decoder.has_value()); - (void)decoder->to_utf8({ data, size }); - return 0; -} diff --git a/Meta/Lagom/Fuzzers/fuzzers.cmake b/Meta/Lagom/Fuzzers/fuzzers.cmake index e7fa63826c..1edfd8faa7 100644 --- a/Meta/Lagom/Fuzzers/fuzzers.cmake +++ b/Meta/Lagom/Fuzzers/fuzzers.cmake @@ -3,7 +3,6 @@ set(FUZZER_TARGETS BLAKE2b BMPLoader Brotli - CyrillicDecoder DDSLoader DNSPacket DeflateCompression @@ -14,7 +13,6 @@ set(FUZZER_TARGETS GIFLoader GzipCompression GzipDecompression - HebrewDecoder HttpRequest ICCProfile ICOLoader @@ -23,8 +21,6 @@ set(FUZZER_TARGETS JPEGLoader Js JsonParser - Latin1Decoder - Latin2Decoder LzmaDecompression LzmaRoundtrip Markdown @@ -53,12 +49,12 @@ set(FUZZER_TARGETS ShellPosix SQLParser Tar + TextDecoder TGALoader TIFFLoader TTF TinyVGLoader URL - UTF16BEDecoder VP9Decoder WasmParser WAVLoader @@ -79,7 +75,6 @@ set(FUZZER_DEPENDENCIES_BLAKE2b LibCrypto) set(FUZZER_DEPENDENCIES_BMPLoader LibGfx) set(FUZZER_DEPENDENCIES_Brotli LibCompress) set(FUZZER_DEPENDENCIES_CSSParser LibWeb) -set(FUZZER_DEPENDENCIES_CyrillicDecoder LibTextCodec) set(FUZZER_DEPENDENCIES_DDSLoader LibGfx) set(FUZZER_DEPENDENCIES_DNSPacket LibDNS) set(FUZZER_DEPENDENCIES_DeflateCompression LibCompress) @@ -90,7 +85,6 @@ set(FUZZER_DEPENDENCIES_Gemini LibGemini) set(FUZZER_DEPENDENCIES_GIFLoader LibGfx) set(FUZZER_DEPENDENCIES_GzipCompression LibCompress) set(FUZZER_DEPENDENCIES_GzipDecompression LibCompress) -set(FUZZER_DEPENDENCIES_HebrewDecoder LibTextCodec) set(FUZZER_DEPENDENCIES_HttpRequest LibHTTP) set(FUZZER_DEPENDENCIES_ICCProfile LibGfx) set(FUZZER_DEPENDENCIES_ICOLoader LibGfx) @@ -98,8 +92,6 @@ set(FUZZER_DEPENDENCIES_ILBMLoader LibGfx) set(FUZZER_DEPENDENCIES_IMAPParser LibIMAP) set(FUZZER_DEPENDENCIES_JPEGLoader LibGfx) set(FUZZER_DEPENDENCIES_Js LibJS) -set(FUZZER_DEPENDENCIES_Latin1Decoder LibTextCodec) -set(FUZZER_DEPENDENCIES_Latin2Decoder LibTextCodec) set(FUZZER_DEPENDENCIES_LzmaDecompression LibArchive LibCompress) set(FUZZER_DEPENDENCIES_LzmaRoundtrip LibCompress) set(FUZZER_DEPENDENCIES_Markdown LibMarkdown) @@ -128,11 +120,11 @@ set(FUZZER_DEPENDENCIES_Shell LibShell) set(FUZZER_DEPENDENCIES_ShellPosix LibShell) set(FUZZER_DEPENDENCIES_SQLParser LibSQL) set(FUZZER_DEPENDENCIES_Tar LibArchive) +set(FUZZER_DEPENDENCIES_TextDecoder LibTextCodec) set(FUZZER_DEPENDENCIES_TGALoader LibGfx) set(FUZZER_DEPENDENCIES_TIFFLoader LibGfx) set(FUZZER_DEPENDENCIES_TTF LibGfx) set(FUZZER_DEPENDENCIES_TinyVGLoader LibGfx) -set(FUZZER_DEPENDENCIES_UTF16BEDecoder LibTextCodec) set(FUZZER_DEPENDENCIES_VP9Decoder LibVideo) set(FUZZER_DEPENDENCIES_WasmParser LibWasm) set(FUZZER_DEPENDENCIES_WAVLoader LibAudio) diff --git a/Userland/Utilities/test-fuzz.cpp b/Userland/Utilities/test-fuzz.cpp index 6fc60c7dd6..c5b00839b8 100644 --- a/Userland/Utilities/test-fuzz.cpp +++ b/Userland/Utilities/test-fuzz.cpp @@ -17,7 +17,6 @@ T(BMPLoader) \ T(Brotli) \ T(CSSParser) \ - T(CyrillicDecoder) \ T(DDSLoader) \ T(DNSPacket) \ T(DeflateCompression) \ @@ -28,7 +27,6 @@ T(GIFLoader) \ T(GzipCompression) \ T(GzipDecompression) \ - T(HebrewDecoder) \ T(HttpRequest) \ T(ICCProfile) \ T(ICOLoader) \ @@ -37,8 +35,6 @@ T(JPEGLoader) \ T(Js) \ T(JsonParser) \ - T(Latin1Decoder) \ - T(Latin2Decoder) \ T(LzmaDecompression) \ T(LzmaRoundtrip) \ T(Markdown) \ @@ -67,12 +63,12 @@ T(ShellPosix) \ T(SQLParser) \ T(Tar) \ + T(TextDecoder) \ T(TGALoader) \ T(TIFFLoader) \ T(TTF) \ T(TinyVGLoader) \ T(URL) \ - T(UTF16BEDecoder) \ T(VP9Decoder) \ T(WasmParser) \ T(WAVLoader) \