From 707a36dd793af00679328846ca913c824927c9ae Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Mon, 1 Jan 2024 13:46:50 +0100 Subject: [PATCH] LibCompress/Brotli: Update the lookback buffer with uncompressed data We previously skipped updating the lookback buffer when copying uncompressed data, which resulted in a wrong total byte count. With a wrong total byte count, our decompressor implementation ended up choosing a wrong offset into the dictionary. --- Tests/LibCompress/TestBrotli.cpp | 57 +++++++++++++++++++++++ Userland/Libraries/LibCompress/Brotli.cpp | 4 ++ 2 files changed, 61 insertions(+) diff --git a/Tests/LibCompress/TestBrotli.cpp b/Tests/LibCompress/TestBrotli.cpp index e31a1bcb26..7102f9f8ce 100644 --- a/Tests/LibCompress/TestBrotli.cpp +++ b/Tests/LibCompress/TestBrotli.cpp @@ -6,9 +6,66 @@ #include +#include +#include +#include #include #include +TEST_CASE(dictionary_use_after_uncompressed_block) +{ + // This input file contains one block of uncompressed data ("WHF") and then invokes + // a copy command that, together with the default distance, results in a dictionary + // lookup-and-copy ("categories"). + // That in particular isn't a special combination, but dictionary indices depend on + // the count of bytes that have been decompressed so far, and we previously had + // a bug where uncompressed data was unaccounted for. + + auto stream = make(); + + // Brotli operates on bits instead of bytes, so we can't easily use a well-documented byte array. + // Instead, assemble the test case on-the-fly via a bit stream. + auto stream_in = LittleEndianOutputBitStream { MaybeOwned(*stream) }; + MUST(stream_in.write_bits(0b0u, 1u)); // WBITS = 16 + + MUST(stream_in.write_bits(0b0u, 1u)); // ISLAST = false + MUST(stream_in.write_bits(0b00u, 2u)); // MNIBBLES = 4 + MUST(stream_in.write_bits(2u, 16u)); // MLEN - 1 = 2 + MUST(stream_in.write_bits(0b1u, 1u)); // ISUNCOMPRESSED = true + MUST(stream_in.align_to_byte_boundary()); + MUST(stream_in.write_until_depleted("WHF"sv.bytes())); // Literal uncompressed data + + MUST(stream_in.write_bits(0b1u, 1u)); // ISLAST = true + MUST(stream_in.write_bits(0b0u, 1u)); // ISLASTEMPTY = false + MUST(stream_in.write_bits(0b00u, 2u)); // MNIBBLES = 4 + MUST(stream_in.write_bits(9u, 16u)); // MLEN - 1 = 9 + MUST(stream_in.write_bits(0b0u, 1u)); // NBLTYPESL = 1 + MUST(stream_in.write_bits(0b0u, 1u)); // NBLTYPESI = 1 + MUST(stream_in.write_bits(0b0u, 1u)); // NBLTYPESD = 1 + MUST(stream_in.write_bits(0b00u, 2u)); // NPOSTFIX = 0 + MUST(stream_in.write_bits(0b0000u, 4u)); // NDIRECT = 0 + MUST(stream_in.write_bits(0b10u, 2u)); // CMODE[0] = 2 + MUST(stream_in.write_bits(0b0u, 1u)); // NTREESL = 1 + MUST(stream_in.write_bits(0b0u, 1u)); // NTREESD = 1 + MUST(stream_in.write_bits(0b01u, 2u)); // literal_codes[0] hskip = 1 + MUST(stream_in.write_bits(0b00u, 2u)); // literal_codes[0] number of symbols - 1 = 0 + MUST(stream_in.write_bits(0u, 8u)); // literal_codes[0] symbols[0] = 0 (unused) + MUST(stream_in.write_bits(0b01u, 2u)); // iac_codes[0] hskip = 1 + MUST(stream_in.write_bits(0b00u, 2u)); // iac_codes[0] number of symbols - 1 = 0 + MUST(stream_in.write_bits(64u, 10u)); // iac_codes[0] symbols[0] = 64 (index = 1, insert_offset = 0, copy_offset = 0) + MUST(stream_in.write_bits(0b01u, 2u)); // distance_codes[0] hskip = 1 + MUST(stream_in.write_bits(0b00u, 2u)); // distance_codes[0] number of symbols - 1 = 0 + MUST(stream_in.write_bits(0u, 6u)); // distance_codes[0] symbols[0] = 0 (unused) + + MUST(stream_in.align_to_byte_boundary()); + MUST(stream_in.flush_buffer_to_stream()); + + auto decompressor = Compress::BrotliDecompressionStream { MaybeOwned(*stream) }; + auto buffer = TRY_OR_FAIL(decompressor.read_until_eof()); + + EXPECT_EQ(buffer.span(), "WHFcategories"sv.bytes()); +} + static void run_test(StringView const file_name) { // This makes sure that the tests will run both on target and in Lagom. diff --git a/Userland/Libraries/LibCompress/Brotli.cpp b/Userland/Libraries/LibCompress/Brotli.cpp index 9b7b4c4cba..27489ff770 100644 --- a/Userland/Libraries/LibCompress/Brotli.cpp +++ b/Userland/Libraries/LibCompress/Brotli.cpp @@ -735,6 +735,10 @@ ErrorOr BrotliDecompressionStream::read_some(Bytes output_buffer) if (uncompressed_bytes.is_empty()) return Error::from_string_literal("eof"); + // TODO: Replace the home-grown LookbackBuffer with AK::CircularBuffer. + for (auto c : uncompressed_bytes) + m_lookback_buffer.value().write(c); + m_bytes_left -= uncompressed_bytes.size(); bytes_read += uncompressed_bytes.size();