LibGfx/WebP: Add CanonicalCode::write_symbol(), use it in writer

We still construct the code length codes manually, and now we also construct a PrefixCodeGroup manually that assigns 8 bits to all symbols (except for fully-opaque alpha channels, and for the unused distance codes, like before). But now we use the CanonicalCodes from that PrefixCodeGroup for writing. No behavior change at all, the output is bit-for-bit identical to before. But this is a step towards actually huffman-coding symbols. This is however a pretty big perf regression. For `image -o test.webp test.bmp` (where test.bmp is retro-sunset.png re-encoded as bmp), time goes from 23.7 ms to 33.2 ms. `animation -o wow.webp giphy.gif` goes from 85.5 ms to 127.7 ms. `animation -o wow.webp 7z7c.gif` goes from 12.6 ms to 16.5 ms.
2024-10-14 20:03:29 +00:00 · 2024-05-08 08:57:53 -04:00 · 2024-05-08 08:57:53 -04:00 · 7aa61ca49b
parent 1bd1b6e5e9
commit 7aa61ca49b
3 changed files with 31 additions and 11 deletions
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.cpp
@ -32,4 +32,12 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& bit_stream)
        [&bit_stream](Compress::CanonicalCode const& code) { return code.read_symbol(bit_stream); }));
 }

+ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& bit_stream, u32 symbol) const
+{
+    TRY(m_code.visit(
+        [&](u32 single_code) -> ErrorOr<void> { VERIFY(symbol == single_code); return {};},
+        [&](Compress::CanonicalCode const& code) { return code.write_symbol(bit_stream, symbol); }));
+    return {};
+}
+
 }
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h
@ -23,6 +23,7 @@ public:

    static ErrorOr<CanonicalCode> from_bytes(ReadonlyBytes);
    ErrorOr<u32> read_symbol(LittleEndianInputBitStream&) const;
+    ErrorOr<void> write_symbol(LittleEndianOutputBitStream&, u32) const;

 private:
    explicit CanonicalCode(u32 single_symbol)
--- a/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp
+++ b/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp
@ -12,6 +12,7 @@
 #include <AK/MemoryStream.h>
 #include <LibCompress/DeflateTables.h>
 #include <LibGfx/Bitmap.h>
+#include <LibGfx/ImageFormats/WebPSharedLossless.h>
 #include <LibGfx/ImageFormats/WebPWriterLossless.h>

 namespace Gfx {
@ -25,7 +26,7 @@ static bool are_all_pixels_opaque(Bitmap const& bitmap)
    return true;
 }

-NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, bool all_pixels_are_opaque)
+NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, PrefixCodeGroup const& prefix_code_group)
 {
    // This is currently the hot loop. Keep performance in mind when you change it.
    for (ARGB32 pixel : bitmap) {
@ -34,15 +35,10 @@ NEVER_INLINE static ErrorOr<void> write_image_data(LittleEndianOutputBitStream&
        u8 g = pixel >> 8;
        u8 b = pixel;

-        // We wrote a huffman table that gives every symbol 8 bits. That means we can write the image data
-        // out uncompressed –- but we do need to reverse the bit order of the bytes.
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[g], 8u));
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[r], 8u));
-        TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[b], 8u));
-
-        // If all pixels are opaque, we wrote a one-element huffman table for alpha, which needs 0 bits per element.
-        if (!all_pixels_are_opaque)
-            TRY(bit_stream.write_bits(Compress::reverse8_lookup_table[a], 8u));
+        TRY(prefix_code_group[0].write_symbol(bit_stream, g));
+        TRY(prefix_code_group[1].write_symbol(bit_stream, r));
+        TRY(prefix_code_group[2].write_symbol(bit_stream, b));
+        TRY(prefix_code_group[3].write_symbol(bit_stream, a));
    }
    return {};
 }
@ -92,6 +88,7 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)

    bool all_pixels_are_opaque = are_all_pixels_opaque(bitmap);

+    PrefixCodeGroup prefix_code_group;
    int number_of_full_channels = all_pixels_are_opaque ? 3 : 4;
    for (int i = 0; i < number_of_full_channels; ++i) {
        TRY(bit_stream.write_bits(0u, 1u)); // Normal code length code.
@ -119,6 +116,9 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
            TRY(bit_stream.write_bits(254u, 8u)); // max_symbol = 2 + 254
        }

+        auto bits_per_symbol = Array<u8, 256>::from_repeated_value(8);
+        prefix_code_group[i] = TRY(CanonicalCode::from_bytes(bits_per_symbol));
+
        // The code length codes only contain a single entry for '8'. WebP streams with a single element store 0 bits per element.
        // (This is different from deflate, which needs 1 bit per element.)
    }
@ -129,16 +129,27 @@ static ErrorOr<void> write_VP8L_image_data(Stream& stream, Bitmap const& bitmap)
        TRY(bit_stream.write_bits(0u, 1u));   // num_symbols - 1
        TRY(bit_stream.write_bits(1u, 1u));   // is_first_8bits
        TRY(bit_stream.write_bits(255u, 8u)); // symbol0
+        Array<u8, 256> bits_per_symbol {};
+        // "When coding a single leaf node [...], all but one code length are zeros, and the single leaf node value
+        //  is marked with the length of 1 -- even when no bits are consumed when that single leaf node tree is used."
+        // CanonicalCode follows that convention too, even when describing simple code lengths.
+        bits_per_symbol[255] = 1;
+        prefix_code_group[3] = TRY(CanonicalCode::from_bytes(bits_per_symbol));
    }

    // For code #5, use a simple empty code, since we don't use this yet.
+    // "Note: Another special case is when all prefix code lengths are zeros (an empty prefix code). [...]
+    //  empty prefix codes can be coded as those containing a single symbol 0."
    TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code.
    TRY(bit_stream.write_bits(0u, 1u)); // num_symbols - 1
    TRY(bit_stream.write_bits(0u, 1u)); // is_first_8bits
    TRY(bit_stream.write_bits(0u, 1u)); // symbol0
+    Array<u8, 256> bits_per_symbol {};
+    bits_per_symbol[0] = 1; // See comment in `if (all_pixels_are_opaque)` block above.
+    prefix_code_group[4] = TRY(CanonicalCode::from_bytes(bits_per_symbol));

    // Image data.
-    TRY(write_image_data(bit_stream, bitmap, all_pixels_are_opaque));
+    TRY(write_image_data(bit_stream, bitmap, prefix_code_group));

    // FIXME: Make ~LittleEndianOutputBitStream do this, or make it VERIFY() that it has happened at least.
    TRY(bit_stream.align_to_byte_boundary());