From 873b0e9470691401630e39840d42ab0424634aff Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Fri, 2 Jun 2023 13:52:40 -0500 Subject: [PATCH] LibGfx/LibVideo: Read batches of multiple bytes in VPX BooleanDecoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does a few things: - The decoder uses a 32- or 64-bit integer as a reservoir of the data being decoded, rather than one single byte as it was previously. - `read_bool()` only refills the reservoir (value) when the size drops below one byte. Previously, it would read out a bit-sized range from the data to completely refill the 8-bit value, doing much more work than necessary for each individual read. - VP9-specific code for reading the marker bit was moved to its own function in Context.h. - A debug flag `VPX_DEBUG` was added to optionally enable checking of the final bits in a VPX ranged arithmetic decode and ensure that it contains all zeroes. These zeroes are a bitstream requirement for VP9, and are also present for all our lossy WebP test inputs currently. This can be useful to test whether all the data present in the range has been consumed. A lot of the size of this diff comes from the removal of error handling from all the range decoder reads in LibVideo/VP9 and LibGfx/WebP (VP8), since it is now checked only at the end of the range. In a benchmark decoding `Tests/LibGfx/test-inputs/4.webp`, decode times are improved by about 22.8%, reducing average runtime from 35.5ms±1.1ms down to 27.4±1.1ms. This should cause no behavioral changes. --- AK/Debug.h.in | 4 + Meta/CMake/all_the_debug_macros.cmake | 1 + .../LibGfx/ImageFormats/BooleanDecoder.cpp | 114 ++++++++++++------ .../LibGfx/ImageFormats/BooleanDecoder.h | 34 ++++-- .../LibGfx/ImageFormats/WebPLoaderLossy.cpp | 12 +- Userland/Libraries/LibVideo/VP9/Context.h | 20 ++- Userland/Libraries/LibVideo/VP9/Parser.cpp | 8 +- 7 files changed, 131 insertions(+), 62 deletions(-) diff --git a/AK/Debug.h.in b/AK/Debug.h.in index 087aa34a35..3e9bbb5ce5 100644 --- a/AK/Debug.h.in +++ b/AK/Debug.h.in @@ -470,6 +470,10 @@ # cmakedefine01 UTF8_DEBUG #endif +#ifndef VPX_DEBUG +# cmakedefine01 VPX_DEBUG +#endif + #ifndef WASI_DEBUG # cmakedefine01 WASI_DEBUG #endif diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index 2a2b9a2611..ee82e3a84d 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -193,6 +193,7 @@ set(VFS_DEBUG ON) set(VIRTIO_DEBUG ON) set(VIRTUAL_CONSOLE_DEBUG ON) set(VMWARE_BACKDOOR_DEBUG ON) +set(VPX_DEBUG ON) set(WAITBLOCK_DEBUG ON) set(WAITQUEUE_DEBUG ON) set(WASI_DEBUG ON) diff --git a/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.cpp b/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.cpp index 7b51b419e4..5ecac9229f 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.cpp +++ b/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.cpp @@ -6,58 +6,88 @@ */ #include +#include +#include #include "BooleanDecoder.h" namespace Gfx { -ErrorOr BooleanDecoder::initialize(MaybeOwned bit_stream, size_t size_in_bytes) +// 9.2.1 Initialization process for Boolean decoder +ErrorOr BooleanDecoder::initialize(ReadonlyBytes data) { - VERIFY(bit_stream->is_aligned_to_byte_boundary()); - auto value = TRY(bit_stream->read_value()); - u8 range = 255; - u64 bits_left = (8 * size_in_bytes) - 8; - return BooleanDecoder { move(bit_stream), value, range, bits_left }; -} + if (data.size() == 0) + return Error::from_string_literal("Size of decoder range cannot be zero"); -/* 9.2.1 */ -ErrorOr BooleanDecoder::initialize_vp9(MaybeOwned bit_stream, size_t size_in_bytes) -{ - BooleanDecoder decoder = TRY(initialize(move(bit_stream), size_in_bytes)); - if (TRY(decoder.read_bool(128))) - return Error::from_string_literal("Range decoder marker was non-zero"); + // NOTE: This implementation is shared between VP8 and VP9. Therefore, we do not check the + // marker bit at the start of the range decode that is required in the VP9 specification. + // This is instead handled by the function that instantiates all range decoders for the + // VP9 decoder. + + // NOTE: As noted below in fill_reservoir(), we read in multi-byte-sized chunks, + // so here we will deviate from the standard to count in bytes rather than bits. + auto decoder = BooleanDecoder { data.data(), data.size() }; + TRY(decoder.fill_reservoir()); return decoder; } -/* 9.2.2 */ +// Instead of filling the value field one bit at a time as the spec suggests, we store the +// data to be read in a reservoir of greater than one byte. This allows us to read out data +// for the entire reservoir at once, avoiding a lot of branch misses in read_bool(). +ErrorOr BooleanDecoder::fill_reservoir() +{ + if (m_value_bits_left > 8) + return {}; + + if (m_bytes_left == 0) + return Error::from_string_literal("Range decoder is out of data"); + + // Read the data into the most significant bits of a variable. + auto read_size = min(reserve_bytes, m_bytes_left); + ValueType read_value = 0; + memcpy(&read_value, m_data, read_size); + read_value = AK::convert_between_host_and_big_endian(read_value); + + // Skip the number of bytes read in the data. + m_data += read_size; + m_bytes_left -= read_size; + + // Shift the value that was read to be less significant than the least significant bit available in the reservoir. + read_value >>= m_value_bits_left; + m_value |= read_value; + m_value_bits_left += read_size * 8; + return {}; +} + +// 9.2.2 Boolean decoding process ErrorOr BooleanDecoder::read_bool(u8 probability) { auto split = 1u + (((m_range - 1u) * probability) >> 8u); + // The actual value being read resides in the most significant 8 bits + // of the value field, so we shift the split into that range for comparison. + auto split_shifted = static_cast(split) << reserve_bits; bool return_bool; - if (m_value < split) { + if (m_value < split_shifted) { m_range = split; return_bool = false; } else { m_range -= split; - m_value -= split; + m_value -= split_shifted; return_bool = true; } - if (m_range < 128) { - u8 bits_to_shift_into_range = count_leading_zeroes(m_range); + u8 bits_to_shift_into_range = count_leading_zeroes(m_range) - ((sizeof(m_range) - 1) * 8); + m_range <<= bits_to_shift_into_range; + m_value <<= bits_to_shift_into_range; + m_value_bits_left -= bits_to_shift_into_range; - if (bits_to_shift_into_range > m_bits_left) - return Error::from_string_literal("Range decoder is out of data"); - - m_range <<= bits_to_shift_into_range; - m_value = (m_value << bits_to_shift_into_range) | TRY(m_bit_stream->read_bits(bits_to_shift_into_range)); - m_bits_left -= bits_to_shift_into_range; - } + TRY(fill_reservoir()); return return_bool; } +// 9.2.4 Parsing process for read_literal ErrorOr BooleanDecoder::read_literal(u8 bits) { u8 return_value = 0; @@ -67,20 +97,34 @@ ErrorOr BooleanDecoder::read_literal(u8 bits) return return_value; } -/* 9.2.3 */ -ErrorOr BooleanDecoder::finish_decode_vp9() +ErrorOr BooleanDecoder::finish_decode() { - while (m_bits_left > 0) { - auto padding_read_size = min(m_bits_left, 64); - auto padding_bits = TRY(m_bit_stream->read_bits(padding_read_size)); - m_bits_left -= padding_read_size; +#if VPX_DEBUG + // 9.2.3 Exit process for Boolean decoder + // + // This process is invoked when the function exit_bool( ) is called from the syntax structure. + // + // The padding syntax element is read using the f(BoolMaxBits) parsing process. + // + // It is a requirement of bitstream conformance that padding is equal to 0. + // + // NOTE: This requirement holds up for all of our WebP lossy test inputs, as well. + bool padding_good = true; - if (padding_bits != 0) - return Error::from_string_literal("Range decoder has non-zero padding element"); + if (m_value != 0) + padding_good = false; + + while (m_bytes_left > 0) { + if (*m_data != 0) + padding_good = false; + m_data++; + m_bytes_left--; } - // FIXME: It is a requirement of bitstream conformance that enough padding bits are inserted to ensure that the final coded byte of a frame is not equal to a superframe marker. - // A byte b is equal to a superframe marker if and only if (b & 0xe0)is equal to 0xc0, i.e. if the most significant 3 bits are equal to 0b110. + if (!padding_good) + return Error::from_string_literal("Range decoder padding was non-zero"); +#endif + return {}; } diff --git a/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.h b/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.h index e56e165329..3e64a8c91d 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.h +++ b/Userland/Libraries/LibGfx/ImageFormats/BooleanDecoder.h @@ -17,29 +17,37 @@ namespace Gfx { // Can decode bitstreams encoded with VP8's and VP9's arithmetic boolean encoder. class BooleanDecoder { public: - static ErrorOr initialize(MaybeOwned bit_stream, size_t size_in_bytes); + static ErrorOr initialize(ReadonlyBytes data); /* (9.2) */ - static ErrorOr initialize_vp9(MaybeOwned bit_stream, size_t size_in_bytes); - ErrorOr read_bool(u8 probability); ErrorOr read_literal(u8 bits); - ErrorOr finish_decode_vp9(); + ErrorOr finish_decode(); private: - BooleanDecoder(MaybeOwned&& bit_stream, u8 value, u8 range, u64 bits_left) - : m_bit_stream(move(bit_stream)) - , m_value(value) - , m_range(range) - , m_bits_left(bits_left) + using ValueType = size_t; + static constexpr u8 reserve_bytes = sizeof(ValueType) - 1; + static constexpr u8 reserve_bits = reserve_bytes * 8; + + BooleanDecoder(u8 const* data, u64 bytes_left) + : m_data(data + 1) + , m_bytes_left(bytes_left - 1) + , m_range(255) + , m_value(static_cast(*data) << reserve_bits) + , m_value_bits_left(8) { } - MaybeOwned m_bit_stream; - u8 m_value { 0 }; - u8 m_range { 0 }; - u64 m_bits_left { 0 }; + ErrorOr fill_reservoir(); + + u8 const* m_data; + size_t m_bytes_left { 0 }; + // This value will never exceed 255. If this is a u8, the compiler will generate a truncation in read_bool(). + u32 m_range { 0 }; + ValueType m_value { 0 }; + // Like above, this will never exceed reserve_bits, but will truncate if it is a u8. + u32 m_value_bits_left { 0 }; }; } diff --git a/Userland/Libraries/LibGfx/ImageFormats/WebPLoaderLossy.cpp b/Userland/Libraries/LibGfx/ImageFormats/WebPLoaderLossy.cpp index ec05396cb3..52fa91d180 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/WebPLoaderLossy.cpp +++ b/Userland/Libraries/LibGfx/ImageFormats/WebPLoaderLossy.cpp @@ -1141,9 +1141,7 @@ ErrorOr decode_VP8_image_data(Gfx::Bitmap& bitmap, FrameHeader const& head Vector streams; for (auto data : data_partitions) { - auto memory_stream = make(data); - auto bit_stream = make(move(memory_stream)); - auto decoder = TRY(BooleanDecoder::initialize(move(bit_stream), data.size() * 8)); + auto decoder = TRY(BooleanDecoder::initialize(data)); TRY(streams.try_append(move(decoder))); } @@ -1221,6 +1219,9 @@ ErrorOr decode_VP8_image_data(Gfx::Bitmap& bitmap, FrameHeader const& head } } + for (auto& decoder : streams) + TRY(decoder.finish_decode()); + return {}; } @@ -1263,9 +1264,7 @@ static ErrorOr> split_data_partitions(ReadonlyBytes second ErrorOr> decode_webp_chunk_VP8_contents(VP8Header const& vp8_header, bool include_alpha_channel) { // The first partition stores header, per-segment state, and macroblock metadata. - FixedMemoryStream memory_stream { vp8_header.first_partition }; - BigEndianInputBitStream bit_stream { MaybeOwned(memory_stream) }; - auto decoder = TRY(BooleanDecoder::initialize(MaybeOwned { bit_stream }, vp8_header.first_partition.size() * 8)); + auto decoder = TRY(BooleanDecoder::initialize(vp8_header.first_partition)); auto header = TRY(decode_VP8_frame_header(decoder)); @@ -1278,6 +1277,7 @@ ErrorOr> decode_webp_chunk_VP8_contents(VP8Header const& v auto macroblock_metadata = TRY(decode_VP8_macroblock_metadata(decoder, header, macroblock_width, macroblock_height)); + TRY(decoder.finish_decode()); // Done with the first partition! auto bitmap_format = include_alpha_channel ? BitmapFormat::BGRA8888 : BitmapFormat::BGRx8888; diff --git a/Userland/Libraries/LibVideo/VP9/Context.h b/Userland/Libraries/LibVideo/VP9/Context.h index 24c2d7a427..fedd96b769 100644 --- a/Userland/Libraries/LibVideo/VP9/Context.h +++ b/Userland/Libraries/LibVideo/VP9/Context.h @@ -57,6 +57,21 @@ public: NonnullOwnPtr stream; BigEndianInputBitStream bit_stream; + DecoderErrorOr create_range_decoder(size_t size) + { + ReadonlyBytes stream_data = static_cast(*stream).bytes(); + auto compressed_header_data = ReadonlyBytes(stream_data.data() + stream->offset(), size); + + // 9.2.1: The Boolean decoding process specified in section 9.2.2 is invoked to read a marker syntax element from the + // bitstream. It is a requirement of bitstream conformance that the value read is equal to 0. + auto decoder = DECODER_TRY(DecoderErrorCategory::Corrupted, BooleanDecoder::initialize(compressed_header_data)); + if (DECODER_TRY(DecoderErrorCategory::Corrupted, decoder.read_bool(128))) + return DecoderError::corrupted("Range decoder marker was non-zero"sv); + + DECODER_TRY(DecoderErrorCategory::Corrupted, bit_stream.discard(size)); + return decoder; + } + NonnullOwnPtr counter; u8 profile { 0 }; @@ -228,12 +243,9 @@ public: auto height = rows_end - rows_start; auto context_view = frame_context.m_block_contexts.view(rows_start, columns_start, height, width); - auto bit_stream = DECODER_TRY_ALLOC(try_make(DECODER_TRY_ALLOC(try_make(*frame_context.stream)))); - auto decoder = DECODER_TRY(DecoderErrorCategory::Corrupted, BooleanDecoder::initialize_vp9(move(bit_stream), tile_size)); - return TileContext { frame_context, - move(decoder), + TRY(frame_context.create_range_decoder(tile_size)), DECODER_TRY_ALLOC(try_make()), rows_start, rows_end, diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp index a0453da52b..ff02c7f1a9 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.cpp +++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp @@ -593,7 +593,8 @@ void Parser::setup_past_independence() DecoderErrorOr Parser::compressed_header(FrameContext& frame_context) { - auto decoder = TRY_READ(BooleanDecoder::initialize_vp9(MaybeOwned(frame_context.bit_stream), frame_context.header_size_in_bytes)); + auto decoder = TRY(frame_context.create_range_decoder(frame_context.header_size_in_bytes)); + frame_context.transform_mode = TRY(read_tx_mode(decoder, frame_context)); if (frame_context.transform_mode == TransformMode::Select) TRY(tx_mode_probs(decoder)); @@ -610,7 +611,7 @@ DecoderErrorOr Parser::compressed_header(FrameContext& frame_context) TRY(read_partition_probs(decoder)); TRY(mv_probs(decoder, frame_context)); } - TRY_READ(decoder.finish_decode_vp9()); + TRY_READ(decoder.finish_decode()); return {}; } @@ -938,7 +939,6 @@ DecoderErrorOr Parser::decode_tiles(FrameContext& frame_context) auto above_segmentation_ids_for_tile = safe_slice(above_segmentation_ids.span(), columns_start, columns_end - columns_start); tile_workloads[tile_col].append(TRY(TileContext::try_create(frame_context, tile_size, rows_start, rows_end, columns_start, columns_end, above_partition_context_for_tile, above_non_zero_tokens_view, above_segmentation_ids_for_tile))); - TRY_READ(frame_context.bit_stream.discard(tile_size)); } } @@ -1002,7 +1002,7 @@ DecoderErrorOr Parser::decode_tile(TileContext& tile_context) TRY(decode_partition(tile_context, row, col, Block_64x64)); } } - TRY_READ(tile_context.decoder.finish_decode_vp9()); + TRY_READ(tile_context.decoder.finish_decode()); return {}; }