LibGfx/LibVideo: Read batches of multiple bytes in VPX BooleanDecoder

This does a few things:

- The decoder uses a 32- or 64-bit integer as a reservoir of the data
  being decoded, rather than one single byte as it was previously.
- `read_bool()` only refills the reservoir (value) when the size drops
  below one byte. Previously, it would read out a bit-sized range from
  the data to completely refill the 8-bit value, doing much more work
  than necessary for each individual read.
- VP9-specific code for reading the marker bit was moved to its own
  function in Context.h.
- A debug flag `VPX_DEBUG` was added to optionally enable checking of
  the final bits in a VPX ranged arithmetic decode and ensure that it
  contains all zeroes. These zeroes are a bitstream requirement for
  VP9, and are also present for all our lossy WebP test inputs
  currently. This can be useful to test whether all the data present in
  the range has been consumed.

A lot of the size of this diff comes from the removal of error handling
from all the range decoder reads in LibVideo/VP9 and LibGfx/WebP (VP8),
since it is now checked only at the end of the range.

In a benchmark decoding `Tests/LibGfx/test-inputs/4.webp`, decode times
are improved by about 22.8%, reducing average runtime from 35.5ms±1.1ms
down to 27.4±1.1ms.

This should cause no behavioral changes.
This commit is contained in:
Zaggy1024 2023-06-02 13:52:40 -05:00 committed by Andreas Kling
parent edd847798a
commit 873b0e9470
7 changed files with 131 additions and 62 deletions

View file

@ -470,6 +470,10 @@
# cmakedefine01 UTF8_DEBUG
#endif
#ifndef VPX_DEBUG
# cmakedefine01 VPX_DEBUG
#endif
#ifndef WASI_DEBUG
# cmakedefine01 WASI_DEBUG
#endif

View file

@ -193,6 +193,7 @@ set(VFS_DEBUG ON)
set(VIRTIO_DEBUG ON)
set(VIRTUAL_CONSOLE_DEBUG ON)
set(VMWARE_BACKDOOR_DEBUG ON)
set(VPX_DEBUG ON)
set(WAITBLOCK_DEBUG ON)
set(WAITQUEUE_DEBUG ON)
set(WASI_DEBUG ON)

View file

@ -6,58 +6,88 @@
*/
#include <AK/BuiltinWrappers.h>
#include <AK/Debug.h>
#include <AK/Endian.h>
#include "BooleanDecoder.h"
namespace Gfx {
ErrorOr<BooleanDecoder> BooleanDecoder::initialize(MaybeOwned<BigEndianInputBitStream> bit_stream, size_t size_in_bytes)
// 9.2.1 Initialization process for Boolean decoder
ErrorOr<BooleanDecoder> BooleanDecoder::initialize(ReadonlyBytes data)
{
VERIFY(bit_stream->is_aligned_to_byte_boundary());
auto value = TRY(bit_stream->read_value<u8>());
u8 range = 255;
u64 bits_left = (8 * size_in_bytes) - 8;
return BooleanDecoder { move(bit_stream), value, range, bits_left };
}
if (data.size() == 0)
return Error::from_string_literal("Size of decoder range cannot be zero");
/* 9.2.1 */
ErrorOr<BooleanDecoder> BooleanDecoder::initialize_vp9(MaybeOwned<BigEndianInputBitStream> bit_stream, size_t size_in_bytes)
{
BooleanDecoder decoder = TRY(initialize(move(bit_stream), size_in_bytes));
if (TRY(decoder.read_bool(128)))
return Error::from_string_literal("Range decoder marker was non-zero");
// NOTE: This implementation is shared between VP8 and VP9. Therefore, we do not check the
// marker bit at the start of the range decode that is required in the VP9 specification.
// This is instead handled by the function that instantiates all range decoders for the
// VP9 decoder.
// NOTE: As noted below in fill_reservoir(), we read in multi-byte-sized chunks,
// so here we will deviate from the standard to count in bytes rather than bits.
auto decoder = BooleanDecoder { data.data(), data.size() };
TRY(decoder.fill_reservoir());
return decoder;
}
/* 9.2.2 */
// Instead of filling the value field one bit at a time as the spec suggests, we store the
// data to be read in a reservoir of greater than one byte. This allows us to read out data
// for the entire reservoir at once, avoiding a lot of branch misses in read_bool().
ErrorOr<void> BooleanDecoder::fill_reservoir()
{
if (m_value_bits_left > 8)
return {};
if (m_bytes_left == 0)
return Error::from_string_literal("Range decoder is out of data");
// Read the data into the most significant bits of a variable.
auto read_size = min<size_t>(reserve_bytes, m_bytes_left);
ValueType read_value = 0;
memcpy(&read_value, m_data, read_size);
read_value = AK::convert_between_host_and_big_endian(read_value);
// Skip the number of bytes read in the data.
m_data += read_size;
m_bytes_left -= read_size;
// Shift the value that was read to be less significant than the least significant bit available in the reservoir.
read_value >>= m_value_bits_left;
m_value |= read_value;
m_value_bits_left += read_size * 8;
return {};
}
// 9.2.2 Boolean decoding process
ErrorOr<bool> BooleanDecoder::read_bool(u8 probability)
{
auto split = 1u + (((m_range - 1u) * probability) >> 8u);
// The actual value being read resides in the most significant 8 bits
// of the value field, so we shift the split into that range for comparison.
auto split_shifted = static_cast<ValueType>(split) << reserve_bits;
bool return_bool;
if (m_value < split) {
if (m_value < split_shifted) {
m_range = split;
return_bool = false;
} else {
m_range -= split;
m_value -= split;
m_value -= split_shifted;
return_bool = true;
}
if (m_range < 128) {
u8 bits_to_shift_into_range = count_leading_zeroes(m_range);
u8 bits_to_shift_into_range = count_leading_zeroes(m_range) - ((sizeof(m_range) - 1) * 8);
m_range <<= bits_to_shift_into_range;
m_value <<= bits_to_shift_into_range;
m_value_bits_left -= bits_to_shift_into_range;
if (bits_to_shift_into_range > m_bits_left)
return Error::from_string_literal("Range decoder is out of data");
m_range <<= bits_to_shift_into_range;
m_value = (m_value << bits_to_shift_into_range) | TRY(m_bit_stream->read_bits<u8>(bits_to_shift_into_range));
m_bits_left -= bits_to_shift_into_range;
}
TRY(fill_reservoir());
return return_bool;
}
// 9.2.4 Parsing process for read_literal
ErrorOr<u8> BooleanDecoder::read_literal(u8 bits)
{
u8 return_value = 0;
@ -67,20 +97,34 @@ ErrorOr<u8> BooleanDecoder::read_literal(u8 bits)
return return_value;
}
/* 9.2.3 */
ErrorOr<void> BooleanDecoder::finish_decode_vp9()
ErrorOr<void> BooleanDecoder::finish_decode()
{
while (m_bits_left > 0) {
auto padding_read_size = min(m_bits_left, 64);
auto padding_bits = TRY(m_bit_stream->read_bits(padding_read_size));
m_bits_left -= padding_read_size;
#if VPX_DEBUG
// 9.2.3 Exit process for Boolean decoder
//
// This process is invoked when the function exit_bool( ) is called from the syntax structure.
//
// The padding syntax element is read using the f(BoolMaxBits) parsing process.
//
// It is a requirement of bitstream conformance that padding is equal to 0.
//
// NOTE: This requirement holds up for all of our WebP lossy test inputs, as well.
bool padding_good = true;
if (padding_bits != 0)
return Error::from_string_literal("Range decoder has non-zero padding element");
if (m_value != 0)
padding_good = false;
while (m_bytes_left > 0) {
if (*m_data != 0)
padding_good = false;
m_data++;
m_bytes_left--;
}
// FIXME: It is a requirement of bitstream conformance that enough padding bits are inserted to ensure that the final coded byte of a frame is not equal to a superframe marker.
// A byte b is equal to a superframe marker if and only if (b & 0xe0)is equal to 0xc0, i.e. if the most significant 3 bits are equal to 0b110.
if (!padding_good)
return Error::from_string_literal("Range decoder padding was non-zero");
#endif
return {};
}

View file

@ -17,29 +17,37 @@ namespace Gfx {
// Can decode bitstreams encoded with VP8's and VP9's arithmetic boolean encoder.
class BooleanDecoder {
public:
static ErrorOr<BooleanDecoder> initialize(MaybeOwned<BigEndianInputBitStream> bit_stream, size_t size_in_bytes);
static ErrorOr<BooleanDecoder> initialize(ReadonlyBytes data);
/* (9.2) */
static ErrorOr<BooleanDecoder> initialize_vp9(MaybeOwned<BigEndianInputBitStream> bit_stream, size_t size_in_bytes);
ErrorOr<bool> read_bool(u8 probability);
ErrorOr<u8> read_literal(u8 bits);
ErrorOr<void> finish_decode_vp9();
ErrorOr<void> finish_decode();
private:
BooleanDecoder(MaybeOwned<BigEndianInputBitStream>&& bit_stream, u8 value, u8 range, u64 bits_left)
: m_bit_stream(move(bit_stream))
, m_value(value)
, m_range(range)
, m_bits_left(bits_left)
using ValueType = size_t;
static constexpr u8 reserve_bytes = sizeof(ValueType) - 1;
static constexpr u8 reserve_bits = reserve_bytes * 8;
BooleanDecoder(u8 const* data, u64 bytes_left)
: m_data(data + 1)
, m_bytes_left(bytes_left - 1)
, m_range(255)
, m_value(static_cast<ValueType>(*data) << reserve_bits)
, m_value_bits_left(8)
{
}
MaybeOwned<BigEndianInputBitStream> m_bit_stream;
u8 m_value { 0 };
u8 m_range { 0 };
u64 m_bits_left { 0 };
ErrorOr<void> fill_reservoir();
u8 const* m_data;
size_t m_bytes_left { 0 };
// This value will never exceed 255. If this is a u8, the compiler will generate a truncation in read_bool().
u32 m_range { 0 };
ValueType m_value { 0 };
// Like above, this will never exceed reserve_bits, but will truncate if it is a u8.
u32 m_value_bits_left { 0 };
};
}

View file

@ -1141,9 +1141,7 @@ ErrorOr<void> decode_VP8_image_data(Gfx::Bitmap& bitmap, FrameHeader const& head
Vector<BooleanDecoder> streams;
for (auto data : data_partitions) {
auto memory_stream = make<FixedMemoryStream>(data);
auto bit_stream = make<BigEndianInputBitStream>(move(memory_stream));
auto decoder = TRY(BooleanDecoder::initialize(move(bit_stream), data.size() * 8));
auto decoder = TRY(BooleanDecoder::initialize(data));
TRY(streams.try_append(move(decoder)));
}
@ -1221,6 +1219,9 @@ ErrorOr<void> decode_VP8_image_data(Gfx::Bitmap& bitmap, FrameHeader const& head
}
}
for (auto& decoder : streams)
TRY(decoder.finish_decode());
return {};
}
@ -1263,9 +1264,7 @@ static ErrorOr<Vector<ReadonlyBytes>> split_data_partitions(ReadonlyBytes second
ErrorOr<NonnullRefPtr<Bitmap>> decode_webp_chunk_VP8_contents(VP8Header const& vp8_header, bool include_alpha_channel)
{
// The first partition stores header, per-segment state, and macroblock metadata.
FixedMemoryStream memory_stream { vp8_header.first_partition };
BigEndianInputBitStream bit_stream { MaybeOwned<Stream>(memory_stream) };
auto decoder = TRY(BooleanDecoder::initialize(MaybeOwned { bit_stream }, vp8_header.first_partition.size() * 8));
auto decoder = TRY(BooleanDecoder::initialize(vp8_header.first_partition));
auto header = TRY(decode_VP8_frame_header(decoder));
@ -1278,6 +1277,7 @@ ErrorOr<NonnullRefPtr<Bitmap>> decode_webp_chunk_VP8_contents(VP8Header const& v
auto macroblock_metadata = TRY(decode_VP8_macroblock_metadata(decoder, header, macroblock_width, macroblock_height));
TRY(decoder.finish_decode());
// Done with the first partition!
auto bitmap_format = include_alpha_channel ? BitmapFormat::BGRA8888 : BitmapFormat::BGRx8888;

View file

@ -57,6 +57,21 @@ public:
NonnullOwnPtr<FixedMemoryStream> stream;
BigEndianInputBitStream bit_stream;
DecoderErrorOr<BooleanDecoder> create_range_decoder(size_t size)
{
ReadonlyBytes stream_data = static_cast<FixedMemoryStream const&>(*stream).bytes();
auto compressed_header_data = ReadonlyBytes(stream_data.data() + stream->offset(), size);
// 9.2.1: The Boolean decoding process specified in section 9.2.2 is invoked to read a marker syntax element from the
// bitstream. It is a requirement of bitstream conformance that the value read is equal to 0.
auto decoder = DECODER_TRY(DecoderErrorCategory::Corrupted, BooleanDecoder::initialize(compressed_header_data));
if (DECODER_TRY(DecoderErrorCategory::Corrupted, decoder.read_bool(128)))
return DecoderError::corrupted("Range decoder marker was non-zero"sv);
DECODER_TRY(DecoderErrorCategory::Corrupted, bit_stream.discard(size));
return decoder;
}
NonnullOwnPtr<SyntaxElementCounter> counter;
u8 profile { 0 };
@ -228,12 +243,9 @@ public:
auto height = rows_end - rows_start;
auto context_view = frame_context.m_block_contexts.view(rows_start, columns_start, height, width);
auto bit_stream = DECODER_TRY_ALLOC(try_make<BigEndianInputBitStream>(DECODER_TRY_ALLOC(try_make<FixedMemoryStream>(*frame_context.stream))));
auto decoder = DECODER_TRY(DecoderErrorCategory::Corrupted, BooleanDecoder::initialize_vp9(move(bit_stream), tile_size));
return TileContext {
frame_context,
move(decoder),
TRY(frame_context.create_range_decoder(tile_size)),
DECODER_TRY_ALLOC(try_make<SyntaxElementCounter>()),
rows_start,
rows_end,

View file

@ -593,7 +593,8 @@ void Parser::setup_past_independence()
DecoderErrorOr<void> Parser::compressed_header(FrameContext& frame_context)
{
auto decoder = TRY_READ(BooleanDecoder::initialize_vp9(MaybeOwned(frame_context.bit_stream), frame_context.header_size_in_bytes));
auto decoder = TRY(frame_context.create_range_decoder(frame_context.header_size_in_bytes));
frame_context.transform_mode = TRY(read_tx_mode(decoder, frame_context));
if (frame_context.transform_mode == TransformMode::Select)
TRY(tx_mode_probs(decoder));
@ -610,7 +611,7 @@ DecoderErrorOr<void> Parser::compressed_header(FrameContext& frame_context)
TRY(read_partition_probs(decoder));
TRY(mv_probs(decoder, frame_context));
}
TRY_READ(decoder.finish_decode_vp9());
TRY_READ(decoder.finish_decode());
return {};
}
@ -938,7 +939,6 @@ DecoderErrorOr<void> Parser::decode_tiles(FrameContext& frame_context)
auto above_segmentation_ids_for_tile = safe_slice(above_segmentation_ids.span(), columns_start, columns_end - columns_start);
tile_workloads[tile_col].append(TRY(TileContext::try_create(frame_context, tile_size, rows_start, rows_end, columns_start, columns_end, above_partition_context_for_tile, above_non_zero_tokens_view, above_segmentation_ids_for_tile)));
TRY_READ(frame_context.bit_stream.discard(tile_size));
}
}
@ -1002,7 +1002,7 @@ DecoderErrorOr<void> Parser::decode_tile(TileContext& tile_context)
TRY(decode_partition(tile_context, row, col, Block_64x64));
}
}
TRY_READ(tile_context.decoder.finish_decode_vp9());
TRY_READ(tile_context.decoder.finish_decode());
return {};
}