AK: Discard bits from LittleEndianInputBitStream as they are read

Rather than tracking our position in the bit buffer, we can simply shift
away the bits that we read. This is mostly for simplicity, but also does
help performance a bit.

Using the "enwik8" file as a test (100MB uncompressed, commonly used in
benchmarks: https://www.mattmahoney.net/dc/enwik8.zip), compression time
decreases from:

    3.96s to 3.79s on Serenity (cold)
    1.08s to 1.04s on Serenity (warm)
    0.83s to 0.82s on Linux
This commit is contained in:
Timothy Flynn 2023-05-18 07:23:49 -04:00 committed by Tim Flynn
parent 0c53b02149
commit 70c977aa56

View file

@ -138,17 +138,11 @@ protected:
return bits == 0 ? 0 : max >> (digits - bits);
}
ALWAYS_INLINE BufferType lsb_aligned_buffer() const
{
return m_bit_offset == bit_buffer_size ? 0 : m_bit_buffer >> m_bit_offset;
}
ALWAYS_INLINE bool is_aligned_to_byte_boundary() const { return m_bit_count % bits_per_byte == 0; }
MaybeOwned<Stream> m_stream;
BufferType m_bit_buffer { 0 };
u8 m_bit_offset { 0 };
u8 m_bit_count { 0 };
};
@ -217,7 +211,7 @@ public:
if (count > m_bit_count)
TRY(refill_buffer_from_stream());
return lsb_aligned_buffer() & lsb_mask<T>(min(count, m_bit_count));
return m_bit_buffer & lsb_mask<T>(min(count, m_bit_count));
}
ALWAYS_INLINE void discard_previously_peeked_bits(u8 count)
@ -226,7 +220,7 @@ public:
if (count > m_bit_count)
count = m_bit_count;
m_bit_offset += count;
m_bit_buffer >>= count;
m_bit_count -= count;
}
@ -236,9 +230,6 @@ public:
{
u8 remaining_bits = 0;
m_bit_buffer = lsb_aligned_buffer();
m_bit_offset = 0;
if (auto offset = m_bit_count % bits_per_byte; offset != 0) {
remaining_bits = m_bit_buffer & lsb_mask<u8>(offset);
discard_previously_peeked_bits(offset);
@ -256,9 +247,8 @@ private:
BufferType buffer = 0;
auto bytes = TRY(m_stream->read_some({ &buffer, bytes_to_read }));
m_bit_buffer = (buffer << m_bit_count) | lsb_aligned_buffer();
m_bit_buffer |= (buffer << m_bit_count);
m_bit_count += bytes.size() * bits_per_byte;
m_bit_offset = 0;
return {};
}