LibCompress: Port GzipDecompressor to Core::Stream

This commit is contained in:
Tim Schumacher 2022-11-29 15:43:31 +01:00 committed by Linus Groh
parent 4e7da96d58
commit f93c7fbb5e
9 changed files with 80 additions and 145 deletions

View file

@ -10,5 +10,5 @@
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size) extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
{ {
auto result = Compress::GzipDecompressor::decompress_all(ReadonlyBytes { data, size }); auto result = Compress::GzipDecompressor::decompress_all(ReadonlyBytes { data, size });
return result.has_value(); return 0;
} }

View file

@ -92,6 +92,6 @@ TEST_CASE(gzip_round_trip)
auto compressed = Compress::GzipCompressor::compress_all(original); auto compressed = Compress::GzipCompressor::compress_all(original);
EXPECT(compressed.has_value()); EXPECT(compressed.has_value());
auto uncompressed = Compress::GzipDecompressor::decompress_all(compressed.value()); auto uncompressed = Compress::GzipDecompressor::decompress_all(compressed.value());
EXPECT(uncompressed.has_value()); EXPECT(!uncompressed.is_error());
EXPECT(uncompressed.value() == original); EXPECT(uncompressed.value() == original);
} }

View file

@ -10,6 +10,7 @@
#include <AK/DeprecatedString.h> #include <AK/DeprecatedString.h>
#include <AK/MemoryStream.h> #include <AK/MemoryStream.h>
#include <LibCore/DateTime.h> #include <LibCore/DateTime.h>
#include <LibCore/MemoryStream.h>
namespace Compress { namespace Compress {
@ -38,8 +39,8 @@ bool BlockHeader::supported_by_implementation() const
return true; return true;
} }
GzipDecompressor::GzipDecompressor(InputStream& stream) GzipDecompressor::GzipDecompressor(NonnullOwnPtr<Core::Stream::Stream> stream)
: m_input_stream(stream) : m_input_stream(move(stream))
{ {
} }
@ -48,12 +49,11 @@ GzipDecompressor::~GzipDecompressor()
m_current_member.clear(); m_current_member.clear();
} }
// FIXME: Again, there are surely a ton of bugs because the code doesn't check for read errors. ErrorOr<Bytes> GzipDecompressor::read(Bytes bytes)
size_t GzipDecompressor::read(Bytes bytes)
{ {
size_t total_read = 0; size_t total_read = 0;
while (total_read < bytes.size()) { while (total_read < bytes.size()) {
if (has_any_error() || m_eof) if (is_eof())
break; break;
auto slice = bytes.slice(total_read); auto slice = bytes.slice(total_read);
@ -63,26 +63,19 @@ size_t GzipDecompressor::read(Bytes bytes)
current_member().m_checksum.update(slice.trim(nread)); current_member().m_checksum.update(slice.trim(nread));
current_member().m_nread += nread; current_member().m_nread += nread;
if (current_member().m_stream.handle_any_error()) { if (current_member().m_stream.handle_any_error())
set_fatal_error(); return Error::from_string_literal("Underlying DeflateDecompressor indicated an error");
break;
}
if (nread < slice.size()) { if (nread < slice.size()) {
LittleEndian<u32> crc32, input_size; LittleEndian<u32> crc32, input_size;
m_input_stream >> crc32 >> input_size; TRY(m_input_stream->read(crc32.bytes()));
TRY(m_input_stream->read(input_size.bytes()));
if (crc32 != current_member().m_checksum.digest()) { if (crc32 != current_member().m_checksum.digest())
// FIXME: Somehow the checksum is incorrect? return Error::from_string_literal("Stored CRC32 does not match the calculated CRC32 of the current member");
set_fatal_error(); if (input_size != current_member().m_nread)
break; return Error::from_string_literal("Input size does not match the number of read bytes");
}
if (input_size != current_member().m_nread) {
set_fatal_error();
break;
}
m_current_member.clear(); m_current_member.clear();
@ -93,12 +86,12 @@ size_t GzipDecompressor::read(Bytes bytes)
total_read += nread; total_read += nread;
continue; continue;
} else { } else {
m_partial_header_offset += m_input_stream.read(Bytes { m_partial_header, sizeof(BlockHeader) }.slice(m_partial_header_offset)); auto current_partial_header_slice = Bytes { m_partial_header, sizeof(BlockHeader) }.slice(m_partial_header_offset);
auto current_partial_header_data = TRY(m_input_stream->read(current_partial_header_slice));
m_partial_header_offset += current_partial_header_data.size();
if (m_input_stream.handle_any_error() || m_input_stream.unreliable_eof()) { if (is_eof())
m_eof = true;
break; break;
}
if (m_partial_header_offset < sizeof(BlockHeader)) { if (m_partial_header_offset < sizeof(BlockHeader)) {
break; // partial header read break; // partial header read
@ -107,51 +100,45 @@ size_t GzipDecompressor::read(Bytes bytes)
BlockHeader header = *(reinterpret_cast<BlockHeader*>(m_partial_header)); BlockHeader header = *(reinterpret_cast<BlockHeader*>(m_partial_header));
if (!header.valid_magic_number() || !header.supported_by_implementation()) { if (!header.valid_magic_number())
set_fatal_error(); return Error::from_string_literal("Header does not have a valid magic number");
break;
} if (!header.supported_by_implementation())
return Error::from_string_literal("Header is not supported by implementation");
if (header.flags & Flags::FEXTRA) { if (header.flags & Flags::FEXTRA) {
LittleEndian<u16> subfield_id, length; LittleEndian<u16> subfield_id, length;
m_input_stream >> subfield_id >> length; TRY(m_input_stream->read(subfield_id.bytes()));
m_input_stream.discard_or_error(length); TRY(m_input_stream->read(length.bytes()));
TRY(m_input_stream->discard(length));
} }
auto discard_string = [&]() { auto discard_string = [&]() -> ErrorOr<void> {
char next_char; char next_char;
do { do {
m_input_stream >> next_char; TRY(m_input_stream->read({ &next_char, sizeof(next_char) }));
if (m_input_stream.has_any_error()) {
set_fatal_error();
break;
}
} while (next_char); } while (next_char);
return {};
}; };
if (header.flags & Flags::FNAME) { if (header.flags & Flags::FNAME)
discard_string(); TRY(discard_string());
if (has_any_error())
break;
}
if (header.flags & Flags::FCOMMENT) { if (header.flags & Flags::FCOMMENT)
discard_string(); TRY(discard_string());
if (has_any_error())
break;
}
if (header.flags & Flags::FHCRC) { if (header.flags & Flags::FHCRC) {
LittleEndian<u16> crc16; LittleEndian<u16> crc16;
m_input_stream >> crc16; TRY(m_input_stream->read(crc16.bytes()));
// FIXME: we should probably verify this instead of just assuming it matches // FIXME: we should probably verify this instead of just assuming it matches
} }
m_current_member.emplace(header, m_input_stream); m_current_member.emplace(header, *m_input_stream);
continue; continue;
} }
} }
return total_read; return bytes.slice(0, total_read);
} }
Optional<DeprecatedString> GzipDecompressor::describe_header(ReadonlyBytes bytes) Optional<DeprecatedString> GzipDecompressor::describe_header(ReadonlyBytes bytes)
@ -167,57 +154,26 @@ Optional<DeprecatedString> GzipDecompressor::describe_header(ReadonlyBytes bytes
return DeprecatedString::formatted("last modified: {}, original size {}", Core::DateTime::from_timestamp(header.modification_time).to_deprecated_string(), (u32)original_size); return DeprecatedString::formatted("last modified: {}, original size {}", Core::DateTime::from_timestamp(header.modification_time).to_deprecated_string(), (u32)original_size);
} }
bool GzipDecompressor::read_or_error(Bytes bytes) ErrorOr<ByteBuffer> GzipDecompressor::decompress_all(ReadonlyBytes bytes)
{ {
if (read(bytes) < bytes.size()) { auto memory_stream = TRY(Core::Stream::MemoryStream::construct(bytes));
set_fatal_error(); auto gzip_stream = make<GzipDecompressor>(move(memory_stream));
return false;
}
return true;
}
bool GzipDecompressor::discard_or_error(size_t count)
{
u8 buffer[4096];
size_t ndiscarded = 0;
while (ndiscarded < count) {
if (unreliable_eof()) {
set_fatal_error();
return false;
}
ndiscarded += read({ buffer, min<size_t>(count - ndiscarded, sizeof(buffer)) });
}
return true;
}
Optional<ByteBuffer> GzipDecompressor::decompress_all(ReadonlyBytes bytes)
{
InputMemoryStream memory_stream { bytes };
GzipDecompressor gzip_stream { memory_stream };
DuplexMemoryStream output_stream; DuplexMemoryStream output_stream;
u8 buffer[4096]; auto buffer = TRY(ByteBuffer::create_uninitialized(4096));
while (!gzip_stream.has_any_error() && !gzip_stream.unreliable_eof()) { while (!gzip_stream->is_eof()) {
auto const nread = gzip_stream.read({ buffer, sizeof(buffer) }); auto const data = TRY(gzip_stream->read(buffer));
output_stream.write_or_error({ buffer, nread }); output_stream.write_or_error(data);
} }
if (gzip_stream.handle_any_error())
return {};
return output_stream.copy_into_contiguous_buffer(); return output_stream.copy_into_contiguous_buffer();
} }
bool GzipDecompressor::unreliable_eof() const { return m_eof; } bool GzipDecompressor::is_eof() const { return m_input_stream->is_eof(); }
bool GzipDecompressor::handle_any_error() ErrorOr<size_t> GzipDecompressor::write(ReadonlyBytes)
{ {
bool handled_errors = m_input_stream.handle_any_error(); VERIFY_NOT_REACHED();
return Stream::handle_any_error() || handled_errors;
} }
GzipCompressor::GzipCompressor(OutputStream& stream) GzipCompressor::GzipCompressor(OutputStream& stream)

View file

@ -8,6 +8,7 @@
#pragma once #pragma once
#include <LibCompress/Deflate.h> #include <LibCompress/Deflate.h>
#include <LibCore/Stream.h>
#include <LibCrypto/Checksum/CRC32.h> #include <LibCrypto/Checksum/CRC32.h>
namespace Compress { namespace Compress {
@ -37,32 +38,33 @@ struct Flags {
static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT; static constexpr u8 MAX = FTEXT | FHCRC | FEXTRA | FNAME | FCOMMENT;
}; };
class GzipDecompressor final : public InputStream { class GzipDecompressor final : public Core::Stream::Stream {
public: public:
GzipDecompressor(InputStream&); GzipDecompressor(NonnullOwnPtr<Core::Stream::Stream>);
~GzipDecompressor(); ~GzipDecompressor();
size_t read(Bytes) override; virtual ErrorOr<Bytes> read(Bytes) override;
bool read_or_error(Bytes) override; virtual ErrorOr<size_t> write(ReadonlyBytes) override;
bool discard_or_error(size_t) override; virtual bool is_eof() const override;
virtual bool is_open() const override { return true; }
virtual void close() override {};
bool unreliable_eof() const override; static ErrorOr<ByteBuffer> decompress_all(ReadonlyBytes);
bool handle_any_error() override;
static Optional<ByteBuffer> decompress_all(ReadonlyBytes);
static Optional<DeprecatedString> describe_header(ReadonlyBytes); static Optional<DeprecatedString> describe_header(ReadonlyBytes);
static bool is_likely_compressed(ReadonlyBytes bytes); static bool is_likely_compressed(ReadonlyBytes bytes);
private: private:
class Member { class Member {
public: public:
Member(BlockHeader header, InputStream& stream) Member(BlockHeader header, Core::Stream::Stream& stream)
: m_header(header) : m_header(header)
, m_stream(stream) , m_adapted_ak_stream(make<Core::Stream::WrapInAKInputStream>(stream))
, m_stream(*m_adapted_ak_stream)
{ {
} }
BlockHeader m_header; BlockHeader m_header;
NonnullOwnPtr<InputStream> m_adapted_ak_stream;
DeflateDecompressor m_stream; DeflateDecompressor m_stream;
Crypto::Checksum::CRC32 m_checksum; Crypto::Checksum::CRC32 m_checksum;
size_t m_nread { 0 }; size_t m_nread { 0 };
@ -71,7 +73,7 @@ private:
Member const& current_member() const { return m_current_member.value(); } Member const& current_member() const { return m_current_member.value(); }
Member& current_member() { return m_current_member.value(); } Member& current_member() { return m_current_member.value(); }
InputStream& m_input_stream; NonnullOwnPtr<Core::Stream::Stream> m_input_stream;
u8 m_partial_header[sizeof(BlockHeader)]; u8 m_partial_header[sizeof(BlockHeader)];
size_t m_partial_header_offset { 0 }; size_t m_partial_header_offset { 0 };
Optional<Member> m_current_member; Optional<Member> m_current_member;

View file

@ -67,8 +67,8 @@ Reader::Reader(ReadonlyBytes coredump_bytes)
Optional<ByteBuffer> Reader::decompress_coredump(ReadonlyBytes raw_coredump) Optional<ByteBuffer> Reader::decompress_coredump(ReadonlyBytes raw_coredump)
{ {
auto decompressed_coredump = Compress::GzipDecompressor::decompress_all(raw_coredump); auto decompressed_coredump = Compress::GzipDecompressor::decompress_all(raw_coredump);
if (decompressed_coredump.has_value()) if (!decompressed_coredump.is_error())
return decompressed_coredump; return decompressed_coredump.release_value();
// If we didn't manage to decompress it, try and parse it as decompressed coredump // If we didn't manage to decompress it, try and parse it as decompressed coredump
auto bytebuffer = ByteBuffer::copy(raw_coredump); auto bytebuffer = ByteBuffer::copy(raw_coredump);

View file

@ -37,8 +37,8 @@ static Optional<ByteBuffer> handle_content_encoding(ByteBuffer const& buf, Depre
dbgln_if(JOB_DEBUG, "Job::handle_content_encoding: buf is gzip compressed!"); dbgln_if(JOB_DEBUG, "Job::handle_content_encoding: buf is gzip compressed!");
auto uncompressed = Compress::GzipDecompressor::decompress_all(buf); auto uncompressed = Compress::GzipDecompressor::decompress_all(buf);
if (!uncompressed.has_value()) { if (uncompressed.is_error()) {
dbgln("Job::handle_content_encoding: Gzip::decompress() failed."); dbgln("Job::handle_content_encoding: Gzip::decompress() failed: {}", uncompressed.error());
return {}; return {};
} }

View file

@ -11,18 +11,17 @@
#include <LibMain/Main.h> #include <LibMain/Main.h>
#include <unistd.h> #include <unistd.h>
static bool decompress_file(Buffered<Core::InputFileStream>& input_stream, Buffered<Core::OutputFileStream>& output_stream) static ErrorOr<void> decompress_file(NonnullOwnPtr<Core::Stream::File> input_stream, Buffered<Core::OutputFileStream>& output_stream)
{ {
auto gzip_stream = Compress::GzipDecompressor { input_stream }; auto gzip_stream = Compress::GzipDecompressor { move(input_stream) };
u8 buffer[4096]; auto buffer = TRY(ByteBuffer::create_uninitialized(4096));
while (!gzip_stream.is_eof()) {
while (!gzip_stream.has_any_error() && !gzip_stream.unreliable_eof()) { auto span = TRY(gzip_stream.read(buffer));
auto const nread = gzip_stream.read({ buffer, sizeof(buffer) }); output_stream.write_or_error(span);
output_stream.write_or_error({ buffer, nread });
} }
return !gzip_stream.handle_any_error(); return {};
} }
ErrorOr<int> serenity_main(Main::Arguments args) ErrorOr<int> serenity_main(Main::Arguments args)
@ -52,20 +51,15 @@ ErrorOr<int> serenity_main(Main::Arguments args)
output_filename = filename; output_filename = filename;
} }
auto input_stream_result = TRY(Core::InputFileStream::open_buffered(input_filename)); auto input_stream_result = TRY(Core::Stream::File::open(input_filename, Core::Stream::OpenMode::Read));
auto success = false;
if (write_to_stdout) { if (write_to_stdout) {
auto stdout = Core::OutputFileStream::stdout_buffered(); auto stdout = Core::OutputFileStream::stdout_buffered();
success = decompress_file(input_stream_result, stdout); TRY(decompress_file(move(input_stream_result), stdout));
} else { } else {
auto output_stream_result = TRY(Core::OutputFileStream::open_buffered(output_filename)); auto output_stream_result = TRY(Core::OutputFileStream::open_buffered(output_filename));
success = decompress_file(input_stream_result, output_stream_result); TRY(decompress_file(move(input_stream_result), output_stream_result));
}
if (!success) {
warnln("Failed gzip decompressing input file");
return 1;
} }
if (!keep_input_files) if (!keep_input_files)

View file

@ -52,7 +52,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
AK::Optional<ByteBuffer> output_bytes; AK::Optional<ByteBuffer> output_bytes;
if (decompress) if (decompress)
output_bytes = Compress::GzipDecompressor::decompress_all(input_bytes); output_bytes = TRY(Compress::GzipDecompressor::decompress_all(input_bytes));
else else
output_bytes = Compress::GzipCompressor::compress_all(input_bytes); output_bytes = Compress::GzipCompressor::compress_all(input_bytes);

View file

@ -65,27 +65,10 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
if (!directory.is_empty()) if (!directory.is_empty())
TRY(Core::System::chdir(directory)); TRY(Core::System::chdir(directory));
// FIXME: Remove these once we have smart pointers everywhere in LibArchive and LibCompress (or just ported the whole stack to Core::Stream). NonnullOwnPtr<Core::Stream::Stream> input_stream = TRY(Core::Stream::File::open_file_or_standard_stream(archive_file, Core::Stream::OpenMode::Read));
// Until then, we have to hold on to _some_ instance of the file AK::Stream.
// Note that this is only in use together with gzip.
OwnPtr<Core::InputFileStream> file_stream;
auto input_stream = TRY([&]() -> ErrorOr<NonnullOwnPtr<Core::Stream::Stream>> { if (gzip)
if (gzip) { input_stream = make<Compress::GzipDecompressor>(move(input_stream));
// FIXME: Port gzip to Core::Stream.
auto file = Core::File::standard_input();
if (!archive_file.is_empty())
file = TRY(Core::File::open(archive_file, Core::OpenMode::ReadOnly));
file_stream = adopt_own(*new Core::InputFileStream(file));
NonnullOwnPtr<InputStream> gzip_stream = make<Compress::GzipDecompressor>(*file_stream);
return make<Core::Stream::WrappedAKInputStream>(move(gzip_stream));
} else {
return TRY(Core::Stream::File::open_file_or_standard_stream(archive_file, Core::Stream::OpenMode::Read));
}
}());
auto tar_stream = TRY(Archive::TarInputStream::construct(move(input_stream))); auto tar_stream = TRY(Archive::TarInputStream::construct(move(input_stream)));