From 0374c1eb3b5a4f69c05c9debe494b3770b398c3c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 19 Mar 2024 08:40:38 -0400 Subject: [PATCH] LibPDF: Handle indirect reference resolving during parsing more robustly If `Document::resolve()` was called during parsing, it'd change the reader's current position, so the parsing code that called it would then end up at an unexpected position in the file. Parser.cpp already had special-case recovery when a stream's length was stored in an indirect reference. Commit ead02da98ac70c ("/JBIG2Globals") in #23503 added another case where we could resolve indirect reference during parsing, but wasn't aware of having to save and restore the reader position for that. Put the save/restore code in `DocumentParser::parse_object_with_index` instead, right before the place that ultimately changes the reader's position during `Document::resolve`. This fixes `/JBIG2Globals` and lets us remove the special-case code for `/Length` handling. Since this is kind of subtle, include a test. --- Tests/LibPDF/CMakeLists.txt | 1 + Tests/LibPDF/TestPDF.cpp | 12 ++++++++++++ Userland/Libraries/LibPDF/DocumentParser.cpp | 20 ++++++++++++++++++++ Userland/Libraries/LibPDF/Parser.cpp | 2 -- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/Tests/LibPDF/CMakeLists.txt b/Tests/LibPDF/CMakeLists.txt index fe57676933..3f4970bdb2 100644 --- a/Tests/LibPDF/CMakeLists.txt +++ b/Tests/LibPDF/CMakeLists.txt @@ -12,6 +12,7 @@ set(TEST_FILES complex.pdf encoding.pdf encryption_nocopy.pdf + jbig2-globals.pdf linearized.pdf non-linearized.pdf oss-fuzz-testcase-62065.pdf diff --git a/Tests/LibPDF/TestPDF.cpp b/Tests/LibPDF/TestPDF.cpp index 0e019ed255..8f3d7289b6 100644 --- a/Tests/LibPDF/TestPDF.cpp +++ b/Tests/LibPDF/TestPDF.cpp @@ -121,6 +121,18 @@ TEST_CASE(encrypted_object_stream) EXPECT_EQ(MUST(info_dict.creator()).value(), "Acrobat PDFMaker 9.1 voor Word"); } +TEST_CASE(resolve_indirect_reference_during_parsing) +{ + auto file = MUST(Core::MappedFile::map("jbig2-globals.pdf"sv)); + auto document = MUST(PDF::Document::create(file->bytes())); + MUST(document->initialize()); + EXPECT_EQ(document->get_page_count(), 1U); + + auto jbig2_stream_value = MUST(document->get_or_load_value(5)); + auto jbig2_stream = MUST(document->resolve_to(jbig2_stream_value)); + EXPECT_EQ(jbig2_stream->bytes().size(), 20'000U); +} + TEST_CASE(malformed_pdf_document) { Array test_inputs = { diff --git a/Userland/Libraries/LibPDF/DocumentParser.cpp b/Userland/Libraries/LibPDF/DocumentParser.cpp index 8d7e6dc6a2..d9924ae8ff 100644 --- a/Userland/Libraries/LibPDF/DocumentParser.cpp +++ b/Userland/Libraries/LibPDF/DocumentParser.cpp @@ -70,11 +70,31 @@ PDFErrorOr DocumentParser::parse_object_with_index(u32 index) if (!m_xref_table->is_object_in_use(index)) return nullptr; + // If this is called to resolve an indirect object reference while parsing another object, + // make sure to restore the current position after parsing the indirect object, so that the + // parser can keep parsing the original object stream afterwards. + // parse_compressed_object_with_index() also moves the reader's position, so this needs + // to be before the potential call to parse_compressed_object_with_index(). + class SavePoint { + public: + SavePoint(Reader& reader) + : m_reader(reader) + { + m_reader.save(); + } + ~SavePoint() { m_reader.load(); } + + private: + Reader& m_reader; + }; + SavePoint restore_current_position { m_reader }; + if (m_xref_table->is_object_compressed(index)) // The object can be found in a object stream return parse_compressed_object_with_index(index); auto byte_offset = m_xref_table->byte_offset_for_object(index); + m_reader.move_to(byte_offset); auto indirect_value = TRY(parse_indirect_value()); VERIFY(indirect_value->index() == index); diff --git a/Userland/Libraries/LibPDF/Parser.cpp b/Userland/Libraries/LibPDF/Parser.cpp index c7842e88f0..2fa72ca87e 100644 --- a/Userland/Libraries/LibPDF/Parser.cpp +++ b/Userland/Libraries/LibPDF/Parser.cpp @@ -483,9 +483,7 @@ PDFErrorOr> Parser::parse_stream(NonnullRefPtrget(CommonNames::Length); if (maybe_length.has_value() && m_document->can_resolve_references()) { // The PDF writer has kindly provided us with the direct length of the stream - m_reader.save(); auto length = TRY(m_document->resolve_to(maybe_length.value())); - m_reader.load(); bytes = m_reader.bytes().slice(m_reader.offset(), length); m_reader.move_by(length); m_reader.consume_whitespace();