LibPDF: Cache fonts per page

Previously, every time a page switched fonts, we'd completely
re-parse the font.

Now, we cache fonts in Renderer, effectively caching them per page.

It'd be nice to have an LRU cache across pages too, but that's a
bigger change, and this already helps a lot.

Font size is part of the cache key, which means we re-parse the same
font at different font sizes. That could be better too, but again,
it's a big help as-is already.

Takes rendering the 1310 pages of the PDF 1.7 reference with

    Build/lagom/bin/pdf --debugging-stats \
        ~/Downloads/pdf_reference_1-7.pdf

from 71 s to 11s :^)

Going through pages especially in the index is noticeably snappier.

(On the PDF 2.0 spec, ISO_32000-2-2020_sponsored.pdf, it's less
dramatic: From 19s to 16s.)
This commit is contained in:
Nico Weber 2023-10-10 13:55:35 -04:00 committed by Andreas Kling
parent e6d9bb0774
commit c8510b58a3
2 changed files with 37 additions and 5 deletions

View file

@ -402,19 +402,32 @@ RENDERER_HANDLER(text_set_leading)
return {};
}
PDFErrorOr<NonnullRefPtr<PDFFont>> Renderer::get_font(FontCacheKey const& key, Optional<NonnullRefPtr<DictObject>> extra_resources)
{
auto it = m_font_cache.find(key);
if (it != m_font_cache.end())
return it->value;
auto resources = extra_resources.value_or(m_page.resources);
auto fonts_dictionary = MUST(resources->get_dict(m_document, CommonNames::Font));
auto font_dictionary = MUST(fonts_dictionary->get_dict(m_document, key.font_dictionary_key));
auto font = TRY(PDFFont::create(m_document, font_dictionary, key.font_size));
m_font_cache.set(key, font);
return font;
}
RENDERER_HANDLER(text_set_font)
{
auto resources = extra_resources.value_or(m_page.resources);
auto target_font_name = MUST(m_document->resolve_to<NameObject>(args[0]))->name();
auto fonts_dictionary = MUST(resources->get_dict(m_document, CommonNames::Font));
auto font_dictionary = MUST(fonts_dictionary->get_dict(m_document, target_font_name));
text_state().font_size = args[1].to_float();
auto& text_rendering_matrix = calculate_text_rendering_matrix();
auto font_size = text_rendering_matrix.x_scale() * text_state().font_size;
auto font = TRY(PDFFont::create(m_document, font_dictionary, font_size));
text_state().font = font;
FontCacheKey cache_key { target_font_name, font_size };
text_state().font = TRY(get_font(cache_key, extra_resources));
m_text_rendering_matrix_is_dirty = true;
return {};

View file

@ -99,6 +99,13 @@ class Renderer {
public:
static PDFErrorsOr<void> render(Document&, Page const&, RefPtr<Gfx::Bitmap>, RenderingPreferences preferences);
struct FontCacheKey {
DeprecatedString font_dictionary_key;
float font_size;
bool operator==(FontCacheKey const&) const = default;
};
private:
Renderer(RefPtr<Document>, Page const&, RefPtr<Gfx::Bitmap>, RenderingPreferences);
@ -139,6 +146,8 @@ private:
Gfx::AffineTransform const& calculate_text_rendering_matrix();
Gfx::AffineTransform calculate_image_space_transformation(int width, int height);
PDFErrorOr<NonnullRefPtr<PDFFont>> get_font(FontCacheKey const&, Optional<NonnullRefPtr<DictObject>> extra_resources);
RefPtr<Document> m_document;
RefPtr<Gfx::Bitmap> m_bitmap;
Page const& m_page;
@ -153,12 +162,22 @@ private:
bool m_text_rendering_matrix_is_dirty { true };
Gfx::AffineTransform m_text_rendering_matrix;
HashMap<FontCacheKey, NonnullRefPtr<PDFFont>> m_font_cache;
};
}
namespace AK {
template<>
struct Traits<PDF::Renderer::FontCacheKey> : public GenericTraits<PDF::Renderer::FontCacheKey> {
static unsigned hash(PDF::Renderer::FontCacheKey const& key)
{
return pair_int_hash(key.font_dictionary_key.hash(), int_hash(bit_cast<u32>(key.font_size)));
}
};
template<>
struct Formatter<PDF::LineCapStyle> : Formatter<StringView> {
ErrorOr<void> format(FormatBuilder& builder, PDF::LineCapStyle const& style)