From b5420b8a9add6559d812971a70d6eaec04bb8dd4 Mon Sep 17 00:00:00 2001 From: Karol Kosek Date: Sun, 10 Jul 2022 00:18:18 +0200 Subject: [PATCH] LibGfx: Implement PNG filtering on write Is it another great upgrade to our PNG encoder like in 9aafaec259? Well, not really - it's not a 2x or 55x improvement like you saw there, but still it saves something: - a screenshot of a blank Serenity desktop dropped from about 45 KiB to 40 KiB. - re-encoding NASA photo of the Earth to PNG again saves about 25% (16.5 MiB -> 12.3 MiB), compared to not using filters. [1]: https://commons.wikimedia.org/wiki/File:The_Blue_Marble_(remastered).jpg --- AK/SIMDExtras.h | 12 +++ Userland/Libraries/LibGfx/PNGShared.h | 10 +++ Userland/Libraries/LibGfx/PNGWriter.cpp | 108 ++++++++++++++++++++++-- 3 files changed, 124 insertions(+), 6 deletions(-) diff --git a/AK/SIMDExtras.h b/AK/SIMDExtras.h index dac25d456d..eba20f2b97 100644 --- a/AK/SIMDExtras.h +++ b/AK/SIMDExtras.h @@ -36,6 +36,18 @@ ALWAYS_INLINE static constexpr u32x4 expand4(u32 u) // Casting +template +ALWAYS_INLINE static u8x4 to_u8x4(TSrc v) +{ + return __builtin_convertvector(v, u8x4); +} + +template +ALWAYS_INLINE static u16x4 to_u16x4(TSrc v) +{ + return __builtin_convertvector(v, u16x4); +} + template ALWAYS_INLINE static u32x4 to_u32x4(TSrc v) { diff --git a/Userland/Libraries/LibGfx/PNGShared.h b/Userland/Libraries/LibGfx/PNGShared.h index 6f60d848ae..b1fecd12c1 100644 --- a/Userland/Libraries/LibGfx/PNGShared.h +++ b/Userland/Libraries/LibGfx/PNGShared.h @@ -43,4 +43,14 @@ ALWAYS_INLINE u8 paeth_predictor(u8 a, u8 b, u8 c) return c; } +ALWAYS_INLINE AK::SIMD::u8x4 paeth_predictor(AK::SIMD::u8x4 a, AK::SIMD::u8x4 b, AK::SIMD::u8x4 c) +{ + return AK::SIMD::u8x4 { + paeth_predictor(a[0], b[0], c[0]), + paeth_predictor(a[1], b[1], c[1]), + paeth_predictor(a[2], b[2], c[2]), + paeth_predictor(a[3], b[3], c[3]), + }; +} + }; diff --git a/Userland/Libraries/LibGfx/PNGWriter.cpp b/Userland/Libraries/LibGfx/PNGWriter.cpp index 950815cfb6..04541c6856 100644 --- a/Userland/Libraries/LibGfx/PNGWriter.cpp +++ b/Userland/Libraries/LibGfx/PNGWriter.cpp @@ -7,12 +7,15 @@ */ #include +#include #include #include #include #include #include +#pragma GCC diagnostic ignored "-Wpsabi" + namespace Gfx { class PNGChunk { @@ -134,6 +137,24 @@ void PNGWriter::add_IEND_chunk() add_chunk(png_chunk); } +union [[gnu::packed]] Pixel { + ARGB32 rgba { 0 }; + struct { + u8 red; + u8 green; + u8 blue; + u8 alpha; + }; + AK::SIMD::u8x4 simd; + + ALWAYS_INLINE static AK::SIMD::u8x4 gfx_to_png(Pixel pixel) + { + swap(pixel.red, pixel.blue); + return pixel.simd; + } +}; +static_assert(AssertSize()); + void PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap) { PNGChunk png_chunk { "IDAT" }; @@ -142,16 +163,91 @@ void PNGWriter::add_IDAT_chunk(Gfx::Bitmap const& bitmap) ByteBuffer uncompressed_block_data; uncompressed_block_data.ensure_capacity(bitmap.size_in_bytes() + bitmap.height()); + Pixel const dummy_scanline[bitmap.width()] {}; + auto* scanline_minus_1 = dummy_scanline; + for (int y = 0; y < bitmap.height(); ++y) { - uncompressed_block_data.append(0); + auto* scanline = reinterpret_cast(bitmap.scanline(y)); + + struct Filter { + PNG::FilterType type; + ByteBuffer buffer {}; + int sum = 0; + + void append(u8 byte) + { + buffer.append(byte); + sum += static_cast(byte); + } + + void append(AK::SIMD::u8x4 simd) + { + append(simd[0]); + append(simd[1]); + append(simd[2]); + append(simd[3]); + } + }; + + Filter none_filter { .type = PNG::FilterType::None }; + none_filter.buffer.ensure_capacity(sizeof(Pixel) * bitmap.height()); + + Filter sub_filter { .type = PNG::FilterType::Sub }; + sub_filter.buffer.ensure_capacity(sizeof(Pixel) * bitmap.height()); + + Filter up_filter { .type = PNG::FilterType::Up }; + up_filter.buffer.ensure_capacity(sizeof(Pixel) * bitmap.height()); + + Filter average_filter { .type = PNG::FilterType::Average }; + average_filter.buffer.ensure_capacity(sizeof(ARGB32) * bitmap.height()); + + Filter paeth_filter { .type = PNG::FilterType::Paeth }; + paeth_filter.buffer.ensure_capacity(sizeof(ARGB32) * bitmap.height()); + + auto pixel_x_minus_1 = Pixel::gfx_to_png(*dummy_scanline); + auto pixel_xy_minus_1 = Pixel::gfx_to_png(*dummy_scanline); for (int x = 0; x < bitmap.width(); ++x) { - auto pixel = bitmap.get_pixel(x, y); - uncompressed_block_data.append(pixel.red()); - uncompressed_block_data.append(pixel.green()); - uncompressed_block_data.append(pixel.blue()); - uncompressed_block_data.append(pixel.alpha()); + auto pixel = Pixel::gfx_to_png(scanline[x]); + auto pixel_y_minus_1 = Pixel::gfx_to_png(scanline_minus_1[x]); + + none_filter.append(pixel); + + sub_filter.append(pixel - pixel_x_minus_1); + + up_filter.append(pixel - pixel_y_minus_1); + + // The sum Orig(a) + Orig(b) shall be performed without overflow (using at least nine-bit arithmetic). + auto sum = AK::SIMD::to_u16x4(pixel_x_minus_1) + AK::SIMD::to_u16x4(pixel_y_minus_1); + auto average = AK::SIMD::to_u8x4(sum / 2); + average_filter.append(pixel - average); + + paeth_filter.append(pixel - PNG::paeth_predictor(pixel_x_minus_1, pixel_y_minus_1, pixel_xy_minus_1)); + + pixel_x_minus_1 = pixel; + pixel_xy_minus_1 = pixel_y_minus_1; } + + scanline_minus_1 = scanline; + + // 12.8 Filter selection: https://www.w3.org/TR/PNG/#12Filter-selection + // For best compression of truecolour and greyscale images, the recommended approach + // is adaptive filtering in which a filter is chosen for each scanline. + // The following simple heuristic has performed well in early tests: + // compute the output scanline using all five filters, and select the filter that gives the smallest sum of absolute values of outputs. + // (Consider the output bytes as signed differences for this test.) + Filter& best_filter = none_filter; + if (abs(best_filter.sum) > abs(sub_filter.sum)) + best_filter = sub_filter; + if (abs(best_filter.sum) > abs(up_filter.sum)) + best_filter = up_filter; + if (abs(best_filter.sum) > abs(average_filter.sum)) + best_filter = average_filter; + if (abs(best_filter.sum) > abs(paeth_filter.sum)) + best_filter = paeth_filter; + + uncompressed_block_data.append(to_underlying(best_filter.type)); + uncompressed_block_data.append(best_filter.buffer); } auto maybe_zlib_buffer = Compress::ZlibCompressor::compress_all(uncompressed_block_data);