LibCompress: Implement Brotli decompressor

This implements the BrotliDecompressionStream, which is a Core::Stream
that can decompress another Core::Stream.
This commit is contained in:
Michiel Visser 2022-03-31 21:07:45 +02:00 committed by Andreas Kling
parent 68772463cb
commit d6a5b11f04
28 changed files with 2725 additions and 0 deletions

View file

@ -569,6 +569,7 @@ if (BUILD_LAGOM)
endforeach()
# Compress
file(COPY "${SERENITY_PROJECT_ROOT}/Tests/LibCompress/brotli-test-files" DESTINATION "./")
file(GLOB LIBCOMPRESS_TESTS CONFIGURE_DEPENDS "../../Tests/LibCompress/*.cpp")
foreach(source ${LIBCOMPRESS_TESTS})
lagom_test(${source} LIBS LagomCompress)

View file

@ -1,4 +1,5 @@
set(TEST_SOURCES
TestBrotli.cpp
TestDeflate.cpp
TestGzip.cpp
TestZlib.cpp
@ -7,3 +8,5 @@ set(TEST_SOURCES
foreach(source IN LISTS TEST_SOURCES)
serenity_test("${source}" LibCompress LIBS LibCompress)
endforeach()
install(DIRECTORY brotli-test-files DESTINATION usr/Tests/LibCompress)

View file

@ -0,0 +1,112 @@
/*
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibCompress/Brotli.h>
#include <LibCore/Stream.h>
static void run_test(StringView const file_name)
{
// This makes sure that the tests will run both on target and in Lagom.
#ifdef __serenity__
String path = String::formatted("/usr/Tests/LibCompress/brotli-test-files/{}", file_name);
#else
String path = String::formatted("brotli-test-files/{}", file_name);
#endif
auto cmp_file = MUST(Core::Stream::File::open(path, Core::Stream::OpenMode::Read));
auto cmp_data = MUST(cmp_file->read_all());
String path_compressed = String::formatted("{}.br", path);
auto file = MUST(Core::Stream::File::open(path_compressed, Core::Stream::OpenMode::Read));
auto brotli_stream = Compress::BrotliDecompressionStream { *file };
auto data = MUST(brotli_stream.read_all());
EXPECT_EQ(data, cmp_data);
}
TEST_CASE(brotli_decompress_uncompressed)
{
run_test("wellhello.txt");
}
TEST_CASE(brotli_decompress_simple)
{
run_test("hello.txt");
}
TEST_CASE(brotli_decompress_simple2)
{
run_test("wellhello2.txt");
}
TEST_CASE(brotli_decompress_lorem)
{
run_test("lorem.txt");
}
TEST_CASE(brotli_decompress_lorem2)
{
run_test("lorem2.txt");
}
TEST_CASE(brotli_decompress_transform)
{
run_test("transform.txt");
}
TEST_CASE(brotli_decompress_serenityos_html)
{
run_test("serenityos.html");
}
TEST_CASE(brotli_decompress_happy3rd_html)
{
run_test("happy3rd.html");
}
TEST_CASE(brotli_decompress_katica_regular_10_font)
{
run_test("KaticaRegular10.font");
}
TEST_CASE(brotli_decompress_zero_one_bin)
{
// This makes sure that the tests will run both on target and in Lagom.
#ifdef __serenity__
String path = "/usr/Tests/LibCompress/brotli-test-files/zero-one.bin";
#else
String path = "brotli-test-files/zero-one.bin";
#endif
String path_compressed = String::formatted("{}.br", path);
auto file = MUST(Core::Stream::File::open(path_compressed, Core::Stream::OpenMode::Read));
auto brotli_stream = Compress::BrotliDecompressionStream { *file };
u8 buffer_raw[4096];
Bytes buffer { buffer_raw, 4096 };
size_t bytes_read = 0;
while (true) {
size_t nread = MUST(brotli_stream.read(buffer)).size();
if (nread == 0)
break;
for (size_t i = 0; i < nread; i++) {
if (bytes_read < 16 * MiB)
EXPECT(buffer[i] == 0);
else
EXPECT(buffer[i] == 1);
}
bytes_read += nread;
}
EXPECT(bytes_read == 32 * MiB);
EXPECT(brotli_stream.is_eof());
}

View file

@ -0,0 +1,628 @@
<!DOCTYPE html>
<html>
<head>
<title>SerenityOS: Year 3 in review</title>
<style>
body {
margin-left: auto;
margin-right: auto;
width: 600px;
font-size: 12pt;
font-family: sans-serif;
}
@media screen and (max-width: 610px) {
header h1 {
margin: 0;
}
body {
margin-top: none;
width: 100%;
}
#intro, footer {
margin-left: 1em;
margin-right: 1em;
}
}
@media screen and (min-width: 610px) {
article, h1, h2 {
border-radius: 10px;
}
}
@media only screen and (min-device-width: 375px) and (max-device-width: 667px) and (-webkit-min-device-pixel-ratio: 2) {
body {
width: 90%;
font-size: 1.4em;
}
}
h1, h2 {
padding: 12px;
background: #000;
color: white;
}
article h1 {
font-size: 1.1em;
vertical-align: middle;
margin: 0;
}
article h1 :link,
article h1 :visited {
color: white;
}
article img,
article iframe {
max-width: 100%;
border: 1px solid black;
}
article img.avatar {
width: 64px;
float: right;
border: none;
margin-bottom: 8px;
}
article {
padding: 20px;
margin-bottom: 20px;
background: #ddd;
}
article.developer {
background: #ddf;
font-style: italic;
}
article iframe {
border: 1px solid black;
}
article.hax0r {
background: black;
font-family: monaco;
}
article.hax0r,
article.hax0r h1,
article.hax0r :link,
article.hax0r :visited {
color: lime;
}
article.hax0r h1 {
background: #040;
}
.yakstack {
height: 96px;
margin-left: 32px;
float: right;
}
</style>
</head>
<body>
<header>
<h1>SerenityOS: Year 3 in review</h1>
</header>
<main>
<div id="intro">
<img class="yakstack" src="yakstack.png">
<p><b>Hello friends! :^)</b>
<p>Today we celebrate the third birthday of SerenityOS, counting from the first commit in the
<a href="https://github.com/SerenityOS/serenity/">git repository</a>, on October 10, 2018.
<p>Previous birthdays: <a href="https://serenityos.org/happy/1st">1st</a>, <a href="https://serenityos.org/happy/2nd">2nd</a>.
<p>What follows is a list of interesting events from the past year, mixed with random development
screenshots and also reflections from other developers in the SerenityOS community.
</div>
<article>
<h1>Introduction to SerenityOS</h1>
<p>SerenityOS is a from-scratch desktop operating system that combines a Unix-like core
with the look&amp;feel of 1990s productivity software. It's written in modern C++ and
goes all the way from kernel to web browser. The project aims to build everything in-house
instead of relying on third-party libraries.
<p>I started building this system after
<a href="https://www.youtube.com/watch?v=j3JkNGKZtqM">finishing a 3-month rehabilitation program for drug addiction</a>
in 2018. I found myself with a lot of time and nothing to spend it on. So I began
building something I'd always wanted to build: my very own dream OS.
<p>Parts of my development work is presented in screencast format on
<a href="https://youtube.com/andreaskling">my YouTube channel</a>.
I also post monthly update videos showcasing new features there.
</article>
<article>
<h1>2020-12-06: Working on Reddit support in LibWeb</h1>
<p>Building a browser takes time, and there's a lot of unglamorous
work like figuring out why things don't align right. Fortunately it's
also really fun!
<p><img src="2020-12-06.png">
</article>
<article>
<h1>2020-12-20: Interview on CppCast</h1>
<p>I went on the <a href="https://cppcast.com">CppCast</a> podcast with <a href="https://twitter.com/lefticus">Jason Turner</a>
and <a href="https://twitter.com/robwirving">Rob Irving</a> to talk about SerenityOS.
<p>It was my first time doing an interview and I was really nervous about it,
but it turned out very okay!
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/SRq9HSGn2qE" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article class="hax0r">
<h1>2020-12-20: The 2020 HXP CTF</h1>
<p>
SerenityOS was once again featured in the <a href="https://ctf.link/">HXP CTF</a>.
After being in their 2019 CTF, we spent a whole bunch of time beefing up system security,
and it definitely helped: This time, only 1 team was able to find an exploit,
compared to 6 teams in the previous CTF!
<p>
Write-ups &amp; exploits from the event:
<ul>
<li><a href="https://hxp.io/blog/79/hxp-CTF-2020-wisdom2/"><b>yyyyyyy</b> found a kernel LPE due to a race condition between execve() and ptrace()</a></li>
<li><a href="https://github.com/allesctf/writeups/blob/master/2020/hxpctf/wisdom2/writeup.md"><b>ALLES! CTF</b> found a kernel LPE due to missing EFLAGS validation in ptrace().</a></li>
</ul>
</article>
<article>
<h1>2021-01-06: Reading "Hackles" on SerenityOS</h1>
<p>I was very happy to get the classic Unix geek webcomic
<a href="http://hackles.org">Hackles</a> working in Browser.
<p><img src="2021-01-06.png">
</article>
<article>
<h1>2021-01-10: LiveOverflow videos about SerenityOS</h1>
<p>At the start of 2021, hacking YouTuber LiveOverflow published
a series of videos about SerenityOS, looking into exploits against
the system.
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/qUh507Na9nk" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
<p>All SerenityOS related videos from LiveOverflow:
<ul>
<li><a href="https://youtube.com/watch?v=qUh507Na9nk">Kernel Root Exploit via a ptrace() and execve() Race Condition</a></li>
<li><a href="https://youtube.com/watch?v=oIAP1_NrSbY">Reading Kernel Source Code - Analysis of an Exploit</a></li>
<li><a href="https://youtube.com/watch?v=1hpqiWKFGQs">How CPUs Access Hardware - Another SerenityOS Exploit</a></li>
</ul>
</article>
<article class="hax0r">
<h1>2021-02-11: vakzz's full chain exploit</h1>
<p><a href="https://twitter.com/wcbowling">William Bowling (vakzz)</a> released
the first ever full chain exploit for SerenityOS, combining a browser bug and
a kernel bug to get remote root access via opening a web page!
<p>Check out vakzz's <a href="https://devcraft.io/2021/02/11/serenityos-writing-a-full-chain-exploit.html">excellent write-up</a>
for a step-by-step walthrough.
</article>
<article>
<h1>2021-02-13: SerenityOS developer interview: Linus Groh</h1>
<p>I wanted to introduce my YouTube audience to more of the SerenityOS
developer community, and Linus became the first guest in my developer
interview series!
<p>It was really nice to shine a light on someone else doing great work on the project.
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/oG8RSX1hyCg" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article class="developer">
<h1>
Developer reflections: <a href="https://twitter.com/linusgroh">Linus Groh</a>
<img class="avatar nolinkify" src="linusg.png">
</h1>
<p>One of my favorite aspects of the past year of SerenityOS development
is the overall progress on the browser! There's still a ton of work to
do, but we're starting to get more and more websites into a recognizable
shape - compared to a year ago, the number of blank pages and crashes
on load is reduced considerably.
<p>It's also one of the most collaborative subsystems: everything from
improving spec compliance in our JavaScript engine and adding some
basic optimizations to implementing countless Web APIs, and continuous
work on CSS and DOM has been a team effort. It's great to see everyone
get comfortable, explore, and eventually become experts in their
favorite topics of browser and JS engine development!
<p>It's been so much fun building all these things together, and I'm
excited to see how far we can get in another year :^)
</article>
<article>
<h1>2021-03-06: Classic game "port": Diablo</h1>
<p>DevilutionX is a reverse engineered "port" of the classic game Diablo.
I ported it to SerenityOS and captured the process in a video.
To date, this is my most viewed video and thousands of people discovered
the project through this video.
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/ZOzZ8R4gphE" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
<p>I also finally beat the game!
<p><img src="2021-03-06.png">
</article>
<article>
<h1>2021-04-01: A new direction for the project</h1>
<p>On April 1st, I posted a video announcing a new visual and spiritual direction
for the SerenityOS project. Most people got the joke :^)
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/a-WXzLKv_rc" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article>
<h1>
2021-04-10: Opening a SerenityOS Discord server
<img class="avatar nolinkify" src="yakbait.png">
</h1>
<p>We decided to try out Discord after seeing how it was used to great effect
in the <a href="https://ziglang.org">Zig language</a> community.
<p>It's been a huge success! While our IRC channel peaked at about 170 users,
we've got well over 4000 members on Discord, and it's helped us reach new
levels of collaboration that were simply not possible with IRC.
<p>It has also spawned an extremely nerdy culture of <a href="https://github.com/kleinesfilmroellchen/yaksplained">yak-related memes</a>.
<p><img src="2021-04-10.png">
</article>
<article>
<h1>2021-04-18: Interviewed on "Systems with JT"</h1>
<p>Programming language wizard <a href="https://twitter.com/jntrnr">JT</a> invited me for an live interview
about SerenityOS and everything around it. It was my first live interview, and I was kinda nervous
but I think it went well!
<p>JT also did a <a href="https://www.youtube.com/watch?v=TtV86uL5oD4">heartwarming video review</a> of SerenityOS back around Christmas.
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/5h8bo9OxCwI" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article>
<h1>2021-04-26: More project maintainers</h1>
<p>In the interview with JT, one of the things that came up was my own
scalability as a project maintainer. Up until this point I had been doing
all the PR review and merging myself.
<p>After talking about it with JT, I realized that I needed to ask for
some help from a handful of trusted contributors. It was scary to give up
a bit of control, but in retrospect it's one of the best decisions I've made. :^)
<p>At the time of writing, we now have five maintainers in addition to myself (in alphabetical order):
<ul>
<li><a href="https://twitter.com/the_semicolon_">Ali Mohammadpur</a></li>
<li><a href="https://twitter.com/bgianf">Brian Gianforcaro</a></li>
<li><a href="https://twitter.com/gunnarbeutner">Gunnar Beutner</a></li>
<li><a href="https://twitter.com/horowitz_idan">Idan Horowitz</a></li>
<li><a href="https://twitter.com/linusgroh">Linus Groh</a></li>
</ul>
<p>They each bring their own expertise and passion to the project, and they've been doing a great job
at keeping the project moving forward while growing.
</article>
<article>
<h1>2021-05-16: Some GUI face-lifts</h1>
<p>Sometimes I like to pick out a part of the GUI that is particularly weak
and spend some time on improving it. Here I was working on the PixelPaint
application, and also the system shutdown dialog.
<p><img src="2021-05-16.png">
<p><img src="2021-05-16-2.png">
</article>
<article>
<h1>2021-05-27: Linus gets on GitHub Sponsors</h1>
<p>Linus becomes the second person to accept <a href="https://github.com/sponsors/linusg">sponsorships</a>
for his SerenityOS work. More people getting sponsored to work on SerenityOS is super cool!
</article>
<article>
<h1>2021-05-28: I quit my job to work on SerenityOS full time!</h1>
<p>As of May of 2021, I'm receiving enough in donations to be able to support
myself while working full-time on SerenityOS!
I wrote a <a href="https://awesomekling.github.io/I-quit-my-job-to-focus-on-SerenityOS-full-time/">blog post about it here</a> and people were very
<a href="https://www.osnews.com/story/133492/serenityos-founder-and-main-developer-goes-full-time-for-serenityos/">supportive</a>
<a href="https://news.ycombinator.com/item?id=27317655">around</a>
<a href="https://www.reddit.com/r/SerenityOS/comments/nn1id7/i_quit_my_job_to_focus_on_serenityos_full_time/">the</a>
<a href="https://lobste.rs/s/lsumm4/i_quit_my_job_focus_on_serenityos_full_time">web</a>.
<p>I'm extremely grateful for all the support, and it's super exciting to be
able to focus on this full time! Massive thanks to everyone who has supported
me over the years! If you would like to help me out as well, check out
the links at the bottom of this page.
</article>
<article>
<h1>2021-06-12: Interview on Zig SHOWTIME!</h1>
<p>I was a guest on the <a href="https://zig.show/">Zig SHOWTIME</a> variety show
from the <a href="https://ziglang.org">Zig language</a> community. The theme was
"tech, taste and soul" and the interview lasted almost 3 hours. Exhausting but fun!
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/e_hCJI__q_4" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article>
<h1>2021-06-30: 64-bit mode activated!</h1>
<p>Up until this point, SerenityOS was a 32-bit x86-only system. Then came x86_64,
much thanks to the hard work of <a href="https://twitter.com/gunnarbeutner">Gunnar Beutner</a>
who decided that the port was <i>going to happen</i>, and then didn't stop until it was up and running!
<p><img src="x86_64.png">
</article>
<article class="developer">
<h1>
Developer reflections: <a href="https://twitter.com/bgianf">Brian Gianforcaro</a>
<img class="avatar nolinkify" src="bgianf.jpg">
</h1>
<p>The past year of Serenity development has been super exciting! One of my favorite things
to happen was the bring up of the x86_64 Kernel. Andreas started making baby steps in Feb 2021,
followed by others contributing additional fixes, until around Jun 2021 when
<a href="https://twitter.com/gunnarbeutner">Gunnar Beutner</a> started contributing tons
of patches and with the help of many others got the system booting and running on x86_64.
In my mind this was a significant symbolic step for the project and the community, onboarding
another architecture makes the system a bit more real in my mind.
<p>From the community perspective I found it very inspiring how Gunnar just took the lead and
started fixing issues left and right. The community saw the momentum and started working
on fixes as well, and everyone together got the system running.
<p>I wish Andreas, the SerenityOS project and community, continued success and here's hoping
for another fruitful year of fun and progress. With the
<a href="https://github.com/SerenityOS/serenity/pull/10276">nascent aarch64 port</a> under way by
<a href="https://twitter.com/thakis">Nico Weber</a>, and the countless other exciting things
folks are working on, I'm excited to see what the next year has in store! :^)
</article>
<article>
<h1>2021-07-08: SerenityOS Office Hours</h1>
<p>After an interesting back &amp; forth "discussion" with my YouTube audience
that started with the question "Am I losing touch with the audience?",
I decided to put some serious effort into connecting with the audience.
<p>After some experimentation, I finally arrived at the <b>SerenityOS Office Hours</b>
format. This is a weekly Q&amp;A livestream that I do every Friday at 4pm Swedish Time.
People are invited to ask any technical or non-technical question about SerenityOS
and we dig into whatever topics come up. It has been well-received and I've really
enjoyed being able to answer questions interactively!
<p>Check out my <a href="https://www.youtube.com/playlist?list=PLMOpZvQB55bf4FjluKyo01ZnXq75SaU5L">stream archive</a>
on YouTube. (And come say hi when I'm live some time!)
</article>
<article>
<h1>2021-07-08: A world map of SerenityOS hackers</h1>
<p>Linus created a <a href="https://usermap.serenityos.org/">collaborative map</a>
of SerenityOS developers &amp; users around the world.
<p><a href="https://usermap.serenityos.org"><img src="usermap.png"></a>
</article>
<article>
<h1>2021-07-20: TrueType renderer improvements</h1>
<p>While I'm a big fan of bitmap fonts personally, I did spend some time working
on our TrueType renderer, fixing up things like vertical alignment and glyph sizes.
<p>I also did some work to support the <b style="font-family: Tahoma, sans-serif">Microsoft Tahoma</b>
and <b style="font-family: 'JetBrains Mono', sans-serif">JetBrains Mono</b> typefaces,
seen in this screenshot!
<p><img src="2021-07-20.png">
</article>
<article>
<h1>2021-07-26: Building a "Settings" app</h1>
<p>Until this point, all the various settings dialogs were scattered
around the system menu. I decided it was time to collect them in a
simple Settings application instead. I think it turned out quite nice!
<p><img src="2021-07-26.png">
</article>
<article>
<h1>2021-07-26: SerenityOS developer interview: Ali Mohammadpur</h1>
<p>I did another developer interview video! This time with Ali,
who is behind many of the subsystems in Serenity (including TLS,
line editing, the spreadsheet, and more!)
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/BL5h6XEIusQ" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article>
<h1>2021-08-10: Working on multi-core stability</h1>
<p>Multi-core support is still immature in SerenityOS, but we have been making some
strides forward in this area. In this screenshot, I'm successfully running <b>Quake II</b>
using 2 CPU's simultaneously.
<p><img src="2021-08-10.png">
</article>
<article>
<h1>2021-08-18: ArsTechnica reviews SerenityOS</h1>
<p>In mid-August, ArsTechnica ran a <a href="https://arstechnica.com/gadgets/2021/08/not-a-linux-distro-review-serenityos-is-a-unix-y-love-letter-to-the-90s/">feature article on SerenityOS</a>.
This came out of nowhere and was a lot of fun!
<p><a href="https://arstechnica.com/gadgets/2021/08/not-a-linux-distro-review-serenityos-is-a-unix-y-love-letter-to-the-90s/"><img class="nolinkify" src="arstechnica.png"></a>
</article>
<article>
<h1>2021-08-29: Showing SerenityOS to my nephew</h1>
<p>My nephew called me on Skype while I was hacking on something, and I asked
if he wanted a tour of the operating system. He said yes, and I got this sweet
screenshot of him excitedly seeing me beat our Breakout game!
<p><img src="2021-08-29.png">
</article>
<article>
<h1>2021-09-12: 500 contributors on GitHub!</h1>
<p>It's wild how many people have <a href="https://github.com/SerenityOS/serenity/graphs/contributors">contributed</a>
to the project at this point!
<p><img src="2021-09-12.png">
</article>
<article>
<h1>2021-09-18: Linus Groh interviewed on CppCast</h1>
<p>It's been so cool to see <a href="https://linus.dev/posts/my-journey-with-serenityos/">Linus's journey with SerenityOS</a>,
from not knowing C++ at all 18 months ago, to being interviewed on a major C++ podcast.
<center><iframe width="560" height="315" data-src="https://www.youtube.com/embed/YLN0A9hziKQ" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></center>
</article>
<article>
<h1>2021-09-19: Reading the HTML spec</h1>
<p>It's a pretty cool milestone when your browser engine is strong enough
to download and display the HTML spec itself.
<p><img src="2021-09-19.png">
</article>
<article class="developer">
<h1>
Developer reflections: <a href="https://twitter.com/horowitz_idan">Idan Horowitz</a>
<img class="avatar nolinkify" src="idanho.jpg">
</h1>
<p>One of the main subprojects in LibJS that was being worked on in 2021 was support for
the stage 3 <a href="https://github.com/tc39/proposal-temporal">Temporal proposal</a>,
which aims to replace the old and awkward <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date">Date API</a>
with a more modern, unified and fully-featured interface.
<p>As a result of the efforts of many contributors (with some of the most notable ones
being <a href="https://twitter.com/linusgroh">Linus Groh</a>
and <a href="https://github.com/Lubrsi">Luke Wilde</a>) Serenity's
LibJS contains the most fleshed out Temporal implementation out of all the popular Javascript engines.
</article>
<article>
<h1>2021-10-02: Browser performance work</h1>
<p>Lately I've been doing a ton of work on browser performance, trying to
bring it to a point where it can display complex pages in a somewhat reasonable
time.
<p>Here I am using Profiler to examine what appears to be memory allocation
performance in our regular expression engine.
<p>The profiling system has matured quite a bit during the last year. It now
has the ability to capture full-system profiles, and we've got more visualizations
to aid in performance analysis. :^)
<p><img src="2021-10-02.png">
</article>
<article>
<h1>Monthly update videos</h1>
<p>The tradition of the monthly SerenityOS update video is alive and well,
ever since my first-ever update video in March 2019.
<p>Something new this year is that for the last couple of videos, I've been
joined by Linus in the videos. The sheer amount of things happening month-to-month
was getting hard to cover by myself, and it's great to share the stage with
someone else who cares deeply about the project as well.
<p><ul>
<li><a href="https://www.youtube.com/watch?v=L-IFGxw-kV4">SerenityOS update (October 2020)</a></li>
<li><a href="https://www.youtube.com/watch?v=AYZ1Wqb9p2w">SerenityOS update (November 2020)</a></li>
<li><a href="https://www.youtube.com/watch?v=7aof37-uCRE">SerenityOS update (December 2020)</a></li>
<li><a href="https://www.youtube.com/watch?v=Arfy5iX0wgI">SerenityOS update (January 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=M81Hy5UP2nA">SerenityOS update (February 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=2OdYWoXIVd0">SerenityOS update (March 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=KehSJ_fdTxU">SerenityOS update (April 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=O3MtPgTUOC8">SerenityOS update (May 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=QI3o2G8MPbQ">SerenityOS update (June 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=nUCpt6F5q-s">SerenityOS update (July 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=GT2SO-X2Wik">SerenityOS update (August 2021)</a></li>
<li><a href="https://www.youtube.com/watch?v=y4bsO4E0G38">SerenityOS update (September 2021)</a></li>
</ul>
<p>Check out the <a href="https://www.youtube.com/playlist?list=PLMOpZvQB55bfp6ykOLayLqLrjcpv_Sw3P">playlist on YouTube</a>
for the full archive!
</article>
</main>
<footer>
<h2>Thanks</h2>
<p>To all the awesome people who have particpated in the last year, writing code,
bug reports, documentation, commenting/liking/sharing my videos, sending letters,
chilling on Discord, coming to the Office Hours livestreams, telling your friends,
etc, thank you all!
<p>I'm unbelievably grateful for all the love and support this project receives!
<p>And also, a huge <b>thank you!</b> to everyone who has supported me via
<a href="https://github.com/sponsors/awesomekling">GitHub Sponsors</a>,
<a href="https://patreon.com/serenityos">Patreon</a>,
and <a href="https://paypal.me/awesomekling">PayPal</a>. Thanks to you, I'm able
to do this full time and I'm excited to see where we can push this project!
<p>All right, let's keep moving forward into year number 4!
<p><i>Andreas Kling, 2021-10-10</i>
<br><a href="https://github.com/awesomekling">GitHub</a> |
<a href="https://youtube.com/c/AndreasKling">YouTube</a> |
<a href="https://twitter.com/awesomekling">Twitter</a> |
<a href="https://patreon.com/serenityos">Patreon</a> |
<a href="https://paypal.me/awesomekling">PayPal</a> |
<a href="https://store.serenityos.org">Store</a>
<br><br>
</footer>
<script>
// Don't insert YouTube iframes on serenity, since we can't play the videos yet anyway.
if (navigator.platform != "SerenityOS") {
for (let iframe of document.getElementsByTagName("iframe")) {
iframe.setAttribute("src", iframe.getAttribute("data-src"));
}
}
// Linkify <img> elements without the 'nolinkify' class.
for (let img of document.querySelectorAll("article img:not(.nolinkify)")) {
let a = document.createElement("a");
a.href = img.src;
img.parentNode.replaceChild(a, img);
a.appendChild(img);
}
let stack = document.getElementsByClassName("yakstack")[0];
stack.onmousedown = function() { stack.src = "yakoverflow.png"; }
</script>
</body>
</html>

Binary file not shown.

View file

@ -0,0 +1 @@
Hello hello hello hello hello hello hello hello hello

Binary file not shown.

View file

@ -0,0 +1 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Pharetra vel turpis nunc eget lorem. Gravida dictum fusce ut placerat orci nulla pellentesque. Potenti nullam ac tortor vitae purus faucibus ornare suspendisse. A lacus vestibulum sed arcu non odio. Ac odio tempor orci dapibus ultrices in iaculis nunc sed. In arcu cursus euismod quis. Pretium lectus quam id leo in. Ac ut consequat semper viverra nam libero justo laoreet sit. Ut porttitor leo a diam sollicitudin tempor. Libero volutpat sed cras ornare arcu dui vivamus. Eu scelerisque felis imperdiet proin fermentum leo. Ut pharetra sit amet aliquam id diam. Diam quis enim lobortis scelerisque fermentum dui. Pellentesque eu tincidunt tortor aliquam nulla facilisi cras. Rhoncus urna neque viverra justo nec ultrices dui.

Binary file not shown.

View file

@ -0,0 +1 @@
nibh praesent tristique magna sit amet purus gravida quis blandit turpis cursus in hac habitasse platea dictumst quisque sagittis purus sit amet volutpat consequat mauris nunc congue nisi vitae suscipit tellus mauris a diam maecenas sed enim ut sem viverra aliquet eget sit amet tellus cras adipiscing enim eu turpis

Binary file not shown.

View file

@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<title>SerenityOS</title>
<style>
body { font-family: sans-serif; }
</style>
</head>
<body>
<img src="banner2.png" alt="SerenityOS">
<h1>SerenityOS</h1>
<b>A graphical Unix-like operating system for desktop computers!</b>
<p>SerenityOS is a love letter to '90s user interfaces with a custom Unix-like core. It flatters with sincerity by stealing beautiful ideas from various other systems.</p>
<p>Roughly speaking, the goal is a marriage between the aesthetic of late-1990s productivity software and the power-user accessibility of late-2000s *nix.</p>
<p>This is a system by us, for us, based on the things we like.</p>
<p><b>Project:</b></p>
<ul>
<li><a href="https://github.com/SerenityOS/serenity">SerenityOS on GitHub</a></li>
<li><a href="https://discord.gg/serenityos">SerenityOS Discord Server</a> <font color=red>(join here to chat!)</font></li>
<li><a href="faq/">Frequently asked questions</a></li>
<li><a href="bounty/">Bug bounty program</a></li>
</ul>
<p><b>Sponsoring developers:</b></p>
<ul>
<li>
<b>Andreas Kling (<a href="https://twitter.com/awesomekling">@awesomekling</a>):</b>
<ul>
<li><a href="https://github.com/sponsors/awesomekling">GitHub Sponsors</a></li>
<li><a href="https://www.patreon.com/serenityos">Patreon</a></li>
</ul>
</li>
<br>
<li>
<b>Linus Groh (<a href="https://twitter.com/linusgroh">@linusgroh</a>):</b>
<ul>
<li><a href="https://github.com/sponsors/linusg">GitHub Sponsors</a></li>
<li><a href="https://liberapay.com/linusg">Liberapay</a></li>
</ul>
</li>
<br>
<li>
<b>Sam Atkins (<a href="https://twitter.com/atkinssj">@AtkinsSJ</a>):</b>
<ul>
<li><a href="https://github.com/sponsors/AtkinsSJ">GitHub Sponsors</a></li>
</ul>
</li>
</ul>
<p><b>Other links:</b></p>
<ul>
<li><a href="https://youtube.com/c/andreaskling">Andreas Kling on YouTube</a></li>
<li><a href="https://youtube.com/c/linusgroh">Linus Groh on YouTube</a></li>
<li><a href="happy/3rd/">Happy 3rd birthday! SerenityOS: Year 3 in review</a></li>
<li><a href="happy/2nd/">Happy 2nd birthday! SerenityOS: The second year</a></li>
<li><a href="happy/1st/">Happy 1st birthday! SerenityOS: From zero to HTML in a year</a></li>
<li><a href="https://happy-serenityos.linus.dev/">Linus's ":^)" tracker</a></li>
<li><a href="https://changelog.serenityos.org/">Lubrsi's commit overview, grouped by month and category</a></li>
<li><a href="https://github.com/SerenityOS/yaksplained">Yaksplained: detailed explanation of yak-related emojis on our Discord server <img src="https://camo.githubusercontent.com/eec2b668c9d82d25aaf61d9afec1af3923f2d9e21bddc83a9ac621254af00ee6/68747470733a2f2f63646e2e646973636f72646170702e636f6d2f656d6f6a69732f3837333637323530353330393637393735382e706e67" height="16" alt=":yakbait:"></a></li>
</ul>
<p><b>Screenshot:</b></p>
<img src="screenshot-b36968c.png">
</body>
</html>

Binary file not shown.

View file

@ -0,0 +1,121 @@
// 0 "" Identity ""
// 1 "" Identity " "
// 2 " " Identity " "
// 3 "" OmitFirst1 ""
// 4 "" FermentFirst " "
// 5 "" Identity " the "
// 6 " " Identity ""
// 7 "s " Identity " "
// 8 "" Identity " of "
// 9 "" FermentFirst ""
// 10 "" Identity " and "
// 11 "" OmitFirst2 ""
// 12 "" OmitLast1 ""
// 13 ", " Identity " "
// 14 "" Identity ", "
// 15 " " FermentFirst " "
// 16 "" Identity " in "
// 17 "" Identity " to "
// 18 "e " Identity " "
// 19 "" Identity "\""
// 20 "" Identity "."
// 21 "" Identity "\">"
// 22 "" Identity "\n"
// 23 "" OmitLast3 ""
// 24 "" Identity "]"
// 25 "" Identity " for "
// 26 "" OmitFirst3 ""
// 27 "" OmitLast2 ""
// 28 "" Identity " a "
// 29 "" Identity " that "
// 30 " " FermentFirst ""
// 31 "" Identity ". "
// 32 "." Identity ""
// 33 " " Identity ", "
// 34 "" OmitFirst4 ""
// 35 "" Identity " with "
// 36 "" Identity "'"
// 37 "" Identity " from "
// 38 "" Identity " by "
// 39 "" OmitFirst5 ""
// 40 "" OmitFirst6 ""
// 41 " the " Identity ""
// 42 "" OmitLast4 ""
// 43 "" Identity ". The "
// 44 "" FermentAll ""
// 45 "" Identity " on "
// 46 "" Identity " as "
// 47 "" Identity " is "
// 48 "" OmitLast7 ""
// 49 "" OmitLast1 "ing "
// 50 "" Identity "\n\t"
// 51 "" Identity ":"
// 52 " " Identity ". "
// 53 "" Identity "ed "
// 54 "" OmitFirst9 ""
// 55 "" OmitFirst7 ""
// 56 "" OmitLast6 ""
// 57 "" Identity "("
// 58 "" FermentFirst ", "
// 59 "" OmitLast8 ""
// 60 "" Identity " at "
// 61 "" Identity "ly "
// 62 " the " Identity " of "
// 63 "" OmitLast5 ""
// 64 "" OmitLast9 ""
// 65 " " FermentFirst ", "
// 66 "" FermentFirst "\""
// 67 "." Identity "("
// 68 "" FermentAll " "
// 69 "" FermentFirst "\">"
// 70 "" Identity "=\""
// 71 " " Identity "."
// 72 ".com/" Identity ""
// 73 " the " Identity " of the "
// 74 "" FermentFirst "'"
// 75 "" Identity ". This "
// 76 "" Identity ","
// 77 "." Identity " "
// 78 "" FermentFirst "("
// 79 "" FermentFirst "."
// 80 "" Identity " not "
// 81 " " Identity "=\""
// 82 "" Identity "er "
// 83 " " FermentAll " "
// 84 "" Identity "al "
// 85 " " FermentAll ""
// 86 "" Identity "='"
// 87 "" FermentAll "\""
// 88 "" FermentFirst ". "
// 89 " " Identity "("
// 90 "" Identity "ful "
// 91 " " FermentFirst ". "
// 92 "" Identity "ive "
// 93 "" Identity "less "
// 94 "" FermentAll "'"
// 95 "" Identity "est "
// 96 " " FermentFirst "."
// 97 "" FermentAll "\">"
// 98 " " Identity "='"
// 99 "" FermentFirst ","
// 100 "" Identity "ize "
// 101 "" FermentAll "."
// 102 "\xc2\xa0" Identity ""
// 103 " " Identity ","
// 104 "" FermentFirst "=\""
// 105 "" FermentAll "=\""
// 106 "" Identity "ous "
// 107 "" FermentAll ", "
// 108 "" FermentFirst "='"
// 109 " " FermentFirst ","
// 110 " " FermentAll "=\""
// 111 " " FermentAll ", "
// 112 "" FermentAll ","
// 113 "" FermentAll "("
// 114 "" FermentAll ". "
// 115 " " FermentAll "."
// 116 "" FermentAll "='"
// 117 " " FermentAll ". "
// 118 " " FermentFirst "=\""
// 119 " " FermentAll "='"
// 120 " " FermentFirst "='"

Binary file not shown.

View file

@ -0,0 +1 @@
Well hello friends!

Binary file not shown.

View file

@ -0,0 +1,2 @@
Well hello friends!
Well hello friends!

View file

@ -0,0 +1 @@
،8ہo¤T]«V¹beYب(منA¬SےS#

Binary file not shown.

View file

@ -0,0 +1,906 @@
/*
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/BinarySearch.h>
#include <AK/QuickSort.h>
#include <LibCompress/Brotli.h>
#include <LibCompress/BrotliDictionary.h>
namespace Compress {
ErrorOr<size_t> BrotliDecompressionStream::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream)
{
size_t code_bits = 1;
while (code_bits < (1 << 16)) {
// FIXME: This is very inefficient and could greatly be improved by implementing this
// algorithm: https://www.hanshq.net/zip.html#huffdec
size_t index;
if (binary_search(m_symbol_codes.span(), code_bits, &index))
return m_symbol_values[index];
code_bits = (code_bits << 1) | TRY(input_stream.read_bit());
}
return Error::from_string_literal("no matching code found");
}
BrotliDecompressionStream::BrotliDecompressionStream(Stream& stream)
: m_input_stream(stream)
{
}
ErrorOr<size_t> BrotliDecompressionStream::read_window_length()
{
if (TRY(m_input_stream.read_bit())) {
switch (TRY(m_input_stream.read_bits(3))) {
case 0: {
switch (TRY(m_input_stream.read_bits(3))) {
case 0:
return 17;
case 1:
return Error::from_string_literal("invalid window length");
case 2:
return 10;
case 3:
return 11;
case 4:
return 12;
case 5:
return 13;
case 6:
return 14;
case 7:
return 15;
default:
VERIFY_NOT_REACHED();
}
}
case 1:
return 18;
case 2:
return 19;
case 3:
return 20;
case 4:
return 21;
case 5:
return 22;
case 6:
return 23;
case 7:
return 24;
default:
VERIFY_NOT_REACHED();
}
} else {
return 16;
}
}
ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles()
{
switch (TRY(m_input_stream.read_bits(2))) {
case 0:
return 4;
case 1:
return 5;
case 2:
return 6;
case 3:
return 0;
default:
VERIFY_NOT_REACHED();
}
}
ErrorOr<size_t> BrotliDecompressionStream::read_variable_length()
{
// Value Bit Pattern
// ----- -----------
// 1 0
// 2 0001
// 3..4 x0011
// 5..8 xx0101
// 9..16 xxx0111
// 17..32 xxxx1001
// 33..64 xxxxx1011
// 65..128 xxxxxx1101
// 129..256 xxxxxxx1111
if (TRY(m_input_stream.read_bit())) {
switch (TRY(m_input_stream.read_bits(3))) {
case 0:
return 2;
case 1:
return 3 + TRY(m_input_stream.read_bits(1));
case 2:
return 5 + TRY(m_input_stream.read_bits(2));
case 3:
return 9 + TRY(m_input_stream.read_bits(3));
case 4:
return 17 + TRY(m_input_stream.read_bits(4));
case 5:
return 33 + TRY(m_input_stream.read_bits(5));
case 6:
return 65 + TRY(m_input_stream.read_bits(6));
case 7:
return 129 + TRY(m_input_stream.read_bits(7));
default:
VERIFY_NOT_REACHED();
}
} else {
return 1;
}
}
ErrorOr<size_t> BrotliDecompressionStream::read_complex_prefix_code_length()
{
// Symbol Code
// ------ ----
// 0 00
// 1 0111
// 2 011
// 3 10
// 4 01
// 5 1111
switch (TRY(m_input_stream.read_bits(2))) {
case 0:
return 0;
case 1:
return 4;
case 2:
return 3;
case 3: {
if (TRY(m_input_stream.read_bit()) == 0) {
return 2;
} else {
if (TRY(m_input_stream.read_bit()) == 0) {
return 1;
} else {
return 5;
}
}
}
default:
VERIFY_NOT_REACHED();
}
}
ErrorOr<void> BrotliDecompressionStream::read_prefix_code(CanonicalCode& code, size_t alphabet_size)
{
size_t hskip = TRY(m_input_stream.read_bits(2));
if (hskip == 1) {
TRY(read_simple_prefix_code(code, alphabet_size));
} else {
TRY(read_complex_prefix_code(code, alphabet_size, hskip));
}
return {};
}
ErrorOr<void> BrotliDecompressionStream::read_simple_prefix_code(CanonicalCode& code, size_t alphabet_size)
{
VERIFY(code.m_symbol_codes.is_empty());
VERIFY(code.m_symbol_values.is_empty());
size_t number_of_symbols = 1 + TRY(m_input_stream.read_bits(2));
size_t symbol_size = 0;
while ((1u << symbol_size) < alphabet_size)
symbol_size++;
Vector<size_t> symbols;
for (size_t i = 0; i < number_of_symbols; i++) {
size_t symbol = TRY(m_input_stream.read_bits(symbol_size));
symbols.append(symbol);
if (symbol >= alphabet_size)
return Error::from_string_literal("symbol larger than alphabet");
}
if (number_of_symbols == 1) {
code.m_symbol_codes.append(0b1);
code.m_symbol_values = move(symbols);
} else if (number_of_symbols == 2) {
code.m_symbol_codes.extend({ 0b10, 0b11 });
if (symbols[0] > symbols[1])
swap(symbols[0], symbols[1]);
code.m_symbol_values = move(symbols);
} else if (number_of_symbols == 3) {
code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 });
if (symbols[1] > symbols[2])
swap(symbols[1], symbols[2]);
code.m_symbol_values = move(symbols);
} else if (number_of_symbols == 4) {
bool tree_select = TRY(m_input_stream.read_bit());
if (tree_select) {
code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 });
if (symbols[2] > symbols[3])
swap(symbols[2], symbols[3]);
code.m_symbol_values = move(symbols);
} else {
code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 });
quick_sort(symbols);
code.m_symbol_values = move(symbols);
}
}
return {};
}
ErrorOr<void> BrotliDecompressionStream::read_complex_prefix_code(CanonicalCode& code, size_t alphabet_size, size_t hskip)
{
// hskip should only be 0, 2 or 3
VERIFY(hskip != 1);
VERIFY(hskip <= 3);
// Read the prefix code_value that is used to encode the actual prefix code_value
size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
size_t code_length[18] { 0 };
size_t code_length_counts[6] { 0 };
size_t sum = 0;
size_t number_of_non_zero_symbols = 0;
for (size_t i = hskip; i < 18; i++) {
size_t len = TRY(read_complex_prefix_code_length());
code_length[symbol_mapping[i]] = len;
if (len != 0) {
code_length_counts[len]++;
sum += (32 >> len);
number_of_non_zero_symbols++;
}
if (sum == 32)
break;
else if (sum > 32)
return Error::from_string_literal("invalid prefix code");
}
BrotliDecompressionStream::CanonicalCode temp_code;
if (number_of_non_zero_symbols > 1) {
size_t code_value = 0;
for (size_t bits = 1; bits <= 5; bits++) {
code_value = (code_value + code_length_counts[bits - 1]) << 1;
size_t current_code_value = code_value;
for (size_t i = 0; i < 18; i++) {
size_t len = code_length[i];
if (len == bits) {
temp_code.m_symbol_codes.append((1 << bits) | current_code_value);
temp_code.m_symbol_values.append(i);
current_code_value++;
}
}
}
} else {
for (size_t i = 0; i < 18; i++) {
size_t len = code_length[i];
if (len != 0) {
temp_code.m_symbol_codes.append(1);
temp_code.m_symbol_values.append(i);
break;
}
}
}
// Read the actual prefix code_value
sum = 0;
size_t i = 0;
size_t previous_non_zero_code_length = 8;
size_t last_symbol = 0;
size_t last_repeat = 0;
Vector<size_t> result_symbols;
Vector<size_t> result_lengths;
size_t result_lengths_count[16] { 0 };
while (i < alphabet_size) {
auto symbol = TRY(temp_code.read_symbol(m_input_stream));
if (symbol < 16) {
result_symbols.append(i);
result_lengths.append(symbol);
result_lengths_count[symbol]++;
if (symbol != 0) {
previous_non_zero_code_length = symbol;
sum += (32768 >> symbol);
if (sum == 32768)
break;
else if (sum > 32768)
return Error::from_string_literal("invalid prefix code");
}
last_repeat = 0;
i++;
} else if (symbol == 16) {
size_t repeat_count = 0;
if (last_symbol == 16 && last_repeat != 0) {
repeat_count = (4 * (last_repeat - 2));
} else {
last_repeat = 0;
}
repeat_count += 3 + TRY(m_input_stream.read_bits(2));
for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) {
result_symbols.append(i);
result_lengths.append(previous_non_zero_code_length);
result_lengths_count[previous_non_zero_code_length]++;
if (previous_non_zero_code_length != 0) {
sum += (32768 >> previous_non_zero_code_length);
if (sum == 32768)
break;
else if (sum > 32768)
return Error::from_string_literal("invalid prefix code");
}
i++;
if (i >= alphabet_size)
break;
}
if (sum == 32768)
break;
VERIFY(sum < 32768);
last_repeat = repeat_count;
} else if (symbol == 17) {
size_t repeat_count = 0;
if (last_symbol == 17 && last_repeat != 0) {
repeat_count = (8 * (last_repeat - 2));
} else {
last_repeat = 0;
}
repeat_count += 3 + TRY(m_input_stream.read_bits(3));
i += (repeat_count - last_repeat);
last_repeat = repeat_count;
}
last_symbol = symbol;
}
result_lengths_count[0] = 0;
size_t code_value = 0;
for (size_t bits = 1; bits < 16; bits++) {
code_value = (code_value + result_lengths_count[bits - 1]) << 1;
size_t current_code_value = code_value;
for (size_t n = 0; n < result_symbols.size(); n++) {
size_t len = result_lengths[n];
if (len == bits) {
code.m_symbol_codes.append((1 << bits) | current_code_value);
code.m_symbol_values.append(result_symbols[n]);
current_code_value++;
}
}
}
return {};
}
static void inverse_move_to_front_transform(Span<u8> v)
{
// RFC 7932 section 7.3
u8 mtf[256];
for (size_t i = 0; i < 256; ++i) {
mtf[i] = (u8)i;
}
for (size_t i = 0; i < v.size(); ++i) {
u8 index = v[i];
u8 value = mtf[index];
v[i] = value;
for (; index; --index) {
mtf[index] = mtf[index - 1];
}
mtf[0] = value;
}
}
ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size)
{
bool use_run_length_encoding = TRY(m_input_stream.read_bit());
size_t run_length_encoding_max = 0;
if (use_run_length_encoding) {
run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4));
}
BrotliDecompressionStream::CanonicalCode code;
TRY(read_prefix_code(code, number_of_codes + run_length_encoding_max));
size_t i = 0;
while (i < context_map_size) {
size_t symbol = TRY(code.read_symbol(m_input_stream));
if (symbol <= run_length_encoding_max) {
size_t repeat_base = 1 << symbol;
size_t repeat_additional = TRY(m_input_stream.read_bits(symbol));
size_t repeat_count = repeat_base + repeat_additional;
while (repeat_count--) {
context_map.append(0);
i++;
}
} else {
size_t value = symbol - run_length_encoding_max;
context_map.append(value);
i++;
}
}
bool inverse_move_to_front = TRY(m_input_stream.read_bit());
if (inverse_move_to_front)
inverse_move_to_front_transform(context_map.span());
return {};
}
ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block)
{
size_t blocks_of_type = TRY(read_variable_length());
block.type = 0;
block.type_previous = 1;
block.number_of_types = blocks_of_type;
block.type_code.clear();
block.length_code.clear();
if (blocks_of_type == 1) {
block.length = 16 * MiB;
} else {
TRY(read_prefix_code(block.type_code, 2 + blocks_of_type));
TRY(read_prefix_code(block.length_code, 26));
TRY(block_update_length(block));
}
return {};
}
ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block)
{
size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
size_t symbol = TRY(block.length_code.read_symbol(m_input_stream));
size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol]));
block.length = block_length;
return {};
}
ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block)
{
size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream));
TRY(block_update_length(block));
if (block_type_symbol == 0) {
swap(block.type, block.type_previous);
} else if (block_type_symbol == 1) {
block.type_previous = block.type;
block.type = (block.type + 1) % block.number_of_types;
} else {
block.type_previous = block.type;
block.type = block_type_symbol - 2;
}
return {};
}
size_t BrotliDecompressionStream::literal_code_index_from_context()
{
size_t const context_id_lut0[256] {
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
};
size_t const context_id_lut1[256] {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
size_t const context_id_lut2[256] {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
};
size_t context_mode = m_literal_context_modes[m_literal_block.type];
size_t context_id;
switch (context_mode) {
case 0:
context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f;
break;
case 1:
context_id = m_lookback_buffer.value().lookback(1, 0) >> 2;
break;
case 2:
context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)];
break;
case 3:
context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)];
break;
default:
VERIFY_NOT_REACHED();
}
size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id];
return literal_code_index;
}
ErrorOr<Bytes> BrotliDecompressionStream::read(Bytes output_buffer)
{
size_t bytes_read = 0;
while (bytes_read < output_buffer.size()) {
if (m_current_state == State::WindowSize) {
size_t window_bits = TRY(read_window_length());
m_window_size = (1 << window_bits) - 16;
m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size));
m_current_state = State::Idle;
} else if (m_current_state == State::Idle) {
// If the final block was read, we are done decompressing
if (m_read_final_block)
break;
m_read_final_block = TRY(m_input_stream.read_bit());
if (m_read_final_block) {
bool is_last_block_empty = TRY(m_input_stream.read_bit());
// If the last block is empty we are done decompressing
if (is_last_block_empty)
break;
}
size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles());
if (size_number_of_nibbles == 0) {
// This block only contains meta-data
bool reserved = TRY(m_input_stream.read_bit());
if (reserved)
return Error::from_string_literal("invalid reserved bit");
size_t skip_bytes = TRY(m_input_stream.read_bits(2));
size_t skip_length = TRY(m_input_stream.read_bits(8 * skip_bytes));
u8 remainder = m_input_stream.align_to_byte_boundary();
if (remainder != 0)
return Error::from_string_literal("remainder bits are non-zero");
// Discard meta-data bytes
u8 temp_buffer[4096];
Bytes temp_bytes { temp_buffer, 4096 };
while (skip_length > 0) {
Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length));
auto metadata_bytes = TRY(m_input_stream.read(temp_bytes_slice));
if (metadata_bytes.is_empty())
return Error::from_string_literal("eof");
skip_length -= metadata_bytes.size();
}
continue;
}
size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles));
bool is_uncompressed = false;
if (!m_read_final_block)
is_uncompressed = TRY(m_input_stream.read_bit());
m_bytes_left = uncompressed_size;
if (is_uncompressed) {
u8 remainder = m_input_stream.align_to_byte_boundary();
if (remainder != 0)
return Error::from_string_literal("remainder is non-zero");
m_current_state = State::UncompressedData;
} else {
TRY(read_block_configuration(m_literal_block));
TRY(read_block_configuration(m_insert_and_copy_block));
TRY(read_block_configuration(m_distance_block));
m_postfix_bits = TRY(m_input_stream.read_bits(2));
m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits;
m_literal_context_modes.clear();
for (size_t i = 0; i < m_literal_block.number_of_types; i++) {
size_t context_mode = TRY(m_input_stream.read_bits(2));
m_literal_context_modes.append(context_mode);
}
m_context_mapping_literal.clear();
size_t number_of_literal_codes = TRY(read_variable_length());
if (number_of_literal_codes == 1) {
for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++)
m_context_mapping_literal.append(0);
} else {
TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types));
}
m_context_mapping_distance.clear();
size_t number_of_distance_codes = TRY(read_variable_length());
if (number_of_distance_codes == 1) {
for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++)
m_context_mapping_distance.append(0);
} else {
TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types));
}
m_literal_codes.clear();
for (size_t i = 0; i < number_of_literal_codes; i++) {
CanonicalCode code;
TRY(read_prefix_code(code, 256));
m_literal_codes.append(move(code));
}
m_insert_and_copy_codes.clear();
for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) {
CanonicalCode code;
TRY(read_prefix_code(code, 704));
m_insert_and_copy_codes.append(move(code));
}
m_distance_codes.clear();
for (size_t i = 0; i < number_of_distance_codes; i++) {
CanonicalCode code;
TRY(read_prefix_code(code, 16 + m_direct_distances + (48 << m_postfix_bits)));
m_distance_codes.append(move(code));
}
m_current_state = State::CompressedCommand;
}
} else if (m_current_state == State::UncompressedData) {
size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left);
VERIFY(number_of_fitting_bytes > 0);
auto uncompressed_bytes = TRY(m_input_stream.read(output_buffer.slice(bytes_read, number_of_fitting_bytes)));
if (uncompressed_bytes.is_empty())
return Error::from_string_literal("eof");
m_bytes_left -= uncompressed_bytes.size();
bytes_read += uncompressed_bytes.size();
// If all bytes were read, return to the idle state
if (m_bytes_left == 0)
m_current_state = State::Idle;
} else if (m_current_state == State::CompressedCommand) {
if (m_insert_and_copy_block.length == 0) {
TRY(block_read_new_state(m_insert_and_copy_block));
}
m_insert_and_copy_block.length--;
size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream));
size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 };
size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 };
bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false };
size_t insert_and_copy_index = insert_and_copy_symbol >> 6;
size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111;
size_t copy_length_code_offset = insert_and_copy_symbol & 0b111;
size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset;
size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset;
m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index];
size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code]));
m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code]));
if (m_insert_length > 0) {
m_current_state = State::CompressedLiteral;
} else {
m_current_state = State::CompressedDistance;
}
} else if (m_current_state == State::CompressedLiteral) {
if (m_literal_block.length == 0) {
TRY(block_read_new_state(m_literal_block));
}
m_literal_block.length--;
size_t literal_code_index = literal_code_index_from_context();
size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream));
output_buffer[bytes_read] = literal_value;
m_lookback_buffer.value().write(literal_value);
bytes_read++;
m_insert_length--;
m_bytes_left--;
if (m_bytes_left == 0)
m_current_state = State::Idle;
else if (m_insert_length == 0)
m_current_state = State::CompressedDistance;
} else if (m_current_state == State::CompressedDistance) {
size_t distance_symbol;
if (m_implicit_zero_distance) {
distance_symbol = 0;
} else {
if (m_distance_block.length == 0) {
TRY(block_read_new_state(m_distance_block));
}
m_distance_block.length--;
size_t context_id = clamp(m_copy_length - 2, 0, 3);
size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id];
distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream));
}
size_t distance;
bool reuse_previous_distance = false;
if (distance_symbol < 16) {
switch (distance_symbol) {
case 0:
distance = m_distances[0];
reuse_previous_distance = true;
break;
case 1:
distance = m_distances[1];
break;
case 2:
distance = m_distances[2];
break;
case 3:
distance = m_distances[3];
break;
case 4:
distance = m_distances[0] - 1;
break;
case 5:
distance = m_distances[0] + 1;
break;
case 6:
distance = m_distances[0] - 2;
break;
case 7:
distance = m_distances[0] + 2;
break;
case 8:
distance = m_distances[0] - 3;
break;
case 9:
distance = m_distances[0] + 3;
break;
case 10:
distance = m_distances[1] - 1;
break;
case 11:
distance = m_distances[1] + 1;
break;
case 12:
distance = m_distances[1] - 2;
break;
case 13:
distance = m_distances[1] + 2;
break;
case 14:
distance = m_distances[1] - 3;
break;
case 15:
distance = m_distances[1] + 3;
break;
}
} else if (distance_symbol < 16 + m_direct_distances) {
distance = distance_symbol - 15;
} else {
size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1;
size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1));
size_t dextra = TRY(m_input_stream.read_bits(ndistbits));
size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits;
size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK;
size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4;
distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1;
}
m_distance = distance;
size_t total_written = m_lookback_buffer.value().total_written();
size_t max_lookback = min(total_written, m_window_size);
if (distance > max_lookback) {
size_t word_index = distance - (max_lookback + 1);
m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length));
m_copy_length = m_dictionary_data.size();
if (m_copy_length == 0)
m_current_state = State::CompressedCommand;
else
m_current_state = State::CompressedDictionary;
} else {
if (!reuse_previous_distance) {
m_distances[3] = m_distances[2];
m_distances[2] = m_distances[1];
m_distances[1] = m_distances[0];
m_distances[0] = distance;
}
m_current_state = State::CompressedCopy;
}
} else if (m_current_state == State::CompressedCopy) {
u8 copy_value = m_lookback_buffer.value().lookback(m_distance);
output_buffer[bytes_read] = copy_value;
m_lookback_buffer.value().write(copy_value);
bytes_read++;
m_copy_length--;
m_bytes_left--;
if (m_bytes_left == 0)
m_current_state = State::Idle;
else if (m_copy_length == 0)
m_current_state = State::CompressedCommand;
} else if (m_current_state == State::CompressedDictionary) {
size_t offset = m_dictionary_data.size() - m_copy_length;
u8 dictionary_value = m_dictionary_data[offset];
output_buffer[bytes_read] = dictionary_value;
m_lookback_buffer.value().write(dictionary_value);
bytes_read++;
m_copy_length--;
m_bytes_left--;
if (m_bytes_left == 0)
m_current_state = State::Idle;
else if (m_copy_length == 0)
m_current_state = State::CompressedCommand;
}
}
return output_buffer.slice(0, bytes_read);
}
bool BrotliDecompressionStream::is_eof() const
{
return m_read_final_block && m_current_state == State::Idle;
}
}

View file

@ -0,0 +1,165 @@
/*
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/CircularQueue.h>
#include <AK/FixedArray.h>
#include <LibCore/InputBitStream.h>
#include <LibCore/Stream.h>
namespace Compress {
using Core::Stream::LittleEndianInputBitStream;
using Core::Stream::Stream;
class BrotliDecompressionStream : public Stream {
public:
enum class State {
WindowSize,
Idle,
UncompressedData,
CompressedCommand,
CompressedLiteral,
CompressedDistance,
CompressedCopy,
CompressedDictionary,
};
class CanonicalCode {
friend class BrotliDecompressionStream;
public:
CanonicalCode() = default;
ErrorOr<size_t> read_symbol(LittleEndianInputBitStream&);
void clear()
{
m_symbol_codes.clear();
m_symbol_values.clear();
}
private:
Vector<size_t> m_symbol_codes;
Vector<size_t> m_symbol_values;
};
struct Block {
size_t type;
size_t type_previous;
size_t number_of_types;
size_t length;
CanonicalCode type_code;
CanonicalCode length_code;
};
class LookbackBuffer {
private:
LookbackBuffer(FixedArray<u8>& buffer)
: m_buffer(move(buffer))
{
}
public:
static ErrorOr<LookbackBuffer> try_create(size_t size)
{
auto buffer = TRY(FixedArray<u8>::try_create(size));
return LookbackBuffer { buffer };
}
void write(u8 value)
{
m_buffer[m_offset] = value;
m_offset = (m_offset + 1) % m_buffer.size();
m_total_written++;
}
u8 lookback(size_t offset) const
{
VERIFY(offset <= m_total_written);
VERIFY(offset <= m_buffer.size());
size_t index = (m_offset + m_buffer.size() - offset) % m_buffer.size();
return m_buffer[index];
}
u8 lookback(size_t offset, u8 fallback) const
{
if (offset > m_total_written || offset > m_buffer.size())
return fallback;
VERIFY(offset <= m_total_written);
VERIFY(offset <= m_buffer.size());
size_t index = (m_offset + m_buffer.size() - offset) % m_buffer.size();
return m_buffer[index];
}
size_t total_written() { return m_total_written; }
private:
FixedArray<u8> m_buffer;
size_t m_offset { 0 };
size_t m_total_written { 0 };
};
public:
BrotliDecompressionStream(Stream&);
bool is_readable() const override { return m_input_stream.is_readable(); }
ErrorOr<Bytes> read(Bytes output_buffer) override;
bool is_writable() const override { return m_input_stream.is_writable(); }
ErrorOr<size_t> write(ReadonlyBytes bytes) override { return m_input_stream.write(bytes); }
bool is_eof() const override;
bool is_open() const override { return m_input_stream.is_open(); }
void close() override { m_input_stream.close(); }
private:
ErrorOr<size_t> read_window_length();
ErrorOr<size_t> read_size_number_of_nibbles();
ErrorOr<size_t> read_variable_length();
ErrorOr<size_t> read_complex_prefix_code_length();
ErrorOr<void> read_prefix_code(CanonicalCode&, size_t alphabet_size);
ErrorOr<void> read_simple_prefix_code(CanonicalCode&, size_t alphabet_size);
ErrorOr<void> read_complex_prefix_code(CanonicalCode&, size_t alphabet_size, size_t hskip);
ErrorOr<void> read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size);
ErrorOr<void> read_block_configuration(Block&);
ErrorOr<void> block_update_length(Block&);
ErrorOr<void> block_read_new_state(Block&);
size_t literal_code_index_from_context();
LittleEndianInputBitStream m_input_stream;
State m_current_state { State::WindowSize };
Optional<LookbackBuffer> m_lookback_buffer;
size_t m_window_size { 0 };
bool m_read_final_block { false };
size_t m_postfix_bits { 0 };
size_t m_direct_distances { 0 };
size_t m_distances[4] { 4, 11, 15, 16 };
size_t m_bytes_left { 0 };
size_t m_insert_length { 0 };
size_t m_copy_length { 0 };
bool m_implicit_zero_distance { false };
size_t m_distance { 0 };
ByteBuffer m_dictionary_data;
Block m_literal_block;
Vector<u8> m_literal_context_modes;
Block m_insert_and_copy_block;
Block m_distance_block;
Vector<u8> m_context_mapping_literal;
Vector<u8> m_context_mapping_distance;
Vector<CanonicalCode> m_literal_codes;
Vector<CanonicalCode> m_insert_and_copy_codes;
Vector<CanonicalCode> m_distance_codes;
};
}

View file

@ -0,0 +1,244 @@
/*
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Types.h>
#include <LibCompress/BrotliDictionary.h>
// Include the 119.9 KiB of dictionary data from a binary file
extern u8 const brotli_dictionary_data[];
#if defined(__APPLE__)
asm(".const_data\n"
".globl _brotli_dictionary_data\n"
"_brotli_dictionary_data:\n");
#else
asm(".section .rodata\n"
".global brotli_dictionary_data\n"
"brotli_dictionary_data:\n");
#endif
asm(".incbin \"LibCompress/BrotliDictionaryData.bin\"\n"
".previous\n");
namespace Compress {
static size_t const bits_by_length[25] {
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5
};
static size_t const offset_by_length[25] {
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016
};
static int ferment(Bytes word, size_t pos)
{
if (word[pos] < 192) {
if (word[pos] >= 97 && word[pos] <= 122) {
word[pos] = word[pos] ^ 32;
}
return 1;
} else if (word[pos] < 224) {
if (pos + 1 < word.size()) {
word[pos + 1] = word[pos + 1] ^ 32;
}
return 2;
} else {
if (pos + 2 < word.size()) {
word[pos + 2] = word[pos + 2] ^ 5;
}
return 3;
}
}
static void ferment_first(Bytes word)
{
if (word.size() > 0) {
ferment(word, 0);
}
}
[[maybe_unused]] static void ferment_all(Bytes word)
{
size_t i = 0;
while (i < word.size()) {
i += ferment(word, i);
}
}
using BrotliDictionary::TransformationOperation::FermentAll;
using BrotliDictionary::TransformationOperation::FermentFirst;
using BrotliDictionary::TransformationOperation::Identity;
using BrotliDictionary::TransformationOperation::OmitFirst;
using BrotliDictionary::TransformationOperation::OmitLast;
constexpr static BrotliDictionary::Transformation transformations[121] {
// ID Prefix Transform Suffix
// -- ------ --------- ------
{ ""sv, Identity, 0, ""sv }, // 0 "" Identity ""
{ ""sv, Identity, 0, " "sv }, // 1 "" Identity " "
{ " "sv, Identity, 0, " "sv }, // 2 " " Identity " "
{ ""sv, OmitFirst, 1, ""sv }, // 3 "" OmitFirst1 ""
{ ""sv, FermentFirst, 0, " "sv }, // 4 "" FermentFirst " "
{ ""sv, Identity, 0, " the "sv }, // 5 "" Identity " the "
{ " "sv, Identity, 0, ""sv }, // 6 " " Identity ""
{ "s "sv, Identity, 0, " "sv }, // 7 "s " Identity " "
{ ""sv, Identity, 0, " of "sv }, // 8 "" Identity " of "
{ ""sv, FermentFirst, 0, ""sv }, // 9 "" FermentFirst ""
{ ""sv, Identity, 0, " and "sv }, // 10 "" Identity " and "
{ ""sv, OmitFirst, 2, ""sv }, // 11 "" OmitFirst2 ""
{ ""sv, OmitLast, 1, ""sv }, // 12 "" OmitLast1 ""
{ ", "sv, Identity, 0, " "sv }, // 13 ", " Identity " "
{ ""sv, Identity, 0, ", "sv }, // 14 "" Identity ", "
{ " "sv, FermentFirst, 0, " "sv }, // 15 " " FermentFirst " "
{ ""sv, Identity, 0, " in "sv }, // 16 "" Identity " in "
{ ""sv, Identity, 0, " to "sv }, // 17 "" Identity " to "
{ "e "sv, Identity, 0, " "sv }, // 18 "e " Identity " "
{ ""sv, Identity, 0, "\""sv }, // 19 "" Identity "\""
{ ""sv, Identity, 0, "."sv }, // 20 "" Identity "."
{ ""sv, Identity, 0, "\">"sv }, // 21 "" Identity "\">"
{ ""sv, Identity, 0, "\n"sv }, // 22 "" Identity "\n"
{ ""sv, OmitLast, 3, ""sv }, // 23 "" OmitLast3 ""
{ ""sv, Identity, 0, "]"sv }, // 24 "" Identity "]"
{ ""sv, Identity, 0, " for "sv }, // 25 "" Identity " for "
{ ""sv, OmitFirst, 3, ""sv }, // 26 "" OmitFirst3 ""
{ ""sv, OmitLast, 2, ""sv }, // 27 "" OmitLast2 ""
{ ""sv, Identity, 0, " a "sv }, // 28 "" Identity " a "
{ ""sv, Identity, 0, " that "sv }, // 29 "" Identity " that "
{ " "sv, FermentFirst, 0, ""sv }, // 30 " " FermentFirst ""
{ ""sv, Identity, 0, ". "sv }, // 31 "" Identity ". "
{ "."sv, Identity, 0, ""sv }, // 32 "." Identity ""
{ " "sv, Identity, 0, ", "sv }, // 33 " " Identity ", "
{ ""sv, OmitFirst, 4, ""sv }, // 34 "" OmitFirst4 ""
{ ""sv, Identity, 0, " with "sv }, // 35 "" Identity " with "
{ ""sv, Identity, 0, "'"sv }, // 36 "" Identity "'"
{ ""sv, Identity, 0, " from "sv }, // 37 "" Identity " from "
{ ""sv, Identity, 0, " by "sv }, // 38 "" Identity " by "
{ ""sv, OmitFirst, 5, ""sv }, // 39 "" OmitFirst5 ""
{ ""sv, OmitFirst, 6, ""sv }, // 40 "" OmitFirst6 ""
{ " the "sv, Identity, 0, ""sv }, // 41 " the " Identity ""
{ ""sv, OmitLast, 4, ""sv }, // 42 "" OmitLast4 ""
{ ""sv, Identity, 0, ". The "sv }, // 43 "" Identity ". The "
{ ""sv, FermentAll, 0, ""sv }, // 44 "" FermentAll ""
{ ""sv, Identity, 0, " on "sv }, // 45 "" Identity " on "
{ ""sv, Identity, 0, " as "sv }, // 46 "" Identity " as "
{ ""sv, Identity, 0, " is "sv }, // 47 "" Identity " is "
{ ""sv, OmitLast, 7, ""sv }, // 48 "" OmitLast7 ""
{ ""sv, OmitLast, 1, "ing "sv }, // 49 "" OmitLast1 "ing "
{ ""sv, Identity, 0, "\n\t"sv }, // 50 "" Identity "\n\t"
{ ""sv, Identity, 0, ":"sv }, // 51 "" Identity ":"
{ " "sv, Identity, 0, ". "sv }, // 52 " " Identity ". "
{ ""sv, Identity, 0, "ed "sv }, // 53 "" Identity "ed "
{ ""sv, OmitFirst, 9, ""sv }, // 54 "" OmitFirst9 ""
{ ""sv, OmitFirst, 7, ""sv }, // 55 "" OmitFirst7 ""
{ ""sv, OmitLast, 6, ""sv }, // 56 "" OmitLast6 ""
{ ""sv, Identity, 0, "("sv }, // 57 "" Identity "("
{ ""sv, FermentFirst, 0, ", "sv }, // 58 "" FermentFirst ", "
{ ""sv, OmitLast, 8, ""sv }, // 59 "" OmitLast8 ""
{ ""sv, Identity, 0, " at "sv }, // 60 "" Identity " at "
{ ""sv, Identity, 0, "ly "sv }, // 61 "" Identity "ly "
{ " the "sv, Identity, 0, " of "sv }, // 62 " the " Identity " of "
{ ""sv, OmitLast, 5, ""sv }, // 63 "" OmitLast5 ""
{ ""sv, OmitLast, 9, ""sv }, // 64 "" OmitLast9 ""
{ " "sv, FermentFirst, 0, ", "sv }, // 65 " " FermentFirst ", "
{ ""sv, FermentFirst, 0, "\""sv }, // 66 "" FermentFirst "\""
{ "."sv, Identity, 0, "("sv }, // 67 "." Identity "("
{ ""sv, FermentAll, 0, " "sv }, // 68 "" FermentAll " "
{ ""sv, FermentFirst, 0, "\">"sv }, // 69 "" FermentFirst "\">"
{ ""sv, Identity, 0, "=\""sv }, // 70 "" Identity "=\""
{ " "sv, Identity, 0, "."sv }, // 71 " " Identity "."
{ ".com/"sv, Identity, 0, ""sv }, // 72 ".com/" Identity ""
{ " the "sv, Identity, 0, " of the "sv }, // 73 " the " Identity " of the "
{ ""sv, FermentFirst, 0, "'"sv }, // 74 "" FermentFirst "'"
{ ""sv, Identity, 0, ". This "sv }, // 75 "" Identity ". This "
{ ""sv, Identity, 0, ","sv }, // 76 "" Identity ","
{ "."sv, Identity, 0, " "sv }, // 77 "." Identity " "
{ ""sv, FermentFirst, 0, "("sv }, // 78 "" FermentFirst "("
{ ""sv, FermentFirst, 0, "."sv }, // 79 "" FermentFirst "."
{ ""sv, Identity, 0, " not "sv }, // 80 "" Identity " not "
{ " "sv, Identity, 0, "=\""sv }, // 81 " " Identity "=\""
{ ""sv, Identity, 0, "er "sv }, // 82 "" Identity "er "
{ " "sv, FermentAll, 0, " "sv }, // 83 " " FermentAll " "
{ ""sv, Identity, 0, "al "sv }, // 84 "" Identity "al "
{ " "sv, FermentAll, 0, ""sv }, // 85 " " FermentAll ""
{ ""sv, Identity, 0, "='"sv }, // 86 "" Identity "='"
{ ""sv, FermentAll, 0, "\""sv }, // 87 "" FermentAll "\""
{ ""sv, FermentFirst, 0, ". "sv }, // 88 "" FermentFirst ". "
{ " "sv, Identity, 0, "("sv }, // 89 " " Identity "("
{ ""sv, Identity, 0, "ful "sv }, // 90 "" Identity "ful "
{ " "sv, FermentFirst, 0, ". "sv }, // 91 " " FermentFirst ". "
{ ""sv, Identity, 0, "ive "sv }, // 92 "" Identity "ive "
{ ""sv, Identity, 0, "less "sv }, // 93 "" Identity "less "
{ ""sv, FermentAll, 0, "'"sv }, // 94 "" FermentAll "'"
{ ""sv, Identity, 0, "est "sv }, // 95 "" Identity "est "
{ " "sv, FermentFirst, 0, "."sv }, // 96 " " FermentFirst "."
{ ""sv, FermentAll, 0, "\">"sv }, // 97 "" FermentAll "\">"
{ " "sv, Identity, 0, "='"sv }, // 98 " " Identity "='"
{ ""sv, FermentFirst, 0, ","sv }, // 99 "" FermentFirst ","
{ ""sv, Identity, 0, "ize "sv }, // 100 "" Identity "ize "
{ ""sv, FermentAll, 0, "."sv }, // 101 "" FermentAll "."
{ "\xc2\xa0"sv, Identity, 0, ""sv }, // 102 "\xc2\xa0" Identity ""
{ " "sv, Identity, 0, ","sv }, // 103 " " Identity ","
{ ""sv, FermentFirst, 0, "=\""sv }, // 104 "" FermentFirst "=\""
{ ""sv, FermentAll, 0, "=\""sv }, // 105 "" FermentAll "=\""
{ ""sv, Identity, 0, "ous "sv }, // 106 "" Identity "ous "
{ ""sv, FermentAll, 0, ", "sv }, // 107 "" FermentAll ", "
{ ""sv, FermentFirst, 0, "='"sv }, // 108 "" FermentFirst "='"
{ " "sv, FermentFirst, 0, ","sv }, // 109 " " FermentFirst ","
{ " "sv, FermentAll, 0, "=\""sv }, // 110 " " FermentAll "=\""
{ " "sv, FermentAll, 0, ", "sv }, // 111 " " FermentAll ", "
{ ""sv, FermentAll, 0, ","sv }, // 112 "" FermentAll ","
{ ""sv, FermentAll, 0, "("sv }, // 113 "" FermentAll "("
{ ""sv, FermentAll, 0, ". "sv }, // 114 "" FermentAll ". "
{ " "sv, FermentAll, 0, "."sv }, // 115 " " FermentAll "."
{ ""sv, FermentAll, 0, "='"sv }, // 116 "" FermentAll "='"
{ " "sv, FermentAll, 0, ". "sv }, // 117 " " FermentAll ". "
{ " "sv, FermentFirst, 0, "=\""sv }, // 118 " " FermentFirst "=\""
{ " "sv, FermentAll, 0, "='"sv }, // 119 " " FermentAll "='"
{ " "sv, FermentFirst, 0, "='"sv }, // 120 " " FermentFirst "='"
};
ErrorOr<ByteBuffer> BrotliDictionary::lookup_word(size_t index, size_t length)
{
if (length < 4 || length > 24)
return Error::from_string_literal("invalid dictionary lookup length");
size_t word_index = index % (1 << bits_by_length[length]);
ReadonlyBytes base_word { brotli_dictionary_data + offset_by_length[length] + (word_index * length), length };
size_t transform_id = index >> bits_by_length[length];
if (transform_id >= 121)
return Error::from_string_literal("invalid dictionary transformation");
auto transformation = transformations[transform_id];
ByteBuffer bb;
bb.append(transformation.prefix.bytes());
size_t prefix_length = bb.size();
switch (transformation.operation) {
case TransformationOperation::Identity:
bb.append(base_word);
break;
case TransformationOperation::FermentFirst:
bb.append(base_word);
ferment_first(bb.bytes().slice(prefix_length));
break;
case TransformationOperation::FermentAll:
bb.append(base_word);
ferment_all(bb.bytes().slice(prefix_length));
break;
case TransformationOperation::OmitFirst:
if (transformation.operation_data < base_word.size())
bb.append(base_word.slice(transformation.operation_data));
break;
case TransformationOperation::OmitLast:
if (transformation.operation_data < base_word.size())
bb.append(base_word.slice(0, base_word.size() - transformation.operation_data));
break;
}
bb.append(transformation.suffix.bytes());
return bb;
}
}

View file

@ -0,0 +1,32 @@
/*
* Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/ByteBuffer.h>
namespace Compress {
class BrotliDictionary {
public:
enum TransformationOperation {
Identity,
FermentFirst,
FermentAll,
OmitFirst,
OmitLast,
};
struct Transformation {
StringView prefix;
TransformationOperation operation;
u8 operation_data;
StringView suffix;
};
static ErrorOr<ByteBuffer> lookup_word(size_t index, size_t length);
};
}

File diff suppressed because one or more lines are too long

View file

@ -1,4 +1,6 @@
set(SOURCES
Brotli.cpp
BrotliDictionary.cpp
Deflate.cpp
Zlib.cpp
Gzip.cpp