1
0
mirror of https://github.com/SerenityOS/serenity synced 2024-07-05 22:34:49 +00:00

AK: Avoid creating an intermediate buffer when decoding a Base64 string

There's no need to copy the result. We can also avoid increasing the
size of the output buffer by 1 for each written byte.

This reduces the runtime of `./bin/base64 -d enwik8.base64 >/dev/null`
from 0.917s to 0.632s.

(enwik8 is a 100MB test file from http://mattmahoney.net/dc/enwik8.zip)
This commit is contained in:
Timothy Flynn 2024-03-20 12:41:41 -04:00 committed by Andreas Kling
parent 0fd7ad09a0
commit 81ad6de41b

View File

@ -47,36 +47,39 @@ ErrorOr<ByteBuffer> decode_base64_impl(StringView input)
return { result };
};
Vector<u8> output;
output.ensure_capacity(calculate_base64_decoded_length(input));
ByteBuffer output;
TRY(output.try_resize(calculate_base64_decoded_length(input)));
size_t offset = 0;
while (offset < input.length()) {
size_t input_offset = 0;
size_t output_offset = 0;
while (input_offset < input.length()) {
bool in2_is_padding = false;
bool in3_is_padding = false;
bool parsed_something = false;
const u8 in0 = TRY(get(offset, nullptr, parsed_something));
const u8 in1 = TRY(get(offset, nullptr, parsed_something));
const u8 in2 = TRY(get(offset, &in2_is_padding, parsed_something));
const u8 in3 = TRY(get(offset, &in3_is_padding, parsed_something));
const u8 in0 = TRY(get(input_offset, nullptr, parsed_something));
const u8 in1 = TRY(get(input_offset, nullptr, parsed_something));
const u8 in2 = TRY(get(input_offset, &in2_is_padding, parsed_something));
const u8 in3 = TRY(get(input_offset, &in3_is_padding, parsed_something));
if (!parsed_something)
break;
const u8 out0 = (in0 << 2) | ((in1 >> 4) & 3);
const u8 out1 = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf);
const u8 out2 = ((in2 & 0x3) << 6) | in3;
output[output_offset++] = (in0 << 2) | ((in1 >> 4) & 3);
output.append(out0);
if (!in2_is_padding)
output.append(out1);
output[output_offset++] = ((in1 & 0xf) << 4) | ((in2 >> 2) & 0xf);
if (!in3_is_padding)
output.append(out2);
output[output_offset++] = ((in2 & 0x3) << 6) | in3;
}
return ByteBuffer::copy(output);
if (output_offset < output.size())
output.trim(output_offset, false);
return output;
}
template<auto alphabet>