AK: Add String::from_stream method

The caller is responsible for determining how long the string is that
they want to read.
This commit is contained in:
Andrew Kaster 2023-02-19 18:34:29 -07:00 committed by Linus Groh
parent 7ac7a73758
commit 0ea697ace5
3 changed files with 84 additions and 0 deletions

View file

@ -9,6 +9,7 @@
#include <AK/FlyString.h>
#include <AK/Format.h>
#include <AK/MemMem.h>
#include <AK/Stream.h>
#include <AK/String.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
@ -23,6 +24,7 @@ public:
static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t, u8*& buffer);
static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count);
static ErrorOr<NonnullRefPtr<StringData>> from_utf8(char const* utf8_bytes, size_t);
static ErrorOr<NonnullRefPtr<StringData>> from_stream(Stream&, size_t byte_count);
struct SubstringData {
StringData const* superstring { nullptr };
@ -141,6 +143,23 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
return new_string_data;
}
ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count)
{
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT);
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
Bytes new_string_bytes = { buffer, byte_count };
TRY(stream.read(new_string_bytes));
Utf8View view(StringView { new_string_bytes });
if (!view.validate())
return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8");
return new_string_data;
}
ErrorOr<NonnullRefPtr<StringData>> StringData::create_substring(StringData const& superstring, size_t start, size_t byte_count)
{
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
@ -222,6 +241,19 @@ ErrorOr<String> String::from_utf8(StringView view)
return String { move(data) };
}
ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
{
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (byte_count > 0)
TRY(stream.read({ short_string.storage, byte_count }));
short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG;
return String { short_string };
}
auto data = TRY(Detail::StringData::from_stream(stream, byte_count));
return String { move(data) };
}
ErrorOr<String> String::repeated(u32 code_point, size_t count)
{
VERIFY(is_unicode(code_point));

View file

@ -64,6 +64,9 @@ public:
// Creates a new String from a sequence of UTF-8 encoded code points.
static ErrorOr<String> from_utf8(StringView);
// Creates a new String by reading byte_count bytes from a UTF-8 encoded Stream.
static ErrorOr<String> from_stream(Stream&, size_t byte_count);
// Creates a new String from a short sequence of UTF-8 encoded code points. If the provided string
// does not fit in the short string storage, a compilation error will be emitted.
static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string)

View file

@ -10,6 +10,7 @@
#include <LibTest/TestCase.h>
#include <AK/MemoryStream.h>
#include <AK/StringBuilder.h>
#include <AK/Try.h>
#include <AK/Utf8View.h>
@ -67,6 +68,54 @@ TEST_CASE(long_strings)
EXPECT_EQ(string.bytes_as_string_view(), "abcdefgh"sv);
}
TEST_CASE(long_streams)
{
{
u8 bytes[64] = {};
constexpr auto test_view = "Well, hello friends"sv;
FixedMemoryStream stream(Bytes { bytes, sizeof(bytes) });
MUST(stream.write(test_view.bytes()));
MUST(stream.seek(0));
auto string = MUST(String::from_stream(stream, test_view.length()));
EXPECT_EQ(string.is_short_string(), false);
EXPECT_EQ(string.bytes().size(), 19u);
EXPECT_EQ(string.bytes_as_string_view(), test_view);
}
{
AllocatingMemoryStream stream;
MUST(stream.write(("abc"sv).bytes()));
auto string = MUST(String::from_stream(stream, 3u));
EXPECT_EQ(string.is_short_string(), true);
EXPECT_EQ(string.bytes().size(), 3u);
EXPECT_EQ(string.bytes_as_string_view(), "abc"sv);
}
{
AllocatingMemoryStream stream;
MUST(stream.write(("0123456789"sv).bytes()));
auto string = MUST(String::from_stream(stream, 9u));
EXPECT_EQ(string.is_short_string(), false);
EXPECT_EQ(string.bytes().size(), 9u);
EXPECT_EQ(string.bytes_as_string_view(), "012345678"sv);
}
{
AllocatingMemoryStream stream;
MUST(stream.write_value(0xffffffff));
MUST(stream.write_value(0xffffffff));
MUST(stream.write_value(0xffffffff));
auto error_or_string = String::from_stream(stream, stream.used_buffer_size());
EXPECT_EQ(error_or_string.is_error(), true);
}
}
TEST_CASE(from_code_points)
{
for (u32 code_point = 0; code_point < 0x80; ++code_point) {