AK: Move data fields from AK::String to a newly created AK::StringBase

This starts separating memory management of string data and string
utilities like `String::formatted`. This would also allow to reuse the
same storage in `DeprecatedString` in the future.
This commit is contained in:
Dan Klishch 2023-10-28 15:17:06 -04:00 committed by Andrew Kaster
parent 6e2f627cb3
commit 4364a28d3d
5 changed files with 142 additions and 101 deletions

View file

@ -28,6 +28,7 @@ set(AK_SOURCES
StackInfo.cpp
Stream.cpp
String.cpp
StringBase.cpp
StringBuilder.cpp
StringFloatingPointConversions.cpp
StringImpl.cpp

View file

@ -122,49 +122,6 @@ void StringData::compute_hash() const
}
String::String(NonnullRefPtr<Detail::StringData const> data)
: m_data(&data.leak_ref())
{
}
String::String(String const& other)
: m_data(other.m_data)
{
if (!is_short_string())
m_data->ref();
}
String::String(String&& other)
: m_data(exchange(other.m_data, nullptr))
{
other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG;
}
String& String::operator=(String&& other)
{
if (!is_short_string())
m_data->unref();
m_data = exchange(other.m_data, nullptr);
other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG;
return *this;
}
String& String::operator=(String const& other)
{
if (&other != this) {
if (!is_short_string())
m_data->unref();
m_data = other.m_data;
if (!is_short_string())
m_data->ref();
}
return *this;
}
void String::destroy_string()
{
if (!is_short_string())
@ -491,21 +448,6 @@ bool String::ends_with_bytes(StringView bytes, CaseSensitivity case_sensitivity)
return bytes_as_string_view().ends_with(bytes, case_sensitivity);
}
bool String::is_short_string() const
{
return has_short_string_bit(reinterpret_cast<uintptr_t>(m_data));
}
ReadonlyBytes String::ShortString::bytes() const
{
return { storage, byte_count() };
}
size_t String::ShortString::byte_count() const
{
return byte_count_and_short_string_flag >> 1;
}
unsigned Traits<String>::hash(String const& string)
{
return string.hash();

View file

@ -14,6 +14,7 @@
#include <AK/Optional.h>
#include <AK/RefCounted.h>
#include <AK/Span.h>
#include <AK/StringBase.h>
#include <AK/StringBuilder.h>
#include <AK/StringUtils.h>
#include <AK/StringView.h>
@ -25,10 +26,6 @@
namespace AK {
namespace Detail {
class StringData;
}
// FIXME: Remove this when OpenBSD Clang fully supports consteval.
// And once oss-fuzz updates to clang >15.
// And once Android ships an NDK with clang >14
@ -41,16 +38,15 @@ class StringData;
// String is a strongly owned sequence of Unicode code points encoded as UTF-8.
// The data may or may not be heap-allocated, and may or may not be reference counted.
// There is no guarantee that the underlying bytes are null-terminated.
class String {
class String : public Detail::StringBase {
AK_MAKE_DEFAULT_COPYABLE(String);
AK_MAKE_DEFAULT_MOVABLE(String);
public:
// NOTE: For short strings, we avoid heap allocations by storing them in the data pointer slot.
static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = sizeof(Detail::StringData*) - 1;
static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = Detail::MAX_SHORT_STRING_BYTE_COUNT;
String(String const&);
String(String&&);
String& operator=(String&&);
String& operator=(String const&);
using StringBase::StringBase;
constexpr ~String()
{
@ -60,7 +56,7 @@ public:
// Creates an empty (zero-length) String.
constexpr String()
: String(ShortString { SHORT_STRING_FLAG, {} })
: StringBase(ShortString { SHORT_STRING_FLAG, {} })
{
}
@ -199,9 +195,6 @@ public:
return builder.to_string();
}
// NOTE: This is primarily interesting to unit tests.
[[nodiscard]] bool is_short_string() const;
[[nodiscard]] static String fly_string_data_to_string(Badge<FlyString>, uintptr_t const&);
[[nodiscard]] static StringView fly_string_data_to_string_view(Badge<FlyString>, uintptr_t const&);
[[nodiscard]] static u32 fly_string_data_to_hash(Badge<FlyString>, uintptr_t const&);
@ -219,36 +212,9 @@ public:
static ErrorOr<String> from_byte_string(T&&) = delete;
private:
// NOTE: If the least significant bit of the pointer is set, this is a short string.
static constexpr uintptr_t SHORT_STRING_FLAG = 1;
static constexpr bool has_short_string_bit(uintptr_t data)
{
return (data & SHORT_STRING_FLAG) != 0;
}
struct ShortString {
ReadonlyBytes bytes() const;
size_t byte_count() const;
// NOTE: This is the byte count shifted left 1 step and or'ed with a 1 (the SHORT_STRING_FLAG)
u8 byte_count_and_short_string_flag { 0 };
u8 storage[MAX_SHORT_STRING_BYTE_COUNT] = { 0 };
};
explicit String(NonnullRefPtr<Detail::StringData const>);
explicit constexpr String(ShortString short_string)
: m_short_string(short_string)
{
}
using ShortString = Detail::ShortString;
void destroy_string();
union {
ShortString m_short_string;
Detail::StringData const* m_data { nullptr };
};
};
template<>

62
AK/StringBase.cpp Normal file
View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringBase.h>
#include <AK/StringInternals.h>
namespace AK::Detail {
ReadonlyBytes ShortString::bytes() const
{
return { storage, byte_count() };
}
size_t ShortString::byte_count() const
{
return byte_count_and_short_string_flag >> 1;
}
StringBase::StringBase(NonnullRefPtr<Detail::StringData const> data)
: m_data(&data.leak_ref())
{
}
StringBase::StringBase(StringBase const& other)
: m_data(other.m_data)
{
if (!is_short_string())
m_data->ref();
}
StringBase& StringBase::operator=(StringBase&& other)
{
if (!is_short_string())
m_data->unref();
m_data = exchange(other.m_data, nullptr);
other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG;
return *this;
}
StringBase& StringBase::operator=(StringBase const& other)
{
if (&other != this) {
if (!is_short_string())
m_data->unref();
m_data = other.m_data;
if (!is_short_string())
m_data->ref();
}
return *this;
}
bool StringBase::is_short_string() const
{
return has_short_string_bit(reinterpret_cast<uintptr_t>(m_data));
}
}

70
AK/StringBase.h Normal file
View file

@ -0,0 +1,70 @@
/*
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Endian.h>
#include <AK/Forward.h>
namespace AK::Detail {
class StringData;
static constexpr size_t MAX_SHORT_STRING_BYTE_COUNT = sizeof(StringData*) - 1;
struct ShortString {
ReadonlyBytes bytes() const;
size_t byte_count() const;
// NOTE: This is the byte count shifted left 1 step and or'ed with a 1 (the SHORT_STRING_FLAG)
u8 byte_count_and_short_string_flag { 0 };
u8 storage[MAX_SHORT_STRING_BYTE_COUNT] = { 0 };
};
static_assert(HostIsLittleEndian, "Order of fields in ShortString assumes LE.");
static_assert(sizeof(ShortString) >= sizeof(StringData*));
static_assert(__builtin_offsetof(ShortString, byte_count_and_short_string_flag) == 0);
class StringBase {
public:
StringBase(StringBase const&);
constexpr StringBase(StringBase&& other)
: m_short_string(other.m_short_string)
{
other.m_short_string = ShortString {};
other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG;
}
StringBase& operator=(StringBase&&);
StringBase& operator=(StringBase const&);
// NOTE: This is primarily interesting to unit tests.
[[nodiscard]] bool is_short_string() const;
protected:
// NOTE: If the least significant bit of the pointer is set, this is a short string.
static constexpr uintptr_t SHORT_STRING_FLAG = 1;
static constexpr bool has_short_string_bit(uintptr_t data)
{
return (data & SHORT_STRING_FLAG) != 0;
}
explicit StringBase(NonnullRefPtr<Detail::StringData const>);
explicit constexpr StringBase(ShortString short_string)
: m_short_string(short_string)
{
}
union {
ShortString m_short_string;
Detail::StringData const* m_data { nullptr };
};
};
}