[vm] Speed up JSON encoding.

TEST=ci
Change-Id: I8bfe00472f3a5e4e6680de631072cea0dacc3f55
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/283980
Reviewed-by: Ben Konyi <bkonyi@google.com>
Commit-Queue: Ryan Macnak <rmacnak@google.com>
This commit is contained in:
Ryan Macnak 2023-02-21 18:58:42 +00:00 committed by Commit Queue
parent 004b400dfb
commit f56c45eb37
4 changed files with 210 additions and 64 deletions

View file

@ -62,42 +62,186 @@ void BaseTextBuffer::AddRaw(const uint8_t* buffer, intptr_t buffer_length) {
buffer_[length_] = '\0';
}
// Write a UTF-32 code unit so it can be read by a JSON parser in a string
// literal. Use official encoding from JSON specification. http://json.org/
void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
void BaseTextBuffer::AddEscapedUTF8(const char* const s, intptr_t len) {
const uint8_t* cursor = reinterpret_cast<const uint8_t*>(s);
const uint8_t* end = cursor + len;
intptr_t needed = 0;
while (cursor < end) {
uint8_t codeunit = *cursor++;
if (codeunit >= 0x80) {
needed += 1;
} else {
needed += EscapedCodeUnitLength(codeunit);
}
}
if (!EnsureCapacity(needed)) return;
cursor = reinterpret_cast<const uint8_t*>(s);
while (cursor < end) {
uint8_t codeunit = *cursor++;
if (codeunit >= 0x80) {
buffer_[length_++] = codeunit;
} else {
EscapeAndAddCodeUnit(codeunit);
}
}
buffer_[length_] = '\0';
}
void BaseTextBuffer::AddEscapedLatin1(const uint8_t* const s, intptr_t len) {
const uint8_t* cursor = s;
const uint8_t* end = cursor + len;
intptr_t needed = 0;
while (cursor < end) {
needed += EscapedCodeUnitLength(*cursor++);
}
if (!EnsureCapacity(needed)) return;
cursor = s;
while (cursor < end) {
EscapeAndAddCodeUnit(*cursor++);
}
buffer_[length_] = '\0';
}
void BaseTextBuffer::AddEscapedUTF16(const uint16_t* s, intptr_t len) {
for (const uint16_t* end = s + len; s < end; s++) {
if (!EnsureCapacity(6)) return;
uint16_t code_unit = *s;
if (Utf16::IsTrailSurrogate(code_unit)) {
EscapeAndAddUTF16CodeUnit(code_unit);
} else if (Utf16::IsLeadSurrogate(code_unit)) {
if (s + 1 == end) {
EscapeAndAddUTF16CodeUnit(code_unit);
} else {
uint16_t next_code_unit = *(s + 1);
if (Utf16::IsTrailSurrogate(next_code_unit)) {
uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
EscapeAndAddCodeUnit(decoded);
s++;
} else {
EscapeAndAddUTF16CodeUnit(code_unit);
}
}
} else {
EscapeAndAddCodeUnit(code_unit);
}
}
buffer_[length_] = '\0';
}
DART_FORCE_INLINE
intptr_t BaseTextBuffer::EscapedCodeUnitLength(uint32_t codeunit) {
switch (codeunit) {
case '"':
AddRaw(reinterpret_cast<uint8_t const*>("\\\""), 2);
break;
case '\\':
AddRaw(reinterpret_cast<uint8_t const*>("\\\\"), 2);
break;
case '/':
AddRaw(reinterpret_cast<uint8_t const*>("\\/"), 2);
break;
case '\b':
AddRaw(reinterpret_cast<uint8_t const*>("\\b"), 2);
break;
case '\f':
AddRaw(reinterpret_cast<uint8_t const*>("\\f"), 2);
break;
case '\n':
AddRaw(reinterpret_cast<uint8_t const*>("\\n"), 2);
break;
case '\r':
AddRaw(reinterpret_cast<uint8_t const*>("\\r"), 2);
break;
case '\t':
AddRaw(reinterpret_cast<uint8_t const*>("\\t"), 2);
break;
return 2;
default:
if (codeunit < 0x20) {
EscapeAndAddUTF16CodeUnit(codeunit);
return 6;
} else if (codeunit <= Utf8::kMaxOneByteChar) {
return 1;
} else if (codeunit <= Utf8::kMaxTwoByteChar) {
return 2;
} else if (codeunit <= Utf8::kMaxThreeByteChar) {
return 3;
} else {
char encoded[6];
intptr_t length = Utf8::Length(codeunit);
Utf8::Encode(codeunit, encoded);
AddRaw(reinterpret_cast<uint8_t const*>(encoded), length);
ASSERT(codeunit <= Utf8::kMaxFourByteChar);
return 4;
}
}
}
static uint8_t Hex(uint8_t value) {
return value < 10 ? '0' + value : 'A' + value - 10;
}
// Write a UTF-32 code unit so it can be read by a JSON parser in a string
// literal. Use official encoding from JSON specification. http://json.org/
DART_FORCE_INLINE
void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
intptr_t remaining = capacity_ - length_;
switch (codeunit) {
case '"':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = '\"';
break;
case '\\':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = '\\';
break;
case '/':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = '/';
break;
case '\b':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = 'b';
break;
case '\f':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = 'f';
break;
case '\n':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = 'n';
break;
case '\r':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = 'r';
break;
case '\t':
ASSERT(remaining > 2);
buffer_[length_++] = '\\';
buffer_[length_++] = 't';
break;
default:
static constexpr int kMask = ~(1 << 6);
if (codeunit < 0x20) {
ASSERT(remaining > 6);
buffer_[length_++] = '\\';
buffer_[length_++] = 'u';
buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
} else if (codeunit <= Utf8::kMaxOneByteChar) {
ASSERT(remaining > 1);
buffer_[length_++] = codeunit;
} else if (codeunit <= Utf8::kMaxTwoByteChar) {
ASSERT(remaining > 2);
buffer_[length_++] = 0xC0 | (codeunit >> 6);
buffer_[length_++] = 0x80 | (codeunit & kMask);
} else if (codeunit <= Utf8::kMaxThreeByteChar) {
ASSERT(remaining > 3);
buffer_[length_++] = 0xE0 | (codeunit >> 12);
buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
buffer_[length_++] = 0x80 | (codeunit & kMask);
} else {
ASSERT(codeunit <= Utf8::kMaxFourByteChar);
ASSERT(remaining > 4);
buffer_[length_++] = 0xF0 | (codeunit >> 18);
buffer_[length_++] = 0x80 | ((codeunit >> 12) & kMask);
buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
buffer_[length_++] = 0x80 | (codeunit & kMask);
}
}
}
@ -105,18 +249,22 @@ void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
// Write an incomplete UTF-16 code unit so it can be read by a JSON parser in a
// string literal.
void BaseTextBuffer::EscapeAndAddUTF16CodeUnit(uint16_t codeunit) {
Printf("\\u%04X", codeunit);
intptr_t remaining = capacity_ - length_;
ASSERT(remaining > 6);
buffer_[length_++] = '\\';
buffer_[length_++] = 'u';
buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
}
void BaseTextBuffer::AddString(const char* s) {
Printf("%s", s);
AddRaw(reinterpret_cast<const uint8_t*>(s), strlen(s));
}
void BaseTextBuffer::AddEscapedString(const char* s) {
intptr_t len = strlen(s);
for (int i = 0; i < len; i++) {
EscapeAndAddCodeUnit(s[i]);
}
AddEscapedUTF8(s, strlen(s));
}
TextBuffer::TextBuffer(intptr_t buf_size) {
@ -143,6 +291,7 @@ bool TextBuffer::EnsureCapacity(intptr_t len) {
intptr_t remaining = capacity_ - length_;
if (remaining <= len) {
intptr_t new_size = capacity_ + Utils::Maximum(capacity_, len + 1);
new_size = Utils::Maximum(new_size, static_cast<intptr_t>(256));
char* new_buf = reinterpret_cast<char*>(realloc(buffer_, new_size));
buffer_ = new_buf;
capacity_ = new_size;

View file

@ -22,12 +22,14 @@ class BaseTextBuffer : public ValueObject {
intptr_t Printf(const char* format, ...) PRINTF_ATTRIBUTE(2, 3);
intptr_t VPrintf(const char* format, va_list args);
void AddChar(char ch);
void EscapeAndAddUTF16CodeUnit(uint16_t cu);
void EscapeAndAddCodeUnit(uint32_t cu);
void AddString(const char* s);
void AddEscapedString(const char* s);
void AddRaw(const uint8_t* buffer, intptr_t buffer_length);
void AddEscapedString(const char* s);
void AddEscapedUTF8(const char* s, intptr_t len);
void AddEscapedLatin1(const uint8_t* code_units, intptr_t len);
void AddEscapedUTF16(const uint16_t* code_units, intptr_t len);
// Returns a pointer to the current internal buffer. Whether the pointer is
// still valid after the BaseTextBuffer dies depends on the subclass.
char* buffer() const { return buffer_; }
@ -37,6 +39,11 @@ class BaseTextBuffer : public ValueObject {
// should be assumed to invalidate the contents of previous calls to buffer().
virtual void Clear() = 0;
private:
intptr_t EscapedCodeUnitLength(uint32_t cu);
void EscapeAndAddCodeUnit(uint32_t cu);
void EscapeAndAddUTF16CodeUnit(uint16_t cu);
protected:
virtual bool EnsureCapacity(intptr_t len) = 0;

View file

@ -348,18 +348,7 @@ void JSONWriter::AddEscapedUTF8String(const char* s, intptr_t len) {
if (s == NULL) {
return;
}
const uint8_t* s8 = reinterpret_cast<const uint8_t*>(s);
intptr_t i = 0;
for (; i < len;) {
// Extract next UTF8 character.
int32_t ch = 0;
int32_t ch_len = Utf8::Decode(&s8[i], len - i, &ch);
ASSERT(ch_len != 0);
buffer_.EscapeAndAddCodeUnit(ch);
// Move i forward.
i += ch_len;
}
ASSERT(i == len);
buffer_.AddEscapedUTF8(s, len);
}
bool JSONWriter::AddDartString(const String& s,
@ -373,29 +362,26 @@ bool JSONWriter::AddDartString(const String& s,
if (!Utils::RangeCheck(offset, count, length)) {
count = length - offset;
}
intptr_t limit = offset + count;
for (intptr_t i = offset; i < limit; i++) {
uint16_t code_unit = s.CharAt(i);
if (Utf16::IsTrailSurrogate(code_unit)) {
buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
} else if (Utf16::IsLeadSurrogate(code_unit)) {
if (i + 1 == limit) {
buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
} else {
uint16_t next_code_unit = s.CharAt(i + 1);
if (Utf16::IsTrailSurrogate(next_code_unit)) {
uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
buffer_.EscapeAndAddCodeUnit(decoded);
i++;
} else {
buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
}
}
if (count > 0) { // Avoid asserts about harmless out-of-bounds index.
NoSafepointScope no_safepoint;
if (s.IsOneByteString()) {
buffer_.AddEscapedLatin1(OneByteString::CharAddr(s, offset), count);
} else if (s.IsExternalOneByteString()) {
buffer_.AddEscapedLatin1(ExternalOneByteString::CharAddr(s, offset),
count);
} else if (s.IsTwoByteString()) {
buffer_.AddEscapedUTF16(TwoByteString::CharAddr(s, offset), count);
} else if (s.IsExternalTwoByteString()) {
buffer_.AddEscapedUTF16(ExternalTwoByteString::CharAddr(s, offset),
count);
} else {
buffer_.EscapeAndAddCodeUnit(code_unit);
UNREACHABLE();
}
}
// Return value indicates whether the string is truncated.
intptr_t limit = offset + count;
return (offset > 0) || (limit < length);
}

View file

@ -10235,6 +10235,7 @@ class OneByteString : public AllStatic {
friend class Utf8;
friend class OneByteStringMessageSerializationCluster;
friend class Deserializer;
friend class JSONWriter;
};
class TwoByteString : public AllStatic {
@ -10353,6 +10354,7 @@ class TwoByteString : public AllStatic {
friend class StringHasher;
friend class Symbols;
friend class TwoByteStringMessageSerializationCluster;
friend class JSONWriter;
};
class ExternalOneByteString : public AllStatic {
@ -10445,6 +10447,7 @@ class ExternalOneByteString : public AllStatic {
friend class StringHasher;
friend class Symbols;
friend class Utf8;
friend class JSONWriter;
};
class ExternalTwoByteString : public AllStatic {
@ -10532,6 +10535,7 @@ class ExternalTwoByteString : public AllStatic {
friend class String;
friend class StringHasher;
friend class Symbols;
friend class JSONWriter;
};
// Matches null_patch.dart / bool_patch.dart.