[vm] Speed up JSON encoding.

TEST=ci Change-Id: I8bfe00472f3a5e4e6680de631072cea0dacc3f55 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/283980 Reviewed-by: Ben Konyi <bkonyi@google.com> Commit-Queue: Ryan Macnak <rmacnak@google.com>
2024-09-04 16:03:44 +00:00 · 2023-02-21 18:58:42 +00:00 · 2023-02-21 18:58:42 +00:00 · f56c45eb37
parent 004b400dfb
commit f56c45eb37
4 changed files with 210 additions and 64 deletions
--- a/runtime/platform/text_buffer.cc
+++ b/runtime/platform/text_buffer.cc
@ -62,42 +62,186 @@ void BaseTextBuffer::AddRaw(const uint8_t* buffer, intptr_t buffer_length) {
  buffer_[length_] = '\0';
 }

-// Write a UTF-32 code unit so it can be read by a JSON parser in a string
-// literal. Use official encoding from JSON specification. http://json.org/
-void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+void BaseTextBuffer::AddEscapedUTF8(const char* const s, intptr_t len) {
+  const uint8_t* cursor = reinterpret_cast<const uint8_t*>(s);
+  const uint8_t* end = cursor + len;
+
+  intptr_t needed = 0;
+  while (cursor < end) {
+    uint8_t codeunit = *cursor++;
+    if (codeunit >= 0x80) {
+      needed += 1;
+    } else {
+      needed += EscapedCodeUnitLength(codeunit);
+    }
+  }
+
+  if (!EnsureCapacity(needed)) return;
+
+  cursor = reinterpret_cast<const uint8_t*>(s);
+  while (cursor < end) {
+    uint8_t codeunit = *cursor++;
+    if (codeunit >= 0x80) {
+      buffer_[length_++] = codeunit;
+    } else {
+      EscapeAndAddCodeUnit(codeunit);
+    }
+  }
+  buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedLatin1(const uint8_t* const s, intptr_t len) {
+  const uint8_t* cursor = s;
+  const uint8_t* end = cursor + len;
+
+  intptr_t needed = 0;
+  while (cursor < end) {
+    needed += EscapedCodeUnitLength(*cursor++);
+  }
+
+  if (!EnsureCapacity(needed)) return;
+
+  cursor = s;
+  while (cursor < end) {
+    EscapeAndAddCodeUnit(*cursor++);
+  }
+  buffer_[length_] = '\0';
+}
+
+void BaseTextBuffer::AddEscapedUTF16(const uint16_t* s, intptr_t len) {
+  for (const uint16_t* end = s + len; s < end; s++) {
+    if (!EnsureCapacity(6)) return;
+
+    uint16_t code_unit = *s;
+    if (Utf16::IsTrailSurrogate(code_unit)) {
+      EscapeAndAddUTF16CodeUnit(code_unit);
+    } else if (Utf16::IsLeadSurrogate(code_unit)) {
+      if (s + 1 == end) {
+        EscapeAndAddUTF16CodeUnit(code_unit);
+      } else {
+        uint16_t next_code_unit = *(s + 1);
+        if (Utf16::IsTrailSurrogate(next_code_unit)) {
+          uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
+          EscapeAndAddCodeUnit(decoded);
+          s++;
+        } else {
+          EscapeAndAddUTF16CodeUnit(code_unit);
+        }
+      }
+    } else {
+      EscapeAndAddCodeUnit(code_unit);
+    }
+  }
+  buffer_[length_] = '\0';
+}
+
+DART_FORCE_INLINE
+intptr_t BaseTextBuffer::EscapedCodeUnitLength(uint32_t codeunit) {
  switch (codeunit) {
    case '"':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\\""), 2);
-      break;
    case '\\':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\\\"), 2);
-      break;
    case '/':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\/"), 2);
-      break;
    case '\b':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\b"), 2);
-      break;
    case '\f':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\f"), 2);
-      break;
    case '\n':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\n"), 2);
-      break;
    case '\r':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\r"), 2);
-      break;
    case '\t':
-      AddRaw(reinterpret_cast<uint8_t const*>("\\t"), 2);
-      break;
+      return 2;
    default:
      if (codeunit < 0x20) {
-        EscapeAndAddUTF16CodeUnit(codeunit);
+        return 6;
+      } else if (codeunit <= Utf8::kMaxOneByteChar) {
+        return 1;
+      } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+        return 2;
+      } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+        return 3;
      } else {
-        char encoded[6];
-        intptr_t length = Utf8::Length(codeunit);
-        Utf8::Encode(codeunit, encoded);
-        AddRaw(reinterpret_cast<uint8_t const*>(encoded), length);
+        ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+        return 4;
+      }
+  }
+}
+
+static uint8_t Hex(uint8_t value) {
+  return value < 10 ? '0' + value : 'A' + value - 10;
+}
+
+// Write a UTF-32 code unit so it can be read by a JSON parser in a string
+// literal. Use official encoding from JSON specification. http://json.org/
+DART_FORCE_INLINE
+void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
+  intptr_t remaining = capacity_ - length_;
+  switch (codeunit) {
+    case '"':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '\"';
+      break;
+    case '\\':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '\\';
+      break;
+    case '/':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = '/';
+      break;
+    case '\b':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'b';
+      break;
+    case '\f':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'f';
+      break;
+    case '\n':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'n';
+      break;
+    case '\r':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 'r';
+      break;
+    case '\t':
+      ASSERT(remaining > 2);
+      buffer_[length_++] = '\\';
+      buffer_[length_++] = 't';
+      break;
+    default:
+      static constexpr int kMask = ~(1 << 6);
+      if (codeunit < 0x20) {
+        ASSERT(remaining > 6);
+        buffer_[length_++] = '\\';
+        buffer_[length_++] = 'u';
+        buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+        buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
+      } else if (codeunit <= Utf8::kMaxOneByteChar) {
+        ASSERT(remaining > 1);
+        buffer_[length_++] = codeunit;
+      } else if (codeunit <= Utf8::kMaxTwoByteChar) {
+        ASSERT(remaining > 2);
+        buffer_[length_++] = 0xC0 | (codeunit >> 6);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
+      } else if (codeunit <= Utf8::kMaxThreeByteChar) {
+        ASSERT(remaining > 3);
+        buffer_[length_++] = 0xE0 | (codeunit >> 12);
+        buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
+      } else {
+        ASSERT(codeunit <= Utf8::kMaxFourByteChar);
+        ASSERT(remaining > 4);
+        buffer_[length_++] = 0xF0 | (codeunit >> 18);
+        buffer_[length_++] = 0x80 | ((codeunit >> 12) & kMask);
+        buffer_[length_++] = 0x80 | ((codeunit >> 6) & kMask);
+        buffer_[length_++] = 0x80 | (codeunit & kMask);
      }
  }
 }
@ -105,18 +249,22 @@ void BaseTextBuffer::EscapeAndAddCodeUnit(uint32_t codeunit) {
 // Write an incomplete UTF-16 code unit so it can be read by a JSON parser in a
 // string literal.
 void BaseTextBuffer::EscapeAndAddUTF16CodeUnit(uint16_t codeunit) {
-  Printf("\\u%04X", codeunit);
+  intptr_t remaining = capacity_ - length_;
+  ASSERT(remaining > 6);
+  buffer_[length_++] = '\\';
+  buffer_[length_++] = 'u';
+  buffer_[length_++] = Hex((codeunit >> 12) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 8) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 4) & 0xF);
+  buffer_[length_++] = Hex((codeunit >> 0) & 0xF);
 }

 void BaseTextBuffer::AddString(const char* s) {
-  Printf("%s", s);
+  AddRaw(reinterpret_cast<const uint8_t*>(s), strlen(s));
 }

 void BaseTextBuffer::AddEscapedString(const char* s) {
-  intptr_t len = strlen(s);
-  for (int i = 0; i < len; i++) {
-    EscapeAndAddCodeUnit(s[i]);
-  }
+  AddEscapedUTF8(s, strlen(s));
 }

 TextBuffer::TextBuffer(intptr_t buf_size) {
@ -143,6 +291,7 @@ bool TextBuffer::EnsureCapacity(intptr_t len) {
  intptr_t remaining = capacity_ - length_;
  if (remaining <= len) {
    intptr_t new_size = capacity_ + Utils::Maximum(capacity_, len + 1);
+    new_size = Utils::Maximum(new_size, static_cast<intptr_t>(256));
    char* new_buf = reinterpret_cast<char*>(realloc(buffer_, new_size));
    buffer_ = new_buf;
    capacity_ = new_size;
--- a/runtime/platform/text_buffer.h
+++ b/runtime/platform/text_buffer.h
@ -22,12 +22,14 @@ class BaseTextBuffer : public ValueObject {
  intptr_t Printf(const char* format, ...) PRINTF_ATTRIBUTE(2, 3);
  intptr_t VPrintf(const char* format, va_list args);
  void AddChar(char ch);
-  void EscapeAndAddUTF16CodeUnit(uint16_t cu);
-  void EscapeAndAddCodeUnit(uint32_t cu);
  void AddString(const char* s);
-  void AddEscapedString(const char* s);
  void AddRaw(const uint8_t* buffer, intptr_t buffer_length);

+  void AddEscapedString(const char* s);
+  void AddEscapedUTF8(const char* s, intptr_t len);
+  void AddEscapedLatin1(const uint8_t* code_units, intptr_t len);
+  void AddEscapedUTF16(const uint16_t* code_units, intptr_t len);
+
  // Returns a pointer to the current internal buffer. Whether the pointer is
  // still valid after the BaseTextBuffer dies depends on the subclass.
  char* buffer() const { return buffer_; }
@ -37,6 +39,11 @@ class BaseTextBuffer : public ValueObject {
  // should be assumed to invalidate the contents of previous calls to buffer().
  virtual void Clear() = 0;

+ private:
+  intptr_t EscapedCodeUnitLength(uint32_t cu);
+  void EscapeAndAddCodeUnit(uint32_t cu);
+  void EscapeAndAddUTF16CodeUnit(uint16_t cu);
+
 protected:
  virtual bool EnsureCapacity(intptr_t len) = 0;

--- a/runtime/vm/json_writer.cc
+++ b/runtime/vm/json_writer.cc
@ -348,18 +348,7 @@ void JSONWriter::AddEscapedUTF8String(const char* s, intptr_t len) {
  if (s == NULL) {
    return;
  }
-  const uint8_t* s8 = reinterpret_cast<const uint8_t*>(s);
-  intptr_t i = 0;
-  for (; i < len;) {
-    // Extract next UTF8 character.
-    int32_t ch = 0;
-    int32_t ch_len = Utf8::Decode(&s8[i], len - i, &ch);
-    ASSERT(ch_len != 0);
-    buffer_.EscapeAndAddCodeUnit(ch);
-    // Move i forward.
-    i += ch_len;
-  }
-  ASSERT(i == len);
+  buffer_.AddEscapedUTF8(s, len);
 }

 bool JSONWriter::AddDartString(const String& s,
@ -373,29 +362,26 @@ bool JSONWriter::AddDartString(const String& s,
  if (!Utils::RangeCheck(offset, count, length)) {
    count = length - offset;
  }
-  intptr_t limit = offset + count;
-  for (intptr_t i = offset; i < limit; i++) {
-    uint16_t code_unit = s.CharAt(i);
-    if (Utf16::IsTrailSurrogate(code_unit)) {
-      buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-    } else if (Utf16::IsLeadSurrogate(code_unit)) {
-      if (i + 1 == limit) {
-        buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-      } else {
-        uint16_t next_code_unit = s.CharAt(i + 1);
-        if (Utf16::IsTrailSurrogate(next_code_unit)) {
-          uint32_t decoded = Utf16::Decode(code_unit, next_code_unit);
-          buffer_.EscapeAndAddCodeUnit(decoded);
-          i++;
-        } else {
-          buffer_.EscapeAndAddUTF16CodeUnit(code_unit);
-        }
-      }
+
+  if (count > 0) {  // Avoid asserts about harmless out-of-bounds index.
+    NoSafepointScope no_safepoint;
+    if (s.IsOneByteString()) {
+      buffer_.AddEscapedLatin1(OneByteString::CharAddr(s, offset), count);
+    } else if (s.IsExternalOneByteString()) {
+      buffer_.AddEscapedLatin1(ExternalOneByteString::CharAddr(s, offset),
+                               count);
+    } else if (s.IsTwoByteString()) {
+      buffer_.AddEscapedUTF16(TwoByteString::CharAddr(s, offset), count);
+    } else if (s.IsExternalTwoByteString()) {
+      buffer_.AddEscapedUTF16(ExternalTwoByteString::CharAddr(s, offset),
+                              count);
    } else {
-      buffer_.EscapeAndAddCodeUnit(code_unit);
+      UNREACHABLE();
    }
  }
+
  // Return value indicates whether the string is truncated.
+  intptr_t limit = offset + count;
  return (offset > 0) || (limit < length);
 }

--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@ -10235,6 +10235,7 @@ class OneByteString : public AllStatic {
  friend class Utf8;
  friend class OneByteStringMessageSerializationCluster;
  friend class Deserializer;
+  friend class JSONWriter;
 };

 class TwoByteString : public AllStatic {
@ -10353,6 +10354,7 @@ class TwoByteString : public AllStatic {
  friend class StringHasher;
  friend class Symbols;
  friend class TwoByteStringMessageSerializationCluster;
+  friend class JSONWriter;
 };

 class ExternalOneByteString : public AllStatic {
@ -10445,6 +10447,7 @@ class ExternalOneByteString : public AllStatic {
  friend class StringHasher;
  friend class Symbols;
  friend class Utf8;
+  friend class JSONWriter;
 };

 class ExternalTwoByteString : public AllStatic {
@ -10532,6 +10535,7 @@ class ExternalTwoByteString : public AllStatic {
  friend class String;
  friend class StringHasher;
  friend class Symbols;
+  friend class JSONWriter;
 };

 // Matches null_patch.dart / bool_patch.dart.