From a19d8a4a37907b10c3ef77b253c13fe274d0f974 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 29 Dec 2023 17:36:06 +0100 Subject: [PATCH] AK: Add ASCII fast path to Utf8CodePointIterator Much of the UTF-8 data that we'll iterate over will be ASCII only, and we can get a significant speed-up by simply having a fast path when the iterator points at a byte that is obviously an ASCII character (<= 0x7F). --- AK/Utf8View.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index f3f2a7af30..fb47e00d36 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -147,6 +147,13 @@ Utf8CodePointIterator& Utf8CodePointIterator::operator++() { VERIFY(m_length > 0); + // OPTIMIZATION: Fast path for ASCII characters. + if (*m_ptr <= 0x7F) { + m_ptr += 1; + m_length -= 1; + return *this; + } + size_t code_point_length_in_bytes = underlying_code_point_length_in_bytes(); if (code_point_length_in_bytes > m_length) { // We don't have enough data for the next code point. Skip one character and try again. @@ -190,6 +197,11 @@ ReadonlyBytes Utf8CodePointIterator::underlying_code_point_bytes() const u32 Utf8CodePointIterator::operator*() const { VERIFY(m_length > 0); + + // OPTIMIZATION: Fast path for ASCII characters. + if (*m_ptr <= 0x7F) + return *m_ptr; + auto [code_point_length_in_bytes, code_point_value_so_far, first_byte_makes_sense] = Utf8View::decode_leading_byte(*m_ptr); if (!first_byte_makes_sense) {