AK: Add starts_with to Utf8View

Unlike String/StringView::starts_with this compares utf8 code points
instead of "characters" (bytes), which is important when handling
aribtary utf-8 input that could include overlong characters.
This commit is contained in:
Idan Horowitz 2021-03-21 22:31:15 +02:00 committed by Andreas Kling
parent 3f9ac88c6e
commit edecf8f6a3
2 changed files with 20 additions and 0 deletions

View file

@ -144,6 +144,24 @@ size_t Utf8View::calculate_length() const
return length;
}
bool Utf8View::starts_with(const Utf8View& start) const
{
if (start.is_empty())
return true;
if (is_empty())
return false;
if (start.length() > length())
return false;
if (begin_ptr() == start.begin_ptr())
return true;
for (auto k = begin(), l = start.begin(); l != start.end(); ++k, ++l) {
if (*k != *l)
return false;
}
return true;
}
Utf8CodepointIterator::Utf8CodepointIterator(const unsigned char* ptr, size_t length)
: m_ptr(ptr)
, m_length(length)

View file

@ -80,6 +80,8 @@ public:
Utf8View substring_view(int byte_offset, int byte_length) const;
bool is_empty() const { return m_string.is_empty(); }
bool starts_with(const Utf8View&) const;
size_t iterator_offset(const Utf8CodepointIterator& it) const
{
return byte_offset_of(it);