AK/GenericLexer: constexpr where possible

Problem:
- Much of the `GenericLexer` can be `constexpr`, but is not.

Solution:
- Make it `constexpr` and de-duplicate code.
- Extend some of `StringView` with `constexpr` to support.
- Add tests to ensure `constexpr` behavior.

Note:
- Construction of `StringView` from pointer and length is not
  `constexpr`-compatible at the moment because the VERIFY cannot be,
  yet.
This commit is contained in:
Lenny Maiorani 2021-04-21 21:19:39 -06:00 committed by Linus Groh
parent c2280a907d
commit 254e010c75
6 changed files with 303 additions and 188 deletions

View file

@ -10,92 +10,6 @@
#include <AK/StringBuilder.h>
namespace AK {
GenericLexer::GenericLexer(const StringView& input)
: m_input(input)
{
}
GenericLexer::~GenericLexer()
{
}
// Tells whether the parser's index has reached input's end
bool GenericLexer::is_eof() const
{
return m_index >= m_input.length();
}
// Returns the current character at the parser index, plus `offset` if specified
char GenericLexer::peek(size_t offset) const
{
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
}
// Tests the next character in the input
bool GenericLexer::next_is(char expected) const
{
return peek() == expected;
}
// Tests if the `expected` string comes next in the input
bool GenericLexer::next_is(StringView expected) const
{
for (size_t i = 0; i < expected.length(); ++i)
if (peek(i) != expected[i])
return false;
return true;
}
// Tests if the `expected` string comes next in the input
bool GenericLexer::next_is(const char* expected) const
{
for (size_t i = 0; expected[i] != '\0'; ++i)
if (peek(i) != expected[i])
return false;
return true;
}
// Go back to the previous character
void GenericLexer::retreat()
{
VERIFY(m_index > 0);
m_index--;
}
// Consume a character and advance the parser index
char GenericLexer::consume()
{
VERIFY(!is_eof());
return m_input[m_index++];
}
// Consume the given character if it is next in the input
bool GenericLexer::consume_specific(char specific)
{
if (peek() != specific)
return false;
ignore();
return true;
}
// Consume the given string if it is next in the input
bool GenericLexer::consume_specific(StringView str)
{
if (!next_is(str))
return false;
ignore(str.length());
return true;
}
// Consume the given string if it is next in the input
bool GenericLexer::consume_specific(const char* str)
{
return consume_specific(StringView(str));
}
// Consume a number of characters
StringView GenericLexer::consume(size_t count)
{
@ -214,46 +128,4 @@ String GenericLexer::consume_and_unescape_string(char escape_char)
return builder.to_string();
}
char GenericLexer::consume_escaped_character(char escape_char, const StringView& escape_map)
{
if (!consume_specific(escape_char))
return consume();
auto c = consume();
for (size_t i = 0; i < escape_map.length(); i += 2) {
if (c == escape_map[i])
return escape_map[i + 1];
}
return c;
}
// Ignore a number of characters (1 by default)
void GenericLexer::ignore(size_t count)
{
count = min(count, m_input.length() - m_index);
m_index += count;
}
// Ignore characters until `stop` is peek'd
// The `stop` character is ignored as it is user-defined
void GenericLexer::ignore_until(char stop)
{
while (!is_eof() && peek() != stop)
m_index++;
ignore();
}
// Ignore characters until the string `stop` is found
// The `stop` string is ignored, as it is user-defined
void GenericLexer::ignore_until(const char* stop)
{
while (!is_eof() && !next_is(stop))
m_index++;
ignore(__builtin_strlen(stop));
}
}

View file

@ -12,29 +12,95 @@ namespace AK {
class GenericLexer {
public:
explicit GenericLexer(const StringView& input);
virtual ~GenericLexer();
constexpr explicit GenericLexer(const StringView& input)
: m_input(input)
{
}
size_t tell() const { return m_index; }
size_t tell_remaining() const { return m_input.length() - m_index; }
constexpr size_t tell() const { return m_index; }
constexpr size_t tell_remaining() const { return m_input.length() - m_index; }
StringView remaining() const { return m_input.substring_view(m_index); }
bool is_eof() const;
constexpr bool is_eof() const { return m_index >= m_input.length(); }
char peek(size_t offset = 0) const;
constexpr char peek(size_t offset = 0) const
{
return (m_index + offset < m_input.length()) ? m_input[m_index + offset] : '\0';
}
bool next_is(char) const;
bool next_is(StringView) const;
bool next_is(const char*) const;
constexpr bool next_is(char expected) const
{
return peek() == expected;
}
void retreat();
constexpr bool next_is(StringView expected) const
{
for (size_t i = 0; i < expected.length(); ++i)
if (peek(i) != expected[i])
return false;
return true;
}
constexpr bool next_is(const char* expected) const
{
for (size_t i = 0; expected[i] != '\0'; ++i)
if (peek(i) != expected[i])
return false;
return true;
}
constexpr void retreat()
{
VERIFY(m_index > 0);
--m_index;
}
constexpr char consume()
{
VERIFY(!is_eof());
return m_input[m_index++];
}
template<typename T>
constexpr bool consume_specific(const T& next)
{
if (!next_is(next))
return false;
if constexpr (requires { next.length(); }) {
ignore(next.length());
} else {
ignore(sizeof(next));
}
return true;
}
bool consume_specific(const String& next)
{
return consume_specific(StringView { next });
}
constexpr bool consume_specific(const char* next)
{
return consume_specific(StringView { next });
}
constexpr char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f")
{
if (!consume_specific(escape_char))
return consume();
auto c = consume();
for (size_t i = 0; i < escape_map.length(); i += 2) {
if (c == escape_map[i])
return escape_map[i + 1];
}
return c;
}
char consume();
bool consume_specific(char);
bool consume_specific(StringView);
bool consume_specific(const char*);
char consume_escaped_character(char escape_char = '\\', const StringView& escape_map = "n\nr\rt\tb\bf\f");
StringView consume(size_t count);
StringView consume_all();
StringView consume_line();
@ -43,9 +109,27 @@ public:
StringView consume_quoted_string(char escape_char = 0);
String consume_and_unescape_string(char escape_char = '\\');
void ignore(size_t count = 1);
void ignore_until(char);
void ignore_until(const char*);
constexpr void ignore(size_t count = 1)
{
count = min(count, m_input.length() - m_index);
m_index += count;
}
constexpr void ignore_until(char stop)
{
while (!is_eof() && peek() != stop) {
++m_index;
}
ignore();
}
constexpr void ignore_until(const char* stop)
{
while (!is_eof() && !next_is(stop)) {
++m_index;
}
ignore(__builtin_strlen(stop));
}
/*
* Conditions are used to match arbitrary characters. You can use lambdas,
@ -57,19 +141,19 @@ public:
*/
// Test the next character against a Condition
template<typename C>
bool next_is(C condition) const
template<typename TPredicate>
constexpr bool next_is(TPredicate pred) const
{
return condition(peek());
return pred(peek());
}
// Consume and return characters while `condition` returns true
template<typename C>
StringView consume_while(C condition)
// Consume and return characters while `pred` returns true
template<typename TPredicate>
StringView consume_while(TPredicate pred)
{
size_t start = m_index;
while (!is_eof() && condition(peek()))
m_index++;
while (!is_eof() && pred(peek()))
++m_index;
size_t length = m_index - start;
if (length == 0)
@ -77,13 +161,13 @@ public:
return m_input.substring_view(start, length);
}
// Consume and return characters until `condition` return true
template<typename C>
StringView consume_until(C condition)
// Consume and return characters until `pred` return true
template<typename TPredicate>
StringView consume_until(TPredicate pred)
{
size_t start = m_index;
while (!is_eof() && !condition(peek()))
m_index++;
while (!is_eof() && !pred(peek()))
++m_index;
size_t length = m_index - start;
if (length == 0)
@ -91,21 +175,21 @@ public:
return m_input.substring_view(start, length);
}
// Ignore characters while `condition` returns true
template<typename C>
void ignore_while(C condition)
// Ignore characters while `pred` returns true
template<typename TPredicate>
constexpr void ignore_while(TPredicate pred)
{
while (!is_eof() && condition(peek()))
m_index++;
while (!is_eof() && pred(peek()))
++m_index;
}
// Ignore characters until `condition` return true
// Ignore characters until `pred` return true
// We don't skip the stop character as it may not be a unique value
template<typename C>
void ignore_until(C condition)
template<typename TPredicate>
constexpr void ignore_until(TPredicate pred)
{
while (!is_eof() && !condition(peek()))
m_index++;
while (!is_eof() && !pred(peek()))
++m_index;
}
protected:

View file

@ -172,17 +172,6 @@ bool StringView::equals_ignoring_case(const StringView& other) const
return StringUtils::equals_ignoring_case(*this, other);
}
StringView StringView::substring_view(size_t start, size_t length) const
{
VERIFY(start + length <= m_length);
return { m_characters + start, length };
}
StringView StringView::substring_view(size_t start) const
{
VERIFY(start <= m_length);
return { m_characters + start, length() - start };
}
StringView StringView::substring_view_starting_from_substring(const StringView& substring) const
{
const char* remaining_characters = substring.characters_without_null_termination();

View file

@ -46,15 +46,15 @@ public:
StringView(const String&);
StringView(const FlyString&);
[[nodiscard]] bool is_null() const { return !m_characters; }
[[nodiscard]] bool is_empty() const { return m_length == 0; }
[[nodiscard]] constexpr bool is_null() const { return !m_characters; }
[[nodiscard]] constexpr bool is_empty() const { return m_length == 0; }
[[nodiscard]] const char* characters_without_null_termination() const { return m_characters; }
[[nodiscard]] size_t length() const { return m_length; }
[[nodiscard]] constexpr size_t length() const { return m_length; }
[[nodiscard]] ReadonlyBytes bytes() const { return { m_characters, m_length }; }
const char& operator[](size_t index) const { return m_characters[index]; }
constexpr const char& operator[](size_t index) const { return m_characters[index]; }
using ConstIterator = SimpleIterator<const StringView, const char>;
@ -84,8 +84,17 @@ public:
Optional<size_t> find(const StringView&) const;
Optional<size_t> find(char c) const;
[[nodiscard]] StringView substring_view(size_t start, size_t length) const;
[[nodiscard]] StringView substring_view(size_t start) const;
[[nodiscard]] constexpr StringView substring_view(size_t start, size_t length) const
{
VERIFY(start + length <= m_length);
return { m_characters + start, length };
}
[[nodiscard]] constexpr StringView substring_view(size_t start) const
{
return substring_view(start, length() - start);
}
[[nodiscard]] Vector<StringView> split_view(char, bool keep_empty = false) const;
[[nodiscard]] Vector<StringView> split_view(const StringView&, bool keep_empty = false) const;
@ -166,7 +175,7 @@ public:
bool operator==(const String&) const;
bool operator==(const StringView& other) const
constexpr bool operator==(const StringView& other) const
{
if (is_null())
return other.is_null();
@ -177,7 +186,7 @@ public:
return !__builtin_memcmp(m_characters, other.m_characters, m_length);
}
bool operator!=(const StringView& other) const
constexpr bool operator!=(const StringView& other) const
{
return !(*this == other);
}

View file

@ -21,6 +21,7 @@ set(AK_TEST_SOURCES
TestEnumBits.cpp
TestFind.cpp
TestFormat.cpp
TestGenericLexer.cpp
TestHashFunctions.cpp
TestHashMap.cpp
TestHashTable.cpp

View file

@ -0,0 +1,160 @@
/*
* Copyright (c) 2021, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/TestSuite.h>
#include <AK/GenericLexer.h>
#include <AK/StringView.h>
TEST_CASE(should_constexpr_construct_from_empty_string_view)
{
constexpr GenericLexer sut(StringView {});
static_assert(sut.is_eof());
}
TEST_CASE(should_construct_from_string_view)
{
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(!sut.is_eof());
}
TEST_CASE(should_constexpr_tell)
{
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(sut.tell() == 0);
}
TEST_CASE(should_constexpr_tell_remaining)
{
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(sut.tell_remaining() == 6);
}
TEST_CASE(should_constexpr_peek)
{
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(sut.peek() == 'a');
static_assert(sut.peek(2) == 'c');
static_assert(sut.peek(100) == '\0');
}
TEST_CASE(should_constexpr_next_is)
{
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(sut.next_is('a'));
static_assert(sut.next_is("abc"));
static_assert(sut.next_is(StringView { "abc" }));
}
TEST_CASE(should_constexpr_retreat)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.consume();
sut.retreat();
return sut;
}();
static_assert(sut.peek() == 'a');
}
TEST_CASE(should_constexpr_consume_1)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.consume();
return sut;
}();
static_assert(sut.peek() == 'b');
}
TEST_CASE(should_constexpr_consume_specific_char)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.consume_specific('a');
return sut;
}();
static_assert(sut.peek() == 'b');
}
TEST_CASE(should_constexpr_consume_specific_string_view)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.consume_specific(StringView { "ab" });
return sut;
}();
static_assert(sut.peek() == 'c');
}
TEST_CASE(should_constexpr_consume_specific_cstring)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.consume_specific("abcd");
return sut;
}();
static_assert(sut.peek() == 'e');
}
TEST_CASE(should_constexpr_ignore_until)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.ignore_until('d');
return sut;
}();
static_assert(sut.peek() == 'e');
}
TEST_CASE(should_constexpr_ignore_until_cstring)
{
constexpr auto sut = [] {
GenericLexer sut(StringView { "abcdef" });
sut.ignore_until("cde");
return sut;
}();
static_assert(sut.peek() == 'f');
}
TEST_CASE(should_constexpr_next_is_pred)
{
constexpr auto pred = [](auto c) {
return c == 'a';
};
constexpr GenericLexer sut(StringView { "abcdef" });
static_assert(sut.next_is(pred));
}
TEST_CASE(should_constexpr_ignore_while_pred)
{
constexpr auto sut = [] {
constexpr auto pred = [](auto c) {
return c == 'a';
};
GenericLexer sut(StringView { "abcdef" });
sut.ignore_while(pred);
return sut;
}();
static_assert(sut.peek() == 'b');
}
TEST_CASE(should_constexpr_ignore_until_pred)
{
constexpr auto sut = [] {
constexpr auto pred = [](auto c) {
return c == 'c';
};
GenericLexer sut(StringView { "abcdef" });
sut.ignore_until(pred);
return sut;
}();
static_assert(sut.peek() == 'c');
}
TEST_MAIN(GenericLexer)