From 8f34b493e4e986d7140a975cccfe4fe3291b484e Mon Sep 17 00:00:00 2001 From: Benoit Lormeau Date: Sat, 26 Sep 2020 12:21:14 +0200 Subject: [PATCH] AK: Enhance GenericLexer's string consumption The `consume_quoted_string()` can now take an escape character. This allows it (for example) to capture a string's enclosing quotes. The escape character is optional by default. You can also consume and unescape a quoted string with the eponymous method `consume_and_unescape_string()`. It takes an escape character as parameter (backslash by default). It builds a String in which common escape sequences get... unescaped :^) (e.g. \n, \r, \t...). --- AK/GenericLexer.cpp | 45 ++++++++++++++++++++++++++++++++++++++++----- AK/GenericLexer.h | 7 ++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index e49d41e690..cb116bb517 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -26,6 +26,7 @@ #include #include +#include namespace AK { @@ -211,29 +212,63 @@ StringView GenericLexer::consume_until(Condition condition) return m_input.substring_view(start, length); } -// Consume a string surrounded by single or double quotes -// The returned StringView does not include the quotes -StringView GenericLexer::consume_quoted_string() +/* + * Consume a string surrounded by single or double quotes. The returned + * StringView does not include the quotes. An escape character can be provided + * to capture the enclosing quotes. Please note that the escape character will + * still be in the resulting StringView + */ +StringView GenericLexer::consume_quoted_string(char escape_char) { if (!is_quote(peek())) return {}; char quote_char = consume(); size_t start = m_index; - while (!is_eof() && peek() != quote_char) + while (!is_eof()) { + if (next_is(escape_char)) + m_index++; + else if (next_is(quote_char)) + break; m_index++; + } size_t length = m_index - start; if (peek() != quote_char) { - m_index = start - 1; // Restore the index in case the string is unterminated + // Restore the index in case the string is unterminated + m_index = start - 1; return {}; } + // Ignore closing quote ignore(); return m_input.substring_view(start, length); } +String GenericLexer::consume_and_unescape_string(char escape_char) +{ + auto view = consume_quoted_string(escape_char); + if (view.is_null()) + return {}; + + // Transform common escape sequences + auto unescape_character = [](char c) { + static const char* escape_map = "n\nr\rt\tb\bf\f"; + for (size_t i = 0; escape_map[i] != '\0'; i += 2) + if (c == escape_map[i]) + return escape_map[i + 1]; + return c; + }; + + StringBuilder builder; + for (size_t i = 0; i < view.length(); ++i) { + char c = (view[i] == escape_char) ? unescape_character(view[++i]) : view[i]; + builder.append(c); + } + return builder.to_string(); +} + // Ignore a number of characters (1 by default) void GenericLexer::ignore(size_t count) { diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index f5a12b51d2..7800474ac4 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -27,6 +27,7 @@ #pragma once #include +#include #include namespace AK { @@ -36,7 +37,7 @@ public: explicit GenericLexer(const StringView& input); virtual ~GenericLexer(); - // A lambda/function can be used to match characters as the user pleases + // A lambda/function can be used to match characters as the user pleases using Condition = Function; size_t tell() const { return m_index; } @@ -64,8 +65,8 @@ public: StringView consume_until(char); StringView consume_until(const char*); StringView consume_until(Condition); - // FIXME: provide an escape character - StringView consume_quoted_string(); + StringView consume_quoted_string(char escape_char = 0); + String consume_and_unescape_string(char escape_char = '\\'); void ignore(size_t count = 1); void ignore_while(Condition);