From 2eb5793d55b48782cc1c4974f957ebcce9881dba Mon Sep 17 00:00:00 2001 From: Tommy Nguyen Date: Mon, 2 Dec 2019 07:42:33 -0500 Subject: [PATCH] LibMarkdown: Handle CRLF line endings Previously, MDDocument only split on Unix-style line endings. This adds a new function to StringView which handles LF, CR and CRLF. --- AK/StringView.cpp | 40 ++++++++++++++++++++++++++++ AK/StringView.h | 6 +++++ AK/Tests/TestStringView.cpp | 21 +++++++++++++++ Libraries/LibMarkdown/MDDocument.cpp | 2 +- 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/AK/StringView.cpp b/AK/StringView.cpp index 6db1b8f144..2b79c61c28 100644 --- a/AK/StringView.cpp +++ b/AK/StringView.cpp @@ -40,6 +40,46 @@ Vector StringView::split_view(const char separator, bool keep_empty) return v; } +Vector StringView::lines(bool consider_cr) const +{ + if (is_empty()) + return {}; + + if (!consider_cr) + return split_view('\n', true); + + Vector v; + ssize_t substart = 0; + bool last_ch_was_cr = false; + bool split_view = false; + for (ssize_t i = 0; i < length(); ++i) { + char ch = characters_without_null_termination()[i]; + if (ch == '\n') { + split_view = true; + if (last_ch_was_cr) { + substart = i + 1; + split_view = false; + last_ch_was_cr = false; + } + } + if (ch == '\r') { + split_view = true; + last_ch_was_cr = true; + } + if (split_view) { + ssize_t sublen = i - substart; + if (sublen != 0) + v.append(substring_view(substart, sublen)); + substart = i + 1; + } + split_view = false; + } + ssize_t taillen = length() - substart; + if (taillen != 0) + v.append(substring_view(substart, taillen)); + return v; +} + bool StringView::starts_with(const StringView& str) const { if (str.is_empty()) diff --git a/AK/StringView.h b/AK/StringView.h index 00c9aaafa3..9e558947f0 100644 --- a/AK/StringView.h +++ b/AK/StringView.h @@ -46,6 +46,12 @@ public: StringView substring_view(int start, int length) const; Vector split_view(char, bool keep_empty = false) const; + // Create a Vector of StringViews split by line endings. As of CommonMark + // 0.29, the spec defines a line ending as "a newline (U+000A), a carriage + // return (U+000D) not followed by a newline, or a carriage return and a + // following newline.". + Vector lines(bool consider_cr = true) const; + // FIXME: These should be shared between String and StringView somehow! unsigned to_uint(bool& ok) const; int to_int(bool& ok) const; diff --git a/AK/Tests/TestStringView.cpp b/AK/Tests/TestStringView.cpp index add8726a1b..d98c74138d 100644 --- a/AK/Tests/TestStringView.cpp +++ b/AK/Tests/TestStringView.cpp @@ -42,4 +42,25 @@ TEST_CASE(starts_with) EXPECT(!test_string_view.starts_with("DEF")); } +TEST_CASE(lines) +{ + String test_string = "a\nb\r\nc\rd"; + StringView test_string_view = test_string.view(); + Vector test_string_vector = test_string_view.lines(); + EXPECT_EQ(test_string_vector.size(), 4); + EXPECT(test_string_vector.at(0) == String("a")); + EXPECT(test_string_vector.at(1) == String("b")); + EXPECT(test_string_vector.at(2) == String("c")); + EXPECT(test_string_vector.at(3) == String("d")); + + test_string = "```\nHello there\r\nHello there\n```"; + test_string_view = test_string.view(); + test_string_vector = test_string_view.lines(); + EXPECT_EQ(test_string_vector.size(), 4); + EXPECT(test_string_vector.at(0) == String("```")); + EXPECT(test_string_vector.at(1) == String("Hello there")); + EXPECT(test_string_vector.at(2) == String("Hello there")); + EXPECT(test_string_vector.at(3) == String("```")); +} + TEST_MAIN(StringView) diff --git a/Libraries/LibMarkdown/MDDocument.cpp b/Libraries/LibMarkdown/MDDocument.cpp index 539ad8f70d..b3161adf53 100644 --- a/Libraries/LibMarkdown/MDDocument.cpp +++ b/Libraries/LibMarkdown/MDDocument.cpp @@ -49,7 +49,7 @@ static bool helper(Vector::ConstIterator& lines, NonnullOwnPtrVector bool MDDocument::parse(const StringView& str) { - const Vector lines_vec = str.split_view('\n', true); + const Vector lines_vec = str.lines(); auto lines = lines_vec.begin(); while (true) {