From 3e0e84dcd125bc2cbf9f3423254584e9b9dc82c9 Mon Sep 17 00:00:00 2001 From: Brendan Coles Date: Wed, 4 Nov 2020 06:20:20 +0000 Subject: [PATCH] AK::URL: Check if URL requires a port set to be considered a valid URL `AK::URL` will now check if the URL requires a port to be set using `AK::URL.protocol_requires_port(protocol)`. If the URL does not specify a port, and no default port for the URL protocol is found with `AK::URL.default_port_for_protocol(protocol)`, the URL is considered to be invalid. --- AK/Tests/TestURL.cpp | 28 +++++++++++++++++ AK/URL.cpp | 75 ++++++++++++++++++++++++++++++++++---------- AK/URL.h | 6 ++-- 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/AK/Tests/TestURL.cpp b/AK/Tests/TestURL.cpp index 07467bf27f..22729ff4b6 100644 --- a/AK/Tests/TestURL.cpp +++ b/AK/Tests/TestURL.cpp @@ -115,17 +115,23 @@ TEST_CASE(some_bad_urls) EXPECT_EQ(URL("http:///serenityos.org").is_valid(), false); EXPECT_EQ(URL("serenityos.org").is_valid(), false); EXPECT_EQ(URL("://serenityos.org").is_valid(), false); + EXPECT_EQ(URL("://:80").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:80:80/").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:80:80").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:abc").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:abc:80").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:abc:80/").is_valid(), false); EXPECT_EQ(URL("http://serenityos.org:/abc/").is_valid(), false); + EXPECT_EQ(URL("data:").is_valid(), false); + EXPECT_EQ(URL("file:").is_valid(), false); + EXPECT_EQ(URL("about:").is_valid(), false); } TEST_CASE(serialization) { EXPECT_EQ(URL("http://www.serenityos.org/").to_string(), "http://www.serenityos.org/"); + EXPECT_EQ(URL("http://www.serenityos.org:0/").to_string(), "http://www.serenityos.org/"); + EXPECT_EQ(URL("http://www.serenityos.org:80/").to_string(), "http://www.serenityos.org/"); EXPECT_EQ(URL("http://www.serenityos.org:81/").to_string(), "http://www.serenityos.org:81/"); EXPECT_EQ(URL("https://www.serenityos.org:443/foo/bar.html?query#fragment").to_string(), "https://www.serenityos.org/foo/bar.html?query#fragment"); } @@ -143,6 +149,7 @@ TEST_CASE(file_url_without_hostname) { URL url("file:///my/file"); EXPECT_EQ(url.is_valid(), true); + EXPECT_EQ(url.protocol(), "file"); EXPECT_EQ(url.host(), ""); EXPECT_EQ(url.path(), "/my/file"); EXPECT_EQ(url.to_string(), "file:///my/file"); @@ -153,10 +160,31 @@ TEST_CASE(about_url) URL url("about:blank"); EXPECT_EQ(url.is_valid(), true); EXPECT_EQ(url.protocol(), "about"); + EXPECT_EQ(url.host(), ""); EXPECT_EQ(url.path(), "blank"); EXPECT_EQ(url.to_string(), "about:blank"); } +TEST_CASE(data_url) +{ + URL url("data:text/html,test"); + EXPECT_EQ(url.is_valid(), true); + EXPECT_EQ(url.protocol(), "data"); + EXPECT_EQ(url.host(), ""); + EXPECT_EQ(url.data_mime_type(), "text/html"); + EXPECT_EQ(url.to_string(), "data:text/html,test"); +} + +TEST_CASE(data_url_base64_encoded) +{ + URL url("data:text/html;base64,test"); + EXPECT_EQ(url.is_valid(), true); + EXPECT_EQ(url.protocol(), "data"); + EXPECT_EQ(url.host(), ""); + EXPECT_EQ(url.data_mime_type(), "text/html"); + EXPECT_EQ(url.to_string(), "data:text/html;base64,test"); +} + TEST_CASE(trailing_slash_with_complete_url) { EXPECT_EQ(URL("http://a/b/").complete_url("c/").to_string(), "http://a/b/c/"); diff --git a/AK/URL.cpp b/AK/URL.cpp index 1c42904264..434c858511 100644 --- a/AK/URL.cpp +++ b/AK/URL.cpp @@ -93,12 +93,14 @@ bool URL::parse(const StringView& string) if (m_protocol == "data") { buffer.clear(); + m_host = ""; state = State::InDataMimeType; continue; } if (m_protocol == "about") { buffer.clear(); + m_host = ""; state = State::InPath; continue; } @@ -109,12 +111,6 @@ bool URL::parse(const StringView& string) return false; if (buffer.is_empty()) return false; - if (m_protocol == "http") - m_port = 80; - else if (m_protocol == "https") - m_port = 443; - else if (m_protocol == "gemini") - m_port = 1965; state = State::InHostname; buffer.clear(); continue; @@ -244,7 +240,11 @@ bool URL::parse(const StringView& string) m_query = ""; if (m_fragment.is_null()) m_fragment = ""; - return true; + + if (!m_port && protocol_requires_port(m_protocol)) + set_port(default_port_for_protocol(m_protocol)); + + return compute_validity(); } URL::URL(const StringView& string) @@ -275,12 +275,11 @@ String URL::to_string() const builder.append("://"); builder.append(m_host); - if (protocol() != "file") { - if (!(protocol() == "http" && port() == 80) && !(protocol() == "https" && port() == 443) && !(protocol() == "gemini" && port() == 1965)) { - builder.append(':'); - builder.append(String::number(m_port)); - } + if (default_port_for_protocol(protocol()) != port()) { + builder.append(':'); + builder.append(String::number(m_port)); } + builder.append(m_path); if (!m_query.is_empty()) { builder.append('?'); @@ -364,6 +363,12 @@ void URL::set_host(const String& host) m_valid = compute_validity(); } +void URL::set_port(u16 port) +{ + m_port = port; + m_valid = compute_validity(); +} + void URL::set_path(const String& path) { m_path = path; @@ -385,16 +390,54 @@ bool URL::compute_validity() const // FIXME: This is by no means complete. if (m_protocol.is_empty()) return false; + + if (m_protocol == "about") { + if (m_path.is_empty()) + return false; + return true; + } + if (m_protocol == "file") { if (m_path.is_empty()) return false; - } else { - if (m_host.is_empty()) - return false; + return true; } + + if (m_protocol == "data") { + if (m_data_mime_type.is_empty()) + return false; + return true; + } + + if (m_host.is_empty()) + return false; + + if (!m_port && protocol_requires_port(m_protocol)) + return false; + return true; } +bool URL::protocol_requires_port(const String& protocol) +{ + return (default_port_for_protocol(protocol) != 0); +} + +u16 URL::default_port_for_protocol(const String& protocol) +{ + if (protocol == "http") + return 80; + if (protocol == "https") + return 443; + if (protocol == "gemini") + return 1965; + if (protocol == "irc") + return 6667; + if (protocol == "ircs") + return 6697; + return 0; +} + URL URL::create_with_file_protocol(const String& path) { URL url; @@ -416,8 +459,8 @@ URL URL::create_with_url_or_path(const String& url_or_path) URL URL::create_with_data(const StringView& mime_type, const StringView& payload, bool is_base64) { URL url; - url.m_valid = true; url.set_protocol("data"); + url.m_valid = true; url.m_data_payload = payload; url.m_data_mime_type = mime_type; url.m_data_payload_is_base64 = is_base64; diff --git a/AK/URL.h b/AK/URL.h index 2b7c42c65c..1e3bdcf54b 100644 --- a/AK/URL.h +++ b/AK/URL.h @@ -56,10 +56,10 @@ public: void set_protocol(const String& protocol); void set_host(const String& host); + void set_port(const u16 port); void set_path(const String& path); void set_query(const String& query); void set_fragment(const String& fragment); - void set_port(u16 port) { m_port = port; } String basename() const; String to_string() const; @@ -73,6 +73,8 @@ public: static URL create_with_url_or_path(const String& url_or_path); static URL create_with_file_protocol(const String& path); static URL create_with_data(const StringView& mime_type, const StringView& payload, bool is_base64 = false); + static bool protocol_requires_port(const String& protocol); + static u16 default_port_for_protocol(const String& protocol); bool operator==(const URL& other) const { @@ -86,7 +88,7 @@ private: bool compute_validity() const; bool m_valid { false }; - u16 m_port { 80 }; + u16 m_port { 0 }; bool m_data_payload_is_base64 { false }; String m_protocol; String m_host;