AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too
This commit is contained in:
MacDue 2023-04-09 14:21:00 +01:00 committed by Andreas Kling
parent d2fc8efd9e
commit 8283e8b88c
5 changed files with 85 additions and 51 deletions

View file

@ -47,20 +47,32 @@ URL URL::complete_url(StringView relative_url) const
return URLParser::parse(relative_url, *this);
}
// NOTE: This only exists for compatibility with the existing URL tests which check for both .is_null() and .is_empty().
static DeprecatedString deprecated_string_percent_encode(DeprecatedString const& input, URL::PercentEncodeSet set = URL::PercentEncodeSet::Userinfo, URL::SpaceAsPlus space_as_plus = URL::SpaceAsPlus::No)
{
if (input.is_null() || input.is_empty())
return input;
return URL::percent_encode(input.view(), set, space_as_plus);
}
void URL::set_scheme(DeprecatedString scheme)
{
m_scheme = move(scheme);
m_valid = compute_validity();
}
void URL::set_username(DeprecatedString username)
void URL::set_username(DeprecatedString username, ApplyPercentEncoding apply_percent_encoding)
{
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
username = deprecated_string_percent_encode(username, PercentEncodeSet::Userinfo);
m_username = move(username);
m_valid = compute_validity();
}
void URL::set_password(DeprecatedString password)
void URL::set_password(DeprecatedString password, ApplyPercentEncoding apply_percent_encoding)
{
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
password = deprecated_string_percent_encode(password, PercentEncodeSet::Userinfo);
m_password = move(password);
m_valid = compute_validity();
}
@ -81,19 +93,38 @@ void URL::set_port(Optional<u16> port)
m_valid = compute_validity();
}
void URL::set_paths(Vector<DeprecatedString> paths)
void URL::set_paths(Vector<DeprecatedString> paths, ApplyPercentEncoding apply_percent_encoding)
{
m_paths = move(paths);
if (apply_percent_encoding == ApplyPercentEncoding::Yes) {
Vector<DeprecatedString> encoded_paths;
encoded_paths.ensure_capacity(paths.size());
for (auto& segment : paths)
encoded_paths.unchecked_append(deprecated_string_percent_encode(segment, PercentEncodeSet::Path));
m_paths = move(encoded_paths);
} else {
m_paths = move(paths);
}
m_valid = compute_validity();
}
void URL::set_query(DeprecatedString query)
void URL::append_path(DeprecatedString path, ApplyPercentEncoding apply_percent_encoding)
{
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
path = deprecated_string_percent_encode(path, PercentEncodeSet::Path);
m_paths.append(path);
}
void URL::set_query(DeprecatedString query, ApplyPercentEncoding apply_percent_encoding)
{
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
query = deprecated_string_percent_encode(query, is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query);
m_query = move(query);
}
void URL::set_fragment(DeprecatedString fragment)
void URL::set_fragment(DeprecatedString fragment, ApplyPercentEncoding apply_percent_encoding)
{
if (apply_percent_encoding == ApplyPercentEncoding::Yes)
fragment = deprecated_string_percent_encode(fragment, PercentEncodeSet::Fragment);
m_fragment = move(fragment);
}
@ -171,9 +202,8 @@ URL URL::create_with_file_scheme(DeprecatedString const& path, DeprecatedString
// This is because a file URL always needs a non-null hostname.
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
url.set_paths(lexical_path.parts());
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
if (path.ends_with('/'))
url.append_path("");
url.append_slash();
url.set_fragment(fragment);
return url;
}
@ -188,9 +218,8 @@ URL URL::create_with_help_scheme(DeprecatedString const& path, DeprecatedString
// This is because a file URL always needs a non-null hostname.
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
url.set_paths(lexical_path.parts());
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
if (path.ends_with('/'))
url.append_path("");
url.append_slash();
url.set_fragment(fragment);
return url;
}
@ -242,10 +271,10 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
builder.append("//"sv);
if (includes_credentials()) {
builder.append(percent_encode(m_username, PercentEncodeSet::Userinfo));
builder.append(m_username);
if (!m_password.is_empty()) {
builder.append(':');
builder.append(percent_encode(m_password, PercentEncodeSet::Userinfo));
builder.append(m_password);
}
builder.append('@');
}
@ -256,24 +285,24 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
}
if (cannot_be_a_base_url()) {
builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
builder.append(m_paths[0]);
} else {
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
builder.append("/."sv);
for (auto& segment : m_paths) {
builder.append('/');
builder.append(percent_encode(segment, PercentEncodeSet::Path));
builder.append(segment);
}
}
if (!m_query.is_null()) {
builder.append('?');
builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
builder.append(m_query);
}
if (exclude_fragment == ExcludeFragment::No && !m_fragment.is_null()) {
builder.append('#');
builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
builder.append(m_fragment);
}
return builder.to_deprecated_string();
@ -300,24 +329,24 @@ DeprecatedString URL::serialize_for_display() const
}
if (cannot_be_a_base_url()) {
builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path));
builder.append(m_paths[0]);
} else {
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
builder.append("/."sv);
for (auto& segment : m_paths) {
builder.append('/');
builder.append(percent_encode(segment, PercentEncodeSet::Path));
builder.append(segment);
}
}
if (!m_query.is_null()) {
builder.append('?');
builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query));
builder.append(m_query);
}
if (!m_fragment.is_null()) {
builder.append('#');
builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment));
builder.append(m_fragment);
}
return builder.to_deprecated_string();

View file

@ -19,8 +19,6 @@
namespace AK {
// NOTE: The member variables cannot contain any percent encoded sequences.
// The URL parser automatically decodes those sequences and the serialize() method will re-encode them as necessary.
class URL {
friend class URLParser;
@ -70,16 +68,25 @@ public:
bool includes_credentials() const { return !m_username.is_empty() || !m_password.is_empty(); }
bool is_special() const { return is_special_scheme(m_scheme); }
enum class ApplyPercentEncoding {
Yes,
No
};
void set_scheme(DeprecatedString);
void set_username(DeprecatedString);
void set_password(DeprecatedString);
void set_username(DeprecatedString username, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void set_password(DeprecatedString password, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void set_host(DeprecatedString);
void set_port(Optional<u16>);
void set_paths(Vector<DeprecatedString>);
void set_query(DeprecatedString);
void set_fragment(DeprecatedString);
void set_paths(Vector<DeprecatedString> password, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void set_query(DeprecatedString query, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void set_fragment(DeprecatedString fragment, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void set_cannot_be_a_base_url(bool value) { m_cannot_be_a_base_url = value; }
void append_path(DeprecatedString path) { m_paths.append(move(path)); }
void append_path(DeprecatedString path, ApplyPercentEncoding apply_percent_encoding = ApplyPercentEncoding::Yes);
void append_slash()
{
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
append_path("", ApplyPercentEncoding::No);
}
DeprecatedString path() const;
DeprecatedString basename() const;

View file

@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input)
// future for validation of URLs, which would then lead to infinite recursion.
// The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
// variables' values here, not what the URL class presents to its users.
// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting
// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode
// everything before setting the member variables.
URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
{
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
++iterator;
} else {
url->m_cannot_be_a_base_url = true;
url->append_path("");
url->append_slash();
state = State::CannotBeABaseUrlPath;
}
} else {
@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (password_token_seen) {
builder.append(url->password());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
url->m_password = URL::percent_decode(builder.string_view());
url->m_password = builder.string_view();
} else {
builder.append(url->username());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
url->m_username = URL::percent_decode(builder.string_view());
url->m_username = builder.string_view();
}
}
buffer.clear();
@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
url->m_host = base_url->m_host;
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
url->append_path(base_url->m_paths[0]);
url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
state = State::Path;
continue;
}
@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
url->m_paths.remove(url->m_paths.size() - 1);
if (code_point != '/' && !(url->is_special() && code_point == '\\'))
url->append_path("");
url->append_slash();
} else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
url->append_path("");
url->append_slash();
} else if (!is_single_dot_path_segment(buffer.string_view())) {
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
auto drive_letter = buffer.string_view()[0];
@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
buffer.append(drive_letter);
buffer.append(':');
}
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->append_path(URL::percent_decode(buffer.string_view()));
url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
}
buffer.clear();
if (code_point == '?') {
@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// NOTE: Verify that the assumptions required for this simplification are correct.
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
if (code_point == '?') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
url->m_query = "";
state = State::Query;
} else if (code_point == '#') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
url->m_fragment = "";
state = State::Fragment;
} else {
@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (code_point != end_of_file) {
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
} else {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
}
}
break;
@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
buffer.append_code_point(code_point);
} else {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_fragment = URL::percent_decode(buffer.string_view());
url->m_fragment = buffer.string_view();
buffer.clear();
}
break;

View file

@ -151,7 +151,7 @@ TEST_CASE(file_url_with_encoded_characters)
URL url("file:///my/file/test%23file.txt"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.scheme(), "file");
EXPECT_EQ(url.path(), "/my/file/test#file.txt");
EXPECT_EQ(url.path(), "/my/file/test%23file.txt");
EXPECT(url.query().is_null());
EXPECT(url.fragment().is_null());
}
@ -389,7 +389,7 @@ TEST_CASE(unicode)
{
URL url { "http://example.com/_ünicöde_téxt_©"sv };
EXPECT(url.is_valid());
EXPECT_EQ(url.path(), "/_ünicöde_téxt_©");
EXPECT_EQ(url.path(), "/_%C3%BCnic%C3%B6de_t%C3%A9xt_%C2%A9");
EXPECT(url.query().is_null());
EXPECT(url.fragment().is_null());
}
@ -415,3 +415,10 @@ TEST_CASE(empty_url_with_base_url)
EXPECT_EQ(parsed_url.is_valid(), true);
EXPECT(base_url.equals(parsed_url));
}
TEST_CASE(google_street_view)
{
constexpr auto streetview_url = "https://www.google.co.uk/maps/@53.3354159,-1.9573545,3a,75y,121.1h,75.67t/data=!3m7!1e1!3m5!1sSY8xCv17jAX4S7SRdV38hg!2e0!6shttps:%2F%2Fstreetviewpixels-pa.googleapis.com%2Fv1%2Fthumbnail%3Fpanoid%3DSY8xCv17jAX4S7SRdV38hg%26cb_client%3Dmaps_sv.tactile.gps%26w%3D203%26h%3D100%26yaw%3D188.13148%26pitch%3D0%26thumbfov%3D100!7i13312!8i6656";
URL url(streetview_url);
EXPECT_EQ(url.serialize(), streetview_url);
}

View file

@ -62,7 +62,7 @@ public:
WebIDL::ExceptionOr<String> to_json() const;
void set_query(Badge<URLSearchParams>, String query) { m_url.set_query(query.to_deprecated_string()); }
void set_query(Badge<URLSearchParams>, String query) { m_url.set_query(query.to_deprecated_string(), AK::URL::ApplyPercentEncoding::Yes); }
private:
URL(JS::Realm&, AK::URL, JS::NonnullGCPtr<URLSearchParams> query);