diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 1945b1da74..5e90b17b8d 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -50,17 +50,319 @@ static Optional parse_ipv4_address(StringView input) return input; } +// https://url.spec.whatwg.org/#concept-ipv6-serializer +static ErrorOr serialize_ipv6_address(Array const& address) +{ + // 1. Let output be the empty string. + StringBuilder output; + + // 2. Let compress be an index to the first IPv6 piece in the first longest sequences of address’s IPv6 pieces that are 0. + // 3. If there is no sequence of address’s IPv6 pieces that are 0 that is longer than 1, then set compress to null. + Optional compress; + size_t longest_sequence_length = 0; + size_t current_sequence_length = 0; + size_t current_sequence_start = 0; + for (size_t i = 0; i < 8; ++i) { + if (address[i] == 0) { + if (current_sequence_length == 0) + current_sequence_start = i; + ++current_sequence_length; + } else { + if (current_sequence_length > longest_sequence_length) { + longest_sequence_length = current_sequence_length; + compress = current_sequence_start; + } + current_sequence_length = 0; + } + } + // 4. Let ignore0 be false. + auto ignore0 = false; + + // 5. For each pieceIndex in the range 0 to 7, inclusive: + for (size_t piece_index = 0; piece_index <= 7; ++piece_index) { + // 1. If ignore0 is true and address[pieceIndex] is 0, then continue. + if (ignore0 && address[piece_index] == 0) + continue; + + // 2. Otherwise, if ignore0 is true, set ignore0 to false. + if (ignore0) + ignore0 = false; + + // 3. If compress is pieceIndex, then: + if (compress == piece_index) { + // 1. Let separator be "::" if pieceIndex is 0, and U+003A (:) otherwise. + auto separator = piece_index == 0 ? "::"sv : ":"sv; + + // 2. Append separator to output. + output.append(separator); + + // 3. Set ignore0 to true and continue. + ignore0 = true; + continue; + } + + // 4. Append address[pieceIndex], represented as the shortest possible lowercase hexadecimal number, to output. + output.appendff("{:x}", address[piece_index]); + + // 5. If pieceIndex is not 7, then append U+003A (:) to output. + if (piece_index != 7) + output.append(':'); + } + + // 6. Return output. + return output.to_string(); +} + +// https://url.spec.whatwg.org/#concept-ipv6-parser +static Optional> parse_ipv6_address(StringView input) +{ + // 1. Let address be a new IPv6 address whose IPv6 pieces are all 0. + Array address {}; + + // 2. Let pieceIndex be 0. + size_t piece_index = 0; + + // 3. Let compress be null. + Optional compress; + + Vector code_points; + code_points.ensure_capacity(input.length()); + for (auto code_point : Utf8View { input }) { + code_points.append(code_point); + } + + // 4. Let pointer be a pointer for input. + size_t pointer = 0; + auto c = [&]() -> u32 { + if (pointer >= code_points.size()) + return end_of_file; + return code_points[pointer]; + }; + + auto remaining = [&]() -> ReadonlySpan { + if ((pointer + 1) >= code_points.size()) + return {}; + return code_points.span().slice(pointer + 1); + }; + + // 5. If c is U+003A (:), then: + if (c() == ':') { + // 1. If remaining does not start with U+003A (:), IPv6-invalid-compression validation error, return failure. + if (remaining().is_empty() || remaining()[0] != ':') { + report_validation_error(); + return {}; + } + + // 2. Increase pointer by 2. + pointer += 2; + + // 3. Increase pieceIndex by 1 and then set compress to pieceIndex. + ++piece_index; + compress = piece_index; + } + + // 6. While c is not the EOF code point: + while (c() != end_of_file) { + // 1. If pieceIndex is 8, IPv6-too-many-pieces validation error, return failure. + if (piece_index == 8) { + report_validation_error(); + return {}; + } + + // 2. If c is U+003A (:), then: + if (c() == ':') { + // 1. If compress is non-null, IPv6-multiple-compression validation error, return failure. + if (compress.has_value()) { + report_validation_error(); + return {}; + } + + // 2. Increase pointer and pieceIndex by 1, set compress to pieceIndex, and then continue. + ++pointer; + ++piece_index; + compress = piece_index; + continue; + } + + // 3. Let value and length be 0. + u32 value = 0; + size_t length = 0; + + // 4. While length is less than 4 and c is an ASCII hex digit, + // set value to value × 0x10 + c interpreted as hexadecimal number, + // and increase pointer and length by 1. + while (length < 4 && is_ascii_hex_digit(c())) { + value = value * 0x10 + parse_ascii_hex_digit(c()); + ++pointer; + ++length; + } + + // 5. If c is U+002E (.), then: + if (c() == '.') { + // 1. If length is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure. + if (length == 0) { + report_validation_error(); + return {}; + } + + // 2. Decrease pointer by length. + pointer -= length; + + // 3. If pieceIndex is greater than 6, IPv4-in-IPv6-too-many-pieces validation error, return failure. + if (piece_index > 6) { + report_validation_error(); + return {}; + } + + // 4. Let numbersSeen be 0. + size_t numbers_seen = 0; + + // 5. While c is not the EOF code point: + while (c() != end_of_file) { + // 1. Let ipv4Piece be null. + Optional ipv4_piece; + + // 2. If numbersSeen is greater than 0, then: + if (numbers_seen > 0) { + // 1. If c is a U+002E (.) and numbersSeen is less than 4, then increase pointer by 1. + if (c() == '.' && numbers_seen < 4) { + ++pointer; + } + // 2. Otherwise, IPv4-in-IPv6-invalid-code-point validation error, return failure. + else { + report_validation_error(); + return {}; + } + } + + // 3. If c is not an ASCII digit, IPv4-in-IPv6-invalid-code-point validation error, return failure. + if (!is_ascii_digit(c())) { + report_validation_error(); + return {}; + } + + // 4. While c is an ASCII digit: + while (is_ascii_digit(c())) { + // 1. Let number be c interpreted as decimal number. + u32 number = parse_ascii_digit(c()); + + // 2. If ipv4Piece is null, then set ipv4Piece to number. + if (!ipv4_piece.has_value()) { + ipv4_piece = number; + } + // Otherwise, if ipv4Piece is 0, IPv4-in-IPv6-invalid-code-point validation error, return failure. + else if (ipv4_piece.value() == 0) { + report_validation_error(); + return {}; + } + // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. + else { + ipv4_piece = ipv4_piece.value() * 10 + number; + } + + // 3. If ipv4Piece is greater than 255, IPv4-in-IPv6-out-of-range-part validation error, return failure. + if (ipv4_piece.value() > 255) { + report_validation_error(); + return {}; + } + + // 4. Increase pointer by 1. + ++pointer; + } + // 5. Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. + address[piece_index] = address[piece_index] * 0x100 + ipv4_piece.value(); + + // 6. Increase numbersSeen by 1. + ++numbers_seen; + + // 7. If numbersSeen is 2 or 4, then increase pieceIndex by 1. + if (numbers_seen == 2 || numbers_seen == 4) + ++piece_index; + } + + // 6. If numbersSeen is not 4, IPv4-in-IPv6-too-few-parts validation error, return failure. + if (numbers_seen != 4) { + report_validation_error(); + return {}; + } + + // 7. Break. + break; + } + // 6. Otherwise, if c is U+003A (:): + else if (c() == ':') { + // 1. Increase pointer by 1. + ++pointer; + + // 2. If c is the EOF code point, IPv6-invalid-code-point validation error, return failure. + if (c() == end_of_file) { + report_validation_error(); + return {}; + } + } + + // 7. Otherwise, if c is not the EOF code point, IPv6-invalid-code-point validation error, return failure. + else if (c() != end_of_file) { + report_validation_error(); + return {}; + } + + // 8. Set address[pieceIndex] to value. + address[piece_index] = value; + + // 9. Increase pieceIndex by 1. + ++piece_index; + } + + // 7. If compress is non-null, then: + if (compress.has_value()) { + // 1. Let swaps be pieceIndex − compress. + size_t swaps = piece_index - compress.value(); + + // 2. Set pieceIndex to 7. + piece_index = 7; + + // 3. While pieceIndex is not 0 and swaps is greater than 0, + // swap address[pieceIndex] with address[compress + swaps − 1], + // and then decrease both pieceIndex and swaps by 1. + while (piece_index != 0 && swaps > 0) { + swap(address[piece_index], address[compress.value() + swaps - 1]); + --piece_index; + --swaps; + } + } + + // 8. Otherwise, if compress is null and pieceIndex is not 8, IPv6-too-few-pieces validation error, return failure. + else if (!compress.has_value() && piece_index != 8) { + report_validation_error(); + return {}; + } + + // 9. Return address. + return address; +} + // https://url.spec.whatwg.org/#concept-host-parser // NOTE: This is a very bare-bones implementation. static Optional parse_host(StringView input, bool is_not_special = false) { + // 1. If input starts with U+005B ([), then: if (input.starts_with('[')) { + // 1. If input does not end with U+005D (]), IPv6-unclosed validation error, return failure. if (!input.ends_with(']')) { report_validation_error(); return {}; } - // FIXME: Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed. - TODO(); + + // 2. Return the result of IPv6 parsing input with its leading U+005B ([) and trailing U+005D (]) removed. + auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2)); + if (!address.has_value()) + return {}; + + auto result = serialize_ipv6_address(*address); + if (result.is_error()) + return {}; + return result.release_value().to_deprecated_string(); } if (is_not_special) diff --git a/Tests/AK/TestURL.cpp b/Tests/AK/TestURL.cpp index f04e2979d9..12cda81a51 100644 --- a/Tests/AK/TestURL.cpp +++ b/Tests/AK/TestURL.cpp @@ -428,3 +428,33 @@ TEST_CASE(google_street_view) URL url(streetview_url); EXPECT_EQ(url.serialize(), streetview_url); } + +TEST_CASE(ipv6_address) +{ + { + constexpr auto ipv6_url = "http://[::1]/index.html"sv; + URL url(ipv6_url); + EXPECT(url.is_valid()); + EXPECT_EQ(url, ipv6_url); + } + + { + constexpr auto ipv6_url = "http://[0:f:0:0:f:f:0:0]/index.html"sv; + URL url(ipv6_url); + EXPECT(url.is_valid()); + EXPECT_EQ(url, ipv6_url); + } + + { + constexpr auto ipv6_url = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/index.html"sv; + URL url(ipv6_url); + EXPECT(url.is_valid()); + EXPECT_EQ(url, ipv6_url); + } + + { + constexpr auto bad_ipv6_url = "https://[oops]/index.html"sv; + URL url(bad_ipv6_url); + EXPECT_EQ(url.is_valid(), false); + } +}