Normalize UriData.parse result.

Validates base64 encoding, normalizes padding. Normalizes non-base64 data using URI percent-escapes for all invalid characters. Fixes issue #28728, #28700 BUG= http://dartbug.com/28728 http://dartbug.com/28700 R=floitsch@google.com Review-Url: https://codereview.chromium.org/2694373003 .
2024-07-05 17:30:16 +00:00 · 2017-02-17 11:02:38 +01:00 · 2017-02-17 11:02:38 +01:00 · dccdd1b981
commit dccdd1b981
parent 849934f9bc
6 changed files with 356 additions and 84 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,6 +3,10 @@
 ### Core library changes
 * `dart:core`: Added `Uri.isScheme` function to check the scheme of a URI.
   Example: `uri.isScheme("http")`. Ignores case when comparing.
+* `dart:core`: Make `UriData.parse` validate its input better.
+  If the data is base-64 encoded, the data is normalized wrt.
+  alphabet and padding, and it contains invalid base-64 data,
+  parsing fails. Also normalizes non-base-64 data.
 * `dart:io`: Added functions `File.lastAccessed`, `File.lastAccessedSync`,
  `File.setLastModified`, `File.setLastModifiedSync`, `File.setLastAccessed`,
  and `File.setLastAccessedSync`.
--- a/sdk/lib/convert/base64.dart
+++ b/sdk/lib/convert/base64.dart
@ -57,6 +57,140 @@ class Base64Codec extends Codec<List<int>, String> {
  Base64Encoder get encoder => _encoder;

  Base64Decoder get decoder => const Base64Decoder();
+
+  /**
+   * Validates and normalizes the base64 encoded data in [source].
+   *
+   * Only acts on the substring from [start] to [end], with [end]
+   * defaulting to the end of the string.
+   *
+   * Normalization will:
+   * * Unescape any `%`-escapes.
+   * * Only allow valid characters (`A`-`Z`, `a`-`z`, `0`-`9`, `/` and `+`).
+   * * Normalize a `_` or `-` character to `/` or `+`.
+   * * Validate that existing padding (trailing `=` characters) is correct.
+   * * If no padding exists, add correct padding if necessary and possible.
+   * * Validate that the length is correct (a multiple of four).
+   */
+  String normalize(String source, [int start = 0, int end]) {
+    end = RangeError.checkValidRange(start, end, source.length);
+    const int percent = 0x25;
+    const int equals = 0x3d;
+    StringBuffer buffer = null;
+    int sliceStart = start;
+    var alphabet = _Base64Encoder._base64Alphabet;
+    var inverseAlphabet = _Base64Decoder._inverseAlphabet;
+    int firstPadding = -1;
+    int firstPaddingSourceIndex = -1;
+    int paddingCount = 0;
+    for (int i = start; i < end;) {
+      int sliceEnd = i;
+      int char = source.codeUnitAt(i++);
+      int originalChar = char;
+      // Normalize char, keep originalChar to see if it matches the source.
+      if (char == percent) {
+        if (i + 2 <= end) {
+          char = parseHexByte(source, i);  // May be negative.
+          i += 2;
+          // We know that %25 isn't valid, but our table considers it
+          // a potential padding start, so skip the checks.
+          if (char == percent) char = -1;
+        } else {
+          // An invalid HEX escape (too short).
+          // Just skip past the handling and reach the throw below.
+          char = -1;
+        }
+      }
+      // If char is negative here, hex-decoding failed in some way.
+      if (0 <= char && char <= 127) {
+        int value = inverseAlphabet[char];
+        if (value >= 0) {
+          char = alphabet.codeUnitAt(value);
+          if (char == originalChar) continue;
+        } else if (value == _Base64Decoder._padding) {
+          // We have ruled out percent, so char is '='.
+          if (firstPadding < 0) {
+            // Mark position in normalized output where padding occurs.
+            firstPadding = (buffer?.length ?? 0) + (sliceEnd - sliceStart);
+            firstPaddingSourceIndex = sliceEnd;
+          }
+          paddingCount++;
+          // It could have been an escaped equals (%3D).
+          if (originalChar == equals) continue;
+        }
+        if (value != _Base64Decoder._invalid) {
+          buffer ??= new StringBuffer();
+          buffer.write(source.substring(sliceStart, sliceEnd));
+          buffer.writeCharCode(char);
+          sliceStart = i;
+          continue;
+        }
+      }
+      throw new FormatException("Invalid base64 data", source, sliceEnd);
+    }
+    if (buffer != null) {
+      buffer.write(source.substring(sliceStart, end));
+      if (firstPadding >= 0) {
+        // There was padding in the source. Check that it is valid:
+        // * result length a multiple of four
+        // * one or two padding characters at the end.
+        _checkPadding(source, firstPaddingSourceIndex, end,
+                      firstPadding, paddingCount, buffer.length);
+      } else {
+        // Length of last chunk (1-4 chars) in the encoding.
+        int endLength = ((buffer.length - 1) % 4) + 1;
+        if (endLength == 1) {
+          // The data must have length 0, 2 or 3 modulo 4.
+          throw new FormatException("Invalid base64 encoding length ",
+              source, end);
+        }
+        while (endLength < 4) {
+          buffer.write("=");
+          endLength++;
+        }
+      }
+      return source.replaceRange(start, end, buffer.toString());
+    }
+    // Original was already normalized, only check padding.
+    int length = end - start;
+    if (firstPadding >= 0) {
+      _checkPadding(source, firstPaddingSourceIndex, end,
+          firstPadding, paddingCount, length);
+    } else {
+      // No padding given, so add some if needed it.
+      int endLength = length % 4;
+      if (endLength == 1) {
+          // The data must have length 0, 2 or 3 modulo 4.
+          throw new FormatException("Invalid base64 encoding length ",
+              source, end);
+      }
+      if (endLength > 1) {
+        // There is no "insertAt" on String, but this works as well.
+        source = source.replaceRange(end, end, (endLength == 2) ? "==" : "=");
+      }
+    }
+    return source;
+  }
+
+  static int _checkPadding(String source, int sourceIndex, int sourceEnd,
+          int firstPadding, int paddingCount, int length) {
+    if (length % 4 != 0) {
+      throw new FormatException(
+          "Invalid base64 padding, padded length must be multiple of four, "
+          "is $length",
+          source, sourceEnd);
+    }
+    if (firstPadding + paddingCount != length) {
+      throw new FormatException(
+          "Invalid base64 padding, '=' not at the end",
+          source, sourceIndex);
+    }
+    if (paddingCount > 2) {
+      throw new FormatException(
+          "Invalid base64 padding, more than two '=' characters",
+          source, sourceIndex);
+    }
+  }
 }

 // ------------------------------------------------------------------------
@ -389,7 +523,7 @@ class _Base64Decoder {
   *
   * Accepts the "URL-safe" alphabet as well (`-` and `_` are the
   * 62nd and 63rd alphabet characters), and considers `%` a padding
-   * character, which mush then be followed by `3D`, the percent-escape
+   * character, which must then be followed by `3D`, the percent-escape
   * for `=`.
   */
  static final List<int> _inverseAlphabet = new Int8List.fromList([
--- a/sdk/lib/convert/convert.dart
+++ b/sdk/lib/convert/convert.dart
@ -55,6 +55,7 @@ library dart.convert;

 import 'dart:async';
 import 'dart:typed_data';
+import 'dart:_internal' show parseHexByte;

 part 'ascii.dart';
 part 'base64.dart';
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart
@ -7,10 +7,12 @@ part of dart.core;
 // Frequently used character codes.
 const int _SPACE = 0x20;
 const int _PERCENT = 0x25;
+const int _AMPERSAND = 0x26;
 const int _PLUS = 0x2B;
 const int _DOT = 0x2E;
 const int _SLASH = 0x2F;
 const int _COLON = 0x3A;
+const int _EQUALS = 0x3d;
 const int _UPPER_CASE_A = 0x41;
 const int _UPPER_CASE_Z = 0x5A;
 const int _LEFT_BRACKET = 0x5B;
@ -1369,7 +1371,7 @@ class _Uri implements Uri {
   *
   * Always non-null.
   */
-  String _path;
+  final String path;

  // The query content, or null if there is no query.
  final String _query;
@ -1401,11 +1403,21 @@ class _Uri implements Uri {
  Map<String, List<String>> _queryParameterLists;

  /// Internal non-verifying constructor. Only call with validated arguments.
+  ///
+  /// The components must be properly normalized.
+  ///
+  /// Use `null` for [_host] if there is no authority. In that case, always
+  /// pass `null` for [_port] and [_userInfo] as well.
+  ///
+  /// Use `null` for [_port], [_userInfo], [_query] and [_fragment] if there is
+  /// component of that type.
+  ///
+  /// The [path] and [scheme] are never empty.
  _Uri._internal(this.scheme,
                 this._userInfo,
                 this._host,
                 this._port,
-                 this._path,
+                 this.path,
                 this._query,
                 this._fragment);

@ -1543,8 +1555,6 @@ class _Uri implements Uri {
    return 0;
  }

-  String get path => _path;
-
  String get query => _query ?? "";

  String get fragment => _fragment ?? "";
@ -1847,7 +1857,7 @@ class _Uri implements Uri {
      path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,
                       scheme, hasAuthority);
    } else {
-      path = this._path;
+      path = this.path;
      if ((isFile || (hasAuthority && !path.isEmpty)) &&
          !path.startsWith('/')) {
        path = "/" + path;
@ -1873,7 +1883,7 @@ class _Uri implements Uri {
  Uri removeFragment() {
    if (!this.hasFragment) return this;
    return new _Uri._internal(scheme, _userInfo, _host, _port,
-                             _path, _query, null);
+                             path, _query, null);
  }

  List<String> get pathSegments {
@ -1914,8 +1924,8 @@ class _Uri implements Uri {
  }

  Uri normalizePath() {
-    String path = _normalizePath(_path, scheme, hasAuthority);
-    if (identical(path, _path)) return this;
+    String path = _normalizePath(this.path, scheme, hasAuthority);
+    if (identical(path, this.path)) return this;
    return this.replace(path: path);
  }

@ -2082,7 +2092,7 @@ class _Uri implements Uri {

  static String _makeUserInfo(String userInfo, int start, int end) {
    if (userInfo == null) return "";
-    return _normalize(userInfo, start, end, _userinfoTable);
+    return _normalizeOrSubstring(userInfo, start, end, _userinfoTable);
  }

  static String _makePath(String path, int start, int end,
@ -2097,7 +2107,7 @@ class _Uri implements Uri {
    }
    var result;
    if (path != null) {
-      result = _normalize(path, start, end, _pathCharOrSlashTable);
+      result = _normalizeOrSubstring(path, start, end, _pathCharOrSlashTable);
    } else {
      result = pathSegments.map((s) =>
          _uriEncode(_pathCharTable, s, UTF8, false)).join("/");
@ -2130,7 +2140,7 @@ class _Uri implements Uri {
      if (queryParameters != null) {
        throw new ArgumentError('Both query and queryParameters specified');
      }
-      return _normalize(query, start, end, _queryCharTable);
+      return _normalizeOrSubstring(query, start, end, _queryCharTable);
    }
    if (queryParameters == null) return null;

@ -2162,7 +2172,7 @@ class _Uri implements Uri {

  static String _makeFragment(String fragment, int start, int end) {
    if (fragment == null) return null;
-    return _normalize(fragment, start, end, _queryCharTable);
+    return _normalizeOrSubstring(fragment, start, end, _queryCharTable);
  }

  /**
@ -2185,8 +2195,8 @@ class _Uri implements Uri {
    }
    int firstDigit = source.codeUnitAt(index + 1);
    int secondDigit = source.codeUnitAt(index + 2);
-    int firstDigitValue = _parseHexDigit(firstDigit);
-    int secondDigitValue = _parseHexDigit(secondDigit);
+    int firstDigitValue = hexDigitValue(firstDigit);
+    int secondDigitValue = hexDigitValue(secondDigit);
    if (firstDigitValue < 0 || secondDigitValue < 0) {
      return "%";  // Marks the escape as invalid.
    }
@ -2206,19 +2216,6 @@ class _Uri implements Uri {
    return null;
  }

-  // Converts a UTF-16 code-unit to its value as a hex digit.
-  // Returns -1 for non-hex digits.
-  static int _parseHexDigit(int char) {
-    const int zeroDigit = 0x30;
-    int digit = char ^ zeroDigit;
-    if (digit <= 9) return digit;
-    int lowerCase = char | 0x20;
-    if (_LOWER_CASE_A <= lowerCase && lowerCase <= _LOWER_CASE_F) {
-      return lowerCase - (_LOWER_CASE_A - 10);
-    }
-    return -1;
-  }
-
  static String _escapeChar(int char) {
    assert(char <= 0x10ffff);  // It's a valid unicode code point.
    List<int> codeUnits;
@ -2254,6 +2251,18 @@ class _Uri implements Uri {
    return new String.fromCharCodes(codeUnits);
  }

+  /**
+   * Normalizes using [_normalize] or returns substring of original.
+   *
+   * If [_normalize] returns `null` (original content is already normalized),
+   * this methods returns the substring if [component] from [start] to [end].
+   */
+  static String _normalizeOrSubstring(String component, int start, int end,
+                                      List<int> charTable) {
+    return _normalize(component, start, end, charTable) ??
+        component.substring(start, end);
+  }
+
  /**
   * Runs through component checking that each character is valid and
   * normalize percent escapes.
@ -2262,9 +2271,12 @@ class _Uri implements Uri {
   * Each `%` character must be followed by two hex digits.
   * If the hex-digits are lower case letters, they are converted to
   * upper case.
+   *
+   * Returns `null` if the original content was already normalized.
   */
  static String _normalize(String component, int start, int end,
-                           List<int> charTable) {
+                           List<int> charTable,
+                           {bool escapeDelimiters = false}) {
    StringBuffer buffer;
    int sectionStart = start;
    int index = start;
@ -2290,7 +2302,7 @@ class _Uri implements Uri {
          } else {
            sourceLength = 3;
          }
-        } else if (_isGeneralDelimiter(char)) {
+        } else if (!escapeDelimiters && _isGeneralDelimiter(char)) {
          _fail(component, index, "Invalid character");
        } else {
          sourceLength = 1;
@ -2315,8 +2327,7 @@ class _Uri implements Uri {
      }
    }
    if (buffer == null) {
-      // Makes no copy if start == 0 and end == component.length.
-      return component.substring(start, end);
+      return null;
    }
    if (sectionStart < end) {
      buffer.write(component.substring(sectionStart, end));
@ -2509,7 +2520,7 @@ class _Uri implements Uri {
        targetHost = this._host;
        targetPort = this._port;
        if (reference.path == "") {
-          targetPath = this._path;
+          targetPath = this.path;
          if (reference.hasQuery) {
            targetQuery = reference.query;
          } else {
@ -2535,7 +2546,7 @@ class _Uri implements Uri {
                targetPath = _removeDotSegments("/" + reference.path);
              }
            } else {
-              var mergedPath = _mergePaths(this._path, reference.path);
+              var mergedPath = _mergePaths(this.path, reference.path);
              if (this.hasScheme || this.hasAuthority || this.hasAbsolutePath) {
                targetPath = _removeDotSegments(mergedPath);
              } else {
@ -2572,9 +2583,9 @@ class _Uri implements Uri {

  bool get hasFragment => _fragment != null;

-  bool get hasEmptyPath => _path.isEmpty;
+  bool get hasEmptyPath => path.isEmpty;

-  bool get hasAbsolutePath => _path.startsWith('/');
+  bool get hasAbsolutePath => path.startsWith('/');

  String get origin {
    if (scheme == "") {
@ -2652,7 +2663,7 @@ class _Uri implements Uri {
  }

  bool get _isPathAbsolute {
-    return _path != null && _path.startsWith('/');
+    return path != null && path.startsWith('/');
  }

  void _writeAuthority(StringSink ss) {
@ -2742,13 +2753,11 @@ class _Uri implements Uri {
      result.putIfAbsent(key, _createList).add(value);
    }

-    const int _equals = 0x3d;
-    const int _ampersand = 0x26;
    while (i < query.length) {
      int char = query.codeUnitAt(i);
-      if (char == _equals) {
+      if (char == _EQUALS) {
        if (equalsIndex < 0) equalsIndex = i;
-      } else if (char == _ampersand) {
+      } else if (char == _AMPERSAND) {
        parsePair(start, equalsIndex, i);
        start = i + 1;
        equalsIndex = -1;
@ -3149,10 +3158,10 @@ class UriData {
   * If there is a single separator left, it ends the "base64" marker.
   *
   * So the following separators are found for a text:
-   *
-   *     data:text/plain;foo=bar;base64,ARGLEBARGLE=
-   *         ^          ^   ^   ^      ^
-   *
+   * ```
+   * data:text/plain;foo=bar;base64,ARGLEBARGLE=
+   *     ^          ^   ^   ^      ^
+   * ```
   */
  final List<int> _separatorIndices;

@ -3353,12 +3362,15 @@ class UriData {
   * percent-escapes for non-ASCII byte values that need an interpretation
   * to be converted to the corresponding string.
   *
-   * Parsing doesn't check the validity of any part, it just checks that the
-   * input has the correct structure with the correct sequence of `/`, `;`, `=`
-   * and `,` delimiters.
+   * Parsing checks that Base64 encoded data is valid, and it normalizes it
+   * to use the default Base64 alphabet and to use padding.
+   * Non-Base64 data is escaped using percent-escapes as necessary to make
+   * it valid, and existing escapes are case normalized.
   *
   * Accessing the individual parts may fail later if they turn out to have
-   * content that can't be decoded successfully as a string.
+   * content that can't be decoded successfully as a string, for example if
+   * existing percent escapes represent bytes that cannot be decoded
+   * by the chosen [Encoding] (see [contentAsString]).
   */
  static UriData parse(String uri) {
    if (uri.length >= 5) {
@ -3388,17 +3400,14 @@ class UriData {
    String query = null;
    int colonIndex = _separatorIndices[0];
    int queryIndex = _text.indexOf('?', colonIndex + 1);
-    int end = null;
+    int end = _text.length;
    if (queryIndex >= 0) {
-      query = _text.substring(queryIndex + 1);
+      query = _Uri._normalizeOrSubstring(_text, queryIndex + 1, end, _Uri._queryCharTable);
      end = queryIndex;
    }
-    path = _text.substring(colonIndex + 1, end);
-    // TODO(lrn): This can generate a URI that isn't path normalized.
-    // That's perfectly reasonable - data URIs are not hierarchical,
-    // but it may make some consumers stumble.
-    // Should we at least do escape normalization?
-    _uriCache = new _Uri._internal("data", "", null, null, path, query, null);
+    path = _Uri._normalizeOrSubstring(_text, colonIndex + 1, end,
+        _Uri._pathCharOrSlashTable);
+    _uriCache = new _DataUri(this, path, query);
    return _uriCache;
  }

@ -3408,6 +3417,9 @@ class UriData {
   * A data URI consists of a "media type" followed by data.
   * The media type starts with a MIME type and can be followed by
   * extra parameters.
+   * If the MIME type representation in the URI text contains URI escapes,
+   * they are unescaped in the returned string.
+   * If the value contain non-ASCII percent escapes, they are decoded as UTF-8.
   *
   * Example:
   *
@ -3432,6 +3444,10 @@ class UriData {
   * If the parameters of the media type contains a `charset` parameter
   * then this returns its value, otherwise it returns `US-ASCII`,
   * which is the default charset for data URIs.
+   * If the value contain non-ASCII percent escapes, they are decoded as UTF-8.
+   *
+   * If the MIME type representation in the URI text contains URI escapes,
+   * they are unescaped in the returned string.
   */
  String get charset {
    int parameterStart = 1;
@ -3503,10 +3519,8 @@ class UriData {
        result[index++] = codeUnit;
      } else {
        if (i + 2 < text.length) {
-          var digit1 = _Uri._parseHexDigit(text.codeUnitAt(i + 1));
-          var digit2 = _Uri._parseHexDigit(text.codeUnitAt(i + 2));
-          if (digit1 >= 0 && digit2 >= 0) {
-            int byte = digit1 * 16 + digit2;
+          int byte = parseHexByte(text, i + 1);
+          if (byte >= 0) {
            result[index++] = byte;
            i += 2;
            continue;
@ -3561,8 +3575,8 @@ class UriData {
   * in the URI, which is reflected by the [charset] getter. This means that
   * [charset] may return a value even if `parameters["charset"]` is `null`.
   *
-   * If the values contain non-ASCII values or percent escapes, they default
-   * to being decoded as UTF-8.
+   * If the values contain non-ASCII values or percent escapes,
+   * they are decoded as UTF-8.
   */
  Map<String, String> get parameters {
    var result = <String, String>{};
@ -3633,6 +3647,19 @@ class UriData {
      }
    }
    indices.add(i);
+    bool isBase64 = indices.length.isOdd;
+    if (isBase64) {
+      text = BASE64.normalize(text, i + 1, text.length);
+    } else {
+      // Validate "data" part, must only contain RFC 2396 'uric' characters
+      // (reserved, unreserved, or escape sequences).
+      // Normalize to this (throws on a fragment separator).
+      var data = _Uri._normalize(text, i + 1, text.length, _uricTable,
+                                 escapeDelimiters: true);
+      if (data != null) {
+        text = text.replaceRange(i + 1, text.length, data);
+      }
+    }
    return new UriData._(text, indices, sourceUri);
  }

@ -3706,6 +3733,26 @@ class UriData {
  //
  // This is the same characters as in a URI query (which is URI pchar plus '?')
  static const _uricTable = _Uri._queryCharTable;
+
+  // Characters allowed in base-64 encoding (alphanumeric, '/', '+' and '=').
+  static const _base64Table = const [
+                //             LSB             MSB
+                //              |               |
+      0x0000,   // 0x00 - 0x0f  00000000 00000000
+      0x0000,   // 0x10 - 0x1f  00000000 00000000
+                //                          +   /
+      0x8800,   // 0x20 - 0x2f  00000000 00010001
+                //              01234567 89
+      0x03ff,   // 0x30 - 0x3f  11111111 11000000
+                //               ABCDEFG HIJKLMNO
+      0xfffe,   // 0x40 - 0x4f  01111111 11111111
+                //              PQRSTUVW XYZ
+      0x07ff,   // 0x50 - 0x5f  11111111 11100000
+                //               abcdefg hijklmno
+      0xfffe,   // 0x60 - 0x6f  01111111 11111111
+                //              pqrstuvw xyz
+      0x07ff,   // 0x70 - 0x7f  11111111 11100000
+  ];
 }

 // --------------------------------------------------------------------
@ -4555,6 +4602,16 @@ class _SimpleUri implements Uri {
  String toString() => _uri;
 }

+/// Special [_Uri] created from an existing [UriData].
+class _DataUri extends _Uri {
+  final UriData _data;
+
+  _DataUri(this._data, String path, String query)
+      : super._internal("data", null, null, null, path, query, null);
+
+  UriData get data => _data;
+}
+
 /// Checks whether [text] starts with "data:" at position [start].
 ///
 /// The text must be long enough to allow reading five characters
--- a/sdk/lib/internal/internal.dart
+++ b/sdk/lib/internal/internal.dart
@ -69,3 +69,30 @@ class ExternalName {
  final String name;
  const ExternalName(this.name);
 }
+
+// Shared hex-parsing utilities.
+
+/// Parses a single hex-digit as code unit.
+///
+/// Returns a negative value if the character is not a valid hex-digit.
+int hexDigitValue(int char) {
+  assert(char >= 0 && char <= 0xFFFF);
+  const int digit0 = 0x30;
+  const int a = 0x61;
+  const int f = 0x66;
+  int digit = char ^ digit0;
+  if (digit <= 9) return digit;
+  int letter = (char | 0x20);
+  if (a <= letter && letter <= f) return letter - (a - 10);
+  return -1;
+}
+
+/// Parses two hex digits in a string.
+///
+/// Returns a negative value if either digit isn't valid.
+int parseHexByte(String source, int index) {
+  assert(index + 2 <= source.length);
+  int digit1 = hexDigitValue(source.codeUnitAt(index));
+  int digit2 = hexDigitValue(source.codeUnitAt(index + 1));
+  return digit1 * 16 + digit2 - (digit2 & 256);
+}
--- a/tests/corelib/data_uri_test.dart
+++ b/tests/corelib/data_uri_test.dart
@ -18,16 +18,18 @@ main() {
  testRoundTrip("blåbærgrød", UTF8);
  testRoundTrip("blåbærgrød", LATIN1);

-  testUriEquals("data:,abc?d#e");
-  testUriEquals("DATA:,ABC?D#E");
-  testUriEquals("data:,a%20bc?d#e");
-  testUriEquals("DATA:,A%20BC?D#E");
-  testUriEquals("data:,a%62c?d#e");
-  testUriEquals("DATA:,A%42C?D#E");
+  testUriEquals("data:,abc?d");
+  testUriEquals("DATA:,ABC?D");
+  testUriEquals("data:,a%20bc?d");
+  testUriEquals("DATA:,A%20BC?D");
+  testUriEquals("data:,abc?d%23e");  // # must and will be is escaped.
+
+  // Test that UriData.uri normalizes path and query.

  testUtf8Encoding("\u1000\uffff");
  testBytes();
  testInvalidCharacters();
+  testNormalization();
  testErrors();
 }

@ -158,6 +160,37 @@ void testBytes() {
  testLists(new List.unmodifiable(bytes));
 }

+void testNormalization() {
+  // Base-64 normalization.
+
+  // Normalized URI-alphabet characters.
+  Expect.equals("data:;base64,AA/+",
+      UriData.parse("data:;base64,AA_-").toString());
+  // Normalized escapes.
+  Expect.equals("data:;base64,AB==",
+      UriData.parse("data:;base64,A%42=%3D").toString());
+  Expect.equals("data:;base64,/+/+",
+      UriData.parse("data:;base64,%5F%2D%2F%2B").toString());
+  // Normalized padded data.
+  Expect.equals("data:;base64,AA==",
+      UriData.parse("data:;base64,AA%3D%3D").toString());
+  Expect.equals("data:;base64,AAA=",
+      UriData.parse("data:;base64,AAA%3D").toString());
+  // Normalized unpadded data.
+  Expect.equals("data:;base64,AA==",
+      UriData.parse("data:;base64,AA").toString());
+  Expect.equals("data:;base64,AAA=",
+      UriData.parse("data:;base64,AAA").toString());
+
+  // "URI normalization" of non-base64 content.
+  var uri = UriData.parse("data:,\x20\xa0");
+  Expect.equals("data:,%20%C2%A0", uri.toString());
+  uri = UriData.parse("data:,x://x@y:[z]:42/p/./?q=x&y=z#?#\u1234\u{12345}");
+  Expect.equals(
+      "data:,x://x@y:%5Bz%5D:42/p/./?q=x&y=z%23?%23%E1%88%B4%F0%92%8D%85",
+      uri.toString());
+}
+
 bool badArgument(e) => e is ArgumentError;
 bool badFormat(e) => e is FormatException;

@ -218,29 +251,45 @@ void testErrors() {
  Expect.throws(() { UriData.parse("data:type/sub;k=v;base64");},
                badFormat);

-  // Invalid base64 format (only detected when decodeing).
+  void formatError(String input) {
+    Expect.throws(() => UriData.parse("data:;base64,$input"), badFormat, input);
+  }
+
+  // Invalid base64 format (detected when parsed).
  for (var a = 0; a <= 4; a++) {
    for (var p = 0; p <= 4; p++) {
      // Base-64 encoding must have length divisible by four and no more
      // than two padding characters at the end.
      if (p < 3 && (a + p) % 4 == 0) continue;
-      uri = UriData.parse("data:;base64," + "A" * a + "=" * p);
-      Expect.throws(uri.contentAsBytes, badFormat);
+      if (p == 0 && a > 1) continue;
+      formatError("A" * a + "=" * p);
+      formatError("A" * a + "%3D" * p);
    }
  }
  // Invalid base64 encoding: padding not at end.
-  uri = UriData.parse("data:;base64,AA=A");
-  Expect.throws(uri.contentAsBytes, badFormat);
-  uri = UriData.parse("data:;base64,A=AA");
-  Expect.throws(uri.contentAsBytes, badFormat);
-  uri = UriData.parse("data:;base64,=AAA");
-  Expect.throws(uri.contentAsBytes, badFormat);
-  uri = UriData.parse("data:;base64,A==A");
-  Expect.throws(uri.contentAsBytes, badFormat);
-  uri = UriData.parse("data:;base64,==AA");
-  Expect.throws(uri.contentAsBytes, badFormat);
-  uri = UriData.parse("data:;base64,===A");
-  Expect.throws(uri.contentAsBytes, badFormat);
+  formatError("AA=A");
+  formatError("A=AA");
+  formatError("=AAA");
+  formatError("A==A");
+  formatError("==AA");
+  formatError("===A");
+  formatError("AAA%3D=");
+  formatError("A%3D==");
+
+  // Invalid unpadded data.
+  formatError("A");
+  formatError("AAAAA");
+
+  // Invalid characters.
+  formatError("AAA*");
+  formatError("AAA\x00");
+  formatError("AAA\\");
+  formatError("AAA,");
+
+  // Invalid escapes.
+  formatError("AAA%25");
+  formatError("AAA%7F");
+  formatError("AAA%7F");
 }

 /// Checks that two [Uri]s are exactly the same.