Make UriData more case-insensitive.

Add `isMimeType` method to `UriData` class, to allow case-insensitive checking of the MIME type. Add `isCharset` and `isEncoding` methods to `UriData` class, to allow case-insensitive and alternative-encoding-name aware checking of the MIME type "charset" parameter. Make `UriData.fromString` and `UriData.fromBytes` recognize and omit a "text/plain" `mimeType` even if it is not all lower-case. Be case-insensitive in a few cases where we weren't before (like the `charset` getter not recognizing `CHARSET=utf-8`.) Fixes #28592 TEST=corelib/data_uri_test.dart updated BUG= http://dartbug.com/28592 Change-Id: Ia885af69d271856af7fadfe93851e07eff6ddca2 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/217366 Reviewed-by: Nate Bosch <nbosch@google.com> Commit-Queue: Lasse Nielsen <lrn@google.com>
2024-10-01 17:12:42 +00:00 · 2022-02-22 10:47:41 +00:00 · 2022-02-22 10:47:41 +00:00 · a3e39fadd6
parent 46daf0fbd1
commit a3e39fadd6
6 changed files with 266 additions and 68 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,13 @@

 - Add `Finalizer` and `WeakReference` which can potentially detect when
  objects are "garbage collected".
+- Add `isMimeType` method to `UriData` class, to allow case-insensitive
+  checking of the MIME type.
+- Add `isCharset` and `isEncoding` methods to `UriData` class,
+  to allow case-insensitive and alternative-encoding-name aware checking
+  of the MIME type "charset" parameter.
+- Make `UriData.fromString` and `UriData.fromBytes` recognize and omit
+  a "text/plain" `mimeType` even if it is not all lower-case.

 #### `dart:ffi`

--- a/pkg/vm/testcases/transformations/type_flow/transformer/enum_from_lib_used_as_type.dart.expect
+++ b/pkg/vm/testcases/transformations/type_flow/transformer/enum_from_lib_used_as_type.dart.expect
@ -22,6 +22,6 @@ class Class extends core::Object {
  synthetic constructor •() → self::Class
    : super core::Object::•()
    ;
-[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3331,getterSelectorId:3332]  method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::Enum e) → core::int
+[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3333,getterSelectorId:3334]  method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::Enum e) → core::int
    return [@vm.inferred-type.metadata=!] e.{core::_Enum::index}{core::int};
 }
--- a/pkg/vm/testcases/transformations/type_flow/transformer/tree_shake_enum_from_lib.dart.expect
+++ b/pkg/vm/testcases/transformations/type_flow/transformer/tree_shake_enum_from_lib.dart.expect
@ -51,6 +51,6 @@ class ConstClass extends core::Object {
  synthetic constructor •() → self::ConstClass
    : super core::Object::•()
    ;
-[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3335,getterSelectorId:3336]  method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::ConstEnum e) → core::int
+[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3337,getterSelectorId:3338]  method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::ConstEnum e) → core::int
    return [@vm.inferred-type.metadata=!] e.{core::_Enum::index}{core::int};
 }
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart
@ -1715,38 +1715,7 @@ class _Uri implements Uri {
    String thisScheme = this.scheme;
    if (scheme == null) return thisScheme.isEmpty;
    if (scheme.length != thisScheme.length) return false;
-    return _compareScheme(scheme, thisScheme);
-  }
-
-  /// Compares scheme characters in [scheme] and at the start of [uri].
-  ///
-  /// Returns `true` if [scheme] represents the same scheme as the start of
-  /// [uri]. That means having the same characters, but possibly different case
-  /// for letters.
-  ///
-  /// This function doesn't check that the characters are valid URI scheme
-  /// characters. The [uri] is assumed to be valid, so if [scheme] matches
-  /// it, it has to be valid too.
-  ///
-  /// The length should be tested before calling this function,
-  /// so the scheme part of [uri] is known to have the same length as [scheme].
-  static bool _compareScheme(String scheme, String uri) {
-    for (int i = 0; i < scheme.length; i++) {
-      int schemeChar = scheme.codeUnitAt(i);
-      int uriChar = uri.codeUnitAt(i);
-      int delta = schemeChar ^ uriChar;
-      if (delta != 0) {
-        if (delta == 0x20) {
-          // Might be a case difference.
-          int lowerChar = uriChar | delta;
-          if (0x61 /*a*/ <= lowerChar && lowerChar <= 0x7a /*z*/) {
-            continue;
-          }
-        }
-        return false;
-      }
-    }
-    return true;
+    return _caseInsensitiveStartsWith(scheme, thisScheme, 0);
  }

  /// Report a parse failure.
@ -3481,7 +3450,7 @@ class UriData {
      Map<String, String>? parameters,
      StringBuffer buffer,
      List<int>? indices) {
-    if (mimeType == null || mimeType == "text/plain") {
+    if (mimeType == null || _caseInsensitiveEquals("text/plain", mimeType)) {
      mimeType = "";
    }

@ -3499,11 +3468,9 @@ class UriData {
          _tokenCharTable, mimeType.substring(slashIndex + 1), utf8, false));
    }
    if (charsetName != null) {
-      // TODO(39209): Use ?.. when sequences are properly supported.
-      if (indices != null)
-        indices
-          ..add(buffer.length)
-          ..add(buffer.length + 8);
+      indices
+        ?..add(buffer.length)
+        ..add(buffer.length + 8);
      buffer.write(";charset=");
      buffer.write(_Uri._uriEncode(_tokenCharTable, charsetName, utf8, false));
    }
@ -3637,6 +3604,27 @@ class UriData {
    return _Uri._uriDecode(_text, start, end, utf8, false);
  }

+  /// Whether the [UriData.mimeType] is equal to [mimeType].
+  ///
+  /// Compares the `data:` URI's MIME type to [mimeType] with a case-
+  /// insensitive comparison which ignores the case of ASCII letters.
+  ///
+  /// An empty [mimeType] is considered equivalent to `text/plain`,
+  /// both in the [mimeType] argument and in the `data:` URI itself.
+  @Since("2.17")
+  bool isMimeType(String mimeType) {
+    int start = _separatorIndices[0] + 1;
+    int end = _separatorIndices[1];
+    if (start == end) {
+      return mimeType.isEmpty ||
+          identical(mimeType, "text/plain") ||
+          _caseInsensitiveEquals(mimeType, "text/plain");
+    }
+    if (mimeType.isEmpty) mimeType = "text/plain";
+    return (mimeType.length == end - start) &&
+        _caseInsensitiveStartsWith(mimeType, _text, start);
+  }
+
  /// The charset parameter of the media type.
  ///
  /// If the parameters of the media type contains a `charset` parameter
@ -3647,23 +3635,89 @@ class UriData {
  /// If the MIME type representation in the URI text contains URI escapes,
  /// they are unescaped in the returned string.
  String get charset {
-    int parameterStart = 1;
-    int parameterEnd = _separatorIndices.length - 1; // The ',' before data.
-    if (isBase64) {
-      // There is a ";base64" separator, so subtract one for that as well.
-      parameterEnd -= 1;
-    }
-    for (int i = parameterStart; i < parameterEnd; i += 2) {
-      var keyStart = _separatorIndices[i] + 1;
-      var keyEnd = _separatorIndices[i + 1];
-      if (keyEnd == keyStart + 7 && _text.startsWith("charset", keyStart)) {
-        return _Uri._uriDecode(
-            _text, keyEnd + 1, _separatorIndices[i + 2], utf8, false);
-      }
+    var charsetIndex = _findCharsetIndex();
+    if (charsetIndex >= 0) {
+      var valueStart = _separatorIndices[charsetIndex + 1] + 1;
+      var valueEnd = _separatorIndices[charsetIndex + 2];
+      return _Uri._uriDecode(_text, valueStart, valueEnd, utf8, false);
    }
    return "US-ASCII";
  }

+  /// Finds the index of the separator before the "charset" parameter.
+  ///
+  /// Returns the index in [_separatorIndices] of the separator before
+  /// the name of the "charset" parameter, or -1 if there is no "charset"
+  /// parameter.
+  int _findCharsetIndex() {
+    var separatorIndices = _separatorIndices;
+    // Loop over all MIME-type parameters.
+    // Check that the parameter can have two parts (key/value)
+    // to ignore a trailing base-64 marker.
+    for (int i = 3; i <= separatorIndices.length; i += 2) {
+      var keyStart = separatorIndices[i - 2] + 1;
+      var keyEnd = separatorIndices[i - 1];
+      if (keyEnd == keyStart + "charset".length &&
+          _caseInsensitiveStartsWith("charset", _text, keyStart)) {
+        return i - 2;
+      }
+    }
+    return -1;
+  }
+
+  /// Checks whether the charset parameter of the mime type is [charset].
+  ///
+  /// If this URI has no "charset" parameter, it is assumed to have a default
+  /// of `charset=US-ASCII`.
+  /// If [charset] is empty, it's treated like `"US-ASCII"`.
+  ///
+  /// Returns true if [charset] and the "charset" parameter value are
+  /// equal strings, ignoring the case of ASCII letters, or both
+  /// correspond to the same [Encoding], as given by [Encoding.getByName].
+  @Since("2.17")
+  bool isCharset(String charset) {
+    var charsetIndex = _findCharsetIndex();
+    if (charsetIndex < 0) {
+      return charset.isEmpty ||
+          _caseInsensitiveEquals(charset, "US-ASCII") ||
+          identical(Encoding.getByName(charset), ascii);
+    }
+    if (charset.isEmpty) charset = "US-ASCII";
+    var valueStart = _separatorIndices[charsetIndex + 1] + 1;
+    var valueEnd = _separatorIndices[charsetIndex + 2];
+    var length = valueEnd - valueStart;
+    if (charset.length == length &&
+        _caseInsensitiveStartsWith(charset, _text, valueStart)) {
+      return true;
+    }
+    var checkedEncoding = Encoding.getByName(charset);
+    return checkedEncoding != null &&
+        identical(
+            checkedEncoding,
+            Encoding.getByName(
+                _Uri._uriDecode(_text, valueStart, valueEnd, utf8, false)));
+  }
+
+  /// Whether the charset parameter represents [encoding].
+  ///
+  /// If the "charset" parameter is not present in the URI,
+  /// it defaults to "US-ASCII", which is the [ascii] encoding.
+  /// If present, it's converted to an [Encoding] using [Encoding.getByName],
+  /// and compared to [encoding].
+  @Since("2.17")
+  bool isEncoding(Encoding encoding) {
+    var charsetIndex = _findCharsetIndex();
+    if (charsetIndex < 0) {
+      return identical(encoding, ascii);
+    }
+    var valueStart = _separatorIndices[charsetIndex + 1] + 1;
+    var valueEnd = _separatorIndices[charsetIndex + 2];
+    return identical(
+        encoding,
+        Encoding.getByName(
+            _Uri._uriDecode(_text, valueStart, valueEnd, utf8, false)));
+  }
+
  /// Whether the data is Base64 encoded or not.
  bool get isBase64 => _separatorIndices.length.isOdd;

@ -4358,7 +4412,7 @@ class _SimpleUri implements Uri {
  bool isScheme(String scheme) {
    if (scheme == null || scheme.isEmpty) return _schemeEnd < 0;
    if (scheme.length != _schemeEnd) return false;
-    return _Uri._compareScheme(scheme, _uri);
+    return _caseInsensitiveStartsWith(scheme, _uri, 0);
  }

  String get scheme {
@ -4857,3 +4911,58 @@ int _skipPackageNameChars(String source, int start, int end) {
  }
  return -1;
 }
+
+/// Whether [string] at [start] starts with  [prefix], ignoring case.
+///
+/// Returns whether [string] at offset [start]
+/// starts with the characters of [prefix],
+/// but ignores differences in the cases of ASCII letters,
+/// so `a` and `A` are considered equal.
+///
+/// The [string] must be at least as long as [prefix].
+///
+/// When used to checks the schemes of URIs,
+/// this function doesn't check that the characters are valid URI scheme
+/// characters. The [string] is assumed to be a valid URI,
+/// so if [prefix] matches it, it has to be valid too.
+bool _caseInsensitiveStartsWith(String prefix, String string, int start) =>
+    _caseInsensitiveCompareStart(prefix, string, start) >= 0;
+
+/// Compares [string] at [start] with [prefix], ignoring case.
+///
+/// Returns 0 if [string] starts with [prefix] at offset [start].
+/// Returns 0x20 if [string] starts with [prefix] at offset [start],
+/// but some ASCII letters have different case.
+/// Returns a negative value if [string] does not start with [prefix],
+/// at offset [start] even ignoring case differences.
+///
+/// The [string] must be at least as long as `start + prefix.length`.
+int _caseInsensitiveCompareStart(String prefix, String string, int start) {
+  int result = 0;
+  for (int i = 0; i < prefix.length; i++) {
+    int prefixChar = prefix.codeUnitAt(i);
+    int stringChar = string.codeUnitAt(start + i);
+    int delta = prefixChar ^ stringChar;
+    if (delta != 0) {
+      if (delta == 0x20) {
+        // Might be a case difference.
+        int lowerChar = stringChar | delta;
+        if (0x61 /*a*/ <= lowerChar && lowerChar <= 0x7a /*z*/) {
+          result = 0x20;
+          continue;
+        }
+      }
+      return -1;
+    }
+  }
+  return result;
+}
+
+/// Checks whether two strings are equal ignoring case differences.
+///
+/// Returns whether if [string1] and [string2] has the same length
+/// and same characters, but ignores the cases of ASCII letters,
+/// so `a` and `A` are considered equal.
+bool _caseInsensitiveEquals(String string1, String string2) =>
+    string1.length == string2.length &&
+    _caseInsensitiveStartsWith(string1, string2, 0);
--- a/tests/corelib/data_uri_test.dart
+++ b/tests/corelib/data_uri_test.dart
@ -34,20 +34,61 @@ main() {
 }

 void testMediaType() {
-  for (var mimeType in ["", "text/plain", "text/javascript"]) {
-    for (var charset in ["", ";charset=US-ASCII", ";charset=UTF-8"]) {
+  for (var mimeType in ["", "text/plain", "Text/PLAIN", "text/javascript"]) {
+    for (var charset in ["", "US-ASCII", "UTF-8"]) {
      for (var base64 in ["", ";base64"]) {
        bool isBase64 = base64.isNotEmpty;
-        var text = "data:$mimeType$charset$base64,";
+        // Parsing the URI from source:
+        var charsetParameter = charset.isEmpty ? "" : ";charset=$charset";
+        var text = "data:$mimeType$charsetParameter$base64,";
        var uri = UriData.parse(text);

-        String expectedCharset =
-            charset.isEmpty ? "US-ASCII" : charset.substring(9);
+        String expectedCharset = charset.isEmpty ? "US-ASCII" : charset;
        String expectedMimeType = mimeType.isEmpty ? "text/plain" : mimeType;

        Expect.equals(text, "$uri");
        Expect.equals(expectedMimeType, uri.mimeType);
+        Expect.isTrue(uri.isMimeType(expectedMimeType));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
        Expect.equals(expectedCharset, uri.charset);
+        Expect.isTrue(uri.isCharset(expectedCharset));
+        Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
+        Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
+        var expectedEncoding = Encoding.getByName(expectedCharset);
+        if (expectedEncoding != null) {
+          Expect.isTrue(uri.isEncoding(expectedEncoding));
+        }
+        Expect.equals(isBase64, uri.isBase64);
+
+        // Creating the URI using a constructor:
+        var encoding = Encoding.getByName(charset);
+        uri = UriData.fromString("",
+            mimeType: mimeType, encoding: encoding, base64: isBase64);
+        expectedMimeType =
+            (mimeType.isEmpty || mimeType.toLowerCase() == "text/plain")
+                ? "text/plain"
+                : mimeType;
+        expectedEncoding = encoding;
+        expectedCharset = expectedEncoding?.name ?? "US-ASCII";
+        var expectedText = "data:"
+            "${expectedMimeType == "text/plain" ? "" : expectedMimeType}"
+            "${charset.isEmpty ? "" : ";charset=$expectedCharset"}"
+            "${isBase64 ? ";base64" : ""}"
+            ",";
+
+        Expect.equals(expectedText, "$uri");
+        Expect.equals(expectedMimeType, uri.mimeType);
+        Expect.isTrue(uri.isMimeType(expectedMimeType));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
+        Expect.equals(expectedCharset, uri.charset);
+        Expect.isTrue(uri.isCharset(expectedCharset));
+        Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
+        Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
+        if (expectedEncoding != null) {
+          Expect.isTrue(uri.isEncoding(expectedEncoding));
+        }
        Expect.equals(isBase64, uri.isBase64);
      }
    }
@ -236,8 +277,8 @@ void testErrors() {
  Expect.throwsFormatException(() => UriData.parse("data:type/sub;k=v;base64"));

  void formatError(String input) {
-    Expect.throwsFormatException(() => UriData.parse("data:;base64,$input"),
-        input);
+    Expect.throwsFormatException(
+        () => UriData.parse("data:;base64,$input"), input);
  }

  // Invalid base64 format (detected when parsed).
--- a/tests/corelib_2/data_uri_test.dart
+++ b/tests/corelib_2/data_uri_test.dart
@ -36,20 +36,61 @@ main() {
 }

 void testMediaType() {
-  for (var mimeType in ["", "text/plain", "text/javascript"]) {
-    for (var charset in ["", ";charset=US-ASCII", ";charset=UTF-8"]) {
+  for (var mimeType in ["", "text/plain", "Text/PLAIN", "text/javascript"]) {
+    for (var charset in ["", "US-ASCII", "UTF-8"]) {
      for (var base64 in ["", ";base64"]) {
        bool isBase64 = base64.isNotEmpty;
-        var text = "data:$mimeType$charset$base64,";
+        // Parsing the URI from source:
+        var charsetParameter = charset.isEmpty ? "" : ";charset=$charset";
+        var text = "data:$mimeType$charsetParameter$base64,";
        var uri = UriData.parse(text);

-        String expectedCharset =
-            charset.isEmpty ? "US-ASCII" : charset.substring(9);
+        String expectedCharset = charset.isEmpty ? "US-ASCII" : charset;
        String expectedMimeType = mimeType.isEmpty ? "text/plain" : mimeType;

        Expect.equals(text, "$uri");
        Expect.equals(expectedMimeType, uri.mimeType);
+        Expect.isTrue(uri.isMimeType(expectedMimeType));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
        Expect.equals(expectedCharset, uri.charset);
+        Expect.isTrue(uri.isCharset(expectedCharset));
+        Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
+        Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
+        var expectedEncoding = Encoding.getByName(expectedCharset);
+        if (expectedEncoding != null) {
+          Expect.isTrue(uri.isEncoding(expectedEncoding));
+        }
+        Expect.equals(isBase64, uri.isBase64);
+
+        // Creating the URI using a constructor:
+        var encoding = Encoding.getByName(charset);
+        uri = UriData.fromString("",
+            mimeType: mimeType, encoding: encoding, base64: isBase64);
+        expectedMimeType =
+            (mimeType.isEmpty || mimeType.toLowerCase() == "text/plain")
+                ? "text/plain"
+                : mimeType;
+        expectedEncoding = encoding;
+        expectedCharset = expectedEncoding?.name ?? "US-ASCII";
+        var expectedText = "data:"
+            "${expectedMimeType == "text/plain" ? "" : expectedMimeType}"
+            "${charset.isEmpty ? "" : ";charset=$expectedCharset"}"
+            "${isBase64 ? ";base64" : ""}"
+            ",";
+
+        Expect.equals(expectedText, "$uri");
+        Expect.equals(expectedMimeType, uri.mimeType);
+        Expect.isTrue(uri.isMimeType(expectedMimeType));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
+        Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
+        Expect.equals(expectedCharset, uri.charset);
+        Expect.isTrue(uri.isCharset(expectedCharset));
+        Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
+        Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
+        if (expectedEncoding != null) {
+          Expect.isTrue(uri.isEncoding(expectedEncoding));
+        }
        Expect.equals(isBase64, uri.isBase64);
      }
    }
@ -238,8 +279,8 @@ void testErrors() {
  Expect.throwsFormatException(() => UriData.parse("data:type/sub;k=v;base64"));

  void formatError(String input) {
-    Expect.throwsFormatException(() => UriData.parse("data:;base64,$input"),
-        input);
+    Expect.throwsFormatException(
+        () => UriData.parse("data:;base64,$input"), input);
  }

  // Invalid base64 format (detected when parsed).