Make UriData more case-insensitive.

Add `isMimeType` method to `UriData` class, to allow case-insensitive
checking of the MIME type.
Add `isCharset` and `isEncoding` methods to `UriData` class,
to allow case-insensitive and alternative-encoding-name aware checking
of the MIME type "charset" parameter.
Make `UriData.fromString` and `UriData.fromBytes` recognize and omit
a "text/plain" `mimeType` even if it is not all lower-case.

Be case-insensitive in a few cases where we weren't before
(like the `charset` getter not recognizing `CHARSET=utf-8`.)

Fixes #28592
TEST=corelib/data_uri_test.dart updated

BUG= http://dartbug.com/28592

Change-Id: Ia885af69d271856af7fadfe93851e07eff6ddca2
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/217366
Reviewed-by: Nate Bosch <nbosch@google.com>
Commit-Queue: Lasse Nielsen <lrn@google.com>
This commit is contained in:
Lasse R.H. Nielsen 2022-02-22 10:47:41 +00:00 committed by Commit Bot
parent 46daf0fbd1
commit a3e39fadd6
6 changed files with 266 additions and 68 deletions

View file

@ -6,6 +6,13 @@
- Add `Finalizer` and `WeakReference` which can potentially detect when
objects are "garbage collected".
- Add `isMimeType` method to `UriData` class, to allow case-insensitive
checking of the MIME type.
- Add `isCharset` and `isEncoding` methods to `UriData` class,
to allow case-insensitive and alternative-encoding-name aware checking
of the MIME type "charset" parameter.
- Make `UriData.fromString` and `UriData.fromBytes` recognize and omit
a "text/plain" `mimeType` even if it is not all lower-case.
#### `dart:ffi`

View file

@ -22,6 +22,6 @@ class Class extends core::Object {
synthetic constructor •() → self::Class
: super core::Object::•()
;
[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3331,getterSelectorId:3332] method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::Enum e) → core::int
[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3333,getterSelectorId:3334] method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::Enum e) → core::int
return [@vm.inferred-type.metadata=!] e.{core::_Enum::index}{core::int};
}

View file

@ -51,6 +51,6 @@ class ConstClass extends core::Object {
synthetic constructor •() → self::ConstClass
: super core::Object::•()
;
[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3335,getterSelectorId:3336] method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::ConstEnum e) → core::int
[@vm.procedure-attributes.metadata=methodOrSetterCalledDynamically:false,getterCalledDynamically:false,hasThisUses:false,hasTearOffUses:false,methodOrSetterSelectorId:3337,getterSelectorId:3338] method method([@vm.inferred-type.metadata=dart.core::Null? (value: null)] self::ConstEnum e) → core::int
return [@vm.inferred-type.metadata=!] e.{core::_Enum::index}{core::int};
}

View file

@ -1715,38 +1715,7 @@ class _Uri implements Uri {
String thisScheme = this.scheme;
if (scheme == null) return thisScheme.isEmpty;
if (scheme.length != thisScheme.length) return false;
return _compareScheme(scheme, thisScheme);
}
/// Compares scheme characters in [scheme] and at the start of [uri].
///
/// Returns `true` if [scheme] represents the same scheme as the start of
/// [uri]. That means having the same characters, but possibly different case
/// for letters.
///
/// This function doesn't check that the characters are valid URI scheme
/// characters. The [uri] is assumed to be valid, so if [scheme] matches
/// it, it has to be valid too.
///
/// The length should be tested before calling this function,
/// so the scheme part of [uri] is known to have the same length as [scheme].
static bool _compareScheme(String scheme, String uri) {
for (int i = 0; i < scheme.length; i++) {
int schemeChar = scheme.codeUnitAt(i);
int uriChar = uri.codeUnitAt(i);
int delta = schemeChar ^ uriChar;
if (delta != 0) {
if (delta == 0x20) {
// Might be a case difference.
int lowerChar = uriChar | delta;
if (0x61 /*a*/ <= lowerChar && lowerChar <= 0x7a /*z*/) {
continue;
}
}
return false;
}
}
return true;
return _caseInsensitiveStartsWith(scheme, thisScheme, 0);
}
/// Report a parse failure.
@ -3481,7 +3450,7 @@ class UriData {
Map<String, String>? parameters,
StringBuffer buffer,
List<int>? indices) {
if (mimeType == null || mimeType == "text/plain") {
if (mimeType == null || _caseInsensitiveEquals("text/plain", mimeType)) {
mimeType = "";
}
@ -3499,11 +3468,9 @@ class UriData {
_tokenCharTable, mimeType.substring(slashIndex + 1), utf8, false));
}
if (charsetName != null) {
// TODO(39209): Use ?.. when sequences are properly supported.
if (indices != null)
indices
..add(buffer.length)
..add(buffer.length + 8);
indices
?..add(buffer.length)
..add(buffer.length + 8);
buffer.write(";charset=");
buffer.write(_Uri._uriEncode(_tokenCharTable, charsetName, utf8, false));
}
@ -3637,6 +3604,27 @@ class UriData {
return _Uri._uriDecode(_text, start, end, utf8, false);
}
/// Whether the [UriData.mimeType] is equal to [mimeType].
///
/// Compares the `data:` URI's MIME type to [mimeType] with a case-
/// insensitive comparison which ignores the case of ASCII letters.
///
/// An empty [mimeType] is considered equivalent to `text/plain`,
/// both in the [mimeType] argument and in the `data:` URI itself.
@Since("2.17")
bool isMimeType(String mimeType) {
int start = _separatorIndices[0] + 1;
int end = _separatorIndices[1];
if (start == end) {
return mimeType.isEmpty ||
identical(mimeType, "text/plain") ||
_caseInsensitiveEquals(mimeType, "text/plain");
}
if (mimeType.isEmpty) mimeType = "text/plain";
return (mimeType.length == end - start) &&
_caseInsensitiveStartsWith(mimeType, _text, start);
}
/// The charset parameter of the media type.
///
/// If the parameters of the media type contains a `charset` parameter
@ -3647,23 +3635,89 @@ class UriData {
/// If the MIME type representation in the URI text contains URI escapes,
/// they are unescaped in the returned string.
String get charset {
int parameterStart = 1;
int parameterEnd = _separatorIndices.length - 1; // The ',' before data.
if (isBase64) {
// There is a ";base64" separator, so subtract one for that as well.
parameterEnd -= 1;
}
for (int i = parameterStart; i < parameterEnd; i += 2) {
var keyStart = _separatorIndices[i] + 1;
var keyEnd = _separatorIndices[i + 1];
if (keyEnd == keyStart + 7 && _text.startsWith("charset", keyStart)) {
return _Uri._uriDecode(
_text, keyEnd + 1, _separatorIndices[i + 2], utf8, false);
}
var charsetIndex = _findCharsetIndex();
if (charsetIndex >= 0) {
var valueStart = _separatorIndices[charsetIndex + 1] + 1;
var valueEnd = _separatorIndices[charsetIndex + 2];
return _Uri._uriDecode(_text, valueStart, valueEnd, utf8, false);
}
return "US-ASCII";
}
/// Finds the index of the separator before the "charset" parameter.
///
/// Returns the index in [_separatorIndices] of the separator before
/// the name of the "charset" parameter, or -1 if there is no "charset"
/// parameter.
int _findCharsetIndex() {
var separatorIndices = _separatorIndices;
// Loop over all MIME-type parameters.
// Check that the parameter can have two parts (key/value)
// to ignore a trailing base-64 marker.
for (int i = 3; i <= separatorIndices.length; i += 2) {
var keyStart = separatorIndices[i - 2] + 1;
var keyEnd = separatorIndices[i - 1];
if (keyEnd == keyStart + "charset".length &&
_caseInsensitiveStartsWith("charset", _text, keyStart)) {
return i - 2;
}
}
return -1;
}
/// Checks whether the charset parameter of the mime type is [charset].
///
/// If this URI has no "charset" parameter, it is assumed to have a default
/// of `charset=US-ASCII`.
/// If [charset] is empty, it's treated like `"US-ASCII"`.
///
/// Returns true if [charset] and the "charset" parameter value are
/// equal strings, ignoring the case of ASCII letters, or both
/// correspond to the same [Encoding], as given by [Encoding.getByName].
@Since("2.17")
bool isCharset(String charset) {
var charsetIndex = _findCharsetIndex();
if (charsetIndex < 0) {
return charset.isEmpty ||
_caseInsensitiveEquals(charset, "US-ASCII") ||
identical(Encoding.getByName(charset), ascii);
}
if (charset.isEmpty) charset = "US-ASCII";
var valueStart = _separatorIndices[charsetIndex + 1] + 1;
var valueEnd = _separatorIndices[charsetIndex + 2];
var length = valueEnd - valueStart;
if (charset.length == length &&
_caseInsensitiveStartsWith(charset, _text, valueStart)) {
return true;
}
var checkedEncoding = Encoding.getByName(charset);
return checkedEncoding != null &&
identical(
checkedEncoding,
Encoding.getByName(
_Uri._uriDecode(_text, valueStart, valueEnd, utf8, false)));
}
/// Whether the charset parameter represents [encoding].
///
/// If the "charset" parameter is not present in the URI,
/// it defaults to "US-ASCII", which is the [ascii] encoding.
/// If present, it's converted to an [Encoding] using [Encoding.getByName],
/// and compared to [encoding].
@Since("2.17")
bool isEncoding(Encoding encoding) {
var charsetIndex = _findCharsetIndex();
if (charsetIndex < 0) {
return identical(encoding, ascii);
}
var valueStart = _separatorIndices[charsetIndex + 1] + 1;
var valueEnd = _separatorIndices[charsetIndex + 2];
return identical(
encoding,
Encoding.getByName(
_Uri._uriDecode(_text, valueStart, valueEnd, utf8, false)));
}
/// Whether the data is Base64 encoded or not.
bool get isBase64 => _separatorIndices.length.isOdd;
@ -4358,7 +4412,7 @@ class _SimpleUri implements Uri {
bool isScheme(String scheme) {
if (scheme == null || scheme.isEmpty) return _schemeEnd < 0;
if (scheme.length != _schemeEnd) return false;
return _Uri._compareScheme(scheme, _uri);
return _caseInsensitiveStartsWith(scheme, _uri, 0);
}
String get scheme {
@ -4857,3 +4911,58 @@ int _skipPackageNameChars(String source, int start, int end) {
}
return -1;
}
/// Whether [string] at [start] starts with [prefix], ignoring case.
///
/// Returns whether [string] at offset [start]
/// starts with the characters of [prefix],
/// but ignores differences in the cases of ASCII letters,
/// so `a` and `A` are considered equal.
///
/// The [string] must be at least as long as [prefix].
///
/// When used to checks the schemes of URIs,
/// this function doesn't check that the characters are valid URI scheme
/// characters. The [string] is assumed to be a valid URI,
/// so if [prefix] matches it, it has to be valid too.
bool _caseInsensitiveStartsWith(String prefix, String string, int start) =>
_caseInsensitiveCompareStart(prefix, string, start) >= 0;
/// Compares [string] at [start] with [prefix], ignoring case.
///
/// Returns 0 if [string] starts with [prefix] at offset [start].
/// Returns 0x20 if [string] starts with [prefix] at offset [start],
/// but some ASCII letters have different case.
/// Returns a negative value if [string] does not start with [prefix],
/// at offset [start] even ignoring case differences.
///
/// The [string] must be at least as long as `start + prefix.length`.
int _caseInsensitiveCompareStart(String prefix, String string, int start) {
int result = 0;
for (int i = 0; i < prefix.length; i++) {
int prefixChar = prefix.codeUnitAt(i);
int stringChar = string.codeUnitAt(start + i);
int delta = prefixChar ^ stringChar;
if (delta != 0) {
if (delta == 0x20) {
// Might be a case difference.
int lowerChar = stringChar | delta;
if (0x61 /*a*/ <= lowerChar && lowerChar <= 0x7a /*z*/) {
result = 0x20;
continue;
}
}
return -1;
}
}
return result;
}
/// Checks whether two strings are equal ignoring case differences.
///
/// Returns whether if [string1] and [string2] has the same length
/// and same characters, but ignores the cases of ASCII letters,
/// so `a` and `A` are considered equal.
bool _caseInsensitiveEquals(String string1, String string2) =>
string1.length == string2.length &&
_caseInsensitiveStartsWith(string1, string2, 0);

View file

@ -34,20 +34,61 @@ main() {
}
void testMediaType() {
for (var mimeType in ["", "text/plain", "text/javascript"]) {
for (var charset in ["", ";charset=US-ASCII", ";charset=UTF-8"]) {
for (var mimeType in ["", "text/plain", "Text/PLAIN", "text/javascript"]) {
for (var charset in ["", "US-ASCII", "UTF-8"]) {
for (var base64 in ["", ";base64"]) {
bool isBase64 = base64.isNotEmpty;
var text = "data:$mimeType$charset$base64,";
// Parsing the URI from source:
var charsetParameter = charset.isEmpty ? "" : ";charset=$charset";
var text = "data:$mimeType$charsetParameter$base64,";
var uri = UriData.parse(text);
String expectedCharset =
charset.isEmpty ? "US-ASCII" : charset.substring(9);
String expectedCharset = charset.isEmpty ? "US-ASCII" : charset;
String expectedMimeType = mimeType.isEmpty ? "text/plain" : mimeType;
Expect.equals(text, "$uri");
Expect.equals(expectedMimeType, uri.mimeType);
Expect.isTrue(uri.isMimeType(expectedMimeType));
Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
Expect.equals(expectedCharset, uri.charset);
Expect.isTrue(uri.isCharset(expectedCharset));
Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
var expectedEncoding = Encoding.getByName(expectedCharset);
if (expectedEncoding != null) {
Expect.isTrue(uri.isEncoding(expectedEncoding));
}
Expect.equals(isBase64, uri.isBase64);
// Creating the URI using a constructor:
var encoding = Encoding.getByName(charset);
uri = UriData.fromString("",
mimeType: mimeType, encoding: encoding, base64: isBase64);
expectedMimeType =
(mimeType.isEmpty || mimeType.toLowerCase() == "text/plain")
? "text/plain"
: mimeType;
expectedEncoding = encoding;
expectedCharset = expectedEncoding?.name ?? "US-ASCII";
var expectedText = "data:"
"${expectedMimeType == "text/plain" ? "" : expectedMimeType}"
"${charset.isEmpty ? "" : ";charset=$expectedCharset"}"
"${isBase64 ? ";base64" : ""}"
",";
Expect.equals(expectedText, "$uri");
Expect.equals(expectedMimeType, uri.mimeType);
Expect.isTrue(uri.isMimeType(expectedMimeType));
Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
Expect.equals(expectedCharset, uri.charset);
Expect.isTrue(uri.isCharset(expectedCharset));
Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
if (expectedEncoding != null) {
Expect.isTrue(uri.isEncoding(expectedEncoding));
}
Expect.equals(isBase64, uri.isBase64);
}
}
@ -236,8 +277,8 @@ void testErrors() {
Expect.throwsFormatException(() => UriData.parse("data:type/sub;k=v;base64"));
void formatError(String input) {
Expect.throwsFormatException(() => UriData.parse("data:;base64,$input"),
input);
Expect.throwsFormatException(
() => UriData.parse("data:;base64,$input"), input);
}
// Invalid base64 format (detected when parsed).

View file

@ -36,20 +36,61 @@ main() {
}
void testMediaType() {
for (var mimeType in ["", "text/plain", "text/javascript"]) {
for (var charset in ["", ";charset=US-ASCII", ";charset=UTF-8"]) {
for (var mimeType in ["", "text/plain", "Text/PLAIN", "text/javascript"]) {
for (var charset in ["", "US-ASCII", "UTF-8"]) {
for (var base64 in ["", ";base64"]) {
bool isBase64 = base64.isNotEmpty;
var text = "data:$mimeType$charset$base64,";
// Parsing the URI from source:
var charsetParameter = charset.isEmpty ? "" : ";charset=$charset";
var text = "data:$mimeType$charsetParameter$base64,";
var uri = UriData.parse(text);
String expectedCharset =
charset.isEmpty ? "US-ASCII" : charset.substring(9);
String expectedCharset = charset.isEmpty ? "US-ASCII" : charset;
String expectedMimeType = mimeType.isEmpty ? "text/plain" : mimeType;
Expect.equals(text, "$uri");
Expect.equals(expectedMimeType, uri.mimeType);
Expect.isTrue(uri.isMimeType(expectedMimeType));
Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
Expect.equals(expectedCharset, uri.charset);
Expect.isTrue(uri.isCharset(expectedCharset));
Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
var expectedEncoding = Encoding.getByName(expectedCharset);
if (expectedEncoding != null) {
Expect.isTrue(uri.isEncoding(expectedEncoding));
}
Expect.equals(isBase64, uri.isBase64);
// Creating the URI using a constructor:
var encoding = Encoding.getByName(charset);
uri = UriData.fromString("",
mimeType: mimeType, encoding: encoding, base64: isBase64);
expectedMimeType =
(mimeType.isEmpty || mimeType.toLowerCase() == "text/plain")
? "text/plain"
: mimeType;
expectedEncoding = encoding;
expectedCharset = expectedEncoding?.name ?? "US-ASCII";
var expectedText = "data:"
"${expectedMimeType == "text/plain" ? "" : expectedMimeType}"
"${charset.isEmpty ? "" : ";charset=$expectedCharset"}"
"${isBase64 ? ";base64" : ""}"
",";
Expect.equals(expectedText, "$uri");
Expect.equals(expectedMimeType, uri.mimeType);
Expect.isTrue(uri.isMimeType(expectedMimeType));
Expect.isTrue(uri.isMimeType(expectedMimeType.toUpperCase()));
Expect.isTrue(uri.isMimeType(expectedMimeType.toLowerCase()));
Expect.equals(expectedCharset, uri.charset);
Expect.isTrue(uri.isCharset(expectedCharset));
Expect.isTrue(uri.isCharset(expectedCharset.toLowerCase()));
Expect.isTrue(uri.isCharset(expectedCharset.toUpperCase()));
if (expectedEncoding != null) {
Expect.isTrue(uri.isEncoding(expectedEncoding));
}
Expect.equals(isBase64, uri.isBase64);
}
}
@ -238,8 +279,8 @@ void testErrors() {
Expect.throwsFormatException(() => UriData.parse("data:type/sub;k=v;base64"));
void formatError(String input) {
Expect.throwsFormatException(() => UriData.parse("data:;base64,$input"),
input);
Expect.throwsFormatException(
() => UriData.parse("data:;base64,$input"), input);
}
// Invalid base64 format (detected when parsed).