dart-sdk/tests/lib/convert/utf84_test.dart
Aske Simon Christensen f5bf50e7a4 Adjust UTF-8 tests to match WHATWG standard expectations.
This adjusts all UTF-8 tests to the new semantics in the breaking
change described here: https://github.com/dart-lang/sdk/issues/41100

This has three parts:
- Unpaired surrogates are encoded as replacement characters, and
  encoded surrogates are considered malformed input when decoding.
- Decoding errors are generally reported on the position of the byte
  that conclusively makes the input malformed.
- The number of replacement characters emitted by the decoder is
  generally one per unfinished sequence or undecodable byte.

The code changes to implement the new semantics are placed in subsequent
commits.

Change-Id: I4cc8ce660e39287e734070764ab8e1f0ebb8b9e0
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/143815
Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
2020-05-04 10:48:32 +00:00

1047 lines
21 KiB
Dart
Executable file

// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'package:expect/expect.dart';
import 'dart:convert';
import 'dart:typed_data' show Uint8List;
const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog.";
const List<int> testEnglishUtf8 = const <int>[
0x54,
0x68,
0x65,
0x20,
0x71,
0x75,
0x69,
0x63,
0x6b,
0x20,
0x62,
0x72,
0x6f,
0x77,
0x6e,
0x20,
0x66,
0x6f,
0x78,
0x20,
0x6a,
0x75,
0x6d,
0x70,
0x73,
0x20,
0x6f,
0x76,
0x65,
0x72,
0x20,
0x74,
0x68,
0x65,
0x20,
0x6c,
0x61,
0x7a,
0x79,
0x20,
0x64,
0x6f,
0x67,
0x2e
];
const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
"fløde mens cirkusklovnen Wolther spillede på xylofon.";
const List<int> testDanishUtf8 = const <int>[
0x51,
0x75,
0x69,
0x7a,
0x64,
0x65,
0x6c,
0x74,
0x61,
0x67,
0x65,
0x72,
0x6e,
0x65,
0x20,
0x73,
0x70,
0x69,
0x73,
0x74,
0x65,
0x20,
0x6a,
0x6f,
0x72,
0x64,
0x62,
0xc3,
0xa6,
0x72,
0x20,
0x6d,
0x65,
0x64,
0x20,
0x66,
0x6c,
0xc3,
0xb8,
0x64,
0x65,
0x20,
0x6d,
0x65,
0x6e,
0x73,
0x20,
0x63,
0x69,
0x72,
0x6b,
0x75,
0x73,
0x6b,
0x6c,
0x6f,
0x76,
0x6e,
0x65,
0x6e,
0x20,
0x57,
0x6f,
0x6c,
0x74,
0x68,
0x65,
0x72,
0x20,
0x73,
0x70,
0x69,
0x6c,
0x6c,
0x65,
0x64,
0x65,
0x20,
0x70,
0xc3,
0xa5,
0x20,
0x78,
0x79,
0x6c,
0x6f,
0x66,
0x6f,
0x6e,
0x2e
];
// unusual formatting due to strange editor interaction w/ text direction.
const String testHebrewPhrase =
"דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
const List<int> testHebrewUtf8 = const <int>[
0xd7,
0x93,
0xd7,
0x92,
0x20,
0xd7,
0xa1,
0xd7,
0xa7,
0xd7,
0xa8,
0xd7,
0x9f,
0x20,
0xd7,
0xa9,
0xd7,
0x98,
0x20,
0xd7,
0x91,
0xd7,
0x99,
0xd7,
0x9d,
0x20,
0xd7,
0x9e,
0xd7,
0x90,
0xd7,
0x95,
0xd7,
0x9b,
0xd7,
0x96,
0xd7,
0x91,
0x20,
0xd7,
0x95,
0xd7,
0x9c,
0xd7,
0xa4,
0xd7,
0xaa,
0xd7,
0xa2,
0x20,
0xd7,
0x9e,
0xd7,
0xa6,
0xd7,
0x90,
0x20,
0xd7,
0x9c,
0xd7,
0x95,
0x20,
0xd7,
0x97,
0xd7,
0x91,
0xd7,
0xa8,
0xd7,
0x94,
0x20,
0xd7,
0x90,
0xd7,
0x99,
0xd7,
0x9a,
0x20,
0xd7,
0x94,
0xd7,
0xa7,
0xd7,
0x9c,
0xd7,
0x99,
0xd7,
0x98,
0xd7,
0x94
];
const String testRussianPhrase = "Съешь же ещё этих мягких "
"французских булок да выпей чаю";
const List<int> testRussianUtf8 = const <int>[
0xd0,
0xa1,
0xd1,
0x8a,
0xd0,
0xb5,
0xd1,
0x88,
0xd1,
0x8c,
0x20,
0xd0,
0xb6,
0xd0,
0xb5,
0x20,
0xd0,
0xb5,
0xd1,
0x89,
0xd1,
0x91,
0x20,
0xd1,
0x8d,
0xd1,
0x82,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd0,
0xbc,
0xd1,
0x8f,
0xd0,
0xb3,
0xd0,
0xba,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd1,
0x84,
0xd1,
0x80,
0xd0,
0xb0,
0xd0,
0xbd,
0xd1,
0x86,
0xd1,
0x83,
0xd0,
0xb7,
0xd1,
0x81,
0xd0,
0xba,
0xd0,
0xb8,
0xd1,
0x85,
0x20,
0xd0,
0xb1,
0xd1,
0x83,
0xd0,
0xbb,
0xd0,
0xbe,
0xd0,
0xba,
0x20,
0xd0,
0xb4,
0xd0,
0xb0,
0x20,
0xd0,
0xb2,
0xd1,
0x8b,
0xd0,
0xbf,
0xd0,
0xb5,
0xd0,
0xb9,
0x20,
0xd1,
0x87,
0xd0,
0xb0,
0xd1,
0x8e
];
const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
"στὸ χρυσαφὶ ξέφωτο";
const List<int> testGreekUtf8 = const <int>[
0xce,
0x93,
0xce,
0xb1,
0xce,
0xb6,
0xce,
0xad,
0xce,
0xb5,
0xcf,
0x82,
0x20,
0xce,
0xba,
0xce,
0xb1,
0xe1,
0xbd,
0xb6,
0x20,
0xce,
0xbc,
0xcf,
0x85,
0xcf,
0x81,
0xcf,
0x84,
0xce,
0xb9,
0xe1,
0xbd,
0xb2,
0xcf,
0x82,
0x20,
0xce,
0xb4,
0xe1,
0xbd,
0xb2,
0xce,
0xbd,
0x20,
0xce,
0xb8,
0xe1,
0xbd,
0xb0,
0x20,
0xce,
0xb2,
0xcf,
0x81,
0xe1,
0xbf,
0xb6,
0x20,
0xcf,
0x80,
0xce,
0xb9,
0xe1,
0xbd,
0xb0,
0x20,
0xcf,
0x83,
0xcf,
0x84,
0xe1,
0xbd,
0xb8,
0x20,
0xcf,
0x87,
0xcf,
0x81,
0xcf,
0x85,
0xcf,
0x83,
0xce,
0xb1,
0xcf,
0x86,
0xe1,
0xbd,
0xb6,
0x20,
0xce,
0xbe,
0xce,
0xad,
0xcf,
0x86,
0xcf,
0x89,
0xcf,
0x84,
0xce,
0xbf
];
const String testKatakanaPhrase = "イロハニホヘト チリヌルヲ ワカヨタレソ "
"ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン";
const List<int> testKatakanaUtf8 = const <int>[
0xe3,
0x82,
0xa4,
0xe3,
0x83,
0xad,
0xe3,
0x83,
0x8f,
0xe3,
0x83,
0x8b,
0xe3,
0x83,
0x9b,
0xe3,
0x83,
0x98,
0xe3,
0x83,
0x88,
0x20,
0xe3,
0x83,
0x81,
0xe3,
0x83,
0xaa,
0xe3,
0x83,
0x8c,
0xe3,
0x83,
0xab,
0xe3,
0x83,
0xb2,
0x20,
0xe3,
0x83,
0xaf,
0xe3,
0x82,
0xab,
0xe3,
0x83,
0xa8,
0xe3,
0x82,
0xbf,
0xe3,
0x83,
0xac,
0xe3,
0x82,
0xbd,
0x20,
0xe3,
0x83,
0x84,
0xe3,
0x83,
0x8d,
0xe3,
0x83,
0x8a,
0xe3,
0x83,
0xa9,
0xe3,
0x83,
0xa0,
0x20,
0xe3,
0x82,
0xa6,
0xe3,
0x83,
0xb0,
0xe3,
0x83,
0x8e,
0xe3,
0x82,
0xaa,
0xe3,
0x82,
0xaf,
0xe3,
0x83,
0xa4,
0xe3,
0x83,
0x9e,
0x20,
0xe3,
0x82,
0xb1,
0xe3,
0x83,
0x95,
0xe3,
0x82,
0xb3,
0xe3,
0x82,
0xa8,
0xe3,
0x83,
0x86,
0x20,
0xe3,
0x82,
0xa2,
0xe3,
0x82,
0xb5,
0xe3,
0x82,
0xad,
0xe3,
0x83,
0xa6,
0xe3,
0x83,
0xa1,
0xe3,
0x83,
0x9f,
0xe3,
0x82,
0xb7,
0x20,
0xe3,
0x83,
0xb1,
0xe3,
0x83,
0x92,
0xe3,
0x83,
0xa2,
0xe3,
0x82,
0xbb,
0xe3,
0x82,
0xb9,
0xe3,
0x83,
0xb3
];
void main() {
testEncodeToUtf8();
String decodeUtf8List(List<int> codeUnits) => utf8.decode(codeUnits);
String decodeUtf8Uint8List(List<int> codeUnits) =>
utf8.decode(new Uint8List.fromList(codeUnits));
testUtf8BytesToString(decodeUtf8List);
testUtf8BytesToString(decodeUtf8Uint8List);
List<int> utf8ToRunes1(List<int> codeUnits) {
return utf8.decode(codeUnits, allowMalformed: true).runes.toList();
}
List<int> utf8ToRunes2(List<int> codeUnits) {
return utf8
.decode(new Uint8List.fromList(codeUnits), allowMalformed: true)
.runes
.toList();
}
testUtf8bytesToCodepoints(utf8ToRunes1);
testUtf8bytesToCodepoints(utf8ToRunes2);
}
void testEncodeToUtf8() {
List<int> encodeUtf8(String str) => utf8.encode(str);
Expect.listEquals(
testEnglishUtf8, encodeUtf8(testEnglishPhrase), "english to utf8");
Expect.listEquals(
testDanishUtf8, encodeUtf8(testDanishPhrase), "encode danish to utf8");
Expect.listEquals(
testHebrewUtf8, encodeUtf8(testHebrewPhrase), "Hebrew to utf8");
Expect.listEquals(
testRussianUtf8, encodeUtf8(testRussianPhrase), "Russian to utf8");
Expect.listEquals(
testGreekUtf8, encodeUtf8(testGreekPhrase), "Greek to utf8");
Expect.listEquals(
testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), "Katakana to utf8");
}
void testUtf8bytesToCodepoints(List<int> utf8ToRunes(List<int> utf8)) {
Expect.listEquals(
[954, 972, 963, 956, 949],
utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]),
"κόσμε");
// boundary conditions: First possible sequence of a certain length
Expect.listEquals([], utf8ToRunes([]), "no input");
Expect.listEquals([0x0], utf8ToRunes([0x0]), "0");
Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80");
Expect.listEquals([0x800], utf8ToRunes([0xe0, 0xa0, 0x80]), "800");
Expect.listEquals([0x10000], utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), "4000000");
// boundary conditions: Last possible sequence of a certain length
Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f");
Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff");
Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), "4000000");
// other boundary conditions
Expect.listEquals([0xd7ff], utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff");
Expect.listEquals([0xe000], utf8ToRunes([0xee, 0x80, 0x80]), "e000");
Expect.listEquals([unicodeReplacementCharacterRune],
utf8ToRunes([0xef, 0xbf, 0xbd]), "fffd");
Expect.listEquals(
[0x10ffff], utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf4, 0x90, 0x80, 0x80]), "110000");
// unexpected continuation bytes
Expect.listEquals([unicodeReplacementCharacterRune], utf8ToRunes([0x80]),
"80 => replacement character");
Expect.listEquals([unicodeReplacementCharacterRune], utf8ToRunes([0xbf]),
"bf => replacement character");
List<int> allContinuationBytes = <int>[];
List<int> matchingReplacementChars = <int>[];
for (int i = 0x80; i < 0xc0; i++) {
allContinuationBytes.add(i);
matchingReplacementChars.add(unicodeReplacementCharacterRune);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allContinuationBytes),
"80 - bf => replacement character x 64");
List<int> allFirstTwoByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xc0; i < 0xe0; i++) {
allFirstTwoByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstTwoByteSeq),
"c0 - df + space => replacement character + space x 32");
List<int> allFirstThreeByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xe0; i < 0xf0; i++) {
allFirstThreeByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstThreeByteSeq),
"e0 - ef + space => replacement character x 16");
List<int> allFirstFourByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf0; i < 0xf8; i++) {
allFirstFourByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstFourByteSeq),
"f0 - f7 + space => replacement character x 8");
List<int> allFirstFiveByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xf8; i < 0xfc; i++) {
allFirstFiveByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstFiveByteSeq),
"f8 - fb + space => replacement character x 4");
List<int> allFirstSixByteSeq = <int>[];
matchingReplacementChars = <int>[];
for (int i = 0xfc; i < 0xfe; i++) {
allFirstSixByteSeq.addAll([i, 0x20]);
matchingReplacementChars.addAll([unicodeReplacementCharacterRune, 0x20]);
}
Expect.listEquals(matchingReplacementChars, utf8ToRunes(allFirstSixByteSeq),
"fc - fd + space => replacement character x 2");
// Sequences with last continuation byte missing
Expect.listEquals([unicodeReplacementCharacterRune], utf8ToRunes([0xc2]),
"2-byte sequence with last byte missing");
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xe0, 0x80]),
"3-byte sequence with last byte missing");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf0, 0x80, 0x80]), "4-byte sequence with last byte missing");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf8, 0x88, 0x80, 0x80]),
"5-byte sequence with last byte missing");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]),
"6-byte sequence with last byte missing");
Expect.listEquals([unicodeReplacementCharacterRune], utf8ToRunes([0xdf]),
"2-byte sequence with last byte missing (hi)");
Expect.listEquals([unicodeReplacementCharacterRune],
utf8ToRunes([0xef, 0xbf]), "3-byte sequence with last byte missing (hi)");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf7, 0xbf, 0xbf]),
"4-byte sequence with last byte missing (hi)");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]),
"5-byte sequence with last byte missing (hi)");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
"6-byte sequence with last byte missing (hi)");
// Concatenation of incomplete sequences
Expect.listEquals(
[
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
],
utf8ToRunes([
0xc2,
0xe0,
0x80,
0xf0,
0x80,
0x80,
0xf8,
0x88,
0x80,
0x80,
0xfc,
0x80,
0x80,
0x80,
0x80,
0xdf,
0xef, // These two bytes form one incomplete sequence.
0xbf, // All others form one per byte.
0xf7,
0xbf,
0xbf,
0xfb,
0xbf,
0xbf,
0xbf,
0xfd,
0xbf,
0xbf,
0xbf,
0xbf
]),
"Concatenation of incomplete sequences");
// Impossible bytes
Expect.listEquals(
[unicodeReplacementCharacterRune], utf8ToRunes([0xfe]), "fe");
Expect.listEquals(
[unicodeReplacementCharacterRune], utf8ToRunes([0xff]), "ff");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
// Overlong sequences
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc0, 0xaf]),
"c0 af");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xe0, 0x80, 0xaf]), "e0 80 af");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), "fc 80 80 80 80 af");
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc1, 0xbf]),
"c1 bf");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xe0, 0x9f, 0xbf]), "e0 9f bf");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), "fc 83 bf bf bf bf");
Expect.listEquals(
[unicodeReplacementCharacterRune, unicodeReplacementCharacterRune],
utf8ToRunes([0xc0, 0x80]),
"c0 80");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xe0, 0x80, 0x80]), "e0 80 80");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
Expect.listEquals([
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune,
unicodeReplacementCharacterRune
], utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), "fc 80 80 80 80 80");
// Other illegal code positions (???)
Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]), "U+FFFE");
Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), "U+FFFF");
}
void testUtf8BytesToString(String decodeUtf8(List<int> input)) {
Expect.stringEquals(
testEnglishPhrase, decodeUtf8(testEnglishUtf8), "English");
Expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish");
Expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew");
Expect.stringEquals(
testRussianPhrase, decodeUtf8(testRussianUtf8), "Russian");
Expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek");
Expect.stringEquals(
testKatakanaPhrase, decodeUtf8(testKatakanaUtf8), "Katakana");
}