mirror of
https://github.com/dart-lang/sdk
synced 2024-09-16 04:27:17 +00:00
f5bf50e7a4
This adjusts all UTF-8 tests to the new semantics in the breaking change described here: https://github.com/dart-lang/sdk/issues/41100 This has three parts: - Unpaired surrogates are encoded as replacement characters, and encoded surrogates are considered malformed input when decoding. - Decoding errors are generally reported on the position of the byte that conclusively makes the input malformed. - The number of replacement characters emitted by the decoder is generally one per unfinished sequence or undecodable byte. The code changes to implement the new semantics are placed in subsequent commits. Change-Id: I4cc8ce660e39287e734070764ab8e1f0ebb8b9e0 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/143815 Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
56 lines
2.2 KiB
Dart
56 lines
2.2 KiB
Dart
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
import "package:expect/expect.dart";
|
|
import 'dart:convert';
|
|
import 'unicode_tests.dart';
|
|
|
|
List<int> encode(String str) => new Utf8Encoder().convert(str);
|
|
List<int> encode2(String str) => utf8.encode(str);
|
|
|
|
void main() {
|
|
for (var test in UNICODE_TESTS) {
|
|
List<int> bytes = test[0];
|
|
String string = test[1];
|
|
Expect.listEquals(bytes, encode(string));
|
|
Expect.listEquals(bytes, encode2(string));
|
|
}
|
|
|
|
testEncodeSlice();
|
|
}
|
|
|
|
void testEncodeSlice() {
|
|
var encoder = utf8.encoder;
|
|
String ascii = "ABCDE";
|
|
Expect.listEquals([0x41, 0x42, 0x43, 0x44, 0x45], encoder.convert(ascii));
|
|
Expect.listEquals([0x41, 0x42, 0x43, 0x44, 0x45], encoder.convert(ascii, 0));
|
|
Expect.listEquals(
|
|
[0x41, 0x42, 0x43, 0x44, 0x45], encoder.convert(ascii, 0, 5));
|
|
Expect.listEquals([0x42, 0x43, 0x44, 0x45], encoder.convert(ascii, 1));
|
|
Expect.listEquals([0x41, 0x42, 0x43, 0x44], encoder.convert(ascii, 0, 4));
|
|
Expect.listEquals([0x42, 0x43, 0x44], encoder.convert(ascii, 1, 4));
|
|
|
|
Expect.throws(() => encoder.convert(ascii, -1)); // start < 0.
|
|
Expect.throws(() => encoder.convert(ascii, 6)); // start > length
|
|
Expect.throws(() => encoder.convert(ascii, 0, -1)); // end < 0
|
|
Expect.throws(() => encoder.convert(ascii, 0, 6)); // end > length
|
|
Expect.throws(() => encoder.convert(ascii, 3, 2)); // end < start
|
|
|
|
var unicode = "\u0081\u0082\u1041\u{10101}";
|
|
|
|
Expect.listEquals(
|
|
[0xc2, 0x81, 0xc2, 0x82, 0xe1, 0x81, 0x81, 0xf0, 0x90, 0x84, 0x81],
|
|
encoder.convert(unicode));
|
|
Expect.listEquals(
|
|
[0xc2, 0x81, 0xc2, 0x82, 0xe1, 0x81, 0x81, 0xf0, 0x90, 0x84, 0x81],
|
|
encoder.convert(unicode, 0, unicode.length));
|
|
Expect.listEquals([0xc2, 0x82, 0xe1, 0x81, 0x81, 0xf0, 0x90, 0x84, 0x81],
|
|
encoder.convert(unicode, 1));
|
|
Expect.listEquals(
|
|
[0xc2, 0x82, 0xe1, 0x81, 0x81], encoder.convert(unicode, 1, 3));
|
|
// Split in the middle of a surrogate pair.
|
|
Expect.listEquals([0xc2, 0x82, 0xe1, 0x81, 0x81, 0xef, 0xbf, 0xbd],
|
|
encoder.convert(unicode, 1, 4));
|
|
}
|