mirror of
https://github.com/dart-lang/sdk
synced 2024-09-21 08:11:28 +00:00
Replace old utf8 decoder with new one.
R=lrn@google.com Review URL: https://codereview.chromium.org//19187002 git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@25089 260f80e4-7a28-3924-810f-c04153c831b5
This commit is contained in:
parent
912b741073
commit
d0768e543f
|
@ -20,11 +20,41 @@ const UTF8 = const Utf8Codec();
|
|||
* A [Utf8Codec] encodes strings to utf-8 code units (bytes) and decodes
|
||||
* UTF-8 code units to strings.
|
||||
*/
|
||||
// TODO(floitsch): Needs a way to specify if decoding should throw or use
|
||||
// the replacement character.
|
||||
class Utf8Codec extends Encoding {
|
||||
const Utf8Codec();
|
||||
final bool _allowMalformed;
|
||||
|
||||
/**
|
||||
* Instantiates a new [Utf8Codec].
|
||||
*
|
||||
* The optional [allowMalformed] argument defines how [decoder] (and [decode])
|
||||
* deal with invalid or unterminated character sequences.
|
||||
*
|
||||
* If it is `true` (and not overriden at the method invocation) [decode] and
|
||||
* the [decoder] replace invalid (or unterminated) octet
|
||||
* sequences with the Unicode Replacement character `U+FFFD` (<EFBFBD>). Otherwise
|
||||
* they throw a [FormatException].
|
||||
*/
|
||||
const Utf8Codec({ bool allowMalformed: false })
|
||||
: _allowMalformed = allowMalformed;
|
||||
|
||||
/**
|
||||
* Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
|
||||
* corresponding string.
|
||||
*
|
||||
* If [allowMalformed] is `true` the decoder replaces invalid (or
|
||||
* unterminated) character sequences with the Unicode Replacement character
|
||||
* `U+FFFD` (<EFBFBD>). Otherwise it throws a [FormatException].
|
||||
*
|
||||
* If [allowMalformed] is not given, it defaults to the `allowMalformed` that
|
||||
* was used to instantiate `this`.
|
||||
*/
|
||||
String decode(List<int> codeUnits, { bool allowMalformed }) {
|
||||
if (allowMalformed == null) allowMalformed = _allowMalformed;
|
||||
return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);
|
||||
}
|
||||
|
||||
Converter<String, List<int>> get encoder => new Utf8Encoder();
|
||||
Converter<List<int>, String> get decoder => new Utf8Decoder();
|
||||
Converter<List<int>, String> get decoder {
|
||||
return new Utf8Decoder(allowMalformed: _allowMalformed);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,11 +21,187 @@ class Utf8Encoder extends Converter<String, List<int>> {
|
|||
* to a string.
|
||||
*/
|
||||
class Utf8Decoder extends Converter<List<int>, String> {
|
||||
final bool _allowMalformed;
|
||||
|
||||
/**
|
||||
* Instantiates a new [Utf8Decoder].
|
||||
*
|
||||
* The optional [allowMalformed] argument defines how [convert] deals
|
||||
* with invalid or unterminated character sequences.
|
||||
*
|
||||
* If it is `true` [convert] replaces invalid (or unterminated) character
|
||||
* sequences with the Unicode Replacement character `U+FFFD` (<EFBFBD>). Otherwise
|
||||
* it throws a [FormatException].
|
||||
*/
|
||||
Utf8Decoder({ bool allowMalformed: false })
|
||||
: this._allowMalformed = allowMalformed;
|
||||
|
||||
/**
|
||||
* Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
|
||||
* corresponding string.
|
||||
*/
|
||||
// TODO(floitsch): allow to configure the decoder (for example the replacement
|
||||
// character).
|
||||
String convert(List<int> codeUnits) => OLD_UTF_LIB.decodeUtf8(codeUnits);
|
||||
String convert(List<int> codeUnits) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
_Utf8Decoder decoder = new _Utf8Decoder(_allowMalformed);
|
||||
decoder.convert(codeUnits, 0, codeUnits.length, buffer);
|
||||
decoder.close(buffer);
|
||||
return buffer.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// UTF-8 constants.
|
||||
const int _ONE_BYTE_LIMIT = 0x7f; // 7 bytes
|
||||
const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bytes
|
||||
const int _THREE_BYTE_LIMIT = 0xffff; // 16 bytes
|
||||
const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bytes, truncated to Unicode max.
|
||||
|
||||
// UTF-16 constants.
|
||||
const int _SURROGATE_MASK = 0xF800;
|
||||
const int _SURROGATE_TAG_MASK = 0xFC00;
|
||||
const int _SURROGATE_VALUE_MASK = 0x3FF;
|
||||
const int _LEAD_SURROGATE_MIN = 0xD800;
|
||||
const int _TAIL_SURROGATE_MIN = 0xDC00;
|
||||
|
||||
const int _REPLACEMENT_CHARACTER = 0xFFFD;
|
||||
const int _BOM_CHARACTER = 0xFEFF;
|
||||
|
||||
bool _isSurrogate(int codeUnit) =>
|
||||
(codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN;
|
||||
bool _isLeadSurrogate(int codeUnit) =>
|
||||
(codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;
|
||||
bool _isTailSurrogate(int codeUnit) =>
|
||||
(codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;
|
||||
int _combineSurrogatePair(int lead, int tail) =>
|
||||
0x10000 | ((lead & _SURROGATE_VALUE_MASK) << 10)
|
||||
| (tail & _SURROGATE_VALUE_MASK);
|
||||
|
||||
|
||||
/**
|
||||
* Decodes UTF-8.
|
||||
*
|
||||
* The decoder handles chunked input.
|
||||
*/
|
||||
// TODO(floitsch): make this class public.
|
||||
class _Utf8Decoder {
|
||||
final bool _allowMalformed;
|
||||
bool _isFirstCharacter = true;
|
||||
int _value = 0;
|
||||
int _expectedUnits = 0;
|
||||
int _extraUnits = 0;
|
||||
|
||||
_Utf8Decoder(this._allowMalformed);
|
||||
|
||||
bool get hasPartialInput => _expectedUnits > 0;
|
||||
|
||||
// Limits of one through four byte encodings.
|
||||
static const List<int> _LIMITS = const <int>[
|
||||
_ONE_BYTE_LIMIT,
|
||||
_TWO_BYTE_LIMIT,
|
||||
_THREE_BYTE_LIMIT,
|
||||
_FOUR_BYTE_LIMIT ];
|
||||
|
||||
void close(StringSink sink) {
|
||||
if (hasPartialInput) {
|
||||
if (!_allowMalformed) {
|
||||
throw new FormatException("Unfinished UTF-8 octet sequence");
|
||||
}
|
||||
sink.writeCharCode(_REPLACEMENT_CHARACTER);
|
||||
}
|
||||
}
|
||||
|
||||
void convert(List<int> codeUnits, int startIndex, int endIndex,
|
||||
StringSink sink) {
|
||||
int value = _value;
|
||||
int expectedUnits = _expectedUnits;
|
||||
int extraUnits = _extraUnits;
|
||||
_value = 0;
|
||||
_expectedUnits = 0;
|
||||
_extraUnits = 0;
|
||||
|
||||
int i = startIndex;
|
||||
loop: while (true) {
|
||||
multibyte: if (expectedUnits > 0) {
|
||||
do {
|
||||
if (i == endIndex) {
|
||||
break loop;
|
||||
}
|
||||
int unit = codeUnits[i];
|
||||
if ((unit & 0xC0) != 0x80) {
|
||||
expectedUnits = 0;
|
||||
if (!_allowMalformed) {
|
||||
throw new FormatException(
|
||||
"Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
|
||||
}
|
||||
_isFirstCharacter = false;
|
||||
sink.writeCharCode(_REPLACEMENT_CHARACTER);
|
||||
break multibyte;
|
||||
} else {
|
||||
value = (value << 6) | (unit & 0x3f);
|
||||
expectedUnits--;
|
||||
i++;
|
||||
}
|
||||
} while (expectedUnits > 0);
|
||||
if (value <= _LIMITS[extraUnits - 1]) {
|
||||
// Overly long encoding. The value could be encoded with a shorter
|
||||
// encoding.
|
||||
if (!_allowMalformed) {
|
||||
throw new FormatException(
|
||||
"Overlong encoding of 0x${value.toRadixString(16)}");
|
||||
}
|
||||
expectedUnits = extraUnits = 0;
|
||||
value = _REPLACEMENT_CHARACTER;
|
||||
}
|
||||
if (value > _FOUR_BYTE_LIMIT) {
|
||||
if (!_allowMalformed) {
|
||||
throw new FormatException("Character outside valid Unicode range: "
|
||||
"0x${value.toRadixString(16)}");
|
||||
}
|
||||
value = _REPLACEMENT_CHARACTER;
|
||||
}
|
||||
if (!_isFirstCharacter || value != _BOM_CHARACTER) {
|
||||
sink.writeCharCode(value);
|
||||
}
|
||||
_isFirstCharacter = false;
|
||||
}
|
||||
|
||||
while (i < endIndex) {
|
||||
int unit = codeUnits[i++];
|
||||
if (unit <= _ONE_BYTE_LIMIT) {
|
||||
_isFirstCharacter = false;
|
||||
sink.writeCharCode(unit);
|
||||
} else {
|
||||
if ((unit & 0xE0) == 0xC0) {
|
||||
value = unit & 0x1F;
|
||||
expectedUnits = extraUnits = 1;
|
||||
continue loop;
|
||||
}
|
||||
if ((unit & 0xF0) == 0xE0) {
|
||||
value = unit & 0x0F;
|
||||
expectedUnits = extraUnits = 2;
|
||||
continue loop;
|
||||
}
|
||||
// 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.
|
||||
if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {
|
||||
value = unit & 0x07;
|
||||
expectedUnits = extraUnits = 3;
|
||||
continue loop;
|
||||
}
|
||||
if (!_allowMalformed) {
|
||||
throw new FormatException(
|
||||
"Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
|
||||
}
|
||||
value = _REPLACEMENT_CHARACTER;
|
||||
expectedUnits = extraUnits = 0;
|
||||
_isFirstCharacter = false;
|
||||
sink.writeCharCode(value);
|
||||
}
|
||||
}
|
||||
break loop;
|
||||
}
|
||||
if (expectedUnits > 0) {
|
||||
_value = value;
|
||||
_expectedUnits = expectedUnits;
|
||||
_extraUnits = extraUnits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
135
tests/lib/convert/utf82_test.dart
Normal file
135
tests/lib/convert/utf82_test.dart
Normal file
|
@ -0,0 +1,135 @@
|
|||
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
||||
// for details. All rights reserved. Use of this source code is governed by a
|
||||
// BSD-style license that can be found in the LICENSE file.
|
||||
|
||||
library utf8_test;
|
||||
import "package:expect/expect.dart";
|
||||
import 'dart:convert';
|
||||
import 'dart:codec';
|
||||
|
||||
String decode(List<int> bytes) => new Utf8Decoder().convert(bytes);
|
||||
String decodeAllowMalformed(List<int> bytes) {
|
||||
return new Utf8Decoder(allowMalformed: true).convert(bytes);
|
||||
}
|
||||
|
||||
String decode2(List<int> bytes) => UTF8.decode(bytes);
|
||||
String decodeAllowMalformed2(List<int> bytes) {
|
||||
return UTF8.decode(bytes, allowMalformed: true);
|
||||
}
|
||||
|
||||
String decode3(List<int> bytes) => new Utf8Codec().decode(bytes);
|
||||
String decodeAllowMalformed3(List<int> bytes) {
|
||||
return new Utf8Codec(allowMalformed: true).decode(bytes);
|
||||
}
|
||||
|
||||
String decode4(List<int> bytes) => new Utf8Codec().decoder.convert(bytes);
|
||||
String decodeAllowMalformed4(List<int> bytes) {
|
||||
return new Utf8Codec(allowMalformed: true).decoder.convert(bytes);
|
||||
}
|
||||
|
||||
final TESTS = [
|
||||
// Unfinished UTF-8 sequences.
|
||||
[ 0xc3 ],
|
||||
[ 0xE2, 0x82 ],
|
||||
[ 0xF0, 0xA4, 0xAD ],
|
||||
// Overlong encoding of euro-sign.
|
||||
[ 0xF0, 0x82, 0x82, 0xAC ],
|
||||
// Other overlong/unfinished sequences.
|
||||
[ 0xC0 ],
|
||||
[ 0xC1 ],
|
||||
[ 0xF5 ],
|
||||
[ 0xF6 ],
|
||||
[ 0xF7 ],
|
||||
[ 0xF8 ],
|
||||
[ 0xF9 ],
|
||||
[ 0xFA ],
|
||||
[ 0xFB ],
|
||||
[ 0xFC ],
|
||||
[ 0xFD ],
|
||||
[ 0xFE ],
|
||||
[ 0xFF ],
|
||||
[ 0xC0, 0x80 ],
|
||||
[ 0xC1, 0x80 ],
|
||||
// Outside valid range.
|
||||
[ 0xF4, 0xBF, 0xBF, 0xBF ]];
|
||||
|
||||
final TESTS2 = [
|
||||
// Test that 0xC0|1, 0x80 does not eat the next character.
|
||||
[[ 0xC0, 0x80, 0x61 ], "Xa" ],
|
||||
[[ 0xC1, 0x80, 0x61 ], "Xa" ],
|
||||
// 0xF5 .. 0xFF never appear in valid UTF-8 sequences.
|
||||
[[ 0xF5, 0x80 ], "XX" ],
|
||||
[[ 0xF6, 0x80 ], "XX" ],
|
||||
[[ 0xF7, 0x80 ], "XX" ],
|
||||
[[ 0xF8, 0x80 ], "XX" ],
|
||||
[[ 0xF9, 0x80 ], "XX" ],
|
||||
[[ 0xFA, 0x80 ], "XX" ],
|
||||
[[ 0xFB, 0x80 ], "XX" ],
|
||||
[[ 0xFC, 0x80 ], "XX" ],
|
||||
[[ 0xFD, 0x80 ], "XX" ],
|
||||
[[ 0xFE, 0x80 ], "XX" ],
|
||||
[[ 0xFF, 0x80 ], "XX" ],
|
||||
[[ 0xF5, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xF6, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xF7, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xF8, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xF9, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFA, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFB, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFC, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFD, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFE, 0x80, 0x61 ], "XXa" ],
|
||||
[[ 0xFF, 0x80, 0x61 ], "XXa" ],
|
||||
// Characters outside the valid range.
|
||||
[[ 0xF5, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xF6, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xF7, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xF8, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xF9, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFA, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFB, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFC, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFD, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFE, 0x80, 0x80, 0x61 ], "XXXa" ],
|
||||
[[ 0xFF, 0x80, 0x80, 0x61 ], "XXXa" ]];
|
||||
|
||||
main() {
|
||||
var allTests = TESTS.expand((test) {
|
||||
// Pairs of test and expected string output when malformed strings are
|
||||
// allowed. Replacement character: U+FFFD
|
||||
return [[ test, "\u{FFFD}" ],
|
||||
[ new List.from([0x61])..addAll(test), "a\u{FFFD}" ],
|
||||
[ new List.from([0x61])..addAll(test)..add(0x61), "a\u{FFFD}a" ],
|
||||
[ new List.from(test)..add(0x61), "\u{FFFD}a" ],
|
||||
[ new List.from(test)..addAll(test), "\u{FFFD}\u{FFFD}" ],
|
||||
[ new List.from(test)..add(0x61)..addAll(test),
|
||||
"\u{FFFD}a\u{FFFD}" ],
|
||||
[ new List.from([0xc3, 0xa5])..addAll(test), "å\u{FFFD}" ],
|
||||
[ new List.from([0xc3, 0xa5])..addAll(test)..addAll([0xc3, 0xa5]),
|
||||
"å\u{FFFD}å" ],
|
||||
[ new List.from(test)..addAll([0xc3, 0xa5]), "\u{FFFD}å" ],
|
||||
[ new List.from(test)..addAll([0xc3, 0xa5])..addAll(test),
|
||||
"\u{FFFD}å\u{FFFD}" ]];
|
||||
});
|
||||
|
||||
var allTests2 = TESTS2.map((test) {
|
||||
// Pairs of test and expected string output when malformed strings are
|
||||
// allowed. Replacement character: U+FFFD
|
||||
String expected = test[1].replaceAll("X", "\u{FFFD}");
|
||||
return [test[0], expected];
|
||||
});
|
||||
|
||||
for (var test in []..addAll(allTests)..addAll(allTests2)) {
|
||||
List<int> bytes = test[0];
|
||||
Expect.throws(() => decode(bytes), (e) => e is FormatException);
|
||||
Expect.throws(() => decode2(bytes), (e) => e is FormatException);
|
||||
Expect.throws(() => decode3(bytes), (e) => e is FormatException);
|
||||
Expect.throws(() => decode4(bytes), (e) => e is FormatException);
|
||||
|
||||
String expected = test[1];
|
||||
Expect.equals(expected, decodeAllowMalformed(bytes));
|
||||
Expect.equals(expected, decodeAllowMalformed2(bytes));
|
||||
Expect.equals(expected, decodeAllowMalformed3(bytes));
|
||||
Expect.equals(expected, decodeAllowMalformed4(bytes));
|
||||
}
|
||||
}
|
46
tests/lib/convert/utf83_test.dart
Normal file
46
tests/lib/convert/utf83_test.dart
Normal file
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
||||
// for details. All rights reserved. Use of this source code is governed by a
|
||||
// BSD-style license that can be found in the LICENSE file.
|
||||
|
||||
library utf8_test;
|
||||
import "package:expect/expect.dart";
|
||||
import 'dart:codec';
|
||||
import 'dart:convert';
|
||||
|
||||
main() {
|
||||
// Test that UTF8-decoder removes leading BOM.
|
||||
Expect.equals("a", UTF8.decode([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("a", UTF8.decoder.convert([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("a", new Utf8Decoder().convert([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("a",
|
||||
UTF8.decode([0xEF, 0xBB, 0xBF, 0x61], allowMalformed: true));
|
||||
Expect.equals("a", new Utf8Codec(allowMalformed: true)
|
||||
.decode([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("a", new Utf8Codec(allowMalformed: true)
|
||||
.decoder.convert([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("a", new Utf8Decoder(allowMalformed: true)
|
||||
.convert([0xEF, 0xBB, 0xBF, 0x61]));
|
||||
Expect.equals("", UTF8.decode([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("", UTF8.decoder.convert([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("", new Utf8Decoder().convert([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("",
|
||||
UTF8.decode([0xEF, 0xBB, 0xBF], allowMalformed: true));
|
||||
Expect.equals("", new Utf8Codec(allowMalformed: true)
|
||||
.decode([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("", new Utf8Codec(allowMalformed: true)
|
||||
.decoder.convert([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("", new Utf8Decoder(allowMalformed: true)
|
||||
.convert([0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}", UTF8.decode([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}", UTF8.decoder.convert([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}",
|
||||
new Utf8Decoder().convert([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}",
|
||||
UTF8.decode([0x61, 0xEF, 0xBB, 0xBF], allowMalformed: true));
|
||||
Expect.equals("a\u{FEFF}", new Utf8Codec(allowMalformed: true)
|
||||
.decode([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}", new Utf8Codec(allowMalformed: true)
|
||||
.decoder.convert([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
Expect.equals("a\u{FEFF}", new Utf8Decoder(allowMalformed: true)
|
||||
.convert([0x61, 0xEF, 0xBB, 0xBF]));
|
||||
}
|
47
tests/lib/convert/utf8_test.dart
Normal file
47
tests/lib/convert/utf8_test.dart
Normal file
|
@ -0,0 +1,47 @@
|
|||
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
||||
// for details. All rights reserved. Use of this source code is governed by a
|
||||
// BSD-style license that can be found in the LICENSE file.
|
||||
|
||||
library utf8_test;
|
||||
import "package:expect/expect.dart";
|
||||
import 'dart:convert';
|
||||
|
||||
String decode(List<int> bytes) => new Utf8Decoder().convert(bytes);
|
||||
|
||||
main() {
|
||||
// Google favorite: "Îñţérñåţîöñåļîžåţîờñ".
|
||||
String string = decode([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72,
|
||||
0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3,
|
||||
0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae,
|
||||
0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1,
|
||||
0xbb, 0x9d, 0xc3, 0xb1]);
|
||||
Expect.stringEquals("Îñţérñåţîöñåļîžåţîờñ", string);
|
||||
|
||||
// Blueberry porridge in Danish: "blåbærgrød".
|
||||
string = decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72,
|
||||
0xc3, 0xb8, 0x64]);
|
||||
Expect.stringEquals("blåbærgrød", string);
|
||||
|
||||
// "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil.
|
||||
string = decode([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0,
|
||||
0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0,
|
||||
0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf,
|
||||
0x88, 0xe0, 0xae, 0xb2]);
|
||||
Expect.stringEquals("சிவா அணாமாைல", string);
|
||||
|
||||
// "िसवा अणामालै", that is "Siva Annamalai" in Devanagari.
|
||||
string = decode([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0,
|
||||
0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0,
|
||||
0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4,
|
||||
0xb2, 0xe0, 0xa5, 0x88]);
|
||||
Expect.stringEquals("िसवा अणामालै", string);
|
||||
|
||||
// DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12)
|
||||
// UTF-8: F0 90 90 92
|
||||
string = decode([0xf0, 0x90, 0x90, 0x92]);
|
||||
Expect.equals(string.length, 2);
|
||||
Expect.equals("𐐒".length, 2);
|
||||
Expect.stringEquals("𐐒", string);
|
||||
|
||||
// TODO(ahe): Add tests of bad input.
|
||||
}
|
Loading…
Reference in a new issue