mirror of
https://github.com/dart-lang/sdk
synced 2024-09-19 15:01:29 +00:00
225df822b6
Initial port of dart:convert to be statically NNBD clean with the current analyzer implementation, using some assumptions about how the rest of the platform libraries will be migrated. Change-Id: Iad96c684dc7a2c1315ee6a3090ccd7b6d0f71c58 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/122173 Reviewed-by: Bob Nystrom <rnystrom@google.com> Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
292 lines
9.5 KiB
Dart
292 lines
9.5 KiB
Dart
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
|
||
// for details. All rights reserved. Use of this source code is governed by a
|
||
// BSD-style license that can be found in the LICENSE file.
|
||
|
||
part of dart.convert;
|
||
|
||
/// An instance of the default implementation of the [AsciiCodec].
|
||
///
|
||
/// This instance provides a convenient access to the most common ASCII
|
||
/// use cases.
|
||
///
|
||
/// Examples:
|
||
/// ```dart
|
||
/// var encoded = ascii.encode("This is ASCII!");
|
||
/// var decoded = ascii.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
|
||
/// 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]);
|
||
/// ```
|
||
const AsciiCodec ascii = AsciiCodec();
|
||
|
||
const int _asciiMask = 0x7F;
|
||
|
||
/// An [AsciiCodec] allows encoding strings as ASCII bytes
|
||
/// and decoding ASCII bytes to strings.
|
||
class AsciiCodec extends Encoding {
|
||
final bool _allowInvalid;
|
||
|
||
/// Instantiates a new [AsciiCodec].
|
||
///
|
||
/// If [allowInvalid] is true, the [decode] method and the converter
|
||
/// returned by [decoder] will default to allowing invalid values.
|
||
/// If allowing invalid values, the values will be decoded into the Unicode
|
||
/// Replacement character (U+FFFD). If not, an exception will be thrown.
|
||
/// Calls to the [decode] method can choose to override this default.
|
||
///
|
||
/// Encoders will not accept invalid (non ASCII) characters.
|
||
const AsciiCodec({bool allowInvalid = false}) : _allowInvalid = allowInvalid;
|
||
|
||
/// The name of this codec, "us-ascii".
|
||
String get name => "us-ascii";
|
||
|
||
Uint8List encode(String source) => encoder.convert(source);
|
||
|
||
/// Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the
|
||
/// corresponding string.
|
||
///
|
||
/// If [bytes] contains values that are not in the range 0 .. 127, the decoder
|
||
/// will eventually throw a [FormatException].
|
||
///
|
||
/// If [allowInvalid] is not provided, it defaults to the value used to create
|
||
/// this [AsciiCodec].
|
||
String decode(List<int> bytes, {bool? allowInvalid}) {
|
||
if (allowInvalid ?? _allowInvalid) {
|
||
return const AsciiDecoder(allowInvalid: true).convert(bytes);
|
||
} else {
|
||
return const AsciiDecoder(allowInvalid: false).convert(bytes);
|
||
}
|
||
}
|
||
|
||
AsciiEncoder get encoder => const AsciiEncoder();
|
||
|
||
AsciiDecoder get decoder => _allowInvalid
|
||
? const AsciiDecoder(allowInvalid: true)
|
||
: const AsciiDecoder(allowInvalid: false);
|
||
}
|
||
|
||
// Superclass for [AsciiEncoder] and [Latin1Encoder].
|
||
// Generalizes common operations that only differ by a mask;
|
||
class _UnicodeSubsetEncoder extends Converter<String, List<int>> {
|
||
final int _subsetMask;
|
||
|
||
const _UnicodeSubsetEncoder(this._subsetMask);
|
||
|
||
/// Converts the [String] into a list of its code units.
|
||
///
|
||
/// If [start] and [end] are provided, only the substring
|
||
/// `string.substring(start, end)` is used as input to the conversion.
|
||
Uint8List convert(String string, [int start = 0, int? end]) {
|
||
var stringLength = string.length;
|
||
end = RangeError.checkValidRange(start, end, stringLength);
|
||
// TODO(38725): Remove workaround when assignment promotion is implemented
|
||
if (end == null) {
|
||
throw RangeError("Invalid range");
|
||
}
|
||
var length = end - start;
|
||
var result = Uint8List(length);
|
||
for (var i = 0; i < length; i++) {
|
||
var codeUnit = string.codeUnitAt(start + i);
|
||
if ((codeUnit & ~_subsetMask) != 0) {
|
||
throw ArgumentError.value(
|
||
string, "string", "Contains invalid characters.");
|
||
}
|
||
result[i] = codeUnit;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/// Starts a chunked conversion.
|
||
///
|
||
/// The converter works more efficiently if the given [sink] is a
|
||
/// [ByteConversionSink].
|
||
StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
|
||
return _UnicodeSubsetEncoderSink(_subsetMask,
|
||
sink is ByteConversionSink ? sink : ByteConversionSink.from(sink));
|
||
}
|
||
|
||
// Override the base-class' bind, to provide a better type.
|
||
Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
|
||
}
|
||
|
||
/// This class converts strings of only ASCII characters to bytes.
|
||
class AsciiEncoder extends _UnicodeSubsetEncoder {
|
||
const AsciiEncoder() : super(_asciiMask);
|
||
}
|
||
|
||
/// This class encodes chunked strings to bytes (unsigned 8-bit
|
||
/// integers).
|
||
class _UnicodeSubsetEncoderSink extends StringConversionSinkBase {
|
||
final ByteConversionSink _sink;
|
||
final int _subsetMask;
|
||
|
||
_UnicodeSubsetEncoderSink(this._subsetMask, this._sink);
|
||
|
||
void close() {
|
||
_sink.close();
|
||
}
|
||
|
||
void addSlice(String source, int start, int end, bool isLast) {
|
||
RangeError.checkValidRange(start, end, source.length);
|
||
for (var i = start; i < end; i++) {
|
||
var codeUnit = source.codeUnitAt(i);
|
||
if ((codeUnit & ~_subsetMask) != 0) {
|
||
throw ArgumentError(
|
||
"Source contains invalid character with code point: $codeUnit.");
|
||
}
|
||
}
|
||
_sink.add(source.codeUnits.sublist(start, end));
|
||
if (isLast) {
|
||
close();
|
||
}
|
||
}
|
||
}
|
||
|
||
/// This class converts Latin-1 bytes (lists of unsigned 8-bit integers)
|
||
/// to a string.
|
||
abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> {
|
||
final bool _allowInvalid;
|
||
final int _subsetMask;
|
||
|
||
/// Instantiates a new decoder.
|
||
///
|
||
/// The [_allowInvalid] argument defines how [convert] deals
|
||
/// with invalid bytes.
|
||
///
|
||
/// The [_subsetMask] argument is a bit mask used to define the subset
|
||
/// of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or
|
||
/// [_asciiMask] for ASCII (7-bit).
|
||
///
|
||
/// If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the
|
||
/// Unicode Replacement character `U+FFFD` (<28>).
|
||
/// Otherwise it throws a [FormatException].
|
||
const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask);
|
||
|
||
/// Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the
|
||
/// corresponding string.
|
||
///
|
||
/// If [start] and [end] are provided, only the sub-list of bytes from
|
||
/// `start` to `end` (`end` not inclusive) is used as input to the conversion.
|
||
String convert(List<int> bytes, [int start = 0, int? end]) {
|
||
end = RangeError.checkValidRange(start, end, bytes.length);
|
||
// TODO(38725): Remove workaround when assignment promotion is implemented
|
||
if (end == null) {
|
||
throw RangeError("Invalid range");
|
||
}
|
||
for (var i = start; i < end; i++) {
|
||
var byte = bytes[i];
|
||
if ((byte & ~_subsetMask) != 0) {
|
||
if (!_allowInvalid) {
|
||
throw FormatException("Invalid value in input: $byte");
|
||
}
|
||
return _convertInvalid(bytes, start, end);
|
||
}
|
||
}
|
||
return String.fromCharCodes(bytes, start, end);
|
||
}
|
||
|
||
String _convertInvalid(List<int> bytes, int start, int end) {
|
||
var buffer = StringBuffer();
|
||
for (var i = start; i < end; i++) {
|
||
var value = bytes[i];
|
||
if ((value & ~_subsetMask) != 0) value = 0xFFFD;
|
||
buffer.writeCharCode(value);
|
||
}
|
||
return buffer.toString();
|
||
}
|
||
|
||
/// Starts a chunked conversion.
|
||
///
|
||
/// The converter works more efficiently if the given [sink] is a
|
||
/// [StringConversionSink].
|
||
ByteConversionSink startChunkedConversion(Sink<String> sink);
|
||
|
||
// Override the base-class's bind, to provide a better type.
|
||
Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
|
||
}
|
||
|
||
class AsciiDecoder extends _UnicodeSubsetDecoder {
|
||
const AsciiDecoder({bool allowInvalid = false})
|
||
: super(allowInvalid, _asciiMask);
|
||
|
||
/// Starts a chunked conversion.
|
||
///
|
||
/// The converter works more efficiently if the given [sink] is a
|
||
/// [StringConversionSink].
|
||
ByteConversionSink startChunkedConversion(Sink<String> sink) {
|
||
StringConversionSink stringSink;
|
||
if (sink is StringConversionSink) {
|
||
stringSink = sink;
|
||
} else {
|
||
stringSink = StringConversionSink.from(sink);
|
||
}
|
||
// TODO(lrn): Use asUtf16Sink when it becomes available. It
|
||
// works just as well, is likely to have less decoding overhead,
|
||
// and make adding U+FFFD easier.
|
||
// At that time, merge this with _Latin1DecoderSink;
|
||
if (_allowInvalid) {
|
||
return _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false));
|
||
} else {
|
||
return _SimpleAsciiDecoderSink(stringSink);
|
||
}
|
||
}
|
||
}
|
||
|
||
class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase {
|
||
ByteConversionSink _utf8Sink;
|
||
_ErrorHandlingAsciiDecoderSink(this._utf8Sink);
|
||
|
||
void close() {
|
||
_utf8Sink.close();
|
||
}
|
||
|
||
void add(List<int> source) {
|
||
addSlice(source, 0, source.length, false);
|
||
}
|
||
|
||
void addSlice(List<int> source, int start, int end, bool isLast) {
|
||
RangeError.checkValidRange(start, end, source.length);
|
||
for (var i = start; i < end; i++) {
|
||
if ((source[i] & ~_asciiMask) != 0) {
|
||
if (i > start) _utf8Sink.addSlice(source, start, i, false);
|
||
// Add UTF-8 encoding of U+FFFD.
|
||
_utf8Sink.add(const <int>[0xEF, 0xBF, 0xBD]);
|
||
start = i + 1;
|
||
}
|
||
}
|
||
if (start < end) {
|
||
_utf8Sink.addSlice(source, start, end, isLast);
|
||
} else if (isLast) {
|
||
close();
|
||
}
|
||
}
|
||
}
|
||
|
||
class _SimpleAsciiDecoderSink extends ByteConversionSinkBase {
|
||
Sink _sink;
|
||
_SimpleAsciiDecoderSink(this._sink);
|
||
|
||
void close() {
|
||
_sink.close();
|
||
}
|
||
|
||
void add(List<int> source) {
|
||
for (var i = 0; i < source.length; i++) {
|
||
if ((source[i] & ~_asciiMask) != 0) {
|
||
throw FormatException("Source contains non-ASCII bytes.");
|
||
}
|
||
}
|
||
_sink.add(String.fromCharCodes(source));
|
||
}
|
||
|
||
void addSlice(List<int> source, int start, int end, bool isLast) {
|
||
final length = source.length;
|
||
RangeError.checkValidRange(start, end, length);
|
||
if (start < end) {
|
||
if (start != 0 || end != length) {
|
||
source = source.sublist(start, end);
|
||
}
|
||
add(source);
|
||
}
|
||
if (isLast) close();
|
||
}
|
||
}
|