Add split function to LineSplitter class in dart:convert.

The split function returns a lazy iterable of the lines, unlike the convert
function which returns a list.
This makes the function usable on large strings where not all of the lines
are needed.

Closes https://github.com/dart-lang/sdk/issues/23837

R=kevmoo@google.com

Review URL: https://codereview.chromium.org//1240623002 .
This commit is contained in:
Lasse R.H. Nielsen 2015-07-15 12:26:24 +02:00
parent c165fee611
commit 4ee6246556
3 changed files with 181 additions and 45 deletions

View file

@ -2,6 +2,9 @@
### Core library changes
* `dart:convert`
* `LineSplitter` added a `split` static method returning an `Iterable`.
* `dart:html`
* `NodeTreeSanitizer` added the `const trusted` field. It can be used
instead of defining a `NullTreeSanitizer` class when calling

View file

@ -4,21 +4,53 @@
part of dart.convert;
// Character constants.
const int _LF = 10;
const int _CR = 13;
/**
* This class splits [String] values into individual lines.
* A [Converter] that splits a [String] into individual lines.
*
* A line is terminated by either a CR (U+000D), a LF (U+000A), a
* CR+LF sequence (DOS line ending),
* and a final non-empty line can be ended by the end of the string.
*
* The returned lines do not contain the line terminators.
*/
class LineSplitter extends Converter<String, List<String>> {
const LineSplitter();
List<String> convert(String data) {
var lines = new List<String>();
_LineSplitterSink._addSlice(data, 0, data.length, true, lines.add);
return lines;
/// Split [lines] into individual lines.
///
/// If [start] and [end] are provided, only split the contents of
/// `lines.substring(start, end)`. The [start] and [end] values must
/// specify a valid sub-range of [lines]
/// (`0 <= start <= end <= lines.length`).
static Iterable<String> split(String lines, [int start = 0, int end]) sync* {
end = RangeError.checkValidRange(start, end, lines.length);
int sliceStart = start;
int char = 0;
for (int i = start; i < end; i++) {
int previousChar = char;
char = lines.codeUnitAt(i);
if (char != _CR) {
if (char != _LF) continue;
if (previousChar == _CR) {
sliceStart = i + 1;
continue;
}
}
yield lines.substring(sliceStart, i);
sliceStart = i + 1;
}
if (sliceStart < end) {
yield lines.substring(sliceStart, end);
}
}
List<String> convert(String data) => split(data).toList();
StringConversionSink startChunkedConversion(Sink<String> sink) {
if (sink is! StringConversionSink) {
sink = new StringConversionSink.from(sink);
@ -29,65 +61,76 @@ class LineSplitter extends Converter<String, List<String>> {
// TODO(floitsch): deal with utf8.
class _LineSplitterSink extends StringConversionSinkBase {
static const int _LF = 10;
static const int _CR = 13;
final StringConversionSink _sink;
/// The carry-over from the previous chunk.
///
/// If the previous slice ended in a line without a line terminator,
/// then the next slice may continue the line.
String _carry;
/// Whether to skip a leading LF character from the next slice.
///
/// If the previous slice ended on a CR character, a following LF
/// would be part of the same line termination, and should be ignored.
///
/// Only `true` when [_carry] is `null`.
bool _skipLeadingLF = false;
_LineSplitterSink(this._sink);
void addSlice(String chunk, int start, int end, bool isLast) {
end = RangeError.checkValidRange(start, end, chunk.length);
// If the chunk is empty, it's probably because it's the last one.
// Handle that here, so we know the range is non-empty below.
if (start >= end) {
if (isLast) close();
return;
}
if (_carry != null) {
assert(!_skipLeadingLF);
chunk = _carry + chunk.substring(start, end);
start = 0;
end = chunk.length;
_carry = null;
} else if (_skipLeadingLF) {
if (chunk.codeUnitAt(start) == _LF) {
start += 1;
}
_skipLeadingLF = false;
}
_carry = _addSlice(chunk, start, end, isLast, _sink.add);
if (isLast) _sink.close();
_addLines(chunk, start, end);
if (isLast) close();
}
void close() {
addSlice('', 0, 0, true);
if (_carry != null) {
_sink.add(_carry);
_carry = null;
}
_sink.close();
}
static String _addSlice(String chunk, int start, int end, bool isLast,
void adder(String val)) {
int pos = start;
while (pos < end) {
int skip = 0;
int char = chunk.codeUnitAt(pos);
if (char == _LF) {
skip = 1;
} else if (char == _CR) {
skip = 1;
if (pos + 1 < end) {
if (chunk.codeUnitAt(pos + 1) == _LF) {
skip = 2;
}
} else if (!isLast) {
return chunk.substring(start, end);
void _addLines(String lines, int start, int end) {
int sliceStart = start;
int char = 0;
for (int i = start; i < end; i++) {
int previousChar = char;
char = lines.codeUnitAt(i);
if (char != _CR) {
if (char != _LF) continue;
if (previousChar == _CR) {
sliceStart = i + 1;
continue;
}
}
if (skip > 0) {
adder(chunk.substring(start, pos));
start = pos = pos + skip;
} else {
pos++;
}
_sink.add(lines.substring(sliceStart, i));
sliceStart = i + 1;
}
if (pos != start) {
var carry = chunk.substring(start, pos);
if (isLast) {
// Add remaining
adder(carry);
} else {
return carry;
}
if (sliceStart < end) {
_carry = lines.substring(sliceStart, end);
} else {
_skipLeadingLF = (char == _CR);
}
return null;
}
}

View file

@ -11,9 +11,12 @@ import 'dart:math' as MATH;
void main() {
testSimpleConvert();
testSplit();
testSplitWithOffsets();
testManyLines();
testReadLine1();
testReadLine2();
testChunkedConversion();
}
void testManyLines() {
@ -136,3 +139,90 @@ void testReadLine2() {
controller.close();
Expect.equals(expectedLines.length, index);
}
void testSplit() {
var test = """line1
line2
line3""";
var result = LineSplitter.split(test).toList();
Expect.listEquals(['line1', 'line2', 'line3'], result);
test = "Line1\nLine2\r\nLine3\rLi"
"ne4\n"
"\n\n\r\n\r\n\r\r";
result = LineSplitter.split(test).toList();
Expect.listEquals(
['Line1', 'Line2', 'Line3', 'Line4', '', '', '', '', '', ''],
result);
}
void testSplitWithOffsets() {
var test = """line1
line2
line3""";
var result = LineSplitter.split(test, 4).toList();
Expect.listEquals(['1', 'line2', 'line3'], result);
result = LineSplitter.split(test, 5).toList();
Expect.listEquals(['', 'line2', 'line3'], result);
result = LineSplitter.split(test, 6).toList();
Expect.listEquals(['line2', 'line3'], result);
result = LineSplitter.split(test, 0, 8).toList();
Expect.listEquals(['line1', 'li'], result);
result = LineSplitter.split(test, 6, 11).toList();
Expect.listEquals(['line2'], result);
test = "Line1\nLine2\r\nLine3\rLi"
"ne4\n"
"\n\n\r\n\r\n\r\r";
result = LineSplitter.split(test).toList();
Expect.listEquals(
['Line1', 'Line2', 'Line3', 'Line4', '', '', '', '', '', ''],
result);
test = "a\n\nb\r\nc\n\rd\r\re\r\n\nf\r\n";
result = LineSplitter.split(test).toList();
Expect.listEquals(["a", "", "b", "c", "", "d", "", "e", "", "f"], result);
}
void testChunkedConversion() {
// Test any split of this complex string.
var test = "a\n\nb\r\nc\n\rd\r\re\r\n\nf\rg\nh\r\n";
var result = ["a", "", "b","c", "", "d", "", "e", "", "f", "g", "h"];
for (int i = 0; i < test.length; i++) {
var output = [];
var splitter = new LineSplitter();
var outSink = new ChunkedConversionSink.withCallback(output.addAll);
var sink = splitter.startChunkedConversion(outSink);
sink.addSlice(test, 0, i, false);
sink.addSlice(test, i, test.length, false);
sink.close();
Expect.listEquals(result, output);
}
// Test the string split into three parts in any way.
for (int i = 0; i < test.length; i++) {
for (int j = i; j < test.length; j++) {
var output = [];
var splitter = new LineSplitter();
var outSink = new ChunkedConversionSink.withCallback(output.addAll);
var sink = splitter.startChunkedConversion(outSink);
sink.addSlice(test, 0, i, false);
sink.addSlice(test, i, j, false);
sink.addSlice(test, j, test.length, true);
Expect.listEquals(result, output);
}
}
}