Lasse R.H. Nielsen 2016-09-26 13:23:41 +02:00
parent 3a4b32d957
commit 574ae435f3
8 changed files with 62 additions and 100 deletions

View file

@ -2790,7 +2790,7 @@ Strings support escape sequences for special characters. The escapes are:
$\backslash$u\{$HEX\_DIGIT_1$ $HEX\_DIGIT_2$\}.
\item $\backslash$u $HEX\_DIGIT_1$ $HEX\_DIGIT_2$ $HEX\_DIGIT_3$ $HEX\_DIGIT_4$, equivalent to $\backslash$u\{$HEX\_DIGIT_1$ $HEX\_DIGIT_2$ $HEX\_DIGIT_3$ $HEX\_DIGIT_4$\}.
\item $\backslash$u\{$HEX\_DIGIT\_SEQUENCE$\} is the unicode scalar value represented by the $HEX\_DIGIT\_SEQUENCE$. It is a compile-time error if the value of the $HEX\_DIGIT\_SEQUENCE$ is not a valid unicode scalar value.
\item $\backslash$u\{$HEX\_DIGIT\_SEQUENCE$\} is the Unicode code point represented by the $HEX\_DIGIT\_SEQUENCE$. It is a compile-time error if the value of the $HEX\_DIGIT\_SEQUENCE$ is not a valid Unicode code point.
\item \$ indicating the beginning of an interpolated expression.
\item Otherwise, $\backslash k$ indicates the character $k$ for any $k$ not in $\{n, r, f, b, t, v, x, u\}$.
\end{itemize}

View file

@ -5370,25 +5370,24 @@ class Parser {
}
/**
* Append the character equivalent of the given [scalarValue] to the given
* Append the character equivalent of the given [codePoint] to the given
* [builder]. Use the [startIndex] and [endIndex] to report an error, and
* don't append anything to the builder, if the scalar value is invalid. The
* don't append anything to the builder, if the code point is invalid. The
* [escapeSequence] is the escape sequence that was parsed to produce the
* scalar value (used for error reporting).
* code point (used for error reporting).
*/
void _appendScalarValue(StringBuffer buffer, String escapeSequence,
int scalarValue, int startIndex, int endIndex) {
if (scalarValue < 0 ||
scalarValue > Character.MAX_CODE_POINT ||
(scalarValue >= 0xD800 && scalarValue <= 0xDFFF)) {
void _appendCodePoint(StringBuffer buffer, String source,
int codePoint, int startIndex, int endIndex) {
if (codePoint < 0 || codePoint > Character.MAX_CODE_POINT) {
String escapeSequence = source.substring(startIndex, endIndex + 1);
_reportErrorForCurrentToken(
ParserErrorCode.INVALID_CODE_POINT, [escapeSequence]);
return;
}
if (scalarValue < Character.MAX_VALUE) {
buffer.writeCharCode(scalarValue);
if (codePoint < Character.MAX_VALUE) {
buffer.writeCharCode(codePoint);
} else {
buffer.write(Character.toChars(scalarValue));
buffer.write(Character.toChars(codePoint));
}
}
@ -7747,8 +7746,7 @@ class Parser {
// Illegal escape sequence: not enough or too many hex digits
_reportErrorForCurrentToken(ParserErrorCode.INVALID_UNICODE_ESCAPE);
}
_appendScalarValue(buffer, lexeme.substring(index, currentIndex + 1),
value, index, currentIndex);
_appendCodePoint(buffer, lexeme, value, index, currentIndex);
return currentIndex + 1;
} else {
if (currentIndex + 3 >= length) {
@ -7767,9 +7765,9 @@ class Parser {
// Illegal escape sequence: invalid hex digits
_reportErrorForCurrentToken(ParserErrorCode.INVALID_UNICODE_ESCAPE);
} else {
_appendScalarValue(
_appendCodePoint(
buffer,
lexeme.substring(index, currentIndex + 1),
lexeme,
(((((Character.digit(firstDigit, 16) << 4) +
Character.digit(secondDigit, 16)) <<
4) +

View file

@ -1468,7 +1468,7 @@ class Foo {
}
void test_invalidCodePoint() {
createParser("'\\uD900'");
createParser("'\\u{110000}'");
StringLiteral literal = parser.parseStringLiteral();
expectNotNullIfNoErrors(literal);
listener.assertErrorsWithCodes([ParserErrorCode.INVALID_CODE_POINT]);

View file

@ -111,13 +111,10 @@ class StringValidator {
Token token, int startOffset, String string, StringQuoting quoting) {
// We need to check for invalid x and u escapes, for line
// terminators in non-multiline strings, and for invalid Unicode
// scalar values (either directly or as u-escape values). We also check
// for unpaired UTF-16 surrogates.
// code points (either directly or as u-escape values).
int length = 0;
int index = startOffset;
bool containsEscape = false;
bool previousWasLeadSurrogate = false;
bool invalidUtf16 = false;
var stringIter = string.codeUnits.iterator;
for (HasNextIterator<int> iter = new HasNextIterator(stringIter);
iter.hasNext;
@ -199,26 +196,13 @@ class StringValidator {
code = value;
}
}
if (code >= 0x10000) length++;
// This handles both unescaped characters and the value of unicode
// escapes.
if (previousWasLeadSurrogate) {
if (!isUtf16TrailSurrogate(code)) {
invalidUtf16 = true;
break;
if (code >= 0x10000) {
length++;
if (code > 0x10FFFF) {
stringParseError("Invalid code point", token, index);
}
previousWasLeadSurrogate = false;
} else if (isUtf16LeadSurrogate(code)) {
previousWasLeadSurrogate = true;
} else if (!isUnicodeScalarValue(code)) {
invalidUtf16 = true;
break;
}
}
if (previousWasLeadSurrogate || invalidUtf16) {
stringParseError("Invalid Utf16 surrogate", token, index);
return null;
}
// String literal successfully validated.
if (quoting.raw || !containsEscape) {
// A string without escapes could just as well have been raw.

View file

@ -484,9 +484,7 @@ void Scanner::ScanEscapedCodePoint(int32_t* code_point) {
}
}
}
if (is_valid &&
((Utf::IsOutOfRange(*code_point) ||
(Utf16::IsSurrogate(*code_point))))) {
if (is_valid && (Utf::IsOutOfRange(*code_point))) {
ErrorMsg("invalid code point");
}
}

View file

@ -192,62 +192,6 @@ static void StringEscapes() {
static void InvalidStringEscapes() {
const GrowableTokenStream& high_start_4 =
Scan("\"\\uD800\"");
EXPECT_EQ(2, high_start_4.length());
CheckKind(high_start_4, 0, Token::kERROR);
EXPECT(high_start_4[0].literal->Equals("invalid code point"));
CheckKind(high_start_4, 1, Token::kEOS);
const GrowableTokenStream& high_start_seq =
Scan("\"\\u{D800}\"");
EXPECT_EQ(2, high_start_seq.length());
CheckKind(high_start_seq, 0, Token::kERROR);
EXPECT(high_start_seq[0].literal->Equals("invalid code point"));
CheckKind(high_start_seq, 1, Token::kEOS);
const GrowableTokenStream& high_end_4 =
Scan("\"\\uDBFF\"");
EXPECT_EQ(2, high_end_4.length());
CheckKind(high_end_4, 0, Token::kERROR);
EXPECT(high_end_4[0].literal->Equals("invalid code point"));
CheckKind(high_end_4, 1, Token::kEOS);
const GrowableTokenStream& high_end_seq =
Scan("\"\\u{DBFF}\"");
EXPECT_EQ(2, high_end_seq.length());
CheckKind(high_end_seq, 0, Token::kERROR);
EXPECT(high_end_seq[0].literal->Equals("invalid code point"));
CheckKind(high_end_seq, 1, Token::kEOS);
const GrowableTokenStream& low_start_4 =
Scan("\"\\uDC00\"");
EXPECT_EQ(2, low_start_4.length());
CheckKind(low_start_4, 0, Token::kERROR);
EXPECT(low_start_4[0].literal->Equals("invalid code point"));
CheckKind(low_start_4, 1, Token::kEOS);
const GrowableTokenStream& low_start_seq =
Scan("\"\\u{DC00}\"");
EXPECT_EQ(2, low_start_seq.length());
CheckKind(low_start_seq, 0, Token::kERROR);
EXPECT(low_start_seq[0].literal->Equals("invalid code point"));
CheckKind(low_start_seq, 1, Token::kEOS);
const GrowableTokenStream& low_end_4 =
Scan("\"\\uDFFF\"");
EXPECT_EQ(2, low_end_4.length());
CheckKind(low_end_4, 0, Token::kERROR);
EXPECT(low_end_4[0].literal->Equals("invalid code point"));
CheckKind(low_end_4, 1, Token::kEOS);
const GrowableTokenStream& low_end_seq =
Scan("\"\\u{DFFF}\"");
EXPECT_EQ(2, low_end_seq.length());
CheckKind(low_end_seq, 0, Token::kERROR);
EXPECT(low_end_seq[0].literal->Equals("invalid code point"));
CheckKind(low_end_seq, 1, Token::kEOS);
const GrowableTokenStream& out_of_range_low =
Scan("\"\\u{110000}\"");
EXPECT_EQ(2, out_of_range_low.length());

View file

@ -0,0 +1,40 @@
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import "package:expect/expect.dart";
main() {
var expect = new String.fromCharCodes([
0, 0x0a, 0x0d, 0x7f, 0xff, 0xffff, 0xd800, 0xdc00, 0xdbff, 0xdfff
]);
test(string) {
Expect.equals(expect, string);
}
// Plain escapes of code points.
test("\x00\x0a\x0d\x7f\xff\uffff\u{10000}\u{10ffff}");
test("""\x00\x0a\x0d\x7f\xff\uffff\u{10000}\u{10ffff}""");
test('\x00\x0a\x0d\x7f\xff\uffff\u{10000}\u{10ffff}');
test('''\x00\x0a\x0d\x7f\xff\uffff\u{10000}\u{10ffff}''');
// Plain escapes of individual code units.
test("\x00\x0a\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff");
test("""\x00\x0a\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff""");
test('\x00\x0a\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff');
test('''\x00\x0a\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff''');
// Insert newline into multiline string.
test("""\x00
\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff""");
test('''\x00
\x0d\x7f\xff\uffff\ud800\udc00\udbff\udfff''');
// Extract code points from multi-character escape string.
test("\x00\x0a\x0d\x7f\xff\uffff"
"${"\u{10000}"[0]}${"\u{10000}"[1]}"
"${"\u{10FFFF}"[0]}${"\u{10FFFF}"[1]}");
test("\x00\x0a\x0d\x7f\xff\uffff" +
"\ud800" + "\udc00\udbff" + "\udfff");
// Single line string over multiple lines with newlines inside interpolation.
test("\x00\x0a\x0d\x7f\xff${
""
}\uffff\ud800\udc00\udbff\udfff");
}

View file

@ -5,10 +5,8 @@
class StringUnicode4NegativeTest {
static testMain() {
// Unicode escapes must refer to valid Unicode points and not surrogate characters
// Unicode escapes must refer to valid Unicode points.
String str = "Foo\u{FFFFFF}";
str = "Foo\uD800";
str = "Foo\uDC00";
}
}