mirror of
https://github.com/dart-lang/sdk
synced 2024-09-16 01:45:06 +00:00
918 lines
25 KiB
C++
918 lines
25 KiB
C++
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
#include "vm/scanner.h"
|
|
|
|
#include "platform/assert.h"
|
|
#include "vm/dart.h"
|
|
#include "vm/flags.h"
|
|
#include "vm/object.h"
|
|
#include "vm/object_store.h"
|
|
#include "vm/symbols.h"
|
|
#include "vm/token.h"
|
|
#include "vm/unicode.h"
|
|
|
|
namespace dart {
|
|
|
|
// Quick access to the locally defined zone() and thread() methods.
|
|
#define Z (zone())
|
|
#define T (thread())
|
|
|
|
class ScanContext : public ZoneAllocated {
|
|
public:
|
|
explicit ScanContext(Scanner* scanner)
|
|
: next_(scanner->saved_context_),
|
|
string_delimiter_(scanner->string_delimiter_),
|
|
string_is_multiline_(scanner->string_is_multiline_),
|
|
brace_level_(scanner->brace_level_) {}
|
|
|
|
void CopyTo(Scanner* scanner) {
|
|
scanner->string_delimiter_ = string_delimiter_;
|
|
scanner->string_is_multiline_ = string_is_multiline_;
|
|
scanner->brace_level_ = brace_level_;
|
|
}
|
|
|
|
ScanContext* next() const { return next_; }
|
|
|
|
private:
|
|
ScanContext* next_;
|
|
const char string_delimiter_;
|
|
const bool string_is_multiline_;
|
|
const int brace_level_;
|
|
};
|
|
|
|
Scanner::KeywordTable Scanner::keywords_[Token::kNumKeywords];
|
|
int Scanner::keywords_char_offset_[Scanner::kNumLowercaseChars];
|
|
|
|
void Scanner::Reset() {
|
|
// Non-changing newline properties.
|
|
newline_token_.kind = Token::kNEWLINE;
|
|
newline_token_.literal = NULL;
|
|
// We don't preserve the column information.
|
|
newline_token_.position.column = 0;
|
|
|
|
// Non-changing empty string token properties.
|
|
empty_string_token_.kind = Token::kSTRING;
|
|
empty_string_token_.literal = &Symbols::Empty();
|
|
empty_string_token_.position.column = 0;
|
|
|
|
lookahead_pos_ = -1;
|
|
token_start_ = 0;
|
|
c0_ = '\0';
|
|
newline_seen_ = false;
|
|
prev_token_line_ = 1;
|
|
saved_context_ = NULL;
|
|
string_delimiter_ = '\0';
|
|
string_is_multiline_ = false;
|
|
brace_level_ = 0;
|
|
c0_pos_.line = 1;
|
|
c0_pos_.column = 0;
|
|
ReadChar();
|
|
}
|
|
|
|
Scanner::Scanner(const String& src, const String& private_key)
|
|
: source_(src),
|
|
source_length_(src.Length()),
|
|
saved_context_(NULL),
|
|
private_key_(String::ZoneHandle(private_key.raw())),
|
|
char_at_func_(src.CharAtFunc()),
|
|
thread_(Thread::Current()),
|
|
zone_(thread_->zone()) {
|
|
Reset();
|
|
}
|
|
|
|
Scanner::~Scanner() {}
|
|
|
|
void Scanner::ErrorMsg(const char* msg) {
|
|
current_token_.kind = Token::kERROR;
|
|
current_token_.literal = &String::ZoneHandle(Z, Symbols::New(T, msg));
|
|
current_token_.position = c0_pos_;
|
|
token_start_ = lookahead_pos_;
|
|
current_token_.offset = lookahead_pos_;
|
|
}
|
|
|
|
void Scanner::PushContext() {
|
|
ScanContext* ctx = new (Z) ScanContext(this);
|
|
saved_context_ = ctx;
|
|
string_delimiter_ = '\0';
|
|
string_is_multiline_ = false;
|
|
brace_level_ = 1; // Account for the opening ${ token.
|
|
}
|
|
|
|
void Scanner::PopContext() {
|
|
ASSERT(saved_context_ != NULL);
|
|
ASSERT(brace_level_ == 0);
|
|
ASSERT(string_delimiter_ == '\0');
|
|
ScanContext* ctx = saved_context_;
|
|
ctx->CopyTo(this);
|
|
saved_context_ = ctx->next();
|
|
ASSERT(string_delimiter_ != '\0');
|
|
}
|
|
|
|
void Scanner::BeginStringLiteral(const char delimiter) {
|
|
string_delimiter_ = delimiter;
|
|
}
|
|
|
|
void Scanner::EndStringLiteral() {
|
|
string_delimiter_ = '\0';
|
|
string_is_multiline_ = false;
|
|
}
|
|
|
|
bool Scanner::IsLetter(int32_t c) {
|
|
return (('A' <= c) && (c <= 'Z')) || (('a' <= c) && (c <= 'z'));
|
|
}
|
|
|
|
bool Scanner::IsDecimalDigit(int32_t c) {
|
|
return '0' <= c && c <= '9';
|
|
}
|
|
|
|
bool Scanner::IsNumberStart(int32_t ch) {
|
|
return IsDecimalDigit(ch) || ch == '.';
|
|
}
|
|
|
|
bool Scanner::IsHexDigit(int32_t c) {
|
|
return IsDecimalDigit(c) || (('A' <= c) && (c <= 'F')) ||
|
|
(('a' <= c) && (c <= 'f'));
|
|
}
|
|
|
|
bool Scanner::IsIdentStartChar(int32_t c) {
|
|
return IsLetter(c) || (c == '_') || (c == '$');
|
|
}
|
|
|
|
bool Scanner::IsIdentChar(int32_t c) {
|
|
return IsLetter(c) || IsDecimalDigit(c) || (c == '_') || (c == '$');
|
|
}
|
|
|
|
bool Scanner::IsIdent(const String& str) {
|
|
if (!str.IsOneByteString()) {
|
|
return false;
|
|
}
|
|
if (str.Length() == 0 || !IsIdentStartChar(CallCharAt()(str, 0))) {
|
|
return false;
|
|
}
|
|
for (int i = 1; i < str.Length(); i++) {
|
|
if (!IsIdentChar(CallCharAt()(str, i))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// This method is used when parsing integers in Dart code. We
|
|
// are reusing the Scanner's handling of number literals in that situation.
|
|
bool Scanner::IsValidInteger(const String& str,
|
|
bool* is_positive,
|
|
const String** value) {
|
|
Scanner s(str, Symbols::Empty());
|
|
TokenDescriptor tokens[3];
|
|
s.Scan();
|
|
tokens[0] = s.current_token();
|
|
s.Scan();
|
|
tokens[1] = s.current_token();
|
|
s.Scan();
|
|
tokens[2] = s.current_token();
|
|
|
|
if ((tokens[0].kind == Token::kINTEGER) && (tokens[1].kind == Token::kEOS)) {
|
|
*is_positive = true;
|
|
*value = tokens[0].literal;
|
|
return true;
|
|
}
|
|
if (((tokens[0].kind == Token::kADD) || (tokens[0].kind == Token::kSUB)) &&
|
|
(tokens[1].kind == Token::kINTEGER) && (tokens[2].kind == Token::kEOS)) {
|
|
// Check there is no space between "+/-" and number.
|
|
if ((tokens[0].offset + 1) != tokens[1].offset) {
|
|
return false;
|
|
}
|
|
*is_positive = tokens[0].kind == Token::kADD;
|
|
*value = tokens[1].literal;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Scanner::ReadChar() {
|
|
if (lookahead_pos_ < source_length_) {
|
|
if (c0_ == '\n') {
|
|
newline_seen_ = true;
|
|
c0_pos_.line++;
|
|
c0_pos_.column = 0;
|
|
if (CallCharAt()(source_, lookahead_pos_) == '\r') {
|
|
// Replace a sequence of '\r' '\n' with a single '\n'.
|
|
if (LookaheadChar(1) == '\n') {
|
|
lookahead_pos_++;
|
|
}
|
|
}
|
|
}
|
|
lookahead_pos_++;
|
|
c0_pos_.column++;
|
|
c0_ = LookaheadChar(0);
|
|
// Replace '\r' with '\n'.
|
|
if (c0_ == '\r') {
|
|
c0_ = '\n';
|
|
}
|
|
}
|
|
}
|
|
|
|
// Look ahead 'how_many' characters. Returns the character, or '\0' if
|
|
// the lookahead position is beyond the end of the string. Does not
|
|
// normalize line end characters into '\n'.
|
|
int32_t Scanner::LookaheadChar(int how_many) {
|
|
ASSERT(how_many >= 0);
|
|
int32_t lookahead_char = '\0';
|
|
if (lookahead_pos_ + how_many < source_length_) {
|
|
lookahead_char = CallCharAt()(source_, lookahead_pos_ + how_many);
|
|
}
|
|
return lookahead_char;
|
|
}
|
|
|
|
void Scanner::ConsumeWhiteSpace() {
|
|
while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n') {
|
|
ReadChar();
|
|
}
|
|
}
|
|
|
|
void Scanner::ConsumeLineComment() {
|
|
ASSERT(c0_ == '/');
|
|
while (c0_ != '\n' && c0_ != '\0') {
|
|
ReadChar();
|
|
}
|
|
ReadChar();
|
|
current_token_.kind = Token::kWHITESP;
|
|
}
|
|
|
|
void Scanner::ConsumeBlockComment() {
|
|
ASSERT(c0_ == '*');
|
|
ReadChar();
|
|
int nesting_level = 1;
|
|
|
|
while (true) {
|
|
const char c = c0_;
|
|
ReadChar();
|
|
if (c0_ == '\0') {
|
|
break;
|
|
}
|
|
if (c == '/' && c0_ == '*') {
|
|
nesting_level++;
|
|
ReadChar(); // Consume asterisk.
|
|
} else if (c == '*' && c0_ == '/') {
|
|
nesting_level--;
|
|
ReadChar(); // Consume slash.
|
|
if (nesting_level == 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
current_token_.kind =
|
|
(nesting_level == 0) ? Token::kWHITESP : Token::kILLEGAL;
|
|
}
|
|
|
|
void Scanner::ScanIdentChars(bool allow_dollar) {
|
|
ASSERT(IsIdentStartChar(c0_));
|
|
ASSERT(allow_dollar || (c0_ != '$'));
|
|
int ident_length = 0;
|
|
int ident_pos = lookahead_pos_;
|
|
int32_t ident_char0 = CallCharAt()(source_, ident_pos);
|
|
while (IsIdentChar(c0_) && (allow_dollar || (c0_ != '$'))) {
|
|
ReadChar();
|
|
ident_length++;
|
|
}
|
|
|
|
// Check whether the characters we read are a known keyword.
|
|
// Note, can't use strcmp since token_chars is not null-terminated.
|
|
if (('a' <= ident_char0) && (ident_char0 <= 'z')) {
|
|
int i = keywords_char_offset_[ident_char0 - 'a'];
|
|
while ((i < Token::kNumKeywords) &&
|
|
(keywords_[i].keyword_chars[0] <= ident_char0)) {
|
|
if (keywords_[i].keyword_len == ident_length) {
|
|
const char* keyword = keywords_[i].keyword_chars;
|
|
int char_pos = 1;
|
|
while ((char_pos < ident_length) &&
|
|
(keyword[char_pos] ==
|
|
CallCharAt()(source_, ident_pos + char_pos))) {
|
|
char_pos++;
|
|
}
|
|
if (char_pos == ident_length) {
|
|
current_token_.literal = keywords_[i].keyword_symbol;
|
|
current_token_.kind = keywords_[i].kind;
|
|
return;
|
|
}
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
|
|
// We did not read a keyword.
|
|
current_token_.kind = Token::kIDENT;
|
|
String& literal = String::ZoneHandle(Z);
|
|
if (ident_char0 == Library::kPrivateIdentifierStart) {
|
|
// Private identifiers are mangled on a per library basis.
|
|
literal = String::SubString(T, source_, ident_pos, ident_length);
|
|
literal = Symbols::FromConcat(T, literal, private_key_);
|
|
} else {
|
|
literal = Symbols::New(T, source_, ident_pos, ident_length);
|
|
}
|
|
current_token_.literal = &literal;
|
|
}
|
|
|
|
// Parse integer or double number literal of format:
|
|
// NUMBER = INTEGER | DOUBLE
|
|
// INTEGER = D+ | (("0x" | "0X") H+)
|
|
// DOUBLE = ((D+ ["." D*]) | ("." D+)) [ EXPONENT ]
|
|
// EXPONENT = ("e" | "E") ["+" | "-"] D+
|
|
void Scanner::ScanNumber(bool dec_point_seen) {
|
|
ASSERT(IsDecimalDigit(c0_));
|
|
char first_digit = c0_;
|
|
|
|
Recognize(dec_point_seen ? Token::kDOUBLE : Token::kINTEGER);
|
|
if (!dec_point_seen && first_digit == '0' && (c0_ == 'x' || c0_ == 'X')) {
|
|
ReadChar();
|
|
if (!IsHexDigit(c0_)) {
|
|
ErrorMsg("hexadecimal digit expected");
|
|
return;
|
|
}
|
|
while (IsHexDigit(c0_)) {
|
|
ReadChar();
|
|
}
|
|
} else {
|
|
while (IsDecimalDigit(c0_)) {
|
|
ReadChar();
|
|
}
|
|
if (c0_ == '.' && !dec_point_seen && IsDecimalDigit(LookaheadChar(1))) {
|
|
Recognize(Token::kDOUBLE);
|
|
while (IsDecimalDigit(c0_)) {
|
|
ReadChar();
|
|
}
|
|
}
|
|
if (((c0_ == 'e') || (c0_ == 'E')) &&
|
|
(IsDecimalDigit(LookaheadChar(1)) || (LookaheadChar(1) == '-') ||
|
|
(LookaheadChar(1) == '+'))) {
|
|
Recognize(Token::kDOUBLE);
|
|
if ((c0_ == '-') || (c0_ == '+')) {
|
|
ReadChar();
|
|
}
|
|
if (!IsDecimalDigit(c0_)) {
|
|
ErrorMsg("missing exponent digits");
|
|
return;
|
|
}
|
|
while (IsDecimalDigit(c0_)) {
|
|
ReadChar();
|
|
}
|
|
}
|
|
}
|
|
if (current_token_.kind != Token::kILLEGAL) {
|
|
intptr_t len = lookahead_pos_ - token_start_;
|
|
const String& str =
|
|
String::ZoneHandle(Z, Symbols::New(T, source_, token_start_, len));
|
|
current_token_.literal = &str;
|
|
}
|
|
}
|
|
|
|
void Scanner::SkipLine() {
|
|
while (c0_ != '\n' && c0_ != '\0') {
|
|
ReadChar();
|
|
}
|
|
}
|
|
|
|
void Scanner::ScanScriptTag() {
|
|
ReadChar();
|
|
ASSERT(c0_ == '!');
|
|
Recognize(Token::kSCRIPTTAG);
|
|
// The script tag extends to the end of the line. Just treat this
|
|
// similar to a line comment.
|
|
SkipLine();
|
|
}
|
|
|
|
void Scanner::ScanLiteralString(bool is_raw) {
|
|
ASSERT(!IsScanningString());
|
|
ASSERT(c0_ == '"' || c0_ == '\'');
|
|
|
|
// Entering string scanning mode.
|
|
BeginStringLiteral(c0_);
|
|
ReadChar();
|
|
|
|
if ((c0_ == string_delimiter_) && (LookaheadChar(1) == string_delimiter_)) {
|
|
string_is_multiline_ = true;
|
|
ReadChar(); // Skip two additional string delimiters.
|
|
ReadChar();
|
|
}
|
|
ScanLiteralStringChars(is_raw, string_is_multiline_);
|
|
}
|
|
|
|
bool Scanner::ScanHexDigits(int digits, int32_t* value) {
|
|
*value = 0;
|
|
for (int i = 0; i < digits; ++i) {
|
|
ReadChar();
|
|
if (!IsHexDigit(c0_)) {
|
|
ErrorMsg("too few hexadecimal digits");
|
|
return false;
|
|
}
|
|
*value <<= 4;
|
|
*value |= Utils::HexDigitToInt(c0_);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Scanner::ScanHexDigits(int min_digits, int max_digits, int32_t* value) {
|
|
*value = 0;
|
|
ReadChar();
|
|
for (int i = 0; i < max_digits; ++i) {
|
|
if (!IsHexDigit(c0_)) {
|
|
if (i < min_digits) {
|
|
ErrorMsg("hexadecimal digit expected");
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
*value <<= 4;
|
|
*value |= Utils::HexDigitToInt(c0_);
|
|
ReadChar();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Scanner::ScanEscapedCodePoint(int32_t* code_point) {
|
|
ASSERT(c0_ == 'u' || c0_ == 'x');
|
|
bool is_valid;
|
|
if (c0_ == 'x') {
|
|
is_valid = ScanHexDigits(2, code_point);
|
|
} else if (c0_ == 'u' && LookaheadChar(1) != '{') {
|
|
is_valid = ScanHexDigits(4, code_point);
|
|
} else {
|
|
ReadChar(); // Skip left curly bracket.
|
|
is_valid = ScanHexDigits(1, 6, code_point);
|
|
if (is_valid) {
|
|
if (c0_ != '}') {
|
|
ErrorMsg("expected '}' after character code");
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
if (is_valid && (Utf::IsOutOfRange(*code_point))) {
|
|
ErrorMsg("invalid code point");
|
|
}
|
|
}
|
|
|
|
void Scanner::ScanLiteralStringChars(bool is_raw, bool remove_whitespace) {
|
|
GrowableArray<int32_t> string_chars(64);
|
|
|
|
ASSERT(IsScanningString());
|
|
// We are at the first character of a string literal piece. A string literal
|
|
// can be broken up into multiple pieces by string interpolation.
|
|
while (true) {
|
|
if ((c0_ == '\0') || ((c0_ == '\n') && !string_is_multiline_)) {
|
|
ErrorMsg("unterminated string literal");
|
|
EndStringLiteral();
|
|
return;
|
|
}
|
|
if (c0_ == '\\' && !is_raw) {
|
|
// Parse escape sequence.
|
|
int32_t escape_char = '\0';
|
|
ReadChar();
|
|
switch (c0_) {
|
|
case 'n':
|
|
escape_char = '\n';
|
|
break;
|
|
case 'r':
|
|
escape_char = '\r';
|
|
break;
|
|
case 'f':
|
|
escape_char = '\f';
|
|
break;
|
|
case 't':
|
|
escape_char = '\t';
|
|
break;
|
|
case 'b':
|
|
escape_char = '\b';
|
|
break;
|
|
case 'v':
|
|
escape_char = '\v';
|
|
break;
|
|
case 'u':
|
|
case 'x': {
|
|
ScanEscapedCodePoint(&escape_char);
|
|
break;
|
|
}
|
|
default:
|
|
if ((c0_ == '\0') || ((c0_ == '\n') && !string_is_multiline_)) {
|
|
ErrorMsg("unterminated string literal");
|
|
EndStringLiteral();
|
|
return;
|
|
}
|
|
escape_char = c0_;
|
|
break;
|
|
}
|
|
string_chars.Add(escape_char);
|
|
} else if (c0_ == '$' && !is_raw) {
|
|
// Scanned a string piece.
|
|
ASSERT(string_chars.data() != NULL);
|
|
// Strings are canonicalized: Allocate a symbol.
|
|
current_token_.literal = &String::ZoneHandle(
|
|
Z, Symbols::FromUTF32(T, string_chars.data(), string_chars.length()));
|
|
// Preserve error tokens.
|
|
if (current_token_.kind != Token::kERROR) {
|
|
current_token_.kind = Token::kSTRING;
|
|
}
|
|
return;
|
|
} else if (c0_ == string_delimiter_) {
|
|
// Check if we are at the end of the string literal.
|
|
if (!string_is_multiline_ || ((LookaheadChar(1) == string_delimiter_) &&
|
|
(LookaheadChar(2) == string_delimiter_))) {
|
|
if (string_is_multiline_) {
|
|
ReadChar(); // Skip two string delimiters.
|
|
ReadChar();
|
|
}
|
|
// Preserve error tokens.
|
|
if (current_token_.kind == Token::kERROR) {
|
|
ReadChar();
|
|
} else {
|
|
Recognize(Token::kSTRING);
|
|
ASSERT(string_chars.data() != NULL);
|
|
// Strings are canonicalized: Allocate a symbol.
|
|
current_token_.literal =
|
|
&String::ZoneHandle(Z, Symbols::FromUTF32(T, string_chars.data(),
|
|
string_chars.length()));
|
|
}
|
|
EndStringLiteral();
|
|
return;
|
|
} else {
|
|
string_chars.Add(string_delimiter_);
|
|
}
|
|
} else {
|
|
// Test for a two part utf16 sequence, and decode to a code point
|
|
// if we find one.
|
|
int32_t ch1 = c0_;
|
|
if (Utf16::IsLeadSurrogate(ch1)) {
|
|
const int32_t ch2 = LookaheadChar(1);
|
|
if (Utf16::IsTrailSurrogate(ch2)) {
|
|
ch1 = Utf16::Decode(ch1, ch2);
|
|
ReadChar();
|
|
}
|
|
}
|
|
string_chars.Add(ch1);
|
|
}
|
|
// The first line of a multi-line string is discarded if it only
|
|
// contains whitespace.
|
|
if (remove_whitespace && (string_chars.Last() == '\n')) {
|
|
bool whitespace_only = true;
|
|
// Last character is the newline, don't inspect it.
|
|
const intptr_t len = string_chars.length() - 1;
|
|
for (int i = 0; i < len; i++) {
|
|
int32_t ch = string_chars[i];
|
|
if ((ch != ' ') && (ch != '\t')) {
|
|
// Non-whitespace character, keep the first line.
|
|
whitespace_only = false;
|
|
break;
|
|
}
|
|
}
|
|
if (whitespace_only) {
|
|
string_chars.Clear(); // Discard characters on first line.
|
|
}
|
|
remove_whitespace = false;
|
|
}
|
|
ReadChar();
|
|
}
|
|
}
|
|
|
|
void Scanner::Scan() {
|
|
newline_seen_ = false;
|
|
|
|
do {
|
|
if (!IsScanningString()) {
|
|
ConsumeWhiteSpace();
|
|
}
|
|
token_start_ = lookahead_pos_;
|
|
current_token_.offset = lookahead_pos_;
|
|
current_token_.position = c0_pos_;
|
|
current_token_.literal = NULL;
|
|
current_token_.kind = Token::kILLEGAL;
|
|
if (IsScanningString()) {
|
|
if (c0_ == '$') {
|
|
ReadChar(); // Skip the '$' character.
|
|
if (IsIdentStartChar(c0_) && (c0_ != '$')) {
|
|
ScanIdentNoDollar();
|
|
current_token_.kind = Token::kINTERPOL_VAR;
|
|
} else if (c0_ == '{') {
|
|
Recognize(Token::kINTERPOL_START);
|
|
PushContext();
|
|
} else {
|
|
ErrorMsg("illegal character after $ in string interpolation");
|
|
EndStringLiteral();
|
|
break;
|
|
}
|
|
} else {
|
|
ScanLiteralStringChars(false, false);
|
|
}
|
|
break;
|
|
}
|
|
switch (c0_) {
|
|
case '\0':
|
|
current_token_.kind = Token::kEOS;
|
|
break;
|
|
|
|
case '+': // + ++ +=
|
|
Recognize(Token::kADD);
|
|
if (c0_ == '+') {
|
|
Recognize(Token::kINCR);
|
|
} else if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_ADD);
|
|
}
|
|
break;
|
|
|
|
case '-': // - -- -=
|
|
Recognize(Token::kSUB);
|
|
if (c0_ == '-') {
|
|
Recognize(Token::kDECR);
|
|
} else if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_SUB);
|
|
}
|
|
break;
|
|
|
|
case '*': // * *=
|
|
Recognize(Token::kMUL);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_MUL);
|
|
}
|
|
break;
|
|
|
|
case '%': // % %=
|
|
Recognize(Token::kMOD);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_MOD);
|
|
}
|
|
break;
|
|
|
|
case '/': // / /= // /*
|
|
Recognize(Token::kDIV);
|
|
if (c0_ == '/') {
|
|
ConsumeLineComment();
|
|
} else if (c0_ == '*') {
|
|
ConsumeBlockComment();
|
|
} else if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_DIV);
|
|
}
|
|
break;
|
|
|
|
case '&': // & &= &&
|
|
Recognize(Token::kBIT_AND);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_AND);
|
|
} else if (c0_ == '&') {
|
|
Recognize(Token::kAND);
|
|
}
|
|
break;
|
|
|
|
case '|': // | |= ||
|
|
Recognize(Token::kBIT_OR);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_OR);
|
|
} else if (c0_ == '|') {
|
|
Recognize(Token::kOR);
|
|
}
|
|
break;
|
|
|
|
case '^': // ^ ^=
|
|
Recognize(Token::kBIT_XOR);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_XOR);
|
|
}
|
|
break;
|
|
|
|
case '[': // [ [] []=
|
|
Recognize(Token::kLBRACK);
|
|
if (c0_ == ']') {
|
|
Recognize(Token::kINDEX);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_INDEX);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ']': // ]
|
|
Recognize(Token::kRBRACK);
|
|
break;
|
|
|
|
case '<': // < <= << <<=
|
|
Recognize(Token::kLT);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kLTE);
|
|
} else if (c0_ == '<') {
|
|
Recognize(Token::kSHL);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_SHL);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '>': // > >= >> >>=
|
|
Recognize(Token::kGT);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kGTE);
|
|
} else if (c0_ == '>') {
|
|
Recognize(Token::kSHR);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_SHR);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '!': // ! !=
|
|
Recognize(Token::kNOT);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kNE);
|
|
}
|
|
break;
|
|
|
|
case '~':
|
|
Recognize(Token::kBIT_NOT);
|
|
if (c0_ == '/') {
|
|
Recognize(Token::kTRUNCDIV);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_TRUNCDIV);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '=': // = == =>
|
|
Recognize(Token::kASSIGN);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kEQ);
|
|
} else if (c0_ == '>') {
|
|
Recognize(Token::kARROW);
|
|
}
|
|
break;
|
|
|
|
case '.': // . .. Number
|
|
Recognize(Token::kPERIOD);
|
|
if (c0_ == '.') {
|
|
Recognize(Token::kCASCADE);
|
|
} else if (IsDecimalDigit(c0_)) {
|
|
ScanNumber(true);
|
|
}
|
|
break;
|
|
|
|
case '?': // ? ?. ?? ??=
|
|
Recognize(Token::kCONDITIONAL);
|
|
if (c0_ == '.') {
|
|
Recognize(Token::kQM_PERIOD);
|
|
} else if (c0_ == '?') {
|
|
Recognize(Token::kIFNULL);
|
|
if (c0_ == '=') {
|
|
Recognize(Token::kASSIGN_COND);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ':':
|
|
Recognize(Token::kCOLON);
|
|
break;
|
|
|
|
case ';':
|
|
Recognize(Token::kSEMICOLON);
|
|
break;
|
|
|
|
case '{':
|
|
Recognize(Token::kLBRACE);
|
|
if (IsNestedContext()) {
|
|
brace_level_++;
|
|
}
|
|
break;
|
|
|
|
case '}':
|
|
Recognize(Token::kRBRACE);
|
|
if (IsNestedContext()) {
|
|
ASSERT(brace_level_ > 0);
|
|
brace_level_--;
|
|
if (brace_level_ == 0) {
|
|
current_token_.kind = Token::kINTERPOL_END;
|
|
PopContext();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case '(':
|
|
Recognize(Token::kLPAREN);
|
|
break;
|
|
|
|
case ')':
|
|
Recognize(Token::kRPAREN);
|
|
break;
|
|
|
|
case ',':
|
|
Recognize(Token::kCOMMA);
|
|
break;
|
|
|
|
case '@':
|
|
Recognize(Token::kAT);
|
|
break;
|
|
|
|
case 'r':
|
|
if ((LookaheadChar(1) == '"') || (LookaheadChar(1) == '\'')) {
|
|
ReadChar();
|
|
ScanLiteralString(true);
|
|
} else {
|
|
ScanIdent();
|
|
}
|
|
break;
|
|
|
|
case '"':
|
|
case '\'':
|
|
ScanLiteralString(false);
|
|
break;
|
|
|
|
case '#':
|
|
if (LookaheadChar(1) == '!') {
|
|
ScanScriptTag();
|
|
} else {
|
|
Recognize(Token::kHASH);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (IsIdentStartChar(c0_)) {
|
|
ScanIdent();
|
|
} else if (IsDecimalDigit(c0_)) {
|
|
ScanNumber(false);
|
|
} else {
|
|
char msg[128];
|
|
char utf8_char[5];
|
|
int len = Utf8::Encode(c0_, utf8_char);
|
|
utf8_char[len] = '\0';
|
|
OS::SNPrint(msg, sizeof(msg), "unexpected character: '%s' (U+%04X)\n",
|
|
utf8_char, c0_);
|
|
ErrorMsg(msg);
|
|
ReadChar();
|
|
}
|
|
}
|
|
} while (current_token_.kind == Token::kWHITESP);
|
|
}
|
|
|
|
void Scanner::ScanAll(TokenCollector* collector) {
|
|
Reset();
|
|
do {
|
|
Scan();
|
|
bool inserted_new_lines = false;
|
|
for (intptr_t diff = current_token_.position.line - prev_token_line_;
|
|
diff > 0; diff--) {
|
|
newline_token_.position.line = current_token_.position.line - diff;
|
|
collector->AddToken(newline_token_);
|
|
inserted_new_lines = true;
|
|
}
|
|
if (inserted_new_lines &&
|
|
((current_token_.kind == Token::kINTERPOL_VAR) ||
|
|
(current_token_.kind == Token::kINTERPOL_START))) {
|
|
// NOTE: If this changes, be sure to update
|
|
// Script::GenerateLineNumberArray to stay in sync.
|
|
empty_string_token_.position.line = current_token_.position.line;
|
|
collector->AddToken(empty_string_token_);
|
|
}
|
|
collector->AddToken(current_token_);
|
|
prev_token_line_ = current_token_.position.line;
|
|
} while (current_token_.kind != Token::kEOS);
|
|
}
|
|
|
|
void Scanner::ScanTo(intptr_t token_index) {
|
|
ASSERT(token_index >= 0);
|
|
intptr_t index = 0;
|
|
Reset();
|
|
do {
|
|
Scan();
|
|
bool inserted_new_lines = false;
|
|
for (intptr_t diff = current_token_.position.line - prev_token_line_;
|
|
diff > 0; diff--) {
|
|
// Advance the index to account for tokens added in ScanAll.
|
|
index++;
|
|
inserted_new_lines = true;
|
|
}
|
|
if (inserted_new_lines &&
|
|
((current_token_.kind == Token::kINTERPOL_VAR) ||
|
|
(current_token_.kind == Token::kINTERPOL_START))) {
|
|
// Advance the index to account for tokens added in ScanAll.
|
|
index++;
|
|
}
|
|
index++;
|
|
prev_token_line_ = current_token_.position.line;
|
|
} while ((token_index >= index) && (current_token_.kind != Token::kEOS));
|
|
}
|
|
|
|
void Scanner::InitOnce() {
|
|
ASSERT(Isolate::Current() == Dart::vm_isolate());
|
|
for (int i = 0; i < kNumLowercaseChars; i++) {
|
|
keywords_char_offset_[i] = Token::kNumKeywords;
|
|
}
|
|
for (int i = 0; i < Token::kNumKeywords; i++) {
|
|
Token::Kind token = static_cast<Token::Kind>(Token::kFirstKeyword + i);
|
|
keywords_[i].kind = token;
|
|
keywords_[i].keyword_chars = Token::Str(token);
|
|
keywords_[i].keyword_len = strlen(Token::Str(token));
|
|
keywords_[i].keyword_symbol = &Symbols::Token(token);
|
|
|
|
int ch = keywords_[i].keyword_chars[0] - 'a';
|
|
if (keywords_char_offset_[ch] == Token::kNumKeywords) {
|
|
keywords_char_offset_[ch] = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace dart
|