2014-11-03 10:00:31 +00:00
|
|
|
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
|
|
|
// for details. All rights reserved. Use of this source code is governed by a
|
|
|
|
// BSD-style license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "vm/regexp_assembler.h"
|
|
|
|
|
2019-04-29 09:11:48 +00:00
|
|
|
#include "unicode/uchar.h"
|
|
|
|
|
|
|
|
#include "platform/unicode.h"
|
|
|
|
|
2016-02-26 14:59:36 +00:00
|
|
|
#include "vm/flags.h"
|
2014-11-26 09:32:43 +00:00
|
|
|
#include "vm/regexp.h"
|
2020-04-16 22:59:03 +00:00
|
|
|
#include "vm/runtime_entry.h"
|
2017-07-11 10:01:47 +00:00
|
|
|
#include "vm/unibrow-inl.h"
|
2014-11-03 10:00:31 +00:00
|
|
|
|
|
|
|
namespace dart {
|
|
|
|
|
2017-07-11 10:01:47 +00:00
|
|
|
void PrintUtf16(uint16_t c) {
|
|
|
|
const char* format =
|
|
|
|
(0x20 <= c && c <= 0x7F) ? "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
|
2018-06-13 19:51:40 +00:00
|
|
|
OS::PrintErr(format, c);
|
2017-07-11 10:01:47 +00:00
|
|
|
}
|
|
|
|
|
2020-04-25 05:21:27 +00:00
|
|
|
uword /*BoolPtr*/ CaseInsensitiveCompareUCS2(uword /*StringPtr*/ str_raw,
|
|
|
|
uword /*SmiPtr*/ lhs_index_raw,
|
|
|
|
uword /*SmiPtr*/ rhs_index_raw,
|
|
|
|
uword /*SmiPtr*/ length_raw) {
|
|
|
|
const String& str = String::Handle(static_cast<StringPtr>(str_raw));
|
|
|
|
const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
|
|
|
|
const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
|
|
|
|
const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
|
2017-07-11 10:01:47 +00:00
|
|
|
|
|
|
|
// TODO(zerny): Optimize as single instance. V8 has this as an
|
|
|
|
// isolate member.
|
|
|
|
unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
|
|
|
|
|
|
|
|
for (intptr_t i = 0; i < length.Value(); i++) {
|
|
|
|
int32_t c1 = str.CharAt(lhs_index.Value() + i);
|
|
|
|
int32_t c2 = str.CharAt(rhs_index.Value() + i);
|
|
|
|
if (c1 != c2) {
|
|
|
|
int32_t s1[1] = {c1};
|
|
|
|
canonicalize.get(c1, '\0', s1);
|
|
|
|
if (s1[0] != c2) {
|
|
|
|
int32_t s2[1] = {c2};
|
|
|
|
canonicalize.get(c2, '\0', s2);
|
|
|
|
if (s1[0] != s2[0]) {
|
2021-01-15 23:32:02 +00:00
|
|
|
return static_cast<uword>(Bool::False().ptr());
|
2017-07-11 10:01:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-01-15 23:32:02 +00:00
|
|
|
return static_cast<uword>(Bool::True().ptr());
|
2017-07-11 10:01:47 +00:00
|
|
|
}
|
|
|
|
|
2020-04-25 05:21:27 +00:00
|
|
|
uword /*BoolPtr*/ CaseInsensitiveCompareUTF16(uword /*StringPtr*/ str_raw,
|
|
|
|
uword /*SmiPtr*/ lhs_index_raw,
|
|
|
|
uword /*SmiPtr*/ rhs_index_raw,
|
|
|
|
uword /*SmiPtr*/ length_raw) {
|
|
|
|
const String& str = String::Handle(static_cast<StringPtr>(str_raw));
|
|
|
|
const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
|
|
|
|
const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
|
|
|
|
const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
|
2019-04-29 09:11:48 +00:00
|
|
|
|
|
|
|
for (intptr_t i = 0; i < length.Value(); i++) {
|
|
|
|
int32_t c1 = str.CharAt(lhs_index.Value() + i);
|
|
|
|
int32_t c2 = str.CharAt(rhs_index.Value() + i);
|
|
|
|
if (Utf16::IsLeadSurrogate(c1)) {
|
|
|
|
// Non-BMP characters do not have case-equivalents in the BMP.
|
|
|
|
// Both have to be non-BMP for them to be able to match.
|
2020-04-25 05:21:27 +00:00
|
|
|
if (!Utf16::IsLeadSurrogate(c2))
|
2021-01-15 23:32:02 +00:00
|
|
|
return static_cast<uword>(Bool::False().ptr());
|
2019-04-29 09:11:48 +00:00
|
|
|
if (i + 1 < length.Value()) {
|
|
|
|
uint16_t c1t = str.CharAt(lhs_index.Value() + i + 1);
|
|
|
|
uint16_t c2t = str.CharAt(rhs_index.Value() + i + 1);
|
|
|
|
if (Utf16::IsTrailSurrogate(c1t) && Utf16::IsTrailSurrogate(c2t)) {
|
|
|
|
c1 = Utf16::Decode(c1, c1t);
|
|
|
|
c2 = Utf16::Decode(c2, c2t);
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
|
|
|
|
c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
|
2021-01-15 23:32:02 +00:00
|
|
|
if (c1 != c2) return static_cast<uword>(Bool::False().ptr());
|
2019-04-29 09:11:48 +00:00
|
|
|
}
|
2021-01-15 23:32:02 +00:00
|
|
|
return static_cast<uword>(Bool::True().ptr());
|
2019-04-29 09:11:48 +00:00
|
|
|
}
|
2017-07-11 10:01:47 +00:00
|
|
|
|
|
|
|
DEFINE_RAW_LEAF_RUNTIME_ENTRY(
|
2019-04-29 09:11:48 +00:00
|
|
|
CaseInsensitiveCompareUCS2,
|
2017-07-11 10:01:47 +00:00
|
|
|
4,
|
|
|
|
false /* is_float */,
|
2019-04-29 09:11:48 +00:00
|
|
|
reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUCS2));
|
2017-07-11 10:01:47 +00:00
|
|
|
|
2019-04-29 09:11:48 +00:00
|
|
|
DEFINE_RAW_LEAF_RUNTIME_ENTRY(
|
|
|
|
CaseInsensitiveCompareUTF16,
|
|
|
|
4,
|
|
|
|
false /* is_float */,
|
|
|
|
reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUTF16));
|
2017-07-11 10:01:47 +00:00
|
|
|
|
2020-04-16 22:59:03 +00:00
|
|
|
BlockLabel::BlockLabel() {
|
2017-07-11 10:01:47 +00:00
|
|
|
#if !defined(DART_PRECOMPILED_RUNTIME)
|
2016-02-26 14:59:36 +00:00
|
|
|
if (!FLAG_interpret_irregexp) {
|
|
|
|
// Only needed by the compiled IR backend.
|
2018-09-03 16:01:24 +00:00
|
|
|
block_ =
|
|
|
|
new JoinEntryInstr(-1, -1, CompilerState::Current().GetNextDeoptId());
|
2016-02-26 14:59:36 +00:00
|
|
|
}
|
2017-07-11 10:01:47 +00:00
|
|
|
#endif
|
2016-02-26 14:59:36 +00:00
|
|
|
}
|
|
|
|
|
2015-03-17 19:24:26 +00:00
|
|
|
RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
|
2016-11-08 21:54:47 +00:00
|
|
|
: slow_safe_compiler_(false), global_mode_(NOT_GLOBAL), zone_(zone) {}
|
2014-11-03 10:00:31 +00:00
|
|
|
|
2016-11-08 21:54:47 +00:00
|
|
|
RegExpMacroAssembler::~RegExpMacroAssembler() {}
|
2014-11-03 10:00:31 +00:00
|
|
|
|
2019-04-29 09:11:48 +00:00
|
|
|
void RegExpMacroAssembler::CheckNotInSurrogatePair(intptr_t cp_offset,
|
|
|
|
BlockLabel* on_failure) {
|
|
|
|
BlockLabel ok;
|
|
|
|
// Check that current character is not a trail surrogate.
|
|
|
|
LoadCurrentCharacter(cp_offset, &ok);
|
|
|
|
CheckCharacterNotInRange(Utf16::kTrailSurrogateStart,
|
|
|
|
Utf16::kTrailSurrogateEnd, &ok);
|
|
|
|
// Check that previous character is not a lead surrogate.
|
|
|
|
LoadCurrentCharacter(cp_offset - 1, &ok);
|
|
|
|
CheckCharacterInRange(Utf16::kLeadSurrogateStart, Utf16::kLeadSurrogateEnd,
|
|
|
|
on_failure);
|
|
|
|
BindBlock(&ok);
|
|
|
|
}
|
|
|
|
|
2014-11-03 10:00:31 +00:00
|
|
|
} // namespace dart
|