From 064a9a50a0a5774dd61a082d1d9c9678de48449f Mon Sep 17 00:00:00 2001 From: Alexander Aprelev Date: Fri, 20 Aug 2021 16:19:16 +0000 Subject: [PATCH] [vm/regexp] Ensure regex interpreter checks and yields to safepoints, handles OOB messages periodically. Fixes https://github.com/flutter/flutter/issues/88063 Fixes https://github.com/dart-lang/sdk/issues/26041 TEST=benchmarks/EventLoopLatencyRegexp, long_regexp_process_oob_messages_test Change-Id: I2bb10b332768f794b902b58be18d437cc07a59a7 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/210065 Reviewed-by: Martin Kustermann Commit-Queue: Alexander Aprelev --- ...long_regexp_process_oob_messages_test.dart | 29 + ...long_regexp_process_oob_messages_test.dart | 29 + runtime/vm/regexp_assembler_bytecode.cc | 54 +- runtime/vm/regexp_assembler_bytecode.h | 10 +- runtime/vm/regexp_interpreter.cc | 859 +++++++++--------- runtime/vm/regexp_interpreter.h | 15 +- 6 files changed, 544 insertions(+), 452 deletions(-) create mode 100644 runtime/tests/vm/dart/isolates/long_regexp_process_oob_messages_test.dart create mode 100644 runtime/tests/vm/dart_2/isolates/long_regexp_process_oob_messages_test.dart diff --git a/runtime/tests/vm/dart/isolates/long_regexp_process_oob_messages_test.dart b/runtime/tests/vm/dart/isolates/long_regexp_process_oob_messages_test.dart new file mode 100644 index 00000000000..404b978f2f9 --- /dev/null +++ b/runtime/tests/vm/dart/isolates/long_regexp_process_oob_messages_test.dart @@ -0,0 +1,29 @@ +// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:isolate'; + +import "package:async_helper/async_helper.dart"; +import "package:expect/expect.dart"; + +worker(SendPort sendPort) { + final re = RegExp(r'(x+)*y'); + final s = 'x' * 100 + ''; + sendPort.send('worker started'); + print(re.allMatches(s).iterator.moveNext()); +} + +main() async { + asyncStart(); + ReceivePort onExit = ReceivePort(); + ReceivePort workerStarted = ReceivePort(); + final isolate = await Isolate.spawn(worker, workerStarted.sendPort, + onExit: onExit.sendPort, errorsAreFatal: true); + await workerStarted.first; + print('worker started, now killing worker'); + isolate.kill(priority: Isolate.immediate); + await onExit.first; + print('worker exited'); + asyncEnd(); +} diff --git a/runtime/tests/vm/dart_2/isolates/long_regexp_process_oob_messages_test.dart b/runtime/tests/vm/dart_2/isolates/long_regexp_process_oob_messages_test.dart new file mode 100644 index 00000000000..404b978f2f9 --- /dev/null +++ b/runtime/tests/vm/dart_2/isolates/long_regexp_process_oob_messages_test.dart @@ -0,0 +1,29 @@ +// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:isolate'; + +import "package:async_helper/async_helper.dart"; +import "package:expect/expect.dart"; + +worker(SendPort sendPort) { + final re = RegExp(r'(x+)*y'); + final s = 'x' * 100 + ''; + sendPort.send('worker started'); + print(re.allMatches(s).iterator.moveNext()); +} + +main() async { + asyncStart(); + ReceivePort onExit = ReceivePort(); + ReceivePort workerStarted = ReceivePort(); + final isolate = await Isolate.spawn(worker, workerStarted.sendPort, + onExit: onExit.sendPort, errorsAreFatal: true); + await workerStarted.first; + print('worker started, now killing worker'); + isolate.kill(priority: Isolate.immediate); + await onExit.first; + print('worker exited'); + asyncEnd(); +} diff --git a/runtime/vm/regexp_assembler_bytecode.cc b/runtime/vm/regexp_assembler_bytecode.cc index 708297a845b..dbd71e26c2a 100644 --- a/runtime/vm/regexp_assembler_bytecode.cc +++ b/runtime/vm/regexp_assembler_bytecode.cc @@ -468,13 +468,13 @@ static intptr_t Prepare(const RegExp& regexp, (regexp.num_bracket_expressions() + 1) * 2; } -static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp, - const String& subject, - intptr_t index, - bool sticky, - int32_t* output, - intptr_t output_size, - Zone* zone) { +static ObjectPtr ExecRaw(const RegExp& regexp, + const String& subject, + intptr_t index, + bool sticky, + int32_t* output, + intptr_t output_size, + Zone* zone) { bool is_one_byte = subject.IsOneByteString() || subject.IsExternalOneByteString(); @@ -493,14 +493,16 @@ static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp, const TypedData& bytecode = TypedData::Handle(zone, regexp.bytecode(is_one_byte, sticky)); ASSERT(!bytecode.IsNull()); - IrregexpInterpreter::IrregexpResult result = - IrregexpInterpreter::Match(bytecode, subject, raw_output, index, zone); + const Object& result = Object::Handle( + zone, + IrregexpInterpreter::Match(bytecode, subject, raw_output, index, zone)); - if (result == IrregexpInterpreter::RE_SUCCESS) { + if (result.ptr() == Bool::True().ptr()) { // Copy capture results to the start of the registers array. memmove(output, raw_output, number_of_capture_registers * sizeof(int32_t)); } - if (result == IrregexpInterpreter::RE_EXCEPTION) { + if (result.ptr() == Object::null()) { + // Exception during regexp processing Thread* thread = Thread::Current(); auto isolate_group = thread->isolate_group(); const Instance& exception = @@ -508,14 +510,14 @@ static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp, Exceptions::Throw(thread, exception); UNREACHABLE(); } - return result; + return result.ptr(); } -InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp, - const String& subject, - const Smi& start_index, - bool sticky, - Zone* zone) { +ObjectPtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp, + const String& subject, + const Smi& start_index, + bool sticky, + Zone* zone) { intptr_t required_registers = Prepare(regexp, subject, sticky, zone); if (required_registers < 0) { // Compiling failed with an exception. @@ -525,11 +527,10 @@ InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp, // V8 uses a shared copy on the isolate when smaller than some threshold. int32_t* output_registers = zone->Alloc(required_registers); - IrregexpInterpreter::IrregexpResult result = - ExecRaw(regexp, subject, start_index.Value(), sticky, output_registers, - required_registers, zone); - - if (result == IrregexpInterpreter::RE_SUCCESS) { + const Object& result = + Object::Handle(zone, ExecRaw(regexp, subject, start_index.Value(), sticky, + output_registers, required_registers, zone)); + if (result.ptr() == Bool::True().ptr()) { intptr_t capture_count = regexp.num_bracket_expressions(); intptr_t capture_register_count = (capture_count + 1) * 2; ASSERT(required_registers >= capture_register_count); @@ -553,10 +554,15 @@ InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp, return result.ptr(); } - if (result == IrregexpInterpreter::RE_EXCEPTION) { + if (result.ptr() == Object::null()) { + // internal exception UNREACHABLE(); } - ASSERT(result == IrregexpInterpreter::RE_FAILURE); + if (result.IsError()) { + Exceptions::PropagateError(Error::Cast(result)); + UNREACHABLE(); + } + ASSERT(result.ptr() == Bool::False().ptr()); return Instance::null(); } diff --git a/runtime/vm/regexp_assembler_bytecode.h b/runtime/vm/regexp_assembler_bytecode.h index cdedaa170ee..6bb8f94b983 100644 --- a/runtime/vm/regexp_assembler_bytecode.h +++ b/runtime/vm/regexp_assembler_bytecode.h @@ -107,11 +107,11 @@ class BytecodeRegExpMacroAssembler : public RegExpMacroAssembler { virtual void PrintBlocks() { UNIMPLEMENTED(); } ///// - static InstancePtr Interpret(const RegExp& regexp, - const String& str, - const Smi& start_index, - bool is_sticky, - Zone* zone); + static ObjectPtr Interpret(const RegExp& regexp, + const String& str, + const Smi& start_index, + bool is_sticky, + Zone* zone); private: void Expand(); diff --git a/runtime/vm/regexp_interpreter.cc b/runtime/vm/regexp_interpreter.cc index eb0a243028a..d9248ac9fbc 100644 --- a/runtime/vm/regexp_interpreter.cc +++ b/runtime/vm/regexp_interpreter.cc @@ -4,11 +4,12 @@ // A simple interpreter for the Irregexp byte code. -#include "vm/regexp_interpreter.h" - #include #include +#include "heap/safepoint.h" +#include "vm/regexp_interpreter.h" + #include "platform/unicode.h" #include "vm/object.h" #include "vm/regexp_assembler.h" @@ -169,14 +170,16 @@ class BacktrackStack { DISALLOW_COPY_AND_ASSIGN(BacktrackStack); }; +// Returns True if success, False if failure, Null if internal exception, +// Error if VM error needs to be propagated up the callchain. template -static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, - const String& subject, - int32_t* registers, - intptr_t current, - uint32_t current_char, - Zone* zone) { - const uint8_t* pc = code_base; +static ObjectPtr RawMatch(const TypedData& bytecode, + const String& subject, + int32_t* registers, + intptr_t current, + uint32_t current_char, + Zone* zone) { + const auto thread = Thread::Current(); // BacktrackStack ensures that the memory allocated for the backtracking stack // is returned to the system or cached if there is no stack being cached at // the moment. @@ -196,482 +199,506 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, OS::PrintErr("Start irregexp bytecode interpreter\n"); } #endif + const uint8_t* code_base; + const uint8_t* pc; + { + NoSafepointScope no_safepoint; + code_base = reinterpret_cast(bytecode.DataAddr(0)); + pc = code_base; + } while (true) { - int32_t insn = Load32Aligned(pc); - switch (insn & BYTECODE_MASK) { - BYTECODE(BREAK) - UNREACHABLE(); - return IrregexpInterpreter::RE_FAILURE; - BYTECODE(PUSH_CP) - if (--backtrack_stack_space < 0) { - return IrregexpInterpreter::RE_EXCEPTION; + if (UNLIKELY(thread->HasScheduledInterrupts())) { + intptr_t pc_offset = pc - code_base; + ErrorPtr error = thread->HandleInterrupts(); + if (error != Object::null()) { + // Needs to be propagated to the Dart native invoking the + // regex matcher. + return error; } - *backtrack_sp++ = current; - pc += BC_PUSH_CP_LENGTH; - break; - BYTECODE(PUSH_BT) - if (--backtrack_stack_space < 0) { - return IrregexpInterpreter::RE_EXCEPTION; - } - *backtrack_sp++ = Load32Aligned(pc + 4); - pc += BC_PUSH_BT_LENGTH; - break; - BYTECODE(PUSH_REGISTER) - if (--backtrack_stack_space < 0) { - return IrregexpInterpreter::RE_EXCEPTION; - } - *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; - pc += BC_PUSH_REGISTER_LENGTH; - break; - BYTECODE(SET_REGISTER) - registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); - pc += BC_SET_REGISTER_LENGTH; - break; - BYTECODE(ADVANCE_REGISTER) - registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); - pc += BC_ADVANCE_REGISTER_LENGTH; - break; - BYTECODE(SET_REGISTER_TO_CP) - registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); - pc += BC_SET_REGISTER_TO_CP_LENGTH; - break; - BYTECODE(SET_CP_TO_REGISTER) - current = registers[insn >> BYTECODE_SHIFT]; - pc += BC_SET_CP_TO_REGISTER_LENGTH; - break; - BYTECODE(SET_REGISTER_TO_SP) - registers[insn >> BYTECODE_SHIFT] = - static_cast(backtrack_sp - backtrack_stack_base); - pc += BC_SET_REGISTER_TO_SP_LENGTH; - break; - BYTECODE(SET_SP_TO_REGISTER) - backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT]; - backtrack_stack_space = - backtrack_stack.max_size() - - static_cast(backtrack_sp - backtrack_stack_base); - pc += BC_SET_SP_TO_REGISTER_LENGTH; - break; - BYTECODE(POP_CP) - backtrack_stack_space++; - --backtrack_sp; - current = *backtrack_sp; - pc += BC_POP_CP_LENGTH; - break; - BYTECODE(POP_BT) - backtrack_stack_space++; - --backtrack_sp; - pc = code_base + *backtrack_sp; - break; - BYTECODE(POP_REGISTER) - backtrack_stack_space++; - --backtrack_sp; - registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; - pc += BC_POP_REGISTER_LENGTH; - break; - BYTECODE(FAIL) - return IrregexpInterpreter::RE_FAILURE; - BYTECODE(SUCCEED) - return IrregexpInterpreter::RE_SUCCESS; - BYTECODE(ADVANCE_CP) - current += insn >> BYTECODE_SHIFT; - pc += BC_ADVANCE_CP_LENGTH; - break; - BYTECODE(GOTO) - pc = code_base + Load32Aligned(pc + 4); - break; - BYTECODE(ADVANCE_CP_AND_GOTO) - current += insn >> BYTECODE_SHIFT; - pc = code_base + Load32Aligned(pc + 4); - break; - BYTECODE(CHECK_GREEDY) - if (current == backtrack_sp[-1]) { - backtrack_sp--; + NoSafepointScope no_safepoint; + code_base = reinterpret_cast(bytecode.DataAddr(0)); + pc = code_base + pc_offset; + } + NoSafepointScope no_safepoint; + bool check_for_safepoint_now = false; + while (!check_for_safepoint_now) { + int32_t insn = Load32Aligned(pc); + switch (insn & BYTECODE_MASK) { + BYTECODE(BREAK) + UNREACHABLE(); + return Bool::False().ptr(); + BYTECODE(PUSH_CP) + if (--backtrack_stack_space < 0) { + return Object::null(); + } + *backtrack_sp++ = current; + pc += BC_PUSH_CP_LENGTH; + break; + BYTECODE(PUSH_BT) + if (--backtrack_stack_space < 0) { + return Object::null(); + } + *backtrack_sp++ = Load32Aligned(pc + 4); + pc += BC_PUSH_BT_LENGTH; + break; + BYTECODE(PUSH_REGISTER) + if (--backtrack_stack_space < 0) { + return Object::null(); + } + *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; + pc += BC_PUSH_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER) + registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); + pc += BC_SET_REGISTER_LENGTH; + break; + BYTECODE(ADVANCE_REGISTER) + registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); + pc += BC_ADVANCE_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER_TO_CP) + registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); + pc += BC_SET_REGISTER_TO_CP_LENGTH; + break; + BYTECODE(SET_CP_TO_REGISTER) + current = registers[insn >> BYTECODE_SHIFT]; + pc += BC_SET_CP_TO_REGISTER_LENGTH; + break; + BYTECODE(SET_REGISTER_TO_SP) + registers[insn >> BYTECODE_SHIFT] = + static_cast(backtrack_sp - backtrack_stack_base); + pc += BC_SET_REGISTER_TO_SP_LENGTH; + break; + BYTECODE(SET_SP_TO_REGISTER) + backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT]; + backtrack_stack_space = + backtrack_stack.max_size() - + static_cast(backtrack_sp - backtrack_stack_base); + pc += BC_SET_SP_TO_REGISTER_LENGTH; + break; + BYTECODE(POP_CP) backtrack_stack_space++; + --backtrack_sp; + current = *backtrack_sp; + pc += BC_POP_CP_LENGTH; + break; + BYTECODE(POP_BT) + backtrack_stack_space++; + --backtrack_sp; + pc = code_base + *backtrack_sp; + // This should match check cadence in JIT irregexp implementation. + check_for_safepoint_now = true; + break; + BYTECODE(POP_REGISTER) + backtrack_stack_space++; + --backtrack_sp; + registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; + pc += BC_POP_REGISTER_LENGTH; + break; + BYTECODE(FAIL) + return Bool::False().ptr(); + BYTECODE(SUCCEED) + return Bool::True().ptr(); + BYTECODE(ADVANCE_CP) + current += insn >> BYTECODE_SHIFT; + pc += BC_ADVANCE_CP_LENGTH; + break; + BYTECODE(GOTO) pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_GREEDY_LENGTH; - } - break; - BYTECODE(LOAD_CURRENT_CHAR) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos < 0 || pos >= subject_length) { + break; + BYTECODE(ADVANCE_CP_AND_GOTO) + current += insn >> BYTECODE_SHIFT; + pc = code_base + Load32Aligned(pc + 4); + break; + BYTECODE(CHECK_GREEDY) + if (current == backtrack_sp[-1]) { + backtrack_sp--; + backtrack_stack_space++; pc = code_base + Load32Aligned(pc + 4); } else { - current_char = subject.CharAt(pos); - pc += BC_LOAD_CURRENT_CHAR_LENGTH; + pc += BC_CHECK_GREEDY_LENGTH; } break; - } - BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { - int pos = current + (insn >> BYTECODE_SHIFT); - current_char = subject.CharAt(pos); - pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; - break; - } - BYTECODE(LOAD_2_CURRENT_CHARS) { - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 2 > subject_length) { - pc = code_base + Load32Aligned(pc + 4); - } else { + BYTECODE(LOAD_CURRENT_CHAR) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos < 0 || pos >= subject_length) { + pc = code_base + Load32Aligned(pc + 4); + } else { + current_char = subject.CharAt(pos); + pc += BC_LOAD_CURRENT_CHAR_LENGTH; + } + break; + } + BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { + int pos = current + (insn >> BYTECODE_SHIFT); + current_char = subject.CharAt(pos); + pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; + break; + } + BYTECODE(LOAD_2_CURRENT_CHARS) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 2 > subject_length) { + pc = code_base + Load32Aligned(pc + 4); + } else { + Char next = subject.CharAt(pos + 1); + current_char = + subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char))); + pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; + } + break; + } + BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { + int pos = current + (insn >> BYTECODE_SHIFT); Char next = subject.CharAt(pos + 1); current_char = subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char))); - pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; + pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; + break; } - break; - } - BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { - int pos = current + (insn >> BYTECODE_SHIFT); - Char next = subject.CharAt(pos + 1); - current_char = - subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char))); - pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; - break; - } - BYTECODE(LOAD_4_CURRENT_CHARS) { - ASSERT(sizeof(Char) == 1); - int pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 4 > subject_length) { - pc = code_base + Load32Aligned(pc + 4); - } else { + BYTECODE(LOAD_4_CURRENT_CHARS) { + ASSERT(sizeof(Char) == 1); + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 4 > subject_length) { + pc = code_base + Load32Aligned(pc + 4); + } else { + Char next1 = subject.CharAt(pos + 1); + Char next2 = subject.CharAt(pos + 2); + Char next3 = subject.CharAt(pos + 3); + current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) | + (next3 << 24)); + pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; + } + break; + } + BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { + ASSERT(sizeof(Char) == 1); + int pos = current + (insn >> BYTECODE_SHIFT); Char next1 = subject.CharAt(pos + 1); Char next2 = subject.CharAt(pos + 2); Char next3 = subject.CharAt(pos + 3); current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) | (next3 << 24)); - pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; + pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; + break; } - break; - } - BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { - ASSERT(sizeof(Char) == 1); - int pos = current + (insn >> BYTECODE_SHIFT); - Char next1 = subject.CharAt(pos + 1); - Char next2 = subject.CharAt(pos + 2); - Char next3 = subject.CharAt(pos + 3); - current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) | - (next3 << 24)); - pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; - break; - } - BYTECODE(CHECK_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c == current_char) { + BYTECODE(CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == current_char) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_4_CHARS_LENGTH; + } + break; + } + BYTECODE(CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == current_char) { + pc = code_base + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_CHAR_LENGTH; + } + break; + } + BYTECODE(CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != current_char) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_NOT_4_CHARS_LENGTH; + } + break; + } + BYTECODE(CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != current_char) { + pc = code_base + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_NOT_CHAR_LENGTH; + } + break; + } + BYTECODE(AND_CHECK_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c == (current_char & Load32Aligned(pc + 8))) { + pc = code_base + Load32Aligned(pc + 12); + } else { + pc += BC_AND_CHECK_4_CHARS_LENGTH; + } + break; + } + BYTECODE(AND_CHECK_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c == (current_char & Load32Aligned(pc + 4))) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_AND_CHECK_CHAR_LENGTH; + } + break; + } + BYTECODE(AND_CHECK_NOT_4_CHARS) { + uint32_t c = Load32Aligned(pc + 4); + if (c != (current_char & Load32Aligned(pc + 8))) { + pc = code_base + Load32Aligned(pc + 12); + } else { + pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; + } + break; + } + BYTECODE(AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + if (c != (current_char & Load32Aligned(pc + 4))) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_AND_CHECK_NOT_CHAR_LENGTH; + } + break; + } + BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { + uint32_t c = (insn >> BYTECODE_SHIFT); + uint32_t minus = Load16Aligned(pc + 4); + uint32_t mask = Load16Aligned(pc + 6); + if (c != ((current_char - minus) & mask)) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; + } + break; + } + BYTECODE(CHECK_CHAR_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from <= current_char && current_char <= to) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; + } + break; + } + BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { + uint32_t from = Load16Aligned(pc + 4); + uint32_t to = Load16Aligned(pc + 6); + if (from > current_char || current_char > to) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; + } + break; + } + BYTECODE(CHECK_BIT_IN_TABLE) { + int mask = RegExpMacroAssembler::kTableMask; + uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; + int bit = (current_char & (kBitsPerByte - 1)); + if ((b & (1 << bit)) != 0) { + pc = code_base + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_BIT_IN_TABLE_LENGTH; + } + break; + } + BYTECODE(CHECK_LT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char < limit) { + pc = code_base + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_LT_LENGTH; + } + break; + } + BYTECODE(CHECK_GT) { + uint32_t limit = (insn >> BYTECODE_SHIFT); + if (current_char > limit) { + pc = code_base + Load32Aligned(pc + 4); + } else { + pc += BC_CHECK_GT_LENGTH; + } + break; + } + BYTECODE(CHECK_REGISTER_LT) + if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { pc = code_base + Load32Aligned(pc + 8); } else { - pc += BC_CHECK_4_CHARS_LENGTH; + pc += BC_CHECK_REGISTER_LT_LENGTH; } break; - } - BYTECODE(CHECK_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c == current_char) { + BYTECODE(CHECK_REGISTER_GE) + if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { + pc = code_base + Load32Aligned(pc + 8); + } else { + pc += BC_CHECK_REGISTER_GE_LENGTH; + } + break; + BYTECODE(CHECK_REGISTER_EQ_POS) + if (registers[insn >> BYTECODE_SHIFT] == current) { pc = code_base + Load32Aligned(pc + 4); } else { - pc += BC_CHECK_CHAR_LENGTH; + pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; } break; - } - BYTECODE(CHECK_NOT_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c != current_char) { + BYTECODE(CHECK_NOT_REGS_EQUAL) + if (registers[insn >> BYTECODE_SHIFT] == + registers[Load32Aligned(pc + 4)]) { + pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; + } else { pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_NOT_4_CHARS_LENGTH; } break; - } - BYTECODE(CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c != current_char) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c == (current_char & Load32Aligned(pc + 8))) { - pc = code_base + Load32Aligned(pc + 12); - } else { - pc += BC_AND_CHECK_4_CHARS_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c == (current_char & Load32Aligned(pc + 4))) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_AND_CHECK_CHAR_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_NOT_4_CHARS) { - uint32_t c = Load32Aligned(pc + 4); - if (c != (current_char & Load32Aligned(pc + 8))) { - pc = code_base + Load32Aligned(pc + 12); - } else { - pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; - } - break; - } - BYTECODE(AND_CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - if (c != (current_char & Load32Aligned(pc + 4))) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_AND_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { - uint32_t c = (insn >> BYTECODE_SHIFT); - uint32_t minus = Load16Aligned(pc + 4); - uint32_t mask = Load16Aligned(pc + 6); - if (c != ((current_char - minus) & mask)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; - } - break; - } - BYTECODE(CHECK_CHAR_IN_RANGE) { - uint32_t from = Load16Aligned(pc + 4); - uint32_t to = Load16Aligned(pc + 6); - if (from <= current_char && current_char <= to) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; - } - break; - } - BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { - uint32_t from = Load16Aligned(pc + 4); - uint32_t to = Load16Aligned(pc + 6); - if (from > current_char || current_char > to) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; - } - break; - } - BYTECODE(CHECK_BIT_IN_TABLE) { - int mask = RegExpMacroAssembler::kTableMask; - uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; - int bit = (current_char & (kBitsPerByte - 1)); - if ((b & (1 << bit)) != 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_BIT_IN_TABLE_LENGTH; - } - break; - } - BYTECODE(CHECK_LT) { - uint32_t limit = (insn >> BYTECODE_SHIFT); - if (current_char < limit) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_LT_LENGTH; - } - break; - } - BYTECODE(CHECK_GT) { - uint32_t limit = (insn >> BYTECODE_SHIFT); - if (current_char > limit) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_GT_LENGTH; - } - break; - } - BYTECODE(CHECK_REGISTER_LT) - if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_REGISTER_LT_LENGTH; - } - break; - BYTECODE(CHECK_REGISTER_GE) - if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { - pc = code_base + Load32Aligned(pc + 8); - } else { - pc += BC_CHECK_REGISTER_GE_LENGTH; - } - break; - BYTECODE(CHECK_REGISTER_EQ_POS) - if (registers[insn >> BYTECODE_SHIFT] == current) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; - } - break; - BYTECODE(CHECK_NOT_REGS_EQUAL) - if (registers[insn >> BYTECODE_SHIFT] == - registers[Load32Aligned(pc + 4)]) { - pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; - } else { - pc = code_base + Load32Aligned(pc + 8); - } - break; - BYTECODE(CHECK_NOT_BACK_REF) { - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { + BYTECODE(CHECK_NOT_BACK_REF) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_LENGTH; + break; + } + if (current + len > subject_length) { + pc = code_base + Load32Aligned(pc + 4); + break; + } else { + int i; + for (i = 0; i < len; i++) { + if (subject.CharAt(from + i) != subject.CharAt(current + i)) { + pc = code_base + Load32Aligned(pc + 4); + break; + } + } + if (i < len) break; + current += len; + } pc += BC_CHECK_NOT_BACK_REF_LENGTH; break; } - if (current + len > subject_length) { - pc = code_base + Load32Aligned(pc + 4); - break; - } else { - int i; - for (i = 0; i < len; i++) { - if (subject.CharAt(from + i) != subject.CharAt(current + i)) { + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) + FALL_THROUGH; + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { + const bool unicode = + (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + break; + } + if (current + len > subject_length) { + pc = code_base + Load32Aligned(pc + 4); + break; + } else { + if (BackRefMatchesNoCase(&canonicalize, from, current, len, + subject, unicode)) { + current += len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + } else { pc = code_base + Load32Aligned(pc + 4); - break; } } - if (i < len) break; - current += len; - } - pc += BC_CHECK_NOT_BACK_REF_LENGTH; - break; - } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) - FALL_THROUGH; - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { - const bool unicode = - (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; break; } - if (current + len > subject_length) { - pc = code_base + Load32Aligned(pc + 4); - break; - } else { - if (BackRefMatchesNoCase(&canonicalize, from, current, len, - subject, unicode)) { - current += len; - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; - } else { - pc = code_base + Load32Aligned(pc + 4); + BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { + const int from = registers[insn >> BYTECODE_SHIFT]; + const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; + break; + } + if ((current - len) < 0) { + pc = code_base + Load32Aligned(pc + 4); + break; + } else { + // When looking behind, the string to match (if it is there) lies + // before the current position, so we will check the [len] + // characters before the current position, excluding the current + // position itself. + const int start = current - len; + int i; + for (i = 0; i < len; i++) { + if (subject.CharAt(from + i) != subject.CharAt(start + i)) { + pc = code_base + Load32Aligned(pc + 4); + break; + } + } + if (i < len) break; + current -= len; } - } - break; - } - BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { - const int from = registers[insn >> BYTECODE_SHIFT]; - const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; break; } - if ((current - len) < 0) { - pc = code_base + Load32Aligned(pc + 4); - break; - } else { - // When looking behind, the string to match (if it is there) lies - // before the current position, so we will check the [len] characters - // before the current position, excluding the current position itself. - const int start = current - len; - int i; - for (i = 0; i < len; i++) { - if (subject.CharAt(from + i) != subject.CharAt(start + i)) { + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) + FALL_THROUGH; + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { + bool unicode = (insn & BYTECODE_MASK) == + BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + break; + } + if (current < len) { + pc = code_base + Load32Aligned(pc + 4); + break; + } else { + if (BackRefMatchesNoCase(&canonicalize, from, current - len, + len, subject, unicode)) { + current -= len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + } else { pc = code_base + Load32Aligned(pc + 4); - break; } } - if (i < len) break; - current -= len; - } - pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; - break; - } - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) - FALL_THROUGH; - BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { - bool unicode = (insn & BYTECODE_MASK) == - BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; - int from = registers[insn >> BYTECODE_SHIFT]; - int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; - if (from < 0 || len <= 0) { - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; break; } - if (current < len) { + BYTECODE(CHECK_AT_START) + if (current == 0) { pc = code_base + Load32Aligned(pc + 4); - break; } else { - if (BackRefMatchesNoCase(&canonicalize, from, current - len, - len, subject, unicode)) { - current -= len; - pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + pc += BC_CHECK_AT_START_LENGTH; + } + break; + BYTECODE(CHECK_NOT_AT_START) { + const int32_t cp_offset = insn >> BYTECODE_SHIFT; + if (current + cp_offset == 0) { + pc += BC_CHECK_NOT_AT_START_LENGTH; } else { pc = code_base + Load32Aligned(pc + 4); } + break; } - break; - } - BYTECODE(CHECK_AT_START) - if (current == 0) { - pc = code_base + Load32Aligned(pc + 4); - } else { - pc += BC_CHECK_AT_START_LENGTH; - } - break; - BYTECODE(CHECK_NOT_AT_START) { - const int32_t cp_offset = insn >> BYTECODE_SHIFT; - if (current + cp_offset == 0) { - pc += BC_CHECK_NOT_AT_START_LENGTH; - } else { - pc = code_base + Load32Aligned(pc + 4); + BYTECODE(SET_CURRENT_POSITION_FROM_END) { + int by = static_cast(insn) >> BYTECODE_SHIFT; + if (subject_length - current > by) { + current = subject_length - by; + current_char = subject.CharAt(current - 1); + } + pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; + break; } - break; + default: + UNREACHABLE(); + break; } - BYTECODE(SET_CURRENT_POSITION_FROM_END) { - int by = static_cast(insn) >> BYTECODE_SHIFT; - if (subject_length - current > by) { - current = subject_length - by; - current_char = subject.CharAt(current - 1); - } - pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; - break; - } - default: - UNREACHABLE(); - break; } } } -IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( - const TypedData& bytecode, - const String& subject, - int32_t* registers, - intptr_t start_position, - Zone* zone) { - NoSafepointScope no_safepoint; - const uint8_t* code_base = reinterpret_cast(bytecode.DataAddr(0)); - +// Returns True if success, False if failure, Null if internal exception, +// Error if VM error needs to be propagated up the callchain. +ObjectPtr IrregexpInterpreter::Match(const TypedData& bytecode, + const String& subject, + int32_t* registers, + intptr_t start_position, + Zone* zone) { uint16_t previous_char = '\n'; if (start_position != 0) { previous_char = subject.CharAt(start_position - 1); } if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { - return RawMatch(code_base, subject, registers, start_position, + return RawMatch(bytecode, subject, registers, start_position, previous_char, zone); } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { - return RawMatch(code_base, subject, registers, start_position, + return RawMatch(bytecode, subject, registers, start_position, previous_char, zone); } else { UNREACHABLE(); - return IrregexpInterpreter::RE_FAILURE; + return Bool::False().ptr(); } } diff --git a/runtime/vm/regexp_interpreter.h b/runtime/vm/regexp_interpreter.h index 9e4e567eef8..f451a40bb05 100644 --- a/runtime/vm/regexp_interpreter.h +++ b/runtime/vm/regexp_interpreter.h @@ -15,13 +15,14 @@ namespace dart { class IrregexpInterpreter : public AllStatic { public: - enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; - - static IrregexpResult Match(const TypedData& bytecode, - const String& subject, - int32_t* captures, - intptr_t start_position, - Zone* zone); + // Returns True in case of a success, False in case of a failure, + // Null in case of internal exception, + // Error in case VM error has to propagated up to the caller. + static ObjectPtr Match(const TypedData& bytecode, + const String& subject, + int32_t* captures, + intptr_t start_position, + Zone* zone); }; } // namespace dart