[vm/regexp] Ensure regex interpreter checks and yields to safepoints, handles OOB messages periodically.

Fixes https://github.com/flutter/flutter/issues/88063
Fixes https://github.com/dart-lang/sdk/issues/26041

TEST=benchmarks/EventLoopLatencyRegexp,
long_regexp_process_oob_messages_test

Change-Id: I2bb10b332768f794b902b58be18d437cc07a59a7
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/210065
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Alexander Aprelev <aam@google.com>
This commit is contained in:
Alexander Aprelev 2021-08-20 16:19:16 +00:00 committed by commit-bot@chromium.org
parent 709f87e7f3
commit 064a9a50a0
6 changed files with 544 additions and 452 deletions

View file

@ -0,0 +1,29 @@
// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:isolate';
import "package:async_helper/async_helper.dart";
import "package:expect/expect.dart";
worker(SendPort sendPort) {
final re = RegExp(r'(x+)*y');
final s = 'x' * 100 + '';
sendPort.send('worker started');
print(re.allMatches(s).iterator.moveNext());
}
main() async {
asyncStart();
ReceivePort onExit = ReceivePort();
ReceivePort workerStarted = ReceivePort();
final isolate = await Isolate.spawn(worker, workerStarted.sendPort,
onExit: onExit.sendPort, errorsAreFatal: true);
await workerStarted.first;
print('worker started, now killing worker');
isolate.kill(priority: Isolate.immediate);
await onExit.first;
print('worker exited');
asyncEnd();
}

View file

@ -0,0 +1,29 @@
// Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:isolate';
import "package:async_helper/async_helper.dart";
import "package:expect/expect.dart";
worker(SendPort sendPort) {
final re = RegExp(r'(x+)*y');
final s = 'x' * 100 + '';
sendPort.send('worker started');
print(re.allMatches(s).iterator.moveNext());
}
main() async {
asyncStart();
ReceivePort onExit = ReceivePort();
ReceivePort workerStarted = ReceivePort();
final isolate = await Isolate.spawn(worker, workerStarted.sendPort,
onExit: onExit.sendPort, errorsAreFatal: true);
await workerStarted.first;
print('worker started, now killing worker');
isolate.kill(priority: Isolate.immediate);
await onExit.first;
print('worker exited');
asyncEnd();
}

View file

@ -468,7 +468,7 @@ static intptr_t Prepare(const RegExp& regexp,
(regexp.num_bracket_expressions() + 1) * 2; (regexp.num_bracket_expressions() + 1) * 2;
} }
static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp, static ObjectPtr ExecRaw(const RegExp& regexp,
const String& subject, const String& subject,
intptr_t index, intptr_t index,
bool sticky, bool sticky,
@ -493,14 +493,16 @@ static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp,
const TypedData& bytecode = const TypedData& bytecode =
TypedData::Handle(zone, regexp.bytecode(is_one_byte, sticky)); TypedData::Handle(zone, regexp.bytecode(is_one_byte, sticky));
ASSERT(!bytecode.IsNull()); ASSERT(!bytecode.IsNull());
IrregexpInterpreter::IrregexpResult result = const Object& result = Object::Handle(
IrregexpInterpreter::Match(bytecode, subject, raw_output, index, zone); zone,
IrregexpInterpreter::Match(bytecode, subject, raw_output, index, zone));
if (result == IrregexpInterpreter::RE_SUCCESS) { if (result.ptr() == Bool::True().ptr()) {
// Copy capture results to the start of the registers array. // Copy capture results to the start of the registers array.
memmove(output, raw_output, number_of_capture_registers * sizeof(int32_t)); memmove(output, raw_output, number_of_capture_registers * sizeof(int32_t));
} }
if (result == IrregexpInterpreter::RE_EXCEPTION) { if (result.ptr() == Object::null()) {
// Exception during regexp processing
Thread* thread = Thread::Current(); Thread* thread = Thread::Current();
auto isolate_group = thread->isolate_group(); auto isolate_group = thread->isolate_group();
const Instance& exception = const Instance& exception =
@ -508,10 +510,10 @@ static IrregexpInterpreter::IrregexpResult ExecRaw(const RegExp& regexp,
Exceptions::Throw(thread, exception); Exceptions::Throw(thread, exception);
UNREACHABLE(); UNREACHABLE();
} }
return result; return result.ptr();
} }
InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp, ObjectPtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp,
const String& subject, const String& subject,
const Smi& start_index, const Smi& start_index,
bool sticky, bool sticky,
@ -525,11 +527,10 @@ InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp,
// V8 uses a shared copy on the isolate when smaller than some threshold. // V8 uses a shared copy on the isolate when smaller than some threshold.
int32_t* output_registers = zone->Alloc<int32_t>(required_registers); int32_t* output_registers = zone->Alloc<int32_t>(required_registers);
IrregexpInterpreter::IrregexpResult result = const Object& result =
ExecRaw(regexp, subject, start_index.Value(), sticky, output_registers, Object::Handle(zone, ExecRaw(regexp, subject, start_index.Value(), sticky,
required_registers, zone); output_registers, required_registers, zone));
if (result.ptr() == Bool::True().ptr()) {
if (result == IrregexpInterpreter::RE_SUCCESS) {
intptr_t capture_count = regexp.num_bracket_expressions(); intptr_t capture_count = regexp.num_bracket_expressions();
intptr_t capture_register_count = (capture_count + 1) * 2; intptr_t capture_register_count = (capture_count + 1) * 2;
ASSERT(required_registers >= capture_register_count); ASSERT(required_registers >= capture_register_count);
@ -553,10 +554,15 @@ InstancePtr BytecodeRegExpMacroAssembler::Interpret(const RegExp& regexp,
return result.ptr(); return result.ptr();
} }
if (result == IrregexpInterpreter::RE_EXCEPTION) { if (result.ptr() == Object::null()) {
// internal exception
UNREACHABLE(); UNREACHABLE();
} }
ASSERT(result == IrregexpInterpreter::RE_FAILURE); if (result.IsError()) {
Exceptions::PropagateError(Error::Cast(result));
UNREACHABLE();
}
ASSERT(result.ptr() == Bool::False().ptr());
return Instance::null(); return Instance::null();
} }

View file

@ -107,7 +107,7 @@ class BytecodeRegExpMacroAssembler : public RegExpMacroAssembler {
virtual void PrintBlocks() { UNIMPLEMENTED(); } virtual void PrintBlocks() { UNIMPLEMENTED(); }
///// /////
static InstancePtr Interpret(const RegExp& regexp, static ObjectPtr Interpret(const RegExp& regexp,
const String& str, const String& str,
const Smi& start_index, const Smi& start_index,
bool is_sticky, bool is_sticky,

View file

@ -4,11 +4,12 @@
// A simple interpreter for the Irregexp byte code. // A simple interpreter for the Irregexp byte code.
#include "vm/regexp_interpreter.h"
#include <memory> #include <memory>
#include <utility> #include <utility>
#include "heap/safepoint.h"
#include "vm/regexp_interpreter.h"
#include "platform/unicode.h" #include "platform/unicode.h"
#include "vm/object.h" #include "vm/object.h"
#include "vm/regexp_assembler.h" #include "vm/regexp_assembler.h"
@ -169,14 +170,16 @@ class BacktrackStack {
DISALLOW_COPY_AND_ASSIGN(BacktrackStack); DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
}; };
// Returns True if success, False if failure, Null if internal exception,
// Error if VM error needs to be propagated up the callchain.
template <typename Char> template <typename Char>
static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, static ObjectPtr RawMatch(const TypedData& bytecode,
const String& subject, const String& subject,
int32_t* registers, int32_t* registers,
intptr_t current, intptr_t current,
uint32_t current_char, uint32_t current_char,
Zone* zone) { Zone* zone) {
const uint8_t* pc = code_base; const auto thread = Thread::Current();
// BacktrackStack ensures that the memory allocated for the backtracking stack // BacktrackStack ensures that the memory allocated for the backtracking stack
// is returned to the system or cached if there is no stack being cached at // is returned to the system or cached if there is no stack being cached at
// the moment. // the moment.
@ -196,29 +199,51 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
OS::PrintErr("Start irregexp bytecode interpreter\n"); OS::PrintErr("Start irregexp bytecode interpreter\n");
} }
#endif #endif
const uint8_t* code_base;
const uint8_t* pc;
{
NoSafepointScope no_safepoint;
code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
pc = code_base;
}
while (true) { while (true) {
if (UNLIKELY(thread->HasScheduledInterrupts())) {
intptr_t pc_offset = pc - code_base;
ErrorPtr error = thread->HandleInterrupts();
if (error != Object::null()) {
// Needs to be propagated to the Dart native invoking the
// regex matcher.
return error;
}
NoSafepointScope no_safepoint;
code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
pc = code_base + pc_offset;
}
NoSafepointScope no_safepoint;
bool check_for_safepoint_now = false;
while (!check_for_safepoint_now) {
int32_t insn = Load32Aligned(pc); int32_t insn = Load32Aligned(pc);
switch (insn & BYTECODE_MASK) { switch (insn & BYTECODE_MASK) {
BYTECODE(BREAK) BYTECODE(BREAK)
UNREACHABLE(); UNREACHABLE();
return IrregexpInterpreter::RE_FAILURE; return Bool::False().ptr();
BYTECODE(PUSH_CP) BYTECODE(PUSH_CP)
if (--backtrack_stack_space < 0) { if (--backtrack_stack_space < 0) {
return IrregexpInterpreter::RE_EXCEPTION; return Object::null();
} }
*backtrack_sp++ = current; *backtrack_sp++ = current;
pc += BC_PUSH_CP_LENGTH; pc += BC_PUSH_CP_LENGTH;
break; break;
BYTECODE(PUSH_BT) BYTECODE(PUSH_BT)
if (--backtrack_stack_space < 0) { if (--backtrack_stack_space < 0) {
return IrregexpInterpreter::RE_EXCEPTION; return Object::null();
} }
*backtrack_sp++ = Load32Aligned(pc + 4); *backtrack_sp++ = Load32Aligned(pc + 4);
pc += BC_PUSH_BT_LENGTH; pc += BC_PUSH_BT_LENGTH;
break; break;
BYTECODE(PUSH_REGISTER) BYTECODE(PUSH_REGISTER)
if (--backtrack_stack_space < 0) { if (--backtrack_stack_space < 0) {
return IrregexpInterpreter::RE_EXCEPTION; return Object::null();
} }
*backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
pc += BC_PUSH_REGISTER_LENGTH; pc += BC_PUSH_REGISTER_LENGTH;
@ -261,6 +286,8 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
backtrack_stack_space++; backtrack_stack_space++;
--backtrack_sp; --backtrack_sp;
pc = code_base + *backtrack_sp; pc = code_base + *backtrack_sp;
// This should match check cadence in JIT irregexp implementation.
check_for_safepoint_now = true;
break; break;
BYTECODE(POP_REGISTER) BYTECODE(POP_REGISTER)
backtrack_stack_space++; backtrack_stack_space++;
@ -269,9 +296,9 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
pc += BC_POP_REGISTER_LENGTH; pc += BC_POP_REGISTER_LENGTH;
break; break;
BYTECODE(FAIL) BYTECODE(FAIL)
return IrregexpInterpreter::RE_FAILURE; return Bool::False().ptr();
BYTECODE(SUCCEED) BYTECODE(SUCCEED)
return IrregexpInterpreter::RE_SUCCESS; return Bool::True().ptr();
BYTECODE(ADVANCE_CP) BYTECODE(ADVANCE_CP)
current += insn >> BYTECODE_SHIFT; current += insn >> BYTECODE_SHIFT;
pc += BC_ADVANCE_CP_LENGTH; pc += BC_ADVANCE_CP_LENGTH;
@ -576,8 +603,9 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
break; break;
} else { } else {
// When looking behind, the string to match (if it is there) lies // When looking behind, the string to match (if it is there) lies
// before the current position, so we will check the [len] characters // before the current position, so we will check the [len]
// before the current position, excluding the current position itself. // characters before the current position, excluding the current
// position itself.
const int start = current - len; const int start = current - len;
int i; int i;
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
@ -648,30 +676,29 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
} }
} }
} }
}
IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( // Returns True if success, False if failure, Null if internal exception,
const TypedData& bytecode, // Error if VM error needs to be propagated up the callchain.
ObjectPtr IrregexpInterpreter::Match(const TypedData& bytecode,
const String& subject, const String& subject,
int32_t* registers, int32_t* registers,
intptr_t start_position, intptr_t start_position,
Zone* zone) { Zone* zone) {
NoSafepointScope no_safepoint;
const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
uint16_t previous_char = '\n'; uint16_t previous_char = '\n';
if (start_position != 0) { if (start_position != 0) {
previous_char = subject.CharAt(start_position - 1); previous_char = subject.CharAt(start_position - 1);
} }
if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { if (subject.IsOneByteString() || subject.IsExternalOneByteString()) {
return RawMatch<uint8_t>(code_base, subject, registers, start_position, return RawMatch<uint8_t>(bytecode, subject, registers, start_position,
previous_char, zone); previous_char, zone);
} else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) {
return RawMatch<uint16_t>(code_base, subject, registers, start_position, return RawMatch<uint16_t>(bytecode, subject, registers, start_position,
previous_char, zone); previous_char, zone);
} else { } else {
UNREACHABLE(); UNREACHABLE();
return IrregexpInterpreter::RE_FAILURE; return Bool::False().ptr();
} }
} }

View file

@ -15,9 +15,10 @@ namespace dart {
class IrregexpInterpreter : public AllStatic { class IrregexpInterpreter : public AllStatic {
public: public:
enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; // Returns True in case of a success, False in case of a failure,
// Null in case of internal exception,
static IrregexpResult Match(const TypedData& bytecode, // Error in case VM error has to propagated up to the caller.
static ObjectPtr Match(const TypedData& bytecode,
const String& subject, const String& subject,
int32_t* captures, int32_t* captures,
intptr_t start_position, intptr_t start_position,