From 48f29b15c71ae1b6877b233d492eff5bff7f9e1b Mon Sep 17 00:00:00 2001 From: Vyacheslav Egorov Date: Mon, 27 Nov 2023 15:22:40 +0000 Subject: [PATCH] [vm/aot] Make regexp/stack-overflow test pass This test needs a larger backtracking stack then we provide by default in AOT mode. Allow controlling stack size via a command line flag regexp_backtrack_stack_size_kb. Additionally cleanup types in the interpreter a bit: generated code uses Int32List for both register and backtracking stack. But the interpreter was using intptr_t for backtracking stack and int32_t for registers. Align these types to use int32_t everywhere and add a bit of sanity checking to ensure that we don't try to use regexp engine with strings which are too big to fit into int32_t range. TEST=corelib/regexp/stack-overflow Change-Id: I5c13c4b47d5d478b683f0450d3c0ce65f6961cff Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/338141 Commit-Queue: Slava Egorov Reviewed-by: Daco Harkes --- runtime/lib/regexp.cc | 13 ++++++++++ runtime/vm/regexp_assembler_bytecode.cc | 2 +- runtime/vm/regexp_interpreter.cc | 25 +++++++++++-------- runtime/vm/regexp_interpreter.h | 2 +- tests/corelib/regexp/stack-overflow_test.dart | 4 +++ 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/runtime/lib/regexp.cc b/runtime/lib/regexp.cc index 9291d11079b..3fd92bf2481 100644 --- a/runtime/lib/regexp.cc +++ b/runtime/lib/regexp.cc @@ -147,6 +147,19 @@ static ObjectPtr ExecuteMatch(Zone* zone, GET_NON_NULL_NATIVE_ARGUMENT(String, subject, arguments->NativeArgAt(1)); GET_NON_NULL_NATIVE_ARGUMENT(Smi, start_index, arguments->NativeArgAt(2)); + // Both generated code and the interpreter are using 32-bit registers and + // 32-bit backtracking stack so they can't work with strings which are + // larger than that. Validate these assumptions before running the regexp. + if (!Utils::IsInt(32, subject.Length())) { + Exceptions::ThrowRangeError("length", + Integer::Handle(Integer::New(subject.Length())), + 0, kMaxInt32); + } + if (!Utils::IsInt(32, start_index.Value())) { + Exceptions::ThrowRangeError("start_index", Integer::Cast(start_index), + kMinInt32, kMaxInt32); + } + #if !defined(DART_PRECOMPILED_RUNTIME) if (!FLAG_interpret_irregexp) { return IRRegExpMacroAssembler::Execute(regexp, subject, start_index, diff --git a/runtime/vm/regexp_assembler_bytecode.cc b/runtime/vm/regexp_assembler_bytecode.cc index 863227ee50e..78587e1c8df 100644 --- a/runtime/vm/regexp_assembler_bytecode.cc +++ b/runtime/vm/regexp_assembler_bytecode.cc @@ -470,7 +470,7 @@ static intptr_t Prepare(const RegExp& regexp, static ObjectPtr ExecRaw(const RegExp& regexp, const String& subject, - intptr_t index, + int32_t index, bool sticky, int32_t* output, intptr_t output_size, diff --git a/runtime/vm/regexp_interpreter.cc b/runtime/vm/regexp_interpreter.cc index 4fed40c9408..e06bf680f38 100644 --- a/runtime/vm/regexp_interpreter.cc +++ b/runtime/vm/regexp_interpreter.cc @@ -20,6 +20,10 @@ namespace dart { DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes"); +DEFINE_FLAG(int, + regexp_backtrack_stack_size_kb, + 256, + "Size of backtracking stack"); typedef unibrow::Mapping Canonicalize; @@ -145,9 +149,10 @@ class BacktrackStack { if (memory_ == nullptr) { const bool executable = false; const bool compressed = false; + const intptr_t size_in_bytes = Utils::RoundUp( + FLAG_regexp_backtrack_stack_size_kb * KB, VirtualMemory::PageSize()); memory_ = std::unique_ptr(VirtualMemory::Allocate( - sizeof(intptr_t) * kBacktrackStackSize, executable, compressed, - "regexp-backtrack-stack")); + size_in_bytes, executable, compressed, "regexp-backtrack-stack")); } } @@ -159,15 +164,13 @@ class BacktrackStack { bool out_of_memory() const { return memory_ == nullptr; } - intptr_t* data() const { - return reinterpret_cast(memory_->address()); + int32_t* data() const { + return reinterpret_cast(memory_->address()); } - intptr_t max_size() const { return kBacktrackStackSize; } + intptr_t max_size() const { return memory_->size() / sizeof(int32_t); } private: - static constexpr intptr_t kBacktrackStackSize = 1 << 16; - std::unique_ptr memory_; DISALLOW_COPY_AND_ASSIGN(BacktrackStack); @@ -179,7 +182,7 @@ template static ObjectPtr RawMatch(const TypedData& bytecode, const String& subject, int32_t* registers, - intptr_t current, + int32_t current, uint32_t current_char) { // BacktrackStack ensures that the memory allocated for the backtracking stack // is returned to the system or cached if there is no stack being cached at @@ -189,8 +192,8 @@ static ObjectPtr RawMatch(const TypedData& bytecode, Exceptions::ThrowOOM(); UNREACHABLE(); } - intptr_t* backtrack_stack_base = backtrack_stack.data(); - intptr_t* backtrack_sp = backtrack_stack_base; + int32_t* backtrack_stack_base = backtrack_stack.data(); + int32_t* backtrack_sp = backtrack_stack_base; intptr_t backtrack_stack_space = backtrack_stack.max_size(); // TODO(zerny): Optimize as single instance. V8 has this as an @@ -689,7 +692,7 @@ static ObjectPtr RawMatch(const TypedData& bytecode, ObjectPtr IrregexpInterpreter::Match(const TypedData& bytecode, const String& subject, int32_t* registers, - intptr_t start_position) { + int32_t start_position) { uint16_t previous_char = '\n'; if (start_position != 0) { previous_char = subject.CharAt(start_position - 1); diff --git a/runtime/vm/regexp_interpreter.h b/runtime/vm/regexp_interpreter.h index a42bd95f0a6..f81ea432044 100644 --- a/runtime/vm/regexp_interpreter.h +++ b/runtime/vm/regexp_interpreter.h @@ -21,7 +21,7 @@ class IrregexpInterpreter : public AllStatic { static ObjectPtr Match(const TypedData& bytecode, const String& subject, int32_t* captures, - intptr_t start_position); + int32_t start_position); }; } // namespace dart diff --git a/tests/corelib/regexp/stack-overflow_test.dart b/tests/corelib/regexp/stack-overflow_test.dart index 97004c3cea5..513834e1c2f 100644 --- a/tests/corelib/regexp/stack-overflow_test.dart +++ b/tests/corelib/regexp/stack-overflow_test.dart @@ -22,6 +22,10 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// When running on the interpreter this test needs a bigger backtracking stack +// then we allocate by default. +// VMOptions=--regexp_backtrack_stack_size_kb=512 + import 'v8_regexp_utils.dart'; import 'package:expect/expect.dart';