diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e19f7322d8..653c2971fb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ the assignment to `y`. `MINUTES_PER_DAY` to `minutesPerDay`, and `ZERO` to `zero`. * Added `Provisional` annotation to `dart:core`. + * Added static `escape` function to `RegExp` class. * `dart:convert` * `Utf8Decoder` when compiled with dart2js uses the browser's `TextDecoder` in diff --git a/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart b/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart index 5950328e510..c15d282c265 100644 --- a/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart +++ b/pkg/dev_compiler/tool/input_sdk/patch/core_patch.dart @@ -16,7 +16,8 @@ import 'dart:_js_helper' NoInline, notNull, nullCheck, - Primitives; + Primitives, + quoteStringForRegExp; import 'dart:_runtime' as dart; @@ -499,6 +500,9 @@ class RegExp { {bool multiLine: false, bool caseSensitive: true}) => new JSSyntaxRegExp(source, multiLine: multiLine, caseSensitive: caseSensitive); + + @patch + static String escape(String text) => quoteStringForRegExp(text); } // Patch for 'identical' function. diff --git a/runtime/lib/regexp_patch.dart b/runtime/lib/regexp_patch.dart index 53af5beb7f9..b7e0f46a6d9 100644 --- a/runtime/lib/regexp_patch.dart +++ b/runtime/lib/regexp_patch.dart @@ -37,6 +37,58 @@ class RegExp { return value.regexp; } + /** + * Finds the index of the first RegExp-significant char in [text]. + * + * Starts looking from [start]. Returns `text.length` if no character + * is found that has special meaning in RegExp syntax. + */ + static int _findEscapeChar(String text, int start) { + // Table where each character in the range U+0000 to U+007f is represented + // by whether it needs to be escaped in a regexp. + // The \x00 characters means escacped, and \x01 means non-escaped. + const escapes = + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01" + // $ ( ) * + . + "\x01\x01\x01\x01\x00\x01\x01\x01\x00\x00\x00\x00\x01\x01\x00\x01" + // ? + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00" + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01" + // [ \ ] ^ + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01" + // { | } + "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x01\x01"; + for (int i = start; i < text.length; i++) { + int char = text.codeUnitAt(i); + if (char <= 0x7f && escapes.codeUnitAt(char) == 0) return i; + } + return text.length; + } + + @patch + static String escape(String text) { + int escapeCharIndex = _findEscapeChar(text, 0); + // If the text contains no characters needing escape, return it directly. + if (escapeCharIndex == text.length) return text; + + var buffer = new StringBuffer(); + int previousSliceEndIndex = 0; + do { + // Copy characters from previous escape to current escape into result. + // This includes the previously escaped character. + buffer.write(text.substring(previousSliceEndIndex, escapeCharIndex)); + // Prepare the current character to be escaped by prefixing it with a '\'. + buffer.write(r"\"); + previousSliceEndIndex = escapeCharIndex; + escapeCharIndex = _findEscapeChar(text, escapeCharIndex + 1); + } while (escapeCharIndex < text.length); + // Copy tail of string into result. + buffer.write(text.substring(previousSliceEndIndex, escapeCharIndex)); + return buffer.toString(); + } + // Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE // elements using an LRU eviction strategy. // TODO(zerny): Do not impose a fixed limit on the number of cached objects. diff --git a/sdk/lib/_internal/js_runtime/lib/core_patch.dart b/sdk/lib/_internal/js_runtime/lib/core_patch.dart index a63117d2ad7..28152960187 100644 --- a/sdk/lib/_internal/js_runtime/lib/core_patch.dart +++ b/sdk/lib/_internal/js_runtime/lib/core_patch.dart @@ -19,6 +19,7 @@ import 'dart:_js_helper' objectHashCode, patch, Primitives, + quoteStringForRegExp, stringJoinUnchecked, getTraceFromException, RuntimeError; @@ -488,6 +489,9 @@ class RegExp { {bool multiLine: false, bool caseSensitive: true}) => new JSSyntaxRegExp(source, multiLine: multiLine, caseSensitive: caseSensitive); + + @patch + static String escape(String text) => quoteStringForRegExp(text); } // Patch for 'identical' function. diff --git a/sdk/lib/core/regexp.dart b/sdk/lib/core/regexp.dart index 73882562561..95e12b5406e 100644 --- a/sdk/lib/core/regexp.dart +++ b/sdk/lib/core/regexp.dart @@ -24,11 +24,12 @@ part of dart.core; * * The following example finds all matches of a regular expression in * a string. + * ```dart + * RegExp exp = new RegExp(r"(\w+)"); + * String str = "Parse my string"; + * Iterable matches = exp.allMatches(str); + * ``` * - * RegExp exp = new RegExp(r"(\w+)"); - * String str = "Parse my string"; - * Iterable matches = exp.allMatches(str); - * * Note the use of a _raw string_ (a string prefixed with `r`) * in the example above. Use a raw string to treat each character in a string * as a literal character. @@ -43,6 +44,19 @@ abstract class RegExp implements Pattern { external factory RegExp(String source, {bool multiLine: false, bool caseSensitive: true}); + /** + * Returns a regular expression that matches [text]. + * + * If [text] contains characters that are meaningful in regular expressions, + * the resulting regular expression will match those characters literally. + * If [text] contains no characters that have special meaning in a regular + * expression, it is returned unmodified. + * + * The characters that have special meaning in regular expressions are: + * `(`, `)`, `[`, `]`, `{`, `}`, `*`, `+`, `?`, `.`, `^`, `$`, `|` and `\`. + */ + external static String escape(String text); + /** * Searches for the first match of the regular expression * in the string [input]. Returns `null` if there is no match. diff --git a/tests/corelib_2/regexp/regexp_escape_test.dart b/tests/corelib_2/regexp/regexp_escape_test.dart new file mode 100644 index 00000000000..e0822067921 --- /dev/null +++ b/tests/corelib_2/regexp/regexp_escape_test.dart @@ -0,0 +1,43 @@ +// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import "package:expect/expect.dart"; + +var escapeChars = r"([)}{]?*+.$^|\"; + +var nonEscapeAscii = "\x00\x01\x02\x03\x04\x05\x06\x07" // + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" // + "\x10\x11\x12\x13\x14\x15\x16\x17" // + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" // + """ !"#%&',-/0123456789:;<=>""" // + """@ABCDEFGHIJKLMNOPQRSTUVWXYZ_""" // + """`abcdefghijklmnopqrstuvwxyz~\x7f"""; +var someNonAscii = + new String.fromCharCodes(new List.generate(0x1000 - 128, (x) => x + 128)); + +test(String string, [bool shouldEscape]) { + var escape = RegExp.escape(string); + Expect.isTrue(new RegExp(escape).hasMatch(string), "$escape"); + Expect.equals(string, new RegExp(escape).firstMatch(string)[0], "$escape"); + if (shouldEscape == true) { + Expect.notEquals(string, escape); + } else if (shouldEscape == false) { + Expect.equals(string, escape); + } +} + +main() { + for (var c in escapeChars.split("")) { + test(c, true); + } + for (var c in nonEscapeAscii.split("")) { + test(c, false); + } + test(escapeChars, true); + test(nonEscapeAscii, false); + test(someNonAscii, false); + test((nonEscapeAscii + escapeChars) * 3, true); + test(r'.abc', true); // First only. + test(r'abc.', true); // Last only. +}