1
0
mirror of https://github.com/dart-lang/sdk synced 2024-07-08 12:06:26 +00:00

Add RegExp.escape methods.

Fixes 4706

Bug: http://dartbug.com/4706
Change-Id: If635cb0eb7c20405ab0127a443fe51176191b5ad
Reviewed-on: https://dart-review.googlesource.com/35641
Reviewed-by: Stephen Adams <sra@google.com>
Commit-Queue: Lasse R.H. Nielsen <lrn@google.com>
This commit is contained in:
Lasse Reichstein Holst Nielsen 2018-02-01 09:56:47 +00:00 committed by commit-bot@chromium.org
parent 209959fe97
commit 0c18b643c6
6 changed files with 123 additions and 5 deletions

View File

@ -64,6 +64,7 @@ the assignment to `y`.
`MINUTES_PER_DAY` to `minutesPerDay`, and
`ZERO` to `zero`.
* Added `Provisional` annotation to `dart:core`.
* Added static `escape` function to `RegExp` class.
* `dart:convert`
* `Utf8Decoder` when compiled with dart2js uses the browser's `TextDecoder` in

View File

@ -16,7 +16,8 @@ import 'dart:_js_helper'
NoInline,
notNull,
nullCheck,
Primitives;
Primitives,
quoteStringForRegExp;
import 'dart:_runtime' as dart;
@ -499,6 +500,9 @@ class RegExp {
{bool multiLine: false, bool caseSensitive: true}) =>
new JSSyntaxRegExp(source,
multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);
}
// Patch for 'identical' function.

View File

@ -37,6 +37,58 @@ class RegExp {
return value.regexp;
}
/**
* Finds the index of the first RegExp-significant char in [text].
*
* Starts looking from [start]. Returns `text.length` if no character
* is found that has special meaning in RegExp syntax.
*/
static int _findEscapeChar(String text, int start) {
// Table where each character in the range U+0000 to U+007f is represented
// by whether it needs to be escaped in a regexp.
// The \x00 characters means escacped, and \x01 means non-escaped.
const escapes =
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
// $ ( ) * + .
"\x01\x01\x01\x01\x00\x01\x01\x01\x00\x00\x00\x00\x01\x01\x00\x01"
// ?
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00"
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
// [ \ ] ^
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01"
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
// { | }
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x01\x01";
for (int i = start; i < text.length; i++) {
int char = text.codeUnitAt(i);
if (char <= 0x7f && escapes.codeUnitAt(char) == 0) return i;
}
return text.length;
}
@patch
static String escape(String text) {
int escapeCharIndex = _findEscapeChar(text, 0);
// If the text contains no characters needing escape, return it directly.
if (escapeCharIndex == text.length) return text;
var buffer = new StringBuffer();
int previousSliceEndIndex = 0;
do {
// Copy characters from previous escape to current escape into result.
// This includes the previously escaped character.
buffer.write(text.substring(previousSliceEndIndex, escapeCharIndex));
// Prepare the current character to be escaped by prefixing it with a '\'.
buffer.write(r"\");
previousSliceEndIndex = escapeCharIndex;
escapeCharIndex = _findEscapeChar(text, escapeCharIndex + 1);
} while (escapeCharIndex < text.length);
// Copy tail of string into result.
buffer.write(text.substring(previousSliceEndIndex, escapeCharIndex));
return buffer.toString();
}
// Regular expression objects are stored in a cache of up to _MAX_CACHE_SIZE
// elements using an LRU eviction strategy.
// TODO(zerny): Do not impose a fixed limit on the number of cached objects.

View File

@ -19,6 +19,7 @@ import 'dart:_js_helper'
objectHashCode,
patch,
Primitives,
quoteStringForRegExp,
stringJoinUnchecked,
getTraceFromException,
RuntimeError;
@ -488,6 +489,9 @@ class RegExp {
{bool multiLine: false, bool caseSensitive: true}) =>
new JSSyntaxRegExp(source,
multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);
}
// Patch for 'identical' function.

View File

@ -24,11 +24,12 @@ part of dart.core;
*
* The following example finds all matches of a regular expression in
* a string.
* ```dart
* RegExp exp = new RegExp(r"(\w+)");
* String str = "Parse my string";
* Iterable<Match> matches = exp.allMatches(str);
* ```
*
* RegExp exp = new RegExp(r"(\w+)");
* String str = "Parse my string";
* Iterable<Match> matches = exp.allMatches(str);
*
* Note the use of a _raw string_ (a string prefixed with `r`)
* in the example above. Use a raw string to treat each character in a string
* as a literal character.
@ -43,6 +44,19 @@ abstract class RegExp implements Pattern {
external factory RegExp(String source,
{bool multiLine: false, bool caseSensitive: true});
/**
* Returns a regular expression that matches [text].
*
* If [text] contains characters that are meaningful in regular expressions,
* the resulting regular expression will match those characters literally.
* If [text] contains no characters that have special meaning in a regular
* expression, it is returned unmodified.
*
* The characters that have special meaning in regular expressions are:
* `(`, `)`, `[`, `]`, `{`, `}`, `*`, `+`, `?`, `.`, `^`, `$`, `|` and `\`.
*/
external static String escape(String text);
/**
* Searches for the first match of the regular expression
* in the string [input]. Returns `null` if there is no match.

View File

@ -0,0 +1,43 @@
// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import "package:expect/expect.dart";
var escapeChars = r"([)}{]?*+.$^|\";
var nonEscapeAscii = "\x00\x01\x02\x03\x04\x05\x06\x07" //
"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" //
"\x10\x11\x12\x13\x14\x15\x16\x17" //
"\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" //
""" !"#%&',-/0123456789:;<=>""" //
"""@ABCDEFGHIJKLMNOPQRSTUVWXYZ_""" //
"""`abcdefghijklmnopqrstuvwxyz~\x7f""";
var someNonAscii =
new String.fromCharCodes(new List.generate(0x1000 - 128, (x) => x + 128));
test(String string, [bool shouldEscape]) {
var escape = RegExp.escape(string);
Expect.isTrue(new RegExp(escape).hasMatch(string), "$escape");
Expect.equals(string, new RegExp(escape).firstMatch(string)[0], "$escape");
if (shouldEscape == true) {
Expect.notEquals(string, escape);
} else if (shouldEscape == false) {
Expect.equals(string, escape);
}
}
main() {
for (var c in escapeChars.split("")) {
test(c, true);
}
for (var c in nonEscapeAscii.split("")) {
test(c, false);
}
test(escapeChars, true);
test(nonEscapeAscii, false);
test(someNonAscii, false);
test((nonEscapeAscii + escapeChars) * 3, true);
test(r'.abc', true); // First only.
test(r'abc.', true); // Last only.
}