deno/cli/lsp/path_to_regex.rs
2024-01-01 19:58:21 +00:00

994 lines
26 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
// The logic of this module is heavily influenced by path-to-regexp at:
// https://github.com/pillarjs/path-to-regexp/ which is licensed as follows:
// The MIT License (MIT)
//
// Copyright (c) 2014 Blake Embrey (hello@blakeembrey.com)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
use fancy_regex::Regex as FancyRegex;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
use std::fmt;
use std::fmt::Write as _;
use std::iter::Peekable;
static ESCAPE_STRING_RE: Lazy<Regex> =
lazy_regex::lazy_regex!(r"([.+*?=^!:${}()\[\]|/\\])");
#[derive(Debug, PartialEq, Eq)]
enum TokenType {
Open,
Close,
Pattern,
Name,
Char,
EscapedChar,
Modifier,
End,
}
#[derive(Debug)]
struct LexToken {
token_type: TokenType,
index: usize,
value: String,
}
fn escape_string(s: &str) -> String {
ESCAPE_STRING_RE.replace_all(s, r"\$1").to_string()
}
fn lexer(s: &str) -> Result<Vec<LexToken>, AnyError> {
let mut tokens = Vec::new();
let mut chars = s.chars().peekable();
let mut index = 0_usize;
loop {
match chars.next() {
None => break,
Some(c) if c == '*' || c == '+' || c == '?' => {
tokens.push(LexToken {
token_type: TokenType::Modifier,
index,
value: c.to_string(),
});
index += 1;
}
Some('\\') => {
index += 1;
let value = chars
.next()
.ok_or_else(|| anyhow!("Unexpected end of string at {}.", index))?;
tokens.push(LexToken {
token_type: TokenType::EscapedChar,
index,
value: value.to_string(),
});
index += 1;
}
Some('{') => {
tokens.push(LexToken {
token_type: TokenType::Open,
index,
value: '{'.to_string(),
});
index += 1;
}
Some('}') => {
tokens.push(LexToken {
token_type: TokenType::Close,
index,
value: '}'.to_string(),
});
index += 1;
}
Some(':') => {
let mut name = String::new();
while let Some(c) = chars.peek() {
if (*c >= '0' && *c <= '9')
|| (*c >= 'A' && *c <= 'Z')
|| (*c >= 'a' && *c <= 'z')
|| *c == '_'
{
let ch = chars.next().unwrap();
name.push(ch);
} else {
break;
}
}
if name.is_empty() {
return Err(anyhow!("Missing parameter name at {}", index));
}
let name_len = name.len();
tokens.push(LexToken {
token_type: TokenType::Name,
index,
value: name,
});
index += 1 + name_len;
}
Some('(') => {
let mut count = 1;
let mut pattern = String::new();
if chars.peek() == Some(&'?') {
return Err(anyhow!(
"Pattern cannot start with \"?\" at {}.",
index + 1
));
}
loop {
let next_char = chars.peek();
if next_char.is_none() {
break;
}
if next_char == Some(&'\\') {
pattern.push(chars.next().unwrap());
pattern.push(
chars
.next()
.ok_or_else(|| anyhow!("Unexpected termination of string."))?,
);
continue;
}
if next_char == Some(&')') {
count -= 1;
if count == 0 {
chars.next();
break;
}
} else if next_char == Some(&'(') {
count += 1;
pattern.push(chars.next().unwrap());
if chars.peek() != Some(&'?') {
return Err(anyhow!(
"Capturing groups are not allowed at {}.",
index + pattern.len()
));
}
continue;
}
pattern.push(chars.next().unwrap());
}
if count > 0 {
return Err(anyhow!("Unbalanced pattern at {}.", index));
}
if pattern.is_empty() {
return Err(anyhow!("Missing pattern at {}.", index));
}
let pattern_len = pattern.len();
tokens.push(LexToken {
token_type: TokenType::Pattern,
index,
value: pattern,
});
index += 2 + pattern_len;
}
Some(c) => {
tokens.push(LexToken {
token_type: TokenType::Char,
index,
value: c.to_string(),
});
index += 1;
}
}
}
tokens.push(LexToken {
token_type: TokenType::End,
index,
value: "".to_string(),
});
Ok(tokens)
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum StringOrNumber {
String(String),
Number(usize),
}
impl fmt::Display for StringOrNumber {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self {
Self::Number(n) => write!(f, "{n}"),
Self::String(s) => write!(f, "{s}"),
}
}
}
#[derive(Debug, Clone)]
pub enum StringOrVec {
String(String),
Vec(Vec<String>),
}
impl StringOrVec {
pub fn from_str(s: &str, key: &Key) -> StringOrVec {
match &key.modifier {
Some(m) if m == "+" || m == "*" => {
let pat = format!(
"{}{}",
key.prefix.clone().unwrap_or_default(),
key.suffix.clone().unwrap_or_default()
);
s.split(&pat)
.map(String::from)
.collect::<Vec<String>>()
.into()
}
_ => s.into(),
}
}
pub fn to_string(
&self,
maybe_key: Option<&Key>,
omit_initial_prefix: bool,
) -> String {
match self {
Self::String(s) => s.clone(),
Self::Vec(v) => {
let (prefix, suffix) = if let Some(key) = maybe_key {
(
key.prefix.clone().unwrap_or_default(),
key.suffix.clone().unwrap_or_default(),
)
} else {
("/".to_string(), "".to_string())
};
let mut s = String::new();
for (i, segment) in v.iter().enumerate() {
if omit_initial_prefix && i == 0 {
write!(s, "{segment}{suffix}").unwrap();
} else {
write!(s, "{prefix}{segment}{suffix}").unwrap();
}
}
s
}
}
}
}
impl Default for StringOrVec {
fn default() -> Self {
Self::String("".to_string())
}
}
impl<'a> From<&'a str> for StringOrVec {
fn from(s: &'a str) -> Self {
Self::String(s.to_string())
}
}
impl From<Vec<String>> for StringOrVec {
fn from(v: Vec<String>) -> Self {
Self::Vec(v)
}
}
/// Meta data about a key.
#[derive(Debug, Clone)]
pub struct Key {
pub name: StringOrNumber,
pub prefix: Option<String>,
pub suffix: Option<String>,
pub pattern: String,
pub modifier: Option<String>,
}
/// A token is a string (nothing special) or key metadata (capture group).
#[derive(Debug, Clone)]
pub enum Token {
String(String),
Key(Key),
}
#[derive(Debug, Default)]
pub struct ParseOptions {
delimiter: Option<String>,
prefixes: Option<String>,
}
#[derive(Debug)]
pub struct TokensToCompilerOptions {
sensitive: bool,
validate: bool,
}
impl Default for TokensToCompilerOptions {
fn default() -> Self {
Self {
sensitive: false,
validate: true,
}
}
}
#[derive(Debug)]
pub struct TokensToRegexOptions {
sensitive: bool,
strict: bool,
end: bool,
start: bool,
delimiter: Option<String>,
ends_with: Option<String>,
}
impl Default for TokensToRegexOptions {
fn default() -> Self {
Self {
sensitive: false,
strict: false,
end: true,
start: true,
delimiter: None,
ends_with: None,
}
}
}
#[derive(Debug, Default)]
pub struct PathToRegexOptions {
parse_options: Option<ParseOptions>,
token_to_regex_options: Option<TokensToRegexOptions>,
}
fn try_consume(
token_type: &TokenType,
it: &mut Peekable<impl Iterator<Item = LexToken>>,
) -> Option<String> {
if let Some(token) = it.peek() {
if &token.token_type == token_type {
let token = it.next().unwrap();
return Some(token.value);
}
}
None
}
fn must_consume(
token_type: &TokenType,
it: &mut Peekable<impl Iterator<Item = LexToken>>,
) -> Result<String, AnyError> {
try_consume(token_type, it).ok_or_else(|| {
let maybe_token = it.next();
if let Some(token) = maybe_token {
anyhow!(
"Unexpected {:?} at {}, expected {:?}",
token.token_type,
token.index,
token_type
)
} else {
anyhow!("Unexpected end of tokens, expected {:?}", token_type)
}
})
}
fn consume_text(
it: &mut Peekable<impl Iterator<Item = LexToken>>,
) -> Option<String> {
let mut result = String::new();
loop {
if let Some(value) = try_consume(&TokenType::Char, it) {
result.push_str(&value);
}
if let Some(value) = try_consume(&TokenType::EscapedChar, it) {
result.push_str(&value);
} else {
break;
}
}
if result.is_empty() {
None
} else {
Some(result)
}
}
/// Parse a string for the raw tokens.
pub fn parse(
s: &str,
maybe_options: Option<ParseOptions>,
) -> Result<Vec<Token>, AnyError> {
let mut tokens = lexer(s)?.into_iter().peekable();
let options = maybe_options.unwrap_or_default();
let prefixes = options.prefixes.unwrap_or_else(|| "./".to_string());
let default_pattern = if let Some(delimiter) = options.delimiter {
format!("[^{}]+?", escape_string(&delimiter))
} else {
"[^/#?]+?".to_string()
};
let mut result = Vec::new();
let mut key = 0_usize;
let mut path = String::new();
loop {
let char = try_consume(&TokenType::Char, &mut tokens);
let name = try_consume(&TokenType::Name, &mut tokens);
let pattern = try_consume(&TokenType::Pattern, &mut tokens);
if name.is_some() || pattern.is_some() {
let mut prefix = char.unwrap_or_default();
if !prefixes.contains(&prefix) {
path.push_str(&prefix);
prefix = String::new();
}
if !path.is_empty() {
result.push(Token::String(path.clone()));
path = String::new();
}
let name = name.map(StringOrNumber::String).unwrap_or_else(|| {
let default = StringOrNumber::Number(key);
key += 1;
default
});
let prefix = if prefix.is_empty() {
None
} else {
Some(prefix)
};
result.push(Token::Key(Key {
name,
prefix,
suffix: None,
pattern: pattern.unwrap_or_else(|| default_pattern.clone()),
modifier: try_consume(&TokenType::Modifier, &mut tokens),
}));
continue;
}
if let Some(value) = char {
path.push_str(&value);
continue;
} else if let Some(value) =
try_consume(&TokenType::EscapedChar, &mut tokens)
{
path.push_str(&value);
continue;
}
if !path.is_empty() {
result.push(Token::String(path.clone()));
path = String::new();
}
if try_consume(&TokenType::Open, &mut tokens).is_some() {
let prefix = consume_text(&mut tokens);
let maybe_name = try_consume(&TokenType::Name, &mut tokens);
let maybe_pattern = try_consume(&TokenType::Pattern, &mut tokens);
let suffix = consume_text(&mut tokens);
must_consume(&TokenType::Close, &mut tokens)?;
let name = maybe_name
.clone()
.map(StringOrNumber::String)
.unwrap_or_else(|| {
if maybe_pattern.is_some() {
let default = StringOrNumber::Number(key);
key += 1;
default
} else {
StringOrNumber::String("".to_string())
}
});
let pattern = if maybe_name.is_some() && maybe_pattern.is_none() {
default_pattern.clone()
} else {
maybe_pattern.unwrap_or_default()
};
result.push(Token::Key(Key {
name,
prefix,
pattern,
suffix,
modifier: try_consume(&TokenType::Modifier, &mut tokens),
}));
continue;
}
must_consume(&TokenType::End, &mut tokens)?;
break;
}
Ok(result)
}
/// Transform a vector of tokens into a regular expression, returning the
/// regular expression and optionally any keys that can be matched as part of
/// the expression.
pub fn tokens_to_regex(
tokens: &[Token],
maybe_options: Option<TokensToRegexOptions>,
) -> Result<(FancyRegex, Option<Vec<Key>>), AnyError> {
let TokensToRegexOptions {
sensitive,
strict,
end,
start,
delimiter,
ends_with,
} = maybe_options.unwrap_or_default();
let has_ends_with = ends_with.is_some();
let ends_with = format!(r"[{}]|$", ends_with.unwrap_or_default());
let delimiter =
format!(r"[{}]", delimiter.unwrap_or_else(|| "/#?".to_string()));
let mut route = if start {
"^".to_string()
} else {
String::new()
};
let maybe_end_token = tokens.iter().last().cloned();
let mut keys: Vec<Key> = Vec::new();
for token in tokens {
let value = match token {
Token::String(s) => s.to_string(),
Token::Key(key) => {
if !key.pattern.is_empty() {
keys.push(key.clone());
}
let prefix = key
.prefix
.clone()
.map(|s| escape_string(&s))
.unwrap_or_default();
let suffix = key
.suffix
.clone()
.map(|s| escape_string(&s))
.unwrap_or_default();
if !key.pattern.is_empty() {
if !prefix.is_empty() || !suffix.is_empty() {
match &key.modifier {
Some(s) if s == "+" || s == "*" => {
let modifier = if key.modifier == Some("*".to_string()) {
"?"
} else {
""
};
format!(
"(?:{}((?:{})(?:{}{}(?:{}))*){}){}",
prefix,
key.pattern,
suffix,
prefix,
key.pattern,
suffix,
modifier
)
}
_ => {
let modifier = key.modifier.clone().unwrap_or_default();
format!(
r"(?:{}({}){}){}",
prefix, key.pattern, suffix, modifier
)
}
}
} else {
let modifier = key.modifier.clone().unwrap_or_default();
format!(r"({}){}", key.pattern, modifier)
}
} else {
let modifier = key.modifier.clone().unwrap_or_default();
format!(r"(?:{prefix}{suffix}){modifier}")
}
}
};
route.push_str(&value);
}
if end {
if !strict {
write!(route, r"{delimiter}?").unwrap();
}
if has_ends_with {
write!(route, r"(?={ends_with})").unwrap();
} else {
route.push('$');
}
} else {
let is_end_delimited = match maybe_end_token {
Some(Token::String(mut s)) => {
if let Some(c) = s.pop() {
delimiter.contains(c)
} else {
false
}
}
Some(_) => false,
None => true,
};
if !strict {
write!(route, r"(?:{delimiter}(?={ends_with}))?").unwrap();
}
if !is_end_delimited {
write!(route, r"(?={delimiter}|{ends_with})").unwrap();
}
}
let flags = if sensitive { "" } else { "(?i)" };
let re = FancyRegex::new(&format!("{flags}{route}"))?;
let maybe_keys = if keys.is_empty() { None } else { Some(keys) };
Ok((re, maybe_keys))
}
/// Convert a path-like string into a regular expression, returning the regular
/// expression and optionally any keys that can be matched in the string.
pub fn string_to_regex(
path: &str,
maybe_options: Option<PathToRegexOptions>,
) -> Result<(FancyRegex, Option<Vec<Key>>), AnyError> {
let (parse_options, tokens_to_regex_options) =
if let Some(options) = maybe_options {
(options.parse_options, options.token_to_regex_options)
} else {
(None, None)
};
tokens_to_regex(&parse(path, parse_options)?, tokens_to_regex_options)
}
pub struct Compiler {
matches: Vec<Option<Regex>>,
tokens: Vec<Token>,
validate: bool,
}
impl Compiler {
pub fn new(
tokens: &[Token],
maybe_options: Option<TokensToCompilerOptions>,
) -> Self {
let TokensToCompilerOptions {
sensitive,
validate,
} = maybe_options.unwrap_or_default();
let flags = if sensitive { "" } else { "(?i)" };
let matches = tokens
.iter()
.map(|t| {
if let Token::Key(k) = t {
Some(Regex::new(&format!("{}^(?:{})$", flags, k.pattern)).unwrap())
} else {
None
}
})
.collect();
Self {
matches,
tokens: tokens.to_vec(),
validate,
}
}
/// Convert a map of key values into a string.
pub fn to_path(
&self,
params: &HashMap<StringOrNumber, StringOrVec>,
) -> Result<String, AnyError> {
let mut path = String::new();
for (i, token) in self.tokens.iter().enumerate() {
match token {
Token::String(s) => path.push_str(s),
Token::Key(k) => {
let value = params.get(&k.name);
let optional = k.modifier == Some("?".to_string())
|| k.modifier == Some("*".to_string());
let repeat = k.modifier == Some("*".to_string())
|| k.modifier == Some("+".to_string());
match value {
Some(StringOrVec::Vec(v)) => {
if !repeat {
return Err(anyhow!(
"Expected \"{:?}\" to not repeat, but got a vector",
k.name
));
}
if v.is_empty() {
if !optional {
return Err(anyhow!(
"Expected \"{:?}\" to not be empty.",
k.name
));
}
} else {
let prefix = k.prefix.clone().unwrap_or_default();
let suffix = k.suffix.clone().unwrap_or_default();
for segment in v {
if !segment.is_empty() && self.validate {
if let Some(re) = &self.matches[i] {
if !re.is_match(segment) {
return Err(anyhow!(
"Expected all \"{:?}\" to match \"{}\", but got {}",
k.name,
k.pattern,
segment
));
}
}
}
write!(path, "{prefix}{segment}{suffix}").unwrap();
}
}
}
Some(StringOrVec::String(s)) => {
if self.validate {
if let Some(re) = &self.matches[i] {
if !re.is_match(s) {
return Err(anyhow!(
"Expected \"{:?}\" to match \"{}\", but got \"{}\"",
k.name,
k.pattern,
s
));
}
}
}
let prefix = k.prefix.clone().unwrap_or_default();
let suffix = k.suffix.clone().unwrap_or_default();
write!(path, "{prefix}{s}{suffix}").unwrap();
}
None => {
if !optional {
let key_type = if repeat { "an array" } else { "a string" };
return Err(anyhow!(
"Expected \"{:?}\" to be {}",
k.name,
key_type
));
}
}
}
}
}
}
Ok(path)
}
}
#[derive(Debug)]
pub struct MatchResult {
pub path: String,
pub index: usize,
pub params: HashMap<StringOrNumber, StringOrVec>,
}
impl MatchResult {
pub fn get(&self, key: &str) -> Option<&StringOrVec> {
self.params.get(&StringOrNumber::String(key.to_string()))
}
}
#[derive(Debug)]
pub struct Matcher {
maybe_keys: Option<Vec<Key>>,
re: FancyRegex,
}
impl Matcher {
pub fn new(
tokens: &[Token],
maybe_options: Option<TokensToRegexOptions>,
) -> Result<Self, AnyError> {
let (re, maybe_keys) = tokens_to_regex(tokens, maybe_options)?;
Ok(Self { maybe_keys, re })
}
/// Match a string path, optionally returning the match result.
pub fn matches(&self, path: &str) -> Option<MatchResult> {
let caps = self.re.captures(path).ok()??;
let m = caps.get(0)?;
let path = m.as_str().to_string();
let index = m.start();
let mut params = HashMap::new();
if let Some(keys) = &self.maybe_keys {
for (i, key) in keys.iter().enumerate() {
if let Some(m) = caps.get(i + 1) {
let value = if key.modifier == Some("*".to_string())
|| key.modifier == Some("+".to_string())
{
let pat = format!(
"{}{}",
key.prefix.clone().unwrap_or_default(),
key.suffix.clone().unwrap_or_default()
);
m.as_str()
.split(&pat)
.map(String::from)
.collect::<Vec<String>>()
.into()
} else {
m.as_str().into()
};
params.insert(key.name.clone(), value);
}
}
}
Some(MatchResult {
path,
index,
params,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
type FixtureMatch<'a> = (&'a str, usize, usize);
type Fixture<'a> = (&'a str, Option<FixtureMatch<'a>>);
fn test_path(
path: &str,
maybe_options: Option<PathToRegexOptions>,
fixtures: &[Fixture],
) {
let result = string_to_regex(path, maybe_options);
assert!(result.is_ok(), "Could not parse path: \"{path}\"");
let (re, _) = result.unwrap();
for (fixture, expected) in fixtures {
let result = re.find(fixture);
assert!(
result.is_ok(),
"Find failure for path \"{path}\" and fixture \"{fixture}\""
);
let actual = result.unwrap();
if let Some((text, start, end)) = *expected {
assert!(actual.is_some(), "Match failure for path \"{path}\" and fixture \"{fixture}\". Expected Some got None");
let actual = actual.unwrap();
assert_eq!(actual.as_str(), text, "Match failure for path \"{}\" and fixture \"{}\". Expected \"{}\" got \"{}\".", path, fixture, text, actual.as_str());
assert_eq!(actual.start(), start);
assert_eq!(actual.end(), end);
} else {
assert!(actual.is_none(), "Match failure for path \"{path}\" and fixture \"{fixture}\". Expected None got {actual:?}");
}
}
}
#[test]
fn test_compiler() {
let tokens = parse("/x/:a@:b/:c*", None).expect("could not parse");
let mut params = HashMap::<StringOrNumber, StringOrVec>::new();
params.insert(
StringOrNumber::String("a".to_string()),
StringOrVec::String("y".to_string()),
);
params.insert(
StringOrNumber::String("b".to_string()),
StringOrVec::String("v1.0.0".to_string()),
);
params.insert(
StringOrNumber::String("c".to_string()),
StringOrVec::Vec(vec!["z".to_string(), "example.ts".to_string()]),
);
let compiler = Compiler::new(&tokens, None);
let actual = compiler.to_path(&params);
assert!(actual.is_ok());
let actual = actual.unwrap();
assert_eq!(actual, "/x/y@v1.0.0/z/example.ts".to_string());
}
#[test]
fn test_compiler_ends_with_sep() {
let tokens = parse("/x/:a@:b/:c*", None).expect("could not parse");
let mut params = HashMap::<StringOrNumber, StringOrVec>::new();
params.insert(
StringOrNumber::String("a".to_string()),
StringOrVec::String("y".to_string()),
);
params.insert(
StringOrNumber::String("b".to_string()),
StringOrVec::String("v1.0.0".to_string()),
);
params.insert(
StringOrNumber::String("c".to_string()),
StringOrVec::Vec(vec![
"z".to_string(),
"example".to_string(),
"".to_string(),
]),
);
let compiler = Compiler::new(&tokens, None);
let actual = compiler.to_path(&params);
assert!(actual.is_ok());
let actual = actual.unwrap();
assert_eq!(actual, "/x/y@v1.0.0/z/example/".to_string());
}
#[test]
fn test_string_to_regex() {
test_path("/", None, &[("/test", None), ("/", Some(("/", 0, 1)))]);
test_path(
"/test",
None,
&[
("/test", Some(("/test", 0, 5))),
("/route", None),
("/test/route", None),
("/test/", Some(("/test/", 0, 6))),
],
);
test_path(
"/test/",
None,
&[
("/test", None),
("/test/", Some(("/test/", 0, 6))),
("/test//", Some(("/test//", 0, 7))),
],
);
// case-sensitive paths
test_path(
"/test",
Some(PathToRegexOptions {
parse_options: None,
token_to_regex_options: Some(TokensToRegexOptions {
sensitive: true,
..Default::default()
}),
}),
&[("/test", Some(("/test", 0, 5))), ("/TEST", None)],
);
test_path(
"/TEST",
Some(PathToRegexOptions {
parse_options: None,
token_to_regex_options: Some(TokensToRegexOptions {
sensitive: true,
..Default::default()
}),
}),
&[("/TEST", Some(("/TEST", 0, 5))), ("/test", None)],
);
}
}