Move regex helpers to new module

This commit is contained in:
sharkdp 2020-04-03 11:44:45 +02:00 committed by David Peter
parent a3060f952e
commit 809fd8fb78
3 changed files with 38 additions and 37 deletions

View file

@ -1,9 +1,6 @@
use std::borrow::Cow;
use std::ffi::{OsStr, OsString};
use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
#[cfg(any(unix, target_os = "redox"))]
pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
@ -20,39 +17,6 @@ pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> {
}
}
/// Determine if a regex pattern contains a literal uppercase character.
pub fn pattern_has_uppercase_char(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
parser
.parse(pattern)
.map(|hir| hir_has_uppercase_char(&hir))
.unwrap_or(false)
}
/// Determine if a regex expression contains a literal uppercase character.
fn hir_has_uppercase_char(hir: &Hir) -> bool {
use regex_syntax::hir::*;
match *hir.kind() {
HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(),
HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(),
HirKind::Class(Class::Unicode(ref ranges)) => ranges
.iter()
.any(|r| r.start().is_uppercase() || r.end().is_uppercase()),
HirKind::Class(Class::Bytes(ref ranges)) => ranges
.iter()
.any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()),
HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => {
hir_has_uppercase_char(hir)
}
HirKind::Concat(ref hirs) | HirKind::Alternation(ref hirs) => {
hirs.iter().any(hir_has_uppercase_char)
}
_ => false,
}
}
/// Maximum size of the output buffer before flushing results to the console
pub const MAX_BUFFER_LENGTH: usize = 1000;

View file

@ -10,6 +10,7 @@ mod fshelper;
mod internal;
mod options;
mod output;
mod regex_helper;
mod walk;
use std::env;
@ -26,8 +27,9 @@ use regex::bytes::{RegexBuilder, RegexSetBuilder};
use crate::exec::CommandTemplate;
use crate::filetypes::FileTypes;
use crate::filter::{SizeFilter, TimeFilter};
use crate::internal::{pattern_has_uppercase_char, transform_args_with_exec};
use crate::internal::transform_args_with_exec;
use crate::options::Options;
use crate::regex_helper::pattern_has_uppercase_char;
// We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481
#[cfg(all(not(windows), not(target_env = "musl")))]

35
src/regex_helper.rs Normal file
View file

@ -0,0 +1,35 @@
use regex_syntax::hir::Hir;
use regex_syntax::ParserBuilder;
/// Determine if a regex pattern contains a literal uppercase character.
pub fn pattern_has_uppercase_char(pattern: &str) -> bool {
let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build();
parser
.parse(pattern)
.map(|hir| hir_has_uppercase_char(&hir))
.unwrap_or(false)
}
/// Determine if a regex expression contains a literal uppercase character.
fn hir_has_uppercase_char(hir: &Hir) -> bool {
use regex_syntax::hir::*;
match *hir.kind() {
HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(),
HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(),
HirKind::Class(Class::Unicode(ref ranges)) => ranges
.iter()
.any(|r| r.start().is_uppercase() || r.end().is_uppercase()),
HirKind::Class(Class::Bytes(ref ranges)) => ranges
.iter()
.any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()),
HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => {
hir_has_uppercase_char(hir)
}
HirKind::Concat(ref hirs) | HirKind::Alternation(ref hirs) => {
hirs.iter().any(hir_has_uppercase_char)
}
_ => false,
}
}