diff --git a/src/internal/mod.rs b/src/internal/mod.rs index d6b8026..d35d354 100644 --- a/src/internal/mod.rs +++ b/src/internal/mod.rs @@ -1,9 +1,6 @@ use std::borrow::Cow; use std::ffi::{OsStr, OsString}; -use regex_syntax::hir::Hir; -use regex_syntax::ParserBuilder; - #[cfg(any(unix, target_os = "redox"))] pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> { use std::os::unix::ffi::OsStrExt; @@ -20,39 +17,6 @@ pub fn osstr_to_bytes(input: &OsStr) -> Cow<[u8]> { } } -/// Determine if a regex pattern contains a literal uppercase character. -pub fn pattern_has_uppercase_char(pattern: &str) -> bool { - let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build(); - - parser - .parse(pattern) - .map(|hir| hir_has_uppercase_char(&hir)) - .unwrap_or(false) -} - -/// Determine if a regex expression contains a literal uppercase character. -fn hir_has_uppercase_char(hir: &Hir) -> bool { - use regex_syntax::hir::*; - - match *hir.kind() { - HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(), - HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(), - HirKind::Class(Class::Unicode(ref ranges)) => ranges - .iter() - .any(|r| r.start().is_uppercase() || r.end().is_uppercase()), - HirKind::Class(Class::Bytes(ref ranges)) => ranges - .iter() - .any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()), - HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => { - hir_has_uppercase_char(hir) - } - HirKind::Concat(ref hirs) | HirKind::Alternation(ref hirs) => { - hirs.iter().any(hir_has_uppercase_char) - } - _ => false, - } -} - /// Maximum size of the output buffer before flushing results to the console pub const MAX_BUFFER_LENGTH: usize = 1000; diff --git a/src/main.rs b/src/main.rs index 95fd0a5..eea6542 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ mod fshelper; mod internal; mod options; mod output; +mod regex_helper; mod walk; use std::env; @@ -26,8 +27,9 @@ use regex::bytes::{RegexBuilder, RegexSetBuilder}; use crate::exec::CommandTemplate; use crate::filetypes::FileTypes; use crate::filter::{SizeFilter, TimeFilter}; -use crate::internal::{pattern_has_uppercase_char, transform_args_with_exec}; +use crate::internal::transform_args_with_exec; use crate::options::Options; +use crate::regex_helper::pattern_has_uppercase_char; // We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481 #[cfg(all(not(windows), not(target_env = "musl")))] diff --git a/src/regex_helper.rs b/src/regex_helper.rs new file mode 100644 index 0000000..131d0b6 --- /dev/null +++ b/src/regex_helper.rs @@ -0,0 +1,35 @@ +use regex_syntax::hir::Hir; +use regex_syntax::ParserBuilder; + +/// Determine if a regex pattern contains a literal uppercase character. +pub fn pattern_has_uppercase_char(pattern: &str) -> bool { + let mut parser = ParserBuilder::new().allow_invalid_utf8(true).build(); + + parser + .parse(pattern) + .map(|hir| hir_has_uppercase_char(&hir)) + .unwrap_or(false) +} + +/// Determine if a regex expression contains a literal uppercase character. +fn hir_has_uppercase_char(hir: &Hir) -> bool { + use regex_syntax::hir::*; + + match *hir.kind() { + HirKind::Literal(Literal::Unicode(c)) => c.is_uppercase(), + HirKind::Literal(Literal::Byte(b)) => char::from(b).is_uppercase(), + HirKind::Class(Class::Unicode(ref ranges)) => ranges + .iter() + .any(|r| r.start().is_uppercase() || r.end().is_uppercase()), + HirKind::Class(Class::Bytes(ref ranges)) => ranges + .iter() + .any(|r| char::from(r.start()).is_uppercase() || char::from(r.end()).is_uppercase()), + HirKind::Group(Group { ref hir, .. }) | HirKind::Repetition(Repetition { ref hir, .. }) => { + hir_has_uppercase_char(hir) + } + HirKind::Concat(ref hirs) | HirKind::Alternation(ref hirs) => { + hirs.iter().any(hir_has_uppercase_char) + } + _ => false, + } +}