du: add support for --exclude and --exclude-from (#3381)

* du: add support for --exclude and --exclude-from

And add an option --verbose (doesn't exist in GNU)
This commit is contained in:
Sylvestre Ledru 2022-04-11 22:50:01 +02:00 committed by GitHub
parent 9214249bdd
commit 8b719a8591
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 354 additions and 53 deletions

1
Cargo.lock generated
View file

@ -2402,6 +2402,7 @@ version = "0.0.13"
dependencies = [
"chrono",
"clap 3.1.6",
"glob",
"uucore",
"winapi 0.3.9",
]

View file

@ -16,6 +16,8 @@ path = "src/du.rs"
[dependencies]
chrono = "^0.4.11"
# For the --exclude & --exclude-from options
glob = "0.3.0"
clap = { version = "3.1", features = ["wrap_help", "cargo"] }
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }

View file

@ -11,11 +11,15 @@ extern crate uucore;
use chrono::prelude::DateTime;
use chrono::Local;
use clap::{crate_version, Arg, ArgMatches, Command};
use glob::Pattern;
use std::collections::HashSet;
use std::env;
use std::fs;
use std::fs::File;
#[cfg(not(windows))]
use std::fs::Metadata;
use std::io::BufRead;
use std::io::BufReader;
use std::io::{ErrorKind, Result};
use std::iter;
#[cfg(not(windows))]
@ -24,7 +28,6 @@ use std::os::unix::fs::MetadataExt;
use std::os::windows::fs::MetadataExt;
#[cfg(windows)]
use std::os::windows::io::AsRawHandle;
#[cfg(windows)]
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;
@ -68,6 +71,9 @@ mod options {
pub const ONE_FILE_SYSTEM: &str = "one-file-system";
pub const DEREFERENCE: &str = "dereference";
pub const INODES: &str = "inodes";
pub const EXCLUDE: &str = "exclude";
pub const EXCLUDE_FROM: &str = "exclude-from";
pub const VERBOSE: &str = "verbose";
pub const FILE: &str = "FILE";
}
@ -80,6 +86,12 @@ Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
of 1000).
PATTERN allows some advanced exclusions. For example, the following syntaxes
are supported:
? will match only one character
* will match zero or more characters
{a,b} will match a or b
";
const USAGE: &str = "\
{} [OPTION]... [FILE]...
@ -97,6 +109,7 @@ struct Options {
one_file_system: bool,
dereference: bool,
inodes: bool,
verbose: bool,
}
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
@ -287,6 +300,7 @@ fn du(
options: &Options,
depth: usize,
inodes: &mut HashSet<FileInfo>,
exclude: &[Pattern],
) -> Box<dyn DoubleEndedIterator<Item = Stat>> {
let mut stats = vec![];
let mut futures = vec![];
@ -306,49 +320,68 @@ fn du(
}
};
for f in read {
'file_loop: for f in read {
match f {
Ok(entry) => match Stat::new(entry.path(), options) {
Ok(this_stat) => {
if let Some(inode) = this_stat.inode {
if inodes.contains(&inode) {
continue;
}
inodes.insert(inode);
}
if this_stat.is_dir {
if options.one_file_system {
if let (Some(this_inode), Some(my_inode)) =
(this_stat.inode, my_stat.inode)
Ok(entry) => {
match Stat::new(entry.path(), options) {
Ok(this_stat) => {
// We have an exclude list
for pattern in exclude {
// Look at all patterns with both short and long paths
// if we have 'du foo' but search to exclude 'foo/bar'
// we need the full path
if pattern.matches(&this_stat.path.to_string_lossy())
|| pattern.matches(&entry.file_name().into_string().unwrap())
{
if this_inode.dev_id != my_inode.dev_id {
continue;
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", &this_stat.path.quote());
}
// Go to the next file
continue 'file_loop;
}
}
futures.push(du(this_stat, options, depth + 1, inodes));
} else {
my_stat.size += this_stat.size;
my_stat.blocks += this_stat.blocks;
my_stat.inodes += 1;
if options.all {
stats.push(this_stat);
if let Some(inode) = this_stat.inode {
if inodes.contains(&inode) {
continue;
}
inodes.insert(inode);
}
if this_stat.is_dir {
if options.one_file_system {
if let (Some(this_inode), Some(my_inode)) =
(this_stat.inode, my_stat.inode)
{
if this_inode.dev_id != my_inode.dev_id {
continue;
}
}
}
futures.push(du(this_stat, options, depth + 1, inodes, exclude));
} else {
my_stat.size += this_stat.size;
my_stat.blocks += this_stat.blocks;
my_stat.inodes += 1;
if options.all {
stats.push(this_stat);
}
}
}
Err(error) => match error.kind() {
ErrorKind::PermissionDenied => {
let description = format!("cannot access {}", entry.path().quote());
let error_message = "Permission denied";
show_error_custom_description!(description, "{}", error_message);
set_exit_code(1);
}
_ => {
set_exit_code(1);
show_error!("cannot access {}: {}", entry.path().quote(), error);
}
},
}
Err(error) => match error.kind() {
ErrorKind::PermissionDenied => {
let description = format!("cannot access {}", entry.path().quote());
let error_message = "Permission denied";
show_error_custom_description!(description, "{}", error_message);
set_exit_code(1);
}
_ => {
set_exit_code(1);
show_error!("cannot access {}: {}", entry.path().quote(), error);
}
},
},
}
Err(error) => show_error!("{}", error),
}
}
@ -406,6 +439,7 @@ enum DuError {
SummarizeDepthConflict(String),
InvalidTimeStyleArg(String),
InvalidTimeArg(String),
InvalidGlob(String),
}
impl Display for DuError {
@ -436,6 +470,7 @@ Try '{} --help' for more information.",
'birth' and 'creation' arguments are not supported on this platform.",
s.quote()
),
DuError::InvalidGlob(s) => write!(f, "Invalid exclude syntax: {}", s),
}
}
}
@ -448,11 +483,75 @@ impl UError for DuError {
Self::InvalidMaxDepthArg(_)
| Self::SummarizeDepthConflict(_)
| Self::InvalidTimeStyleArg(_)
| Self::InvalidTimeArg(_) => 1,
| Self::InvalidTimeArg(_)
| Self::InvalidGlob(_) => 1,
}
}
}
// Read a file and return each line in a vector of String
fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
let file = File::open(filename).expect("no such file");
let buf = BufReader::new(file);
buf.lines()
.map(|l| l.expect("Could not parse line"))
.collect()
}
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
// to ignore the files
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
let mut excludes_from = if matches.is_present(options::EXCLUDE_FROM) {
match matches.values_of(options::EXCLUDE_FROM) {
Some(all_files) => {
let mut exclusion = Vec::<String>::new();
// Read the exclude lists from all the files
// and add them into a vector of string
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
for f in files {
exclusion.extend(file_as_vec(&f));
}
exclusion
}
None => Vec::<String>::new(),
}
} else {
Vec::<String>::new()
};
let mut excludes = if matches.is_present(options::EXCLUDE) {
match matches.values_of(options::EXCLUDE) {
Some(v) => {
// Read the various arguments
v.clone().map(|v| v.to_owned()).collect()
}
None => Vec::<String>::new(),
}
} else {
Vec::<String>::new()
};
// Merge the two lines
excludes.append(&mut excludes_from);
if !&excludes.is_empty() {
let mut builder = Vec::new();
// Create the `Vec` of excludes
for f in excludes {
if matches.is_present(options::VERBOSE) {
println!("adding {:?} to the exclude list ", &f);
}
match Pattern::new(&f) {
Ok(glob) => builder.push(glob),
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
};
}
Ok(builder)
} else {
Ok(Vec::new())
}
}
#[uucore::main]
#[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
@ -475,6 +574,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
one_file_system: matches.is_present(options::ONE_FILE_SYSTEM),
dereference: matches.is_present(options::DEREFERENCE),
inodes: matches.is_present(options::INODES),
verbose: matches.is_present(options::VERBOSE),
};
let files = match matches.value_of(options::FILE) {
@ -529,8 +629,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"\n"
};
let excludes = get_glob_ignore(&matches)?;
let mut grand_total = 0;
for path_string in files {
'loop_file: for path_string in files {
// Skip if we don't want to ignore anything
if !&excludes.is_empty() {
for pattern in &excludes {
{
if pattern.matches(path_string) {
// if the directory is ignored, leave early
if options.verbose {
println!("{} ignored", path_string.quote());
}
continue 'loop_file;
}
}
}
}
let path = PathBuf::from(&path_string);
match Stat::new(path, &options) {
Ok(stat) => {
@ -538,7 +655,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
if let Some(inode) = stat.inode {
inodes.insert(inode);
}
let iter = du(stat, &options, 0, &mut inodes);
let iter = du(stat, &options, 0, &mut inodes, &excludes);
let (_, len) = iter.size_hint();
let len = len.unwrap();
for (index, stat) in iter.enumerate() {
@ -763,19 +880,28 @@ pub fn uu_app<'a>() -> Command<'a> {
.help("exclude entries smaller than SIZE if positive, \
or entries greater than SIZE if negative")
)
// .arg(
// Arg::new("")
// .short('x')
// .long("exclude-from")
// .value_name("FILE")
// .help("exclude files that match any pattern in FILE")
// )
// .arg(
// Arg::new("exclude")
// .long("exclude")
// .value_name("PATTERN")
// .help("exclude files that match PATTERN")
// )
.arg(
Arg::new(options::VERBOSE)
.short('v')
.long("verbose")
.help("verbose mode (option not present in GNU/Coreutils)")
)
.arg(
Arg::new(options::EXCLUDE)
.long(options::EXCLUDE)
.value_name("PATTERN")
.help("exclude files that match PATTERN")
.multiple_occurrences(true)
)
.arg(
Arg::new(options::EXCLUDE_FROM)
.short('X')
.long("exclude-from")
.value_name("FILE")
.help("exclude files that match any pattern in FILE")
.multiple_occurrences(true)
)
.arg(
Arg::new(options::TIME)
.long(options::TIME)

View file

@ -3,7 +3,11 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (paths) sublink subwords
// spell-checker:ignore (paths) sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty
#[cfg(not(windows))]
use regex::Regex;
#[cfg(not(windows))]
use std::io::Write;
use crate::common::util::*;
@ -602,3 +606,171 @@ fn test_du_bytes() {
))]
result.stdout_contains("21529\t./subdir\n");
}
#[test]
fn test_du_exclude() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.symlink_dir(SUB_DEEPER_DIR, SUB_DIR_LINKS_DEEPER_SYM_DIR);
at.mkdir_all(SUB_DIR_LINKS);
ts.ucmd()
.arg("--exclude=subdir")
.arg(SUB_DEEPER_DIR)
.succeeds()
.stdout_contains("subdir/deeper/deeper_dir");
ts.ucmd()
.arg("--exclude=subdir")
.arg("subdir")
.succeeds()
.stdout_is("");
ts.ucmd()
.arg("--exclude=subdir")
.arg("--verbose")
.arg("subdir")
.succeeds()
.stdout_contains("'subdir' ignored");
}
#[test]
// Disable on Windows because we are looking for /
// And the tests would be more complex if we have to support \ too
#[cfg(not(target_os = "windows"))]
fn test_du_exclude_2() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("azerty/xcwww/azeaze");
let result = ts.ucmd().arg("azerty").succeeds();
let path_regexp = r"(.*)azerty/xcwww/azeaze(.*)azerty/xcwww(.*)azerty";
let re = Regex::new(path_regexp).unwrap();
assert!(re.is_match(result.stdout_str().replace('\n', "").trim()));
// Exact match
ts.ucmd()
.arg("--exclude=azeaze")
.arg("azerty")
.succeeds()
.stdout_does_not_contain("azerty/xcwww/azeaze");
// Partial match and NOT a glob
ts.ucmd()
.arg("--exclude=azeaz")
.arg("azerty")
.succeeds()
.stdout_contains("azerty/xcwww/azeaze");
// Partial match and a various glob
ts.ucmd()
.arg("--exclude=azea?")
.arg("azerty")
.succeeds()
.stdout_contains("azerty/xcwww/azeaze");
ts.ucmd()
.arg("--exclude=azea{z,b}")
.arg("azerty")
.succeeds()
.stdout_contains("azerty/xcwww/azeaze");
ts.ucmd()
.arg("--exclude=azea*")
.arg("azerty")
.succeeds()
.stdout_does_not_contain("azerty/xcwww/azeaze");
ts.ucmd()
.arg("--exclude=azeaz?")
.arg("azerty")
.succeeds()
.stdout_does_not_contain("azerty/xcwww/azeaze");
}
#[test]
// Disable on Windows because we are looking for /
// And the tests would be more complex if we have to support \ too
#[cfg(not(target_os = "windows"))]
fn test_du_exclude_mix() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
let mut file1 = at.make_file("file-ignore1");
file1.write_all(b"azeaze").unwrap();
let mut file2 = at.make_file("file-ignore2");
file2.write_all(b"amaz?ng").unwrap();
at.mkdir_all("azerty/xcwww/azeaze");
at.mkdir_all("azerty/xcwww/qzerty");
at.mkdir_all("azerty/xcwww/amazing");
ts.ucmd()
.arg("azerty")
.succeeds()
.stdout_contains("azerty/xcwww/azeaze");
ts.ucmd()
.arg("--exclude=azeaze")
.arg("azerty")
.succeeds()
.stdout_does_not_contain("azerty/xcwww/azeaze");
// Just exclude one file name
let result = ts.ucmd().arg("--exclude=qzerty").arg("azerty").succeeds();
assert!(!result.stdout_str().contains("qzerty"));
assert!(result.stdout_str().contains("azerty"));
assert!(result.stdout_str().contains("xcwww"));
// Exclude from file
let result = ts
.ucmd()
.arg("--exclude-from=file-ignore1")
.arg("azerty")
.succeeds();
assert!(!result.stdout_str().contains("azeaze"));
assert!(result.stdout_str().contains("qzerty"));
assert!(result.stdout_str().contains("xcwww"));
// Mix two files and string
let result = ts
.ucmd()
.arg("--exclude=qzerty")
.arg("--exclude-from=file-ignore1")
.arg("--exclude-from=file-ignore2")
.arg("azerty")
.succeeds();
assert!(!result.stdout_str().contains("amazing"));
assert!(!result.stdout_str().contains("qzerty"));
assert!(!result.stdout_str().contains("azeaze"));
assert!(result.stdout_str().contains("xcwww"));
}
#[test]
fn test_du_exclude_several_components() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("a/b/c");
at.mkdir_all("a/x/y");
at.mkdir_all("a/u/y");
// Exact match
let result = ts
.ucmd()
.arg("--exclude=a/u")
.arg("--exclude=a/b")
.arg("a")
.succeeds();
assert!(!result.stdout_str().contains("a/u"));
assert!(!result.stdout_str().contains("a/b"));
}
#[test]
fn test_du_exclude_invalid_syntax() {
let ts = TestScenario::new(util_name!());
let at = &ts.fixtures;
at.mkdir_all("azerty/xcwww/azeaze");
ts.ucmd()
.arg("--exclude=a[ze")
.arg("azerty")
.fails()
.stderr_contains("du: Invalid exclude syntax");
}