mirror of
https://github.com/uutils/coreutils
synced 2024-10-07 00:19:14 +00:00
du: add support for --exclude and --exclude-from (#3381)
* du: add support for --exclude and --exclude-from And add an option --verbose (doesn't exist in GNU)
This commit is contained in:
parent
9214249bdd
commit
8b719a8591
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2402,6 +2402,7 @@ version = "0.0.13"
|
|||
dependencies = [
|
||||
"chrono",
|
||||
"clap 3.1.6",
|
||||
"glob",
|
||||
"uucore",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
|
|
@ -16,6 +16,8 @@ path = "src/du.rs"
|
|||
|
||||
[dependencies]
|
||||
chrono = "^0.4.11"
|
||||
# For the --exclude & --exclude-from options
|
||||
glob = "0.3.0"
|
||||
clap = { version = "3.1", features = ["wrap_help", "cargo"] }
|
||||
uucore = { version=">=0.0.11", package="uucore", path="../../uucore" }
|
||||
|
||||
|
|
|
@ -11,11 +11,15 @@ extern crate uucore;
|
|||
use chrono::prelude::DateTime;
|
||||
use chrono::Local;
|
||||
use clap::{crate_version, Arg, ArgMatches, Command};
|
||||
use glob::Pattern;
|
||||
use std::collections::HashSet;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
#[cfg(not(windows))]
|
||||
use std::fs::Metadata;
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
use std::io::{ErrorKind, Result};
|
||||
use std::iter;
|
||||
#[cfg(not(windows))]
|
||||
|
@ -24,7 +28,6 @@ use std::os::unix::fs::MetadataExt;
|
|||
use std::os::windows::fs::MetadataExt;
|
||||
#[cfg(windows)]
|
||||
use std::os::windows::io::AsRawHandle;
|
||||
#[cfg(windows)]
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
@ -68,6 +71,9 @@ mod options {
|
|||
pub const ONE_FILE_SYSTEM: &str = "one-file-system";
|
||||
pub const DEREFERENCE: &str = "dereference";
|
||||
pub const INODES: &str = "inodes";
|
||||
pub const EXCLUDE: &str = "exclude";
|
||||
pub const EXCLUDE_FROM: &str = "exclude-from";
|
||||
pub const VERBOSE: &str = "verbose";
|
||||
pub const FILE: &str = "FILE";
|
||||
}
|
||||
|
||||
|
@ -80,6 +86,12 @@ Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).
|
|||
SIZE is an integer and optional unit (example: 10M is 10*1024*1024).
|
||||
Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers
|
||||
of 1000).
|
||||
|
||||
PATTERN allows some advanced exclusions. For example, the following syntaxes
|
||||
are supported:
|
||||
? will match only one character
|
||||
* will match zero or more characters
|
||||
{a,b} will match a or b
|
||||
";
|
||||
const USAGE: &str = "\
|
||||
{} [OPTION]... [FILE]...
|
||||
|
@ -97,6 +109,7 @@ struct Options {
|
|||
one_file_system: bool,
|
||||
dereference: bool,
|
||||
inodes: bool,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
|
||||
|
@ -287,6 +300,7 @@ fn du(
|
|||
options: &Options,
|
||||
depth: usize,
|
||||
inodes: &mut HashSet<FileInfo>,
|
||||
exclude: &[Pattern],
|
||||
) -> Box<dyn DoubleEndedIterator<Item = Stat>> {
|
||||
let mut stats = vec![];
|
||||
let mut futures = vec![];
|
||||
|
@ -306,49 +320,68 @@ fn du(
|
|||
}
|
||||
};
|
||||
|
||||
for f in read {
|
||||
'file_loop: for f in read {
|
||||
match f {
|
||||
Ok(entry) => match Stat::new(entry.path(), options) {
|
||||
Ok(this_stat) => {
|
||||
if let Some(inode) = this_stat.inode {
|
||||
if inodes.contains(&inode) {
|
||||
continue;
|
||||
}
|
||||
inodes.insert(inode);
|
||||
}
|
||||
if this_stat.is_dir {
|
||||
if options.one_file_system {
|
||||
if let (Some(this_inode), Some(my_inode)) =
|
||||
(this_stat.inode, my_stat.inode)
|
||||
Ok(entry) => {
|
||||
match Stat::new(entry.path(), options) {
|
||||
Ok(this_stat) => {
|
||||
// We have an exclude list
|
||||
for pattern in exclude {
|
||||
// Look at all patterns with both short and long paths
|
||||
// if we have 'du foo' but search to exclude 'foo/bar'
|
||||
// we need the full path
|
||||
if pattern.matches(&this_stat.path.to_string_lossy())
|
||||
|| pattern.matches(&entry.file_name().into_string().unwrap())
|
||||
{
|
||||
if this_inode.dev_id != my_inode.dev_id {
|
||||
continue;
|
||||
// if the directory is ignored, leave early
|
||||
if options.verbose {
|
||||
println!("{} ignored", &this_stat.path.quote());
|
||||
}
|
||||
// Go to the next file
|
||||
continue 'file_loop;
|
||||
}
|
||||
}
|
||||
futures.push(du(this_stat, options, depth + 1, inodes));
|
||||
} else {
|
||||
my_stat.size += this_stat.size;
|
||||
my_stat.blocks += this_stat.blocks;
|
||||
my_stat.inodes += 1;
|
||||
if options.all {
|
||||
stats.push(this_stat);
|
||||
|
||||
if let Some(inode) = this_stat.inode {
|
||||
if inodes.contains(&inode) {
|
||||
continue;
|
||||
}
|
||||
inodes.insert(inode);
|
||||
}
|
||||
if this_stat.is_dir {
|
||||
if options.one_file_system {
|
||||
if let (Some(this_inode), Some(my_inode)) =
|
||||
(this_stat.inode, my_stat.inode)
|
||||
{
|
||||
if this_inode.dev_id != my_inode.dev_id {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
futures.push(du(this_stat, options, depth + 1, inodes, exclude));
|
||||
} else {
|
||||
my_stat.size += this_stat.size;
|
||||
my_stat.blocks += this_stat.blocks;
|
||||
my_stat.inodes += 1;
|
||||
if options.all {
|
||||
stats.push(this_stat);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(error) => match error.kind() {
|
||||
ErrorKind::PermissionDenied => {
|
||||
let description = format!("cannot access {}", entry.path().quote());
|
||||
let error_message = "Permission denied";
|
||||
show_error_custom_description!(description, "{}", error_message);
|
||||
set_exit_code(1);
|
||||
}
|
||||
_ => {
|
||||
set_exit_code(1);
|
||||
show_error!("cannot access {}: {}", entry.path().quote(), error);
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(error) => match error.kind() {
|
||||
ErrorKind::PermissionDenied => {
|
||||
let description = format!("cannot access {}", entry.path().quote());
|
||||
let error_message = "Permission denied";
|
||||
show_error_custom_description!(description, "{}", error_message);
|
||||
set_exit_code(1);
|
||||
}
|
||||
_ => {
|
||||
set_exit_code(1);
|
||||
show_error!("cannot access {}: {}", entry.path().quote(), error);
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
Err(error) => show_error!("{}", error),
|
||||
}
|
||||
}
|
||||
|
@ -406,6 +439,7 @@ enum DuError {
|
|||
SummarizeDepthConflict(String),
|
||||
InvalidTimeStyleArg(String),
|
||||
InvalidTimeArg(String),
|
||||
InvalidGlob(String),
|
||||
}
|
||||
|
||||
impl Display for DuError {
|
||||
|
@ -436,6 +470,7 @@ Try '{} --help' for more information.",
|
|||
'birth' and 'creation' arguments are not supported on this platform.",
|
||||
s.quote()
|
||||
),
|
||||
DuError::InvalidGlob(s) => write!(f, "Invalid exclude syntax: {}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -448,11 +483,75 @@ impl UError for DuError {
|
|||
Self::InvalidMaxDepthArg(_)
|
||||
| Self::SummarizeDepthConflict(_)
|
||||
| Self::InvalidTimeStyleArg(_)
|
||||
| Self::InvalidTimeArg(_) => 1,
|
||||
| Self::InvalidTimeArg(_)
|
||||
| Self::InvalidGlob(_) => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read a file and return each line in a vector of String
|
||||
fn file_as_vec(filename: impl AsRef<Path>) -> Vec<String> {
|
||||
let file = File::open(filename).expect("no such file");
|
||||
let buf = BufReader::new(file);
|
||||
|
||||
buf.lines()
|
||||
.map(|l| l.expect("Could not parse line"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Given the --exclude-from and/or --exclude arguments, returns the globset lists
|
||||
// to ignore the files
|
||||
fn get_glob_ignore(matches: &ArgMatches) -> UResult<Vec<Pattern>> {
|
||||
let mut excludes_from = if matches.is_present(options::EXCLUDE_FROM) {
|
||||
match matches.values_of(options::EXCLUDE_FROM) {
|
||||
Some(all_files) => {
|
||||
let mut exclusion = Vec::<String>::new();
|
||||
// Read the exclude lists from all the files
|
||||
// and add them into a vector of string
|
||||
let files: Vec<String> = all_files.clone().map(|v| v.to_owned()).collect();
|
||||
for f in files {
|
||||
exclusion.extend(file_as_vec(&f));
|
||||
}
|
||||
exclusion
|
||||
}
|
||||
None => Vec::<String>::new(),
|
||||
}
|
||||
} else {
|
||||
Vec::<String>::new()
|
||||
};
|
||||
|
||||
let mut excludes = if matches.is_present(options::EXCLUDE) {
|
||||
match matches.values_of(options::EXCLUDE) {
|
||||
Some(v) => {
|
||||
// Read the various arguments
|
||||
v.clone().map(|v| v.to_owned()).collect()
|
||||
}
|
||||
None => Vec::<String>::new(),
|
||||
}
|
||||
} else {
|
||||
Vec::<String>::new()
|
||||
};
|
||||
|
||||
// Merge the two lines
|
||||
excludes.append(&mut excludes_from);
|
||||
if !&excludes.is_empty() {
|
||||
let mut builder = Vec::new();
|
||||
// Create the `Vec` of excludes
|
||||
for f in excludes {
|
||||
if matches.is_present(options::VERBOSE) {
|
||||
println!("adding {:?} to the exclude list ", &f);
|
||||
}
|
||||
match Pattern::new(&f) {
|
||||
Ok(glob) => builder.push(glob),
|
||||
Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()),
|
||||
};
|
||||
}
|
||||
Ok(builder)
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[uucore::main]
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
|
@ -475,6 +574,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
one_file_system: matches.is_present(options::ONE_FILE_SYSTEM),
|
||||
dereference: matches.is_present(options::DEREFERENCE),
|
||||
inodes: matches.is_present(options::INODES),
|
||||
verbose: matches.is_present(options::VERBOSE),
|
||||
};
|
||||
|
||||
let files = match matches.value_of(options::FILE) {
|
||||
|
@ -529,8 +629,25 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
"\n"
|
||||
};
|
||||
|
||||
let excludes = get_glob_ignore(&matches)?;
|
||||
|
||||
let mut grand_total = 0;
|
||||
for path_string in files {
|
||||
'loop_file: for path_string in files {
|
||||
// Skip if we don't want to ignore anything
|
||||
if !&excludes.is_empty() {
|
||||
for pattern in &excludes {
|
||||
{
|
||||
if pattern.matches(path_string) {
|
||||
// if the directory is ignored, leave early
|
||||
if options.verbose {
|
||||
println!("{} ignored", path_string.quote());
|
||||
}
|
||||
continue 'loop_file;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let path = PathBuf::from(&path_string);
|
||||
match Stat::new(path, &options) {
|
||||
Ok(stat) => {
|
||||
|
@ -538,7 +655,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
if let Some(inode) = stat.inode {
|
||||
inodes.insert(inode);
|
||||
}
|
||||
let iter = du(stat, &options, 0, &mut inodes);
|
||||
let iter = du(stat, &options, 0, &mut inodes, &excludes);
|
||||
let (_, len) = iter.size_hint();
|
||||
let len = len.unwrap();
|
||||
for (index, stat) in iter.enumerate() {
|
||||
|
@ -763,19 +880,28 @@ pub fn uu_app<'a>() -> Command<'a> {
|
|||
.help("exclude entries smaller than SIZE if positive, \
|
||||
or entries greater than SIZE if negative")
|
||||
)
|
||||
// .arg(
|
||||
// Arg::new("")
|
||||
// .short('x')
|
||||
// .long("exclude-from")
|
||||
// .value_name("FILE")
|
||||
// .help("exclude files that match any pattern in FILE")
|
||||
// )
|
||||
// .arg(
|
||||
// Arg::new("exclude")
|
||||
// .long("exclude")
|
||||
// .value_name("PATTERN")
|
||||
// .help("exclude files that match PATTERN")
|
||||
// )
|
||||
.arg(
|
||||
Arg::new(options::VERBOSE)
|
||||
.short('v')
|
||||
.long("verbose")
|
||||
.help("verbose mode (option not present in GNU/Coreutils)")
|
||||
)
|
||||
.arg(
|
||||
Arg::new(options::EXCLUDE)
|
||||
.long(options::EXCLUDE)
|
||||
.value_name("PATTERN")
|
||||
.help("exclude files that match PATTERN")
|
||||
.multiple_occurrences(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new(options::EXCLUDE_FROM)
|
||||
.short('X')
|
||||
.long("exclude-from")
|
||||
.value_name("FILE")
|
||||
.help("exclude files that match any pattern in FILE")
|
||||
.multiple_occurrences(true)
|
||||
|
||||
)
|
||||
.arg(
|
||||
Arg::new(options::TIME)
|
||||
.long(options::TIME)
|
||||
|
|
|
@ -3,7 +3,11 @@
|
|||
// * For the full copyright and license information, please view the LICENSE
|
||||
// * file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (paths) sublink subwords
|
||||
// spell-checker:ignore (paths) sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty
|
||||
#[cfg(not(windows))]
|
||||
use regex::Regex;
|
||||
#[cfg(not(windows))]
|
||||
use std::io::Write;
|
||||
|
||||
use crate::common::util::*;
|
||||
|
||||
|
@ -602,3 +606,171 @@ fn test_du_bytes() {
|
|||
))]
|
||||
result.stdout_contains("21529\t./subdir\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_du_exclude() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.symlink_dir(SUB_DEEPER_DIR, SUB_DIR_LINKS_DEEPER_SYM_DIR);
|
||||
at.mkdir_all(SUB_DIR_LINKS);
|
||||
|
||||
ts.ucmd()
|
||||
.arg("--exclude=subdir")
|
||||
.arg(SUB_DEEPER_DIR)
|
||||
.succeeds()
|
||||
.stdout_contains("subdir/deeper/deeper_dir");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=subdir")
|
||||
.arg("subdir")
|
||||
.succeeds()
|
||||
.stdout_is("");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=subdir")
|
||||
.arg("--verbose")
|
||||
.arg("subdir")
|
||||
.succeeds()
|
||||
.stdout_contains("'subdir' ignored");
|
||||
}
|
||||
|
||||
#[test]
|
||||
// Disable on Windows because we are looking for /
|
||||
// And the tests would be more complex if we have to support \ too
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
fn test_du_exclude_2() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("azerty/xcwww/azeaze");
|
||||
|
||||
let result = ts.ucmd().arg("azerty").succeeds();
|
||||
|
||||
let path_regexp = r"(.*)azerty/xcwww/azeaze(.*)azerty/xcwww(.*)azerty";
|
||||
let re = Regex::new(path_regexp).unwrap();
|
||||
assert!(re.is_match(result.stdout_str().replace('\n', "").trim()));
|
||||
|
||||
// Exact match
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azeaze")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_does_not_contain("azerty/xcwww/azeaze");
|
||||
// Partial match and NOT a glob
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azeaz")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_contains("azerty/xcwww/azeaze");
|
||||
// Partial match and a various glob
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azea?")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_contains("azerty/xcwww/azeaze");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azea{z,b}")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_contains("azerty/xcwww/azeaze");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azea*")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_does_not_contain("azerty/xcwww/azeaze");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azeaz?")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_does_not_contain("azerty/xcwww/azeaze");
|
||||
}
|
||||
|
||||
#[test]
|
||||
// Disable on Windows because we are looking for /
|
||||
// And the tests would be more complex if we have to support \ too
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
fn test_du_exclude_mix() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
let mut file1 = at.make_file("file-ignore1");
|
||||
file1.write_all(b"azeaze").unwrap();
|
||||
let mut file2 = at.make_file("file-ignore2");
|
||||
file2.write_all(b"amaz?ng").unwrap();
|
||||
|
||||
at.mkdir_all("azerty/xcwww/azeaze");
|
||||
at.mkdir_all("azerty/xcwww/qzerty");
|
||||
at.mkdir_all("azerty/xcwww/amazing");
|
||||
|
||||
ts.ucmd()
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_contains("azerty/xcwww/azeaze");
|
||||
ts.ucmd()
|
||||
.arg("--exclude=azeaze")
|
||||
.arg("azerty")
|
||||
.succeeds()
|
||||
.stdout_does_not_contain("azerty/xcwww/azeaze");
|
||||
|
||||
// Just exclude one file name
|
||||
let result = ts.ucmd().arg("--exclude=qzerty").arg("azerty").succeeds();
|
||||
assert!(!result.stdout_str().contains("qzerty"));
|
||||
assert!(result.stdout_str().contains("azerty"));
|
||||
assert!(result.stdout_str().contains("xcwww"));
|
||||
|
||||
// Exclude from file
|
||||
let result = ts
|
||||
.ucmd()
|
||||
.arg("--exclude-from=file-ignore1")
|
||||
.arg("azerty")
|
||||
.succeeds();
|
||||
assert!(!result.stdout_str().contains("azeaze"));
|
||||
assert!(result.stdout_str().contains("qzerty"));
|
||||
assert!(result.stdout_str().contains("xcwww"));
|
||||
|
||||
// Mix two files and string
|
||||
let result = ts
|
||||
.ucmd()
|
||||
.arg("--exclude=qzerty")
|
||||
.arg("--exclude-from=file-ignore1")
|
||||
.arg("--exclude-from=file-ignore2")
|
||||
.arg("azerty")
|
||||
.succeeds();
|
||||
assert!(!result.stdout_str().contains("amazing"));
|
||||
assert!(!result.stdout_str().contains("qzerty"));
|
||||
assert!(!result.stdout_str().contains("azeaze"));
|
||||
assert!(result.stdout_str().contains("xcwww"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_du_exclude_several_components() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("a/b/c");
|
||||
at.mkdir_all("a/x/y");
|
||||
at.mkdir_all("a/u/y");
|
||||
|
||||
// Exact match
|
||||
let result = ts
|
||||
.ucmd()
|
||||
.arg("--exclude=a/u")
|
||||
.arg("--exclude=a/b")
|
||||
.arg("a")
|
||||
.succeeds();
|
||||
assert!(!result.stdout_str().contains("a/u"));
|
||||
assert!(!result.stdout_str().contains("a/b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_du_exclude_invalid_syntax() {
|
||||
let ts = TestScenario::new(util_name!());
|
||||
let at = &ts.fixtures;
|
||||
|
||||
at.mkdir_all("azerty/xcwww/azeaze");
|
||||
|
||||
ts.ucmd()
|
||||
.arg("--exclude=a[ze")
|
||||
.arg("azerty")
|
||||
.fails()
|
||||
.stderr_contains("du: Invalid exclude syntax");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue