Merge pull request #6535 from sylvestre/sort

sort: improve the error mgmt with --batch-size
This commit is contained in:
Daniel Hofstetter 2024-07-05 09:45:54 +02:00 committed by GitHub
commit b774000351
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 124 additions and 11 deletions

View file

@ -74,6 +74,7 @@ microbenchmarks
microbenchmarking
multibyte
multicall
nmerge
noatime
nocache
nocreat

1
Cargo.lock generated
View file

@ -3182,6 +3182,7 @@ dependencies = [
"fnv",
"itertools 0.13.0",
"memchr",
"nix",
"rand",
"rayon",
"self_cell",

View file

@ -31,6 +31,9 @@ tempfile = { workspace = true }
unicode-width = { workspace = true }
uucore = { workspace = true, features = ["fs", "version-cmp"] }
[target.'cfg(target_os = "linux")'.dependencies]
nix = { workspace = true }
[[bin]]
name = "sort"
path = "src/main.rs"

View file

@ -7,7 +7,7 @@
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
// spell-checker:ignore (misc) HFKJFK Mbdfhn
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim
mod check;
mod chunks;
@ -23,6 +23,8 @@ use clap::{crate_version, Arg, ArgAction, Command};
use custom_str_cmp::custom_str_cmp;
use ext_sort::ext_sort;
use fnv::FnvHasher;
#[cfg(target_os = "linux")]
use nix::libc::{getrlimit, rlimit, RLIMIT_NOFILE};
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
use rand::{thread_rng, Rng};
use rayon::prelude::*;
@ -45,7 +47,7 @@ use uucore::line_ending::LineEnding;
use uucore::parse_size::{ParseSizeError, Parser};
use uucore::shortcut_value_parser::ShortcutValueParser;
use uucore::version_cmp::version_cmp;
use uucore::{format_usage, help_about, help_section, help_usage};
use uucore::{format_usage, help_about, help_section, help_usage, show_error};
use crate::tmp_dir::TmpDirWrapper;
@ -146,7 +148,9 @@ enum SortError {
CompressProgTerminatedAbnormally {
prog: String,
},
TmpDirCreationFailed,
TmpFileCreationFailed {
path: PathBuf,
},
Uft8Error {
error: Utf8Error,
},
@ -212,7 +216,9 @@ impl Display for SortError {
Self::CompressProgTerminatedAbnormally { prog } => {
write!(f, "{} terminated abnormally", prog.quote())
}
Self::TmpDirCreationFailed => write!(f, "could not create temporary directory"),
Self::TmpFileCreationFailed { path } => {
write!(f, "cannot create temporary file in '{}':", path.display())
}
Self::Uft8Error { error } => write!(f, "{error}"),
}
}
@ -1030,6 +1036,18 @@ fn make_sort_mode_arg(mode: &'static str, short: char, help: &'static str) -> Ar
arg
}
#[cfg(target_os = "linux")]
fn get_rlimit() -> UResult<usize> {
let mut limit = rlimit {
rlim_cur: 0,
rlim_max: 0,
};
match unsafe { getrlimit(RLIMIT_NOFILE, &mut limit) } {
0 => Ok(limit.rlim_cur as usize),
_ => Err(UUsageError::new(2, "Failed to fetch rlimit")),
}
}
#[uucore::main]
#[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
@ -1158,12 +1176,36 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.map(String::from);
if let Some(n_merge) = matches.get_one::<String>(options::BATCH_SIZE) {
settings.merge_batch_size = n_merge.parse().map_err(|_| {
UUsageError::new(
2,
format!("invalid --batch-size argument {}", n_merge.quote()),
)
})?;
match n_merge.parse::<usize>() {
Ok(parsed_value) => {
if parsed_value < 2 {
show_error!("invalid --batch-size argument '{}'", n_merge);
return Err(UUsageError::new(2, "minimum --batch-size argument is '2'"));
}
settings.merge_batch_size = parsed_value;
}
Err(e) => {
let error_message = if *e.kind() == std::num::IntErrorKind::PosOverflow {
#[cfg(target_os = "linux")]
{
show_error!("--batch-size argument {} too large", n_merge.quote());
format!(
"maximum --batch-size argument with current rlimit is {}",
get_rlimit()?
)
}
#[cfg(not(target_os = "linux"))]
{
format!("--batch-size argument {} too large", n_merge.quote())
}
} else {
format!("invalid --batch-size argument {}", n_merge.quote())
};
return Err(UUsageError::new(2, error_message));
}
}
}
settings.line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));

View file

@ -46,7 +46,9 @@ impl TmpDirWrapper {
tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(&self.parent_path)
.map_err(|_| SortError::TmpDirCreationFailed)?,
.map_err(|_| SortError::TmpFileCreationFailed {
path: self.parent_path.clone(),
})?,
);
let path = self.temp_dir.as_ref().unwrap().path().to_owned();

View file

@ -1036,6 +1036,38 @@ fn test_merge_batches() {
.stdout_only_fixture("ext_sort.expected");
}
#[test]
fn test_batch_size_invalid() {
TestScenario::new(util_name!())
.ucmd()
.arg("--batch-size=0")
.fails()
.code_is(2)
.stderr_contains("sort: invalid --batch-size argument '0'")
.stderr_contains("sort: minimum --batch-size argument is '2'");
}
#[test]
fn test_batch_size_too_large() {
let large_batch_size = "18446744073709551616";
TestScenario::new(util_name!())
.ucmd()
.arg(format!("--batch-size={}", large_batch_size))
.fails()
.code_is(2)
.stderr_contains(format!(
"--batch-size argument '{}' too large",
large_batch_size
));
#[cfg(target_os = "linux")]
TestScenario::new(util_name!())
.ucmd()
.arg(format!("--batch-size={}", large_batch_size))
.fails()
.code_is(2)
.stderr_contains("maximum --batch-size argument with current rlimit is");
}
#[test]
fn test_merge_batch_size() {
TestScenario::new(util_name!())

View file

@ -0,0 +1,32 @@
diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl
index 89eed0c64..c2f5aa7e5 100755
--- a/tests/sort/sort-merge.pl
+++ b/tests/sort/sort-merge.pl
@@ -43,22 +43,22 @@ my @Tests =
# check validation of --batch-size option
['nmerge-0', "-m --batch-size=0", @inputs,
{ERR=>"$prog: invalid --batch-size argument '0'\n".
- "$prog: minimum --batch-size argument is '2'\n"}, {EXIT=>2}],
+ "$prog: minimum --batch-size argument is '2'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}],
['nmerge-1', "-m --batch-size=1", @inputs,
{ERR=>"$prog: invalid --batch-size argument '1'\n".
- "$prog: minimum --batch-size argument is '2'\n"}, {EXIT=>2}],
+ "$prog: minimum --batch-size argument is '2'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}],
['nmerge-neg', "-m --batch-size=-1", @inputs,
- {ERR=>"$prog: invalid --batch-size argument '-1'\n"}, {EXIT=>2}],
+ {ERR=>"$prog: invalid --batch-size argument '-1'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}],
['nmerge-nan', "-m --batch-size=a", @inputs,
- {ERR=>"$prog: invalid --batch-size argument 'a'\n"}, {EXIT=>2}],
+ {ERR=>"$prog: invalid --batch-size argument 'a'\nTry 'sort --help' for more information.\n"}, {EXIT=>2}],
['nmerge-big', "-m --batch-size=$bigint", @inputs,
{ERR_SUBST=>'s/(current rlimit is) \d+/$1/'},
{ERR=>"$prog: --batch-size argument '$bigint' too large\n".
- "$prog: maximum --batch-size argument with current rlimit is\n"},
+ "$prog: maximum --batch-size argument with current rlimit is\nTry 'sort --help' for more information.\n"},
{EXIT=>2}],
# This should work since nmerge >= the number of input files