Merge branch 'master' of https://github.com/uutils/coreutils into uutils-master-2

This commit is contained in:
Tyler 2021-08-03 17:33:43 -07:00
commit 601c9fc620
33 changed files with 1762 additions and 521 deletions

View file

@ -13,8 +13,8 @@ env:
PROJECT_NAME: coreutils
PROJECT_DESC: "Core universal (cross-platform) utilities"
PROJECT_AUTH: "uutils"
RUST_MIN_SRV: "1.43.1" ## v1.43.0
RUST_COV_SRV: "2020-08-01" ## (~v1.47.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel
RUST_MIN_SRV: "1.47.0" ## MSRV v1.47.0
RUST_COV_SRV: "2021-05-06" ## (~v1.52.0) supported rust version for code coverage; (date required/used by 'coverage') ## !maint: refactor when code coverage support is included in the stable channel
on: [push, pull_request]
@ -249,6 +249,8 @@ jobs:
# { os, target, cargo-options, features, use-cross, toolchain }
- { os: ubuntu-latest , target: arm-unknown-linux-gnueabihf , features: feat_os_unix_gnueabihf , use-cross: use-cross }
- { os: ubuntu-latest , target: aarch64-unknown-linux-gnu , features: feat_os_unix_gnueabihf , use-cross: use-cross }
- { os: ubuntu-latest , target: x86_64-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross }
# - { os: ubuntu-latest , target: x86_64-unknown-linux-gnu , features: feat_selinux , use-cross: use-cross }
# - { os: ubuntu-18.04 , target: i586-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } ## note: older windows platform; not required, dev-FYI only
# - { os: ubuntu-18.04 , target: i586-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross } ## note: older windows platform; not required, dev-FYI only
- { os: ubuntu-18.04 , target: i686-unknown-linux-gnu , features: feat_os_unix , use-cross: use-cross }

View file

@ -151,6 +151,9 @@ Sylvestre Ledru
T Jameson Little
Jameson
Little
Thomas Queiroz
Thomas
Queiroz
Tobias Bohumir Schottdorf
Tobias
Bohumir

215
Cargo.lock generated
View file

@ -73,6 +73,29 @@ dependencies = [
"compare",
]
[[package]]
name = "bindgen"
version = "0.59.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453c49e5950bb0eb63bb3df640e31618846c89d5b7faa54040d76e98e0134375"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"clap",
"env_logger 0.8.4",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"proc-macro2",
"quote 1.0.9",
"regex",
"rustc-hash",
"shlex",
"which",
]
[[package]]
name = "bit-set"
version = "0.5.2"
@ -94,6 +117,18 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "bitvec"
version = "0.19.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "blake2b_simd"
version = "0.5.11"
@ -149,9 +184,18 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cc"
version = "1.0.68"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787"
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
[[package]]
name = "cexpr"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
@ -178,6 +222,17 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "clang-sys"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c"
dependencies = [
"glob 0.3.0",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "2.33.3"
@ -241,6 +296,7 @@ dependencies = [
"rand 0.7.3",
"regex",
"rlimit",
"selinux",
"sha1",
"tempfile",
"textwrap",
@ -464,9 +520,9 @@ dependencies = [
[[package]]
name = "crossbeam-deque"
version = "0.8.0"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if 1.0.0",
"crossbeam-epoch",
@ -592,6 +648,19 @@ dependencies = [
"regex",
]
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
@ -634,6 +703,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "funty"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7"
[[package]]
name = "gcd"
version = "2.0.1"
@ -747,6 +822,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "if_rust_version"
version = "1.0.0"
@ -802,12 +883,28 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3"
[[package]]
name = "libloading"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
dependencies = [
"cfg-if 1.0.0",
"winapi 0.3.9",
]
[[package]]
name = "locale"
version = "0.2.2"
@ -939,6 +1036,18 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "nom"
version = "6.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2"
dependencies = [
"bitvec",
"funty",
"memchr 2.4.0",
"version_check",
]
[[package]]
name = "ntapi"
version = "0.3.6"
@ -1104,6 +1213,12 @@ dependencies = [
"proc-macro-hack",
]
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pkg-config"
version = "0.3.19"
@ -1170,9 +1285,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.27"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid 0.2.2",
]
@ -1195,7 +1310,7 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f"
dependencies = [
"env_logger",
"env_logger 0.7.1",
"log",
"rand 0.7.3",
"rand_core 0.5.1",
@ -1216,6 +1331,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8"
[[package]]
name = "rand"
version = "0.5.6"
@ -1384,6 +1505,12 @@ dependencies = [
"redox_syscall 0.2.9",
]
[[package]]
name = "reference-counted-singleton"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef445213a92fdddc4bc69d9111156d20ffd50704a86ad82b372aab701a0d3a9a"
[[package]]
name = "regex"
version = "1.5.4"
@ -1438,6 +1565,12 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "same-file"
version = "1.0.6"
@ -1453,6 +1586,32 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "selinux"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd525eeb189eb26c8471463186bba87644e3d8a9c7ae392adaf9ec45ede574bc"
dependencies = [
"bitflags",
"libc",
"once_cell",
"reference-counted-singleton",
"selinux-sys",
"thiserror",
]
[[package]]
name = "selinux-sys"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d842d177120716580c4c6cb56dfe3c5f3a3e3dcec635091f1b2034b6c0be4c6"
dependencies = [
"bindgen",
"cc",
"dunce",
"walkdir",
]
[[package]]
name = "sha1"
version = "0.6.0"
@ -1484,6 +1643,12 @@ dependencies = [
"generic-array",
]
[[package]]
name = "shlex"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d"
[[package]]
name = "signal-hook"
version = "0.3.9"
@ -1572,15 +1737,21 @@ dependencies = [
[[package]]
name = "syn"
version = "1.0.73"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7"
checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
dependencies = [
"proc-macro2",
"quote 1.0.9",
"unicode-xid 0.2.2",
]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tempfile"
version = "3.2.0"
@ -1614,6 +1785,15 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "termcolor"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
dependencies = [
"winapi-util",
]
[[package]]
name = "termion"
version = "1.5.6"
@ -2111,6 +2291,7 @@ name = "uu_id"
version = "0.0.7"
dependencies = [
"clap",
"selinux",
"uucore",
"uucore_procs",
]
@ -2603,6 +2784,7 @@ version = "0.0.7"
dependencies = [
"clap",
"libc",
"nix 0.20.0",
"redox_syscall 0.1.57",
"uucore",
"uucore_procs",
@ -2872,6 +3054,15 @@ version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "which"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
dependencies = [
"libc",
]
[[package]]
name = "wild"
version = "2.0.4"
@ -2924,6 +3115,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "wyz"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214"
[[package]]
name = "xattr"
version = "0.2.2"

View file

@ -1,6 +1,8 @@
# coreutils (uutils)
# * see the repository LICENSE, README, and CONTRIBUTING files for more information
# spell-checker:ignore (libs) libselinux
[package]
name = "coreutils"
version = "0.0.7"
@ -139,6 +141,11 @@ feat_os_unix_musl = [
#
"feat_require_unix",
]
# "feat_selinux" == set of utilities providing support for SELinux Security Context if enabled with `--features feat_selinux`.
# NOTE:
# The selinux(-sys) crate requires `libselinux` headers and shared library to be accessible in the C toolchain at compile time.
# Running a uutils compiled with `feat_selinux` requires an SELinux enabled Kernel at run time.
feat_selinux = ["id/selinux", "selinux"]
## feature sets with requirements (restricting cross-platform availability)
#
# ** NOTE: these `feat_require_...` sets should be minimized as much as possible to encourage cross-platform availability of utilities
@ -230,6 +237,7 @@ clap = { version = "2.33", features = ["wrap_help"] }
lazy_static = { version="1.3" }
textwrap = { version="=0.11.0", features=["term_size"] } # !maint: [2020-05-10; rivy] unstable crate using undocumented features; pinned currently, will review
uucore = { version=">=0.0.9", package="uucore", path="src/uucore" }
selinux = { version="0.1.3", optional = true }
# * uutils
uu_test = { optional=true, version="0.0.7", package="uu_test", path="src/uu/test" }
#

View file

@ -39,7 +39,7 @@ to compile anywhere, and this is as good a way as any to try and learn it.
### Rust Version
uutils follows Rust's release channels and is tested against stable, beta and nightly.
The current oldest supported version of the Rust compiler is `1.43.1`.
The current oldest supported version of the Rust compiler is `1.47`.
On both Windows and Redox, only the nightly version is tested currently.

View file

@ -28,8 +28,8 @@ pub fn main() {
if val == "1" && key.starts_with(env_feature_prefix) {
let krate = key[env_feature_prefix.len()..].to_lowercase();
match krate.as_ref() {
"default" | "macos" | "unix" | "windows" => continue, // common/standard feature names
"nightly" | "test_unimplemented" => continue, // crate-local custom features
"default" | "macos" | "unix" | "windows" | "selinux" => continue, // common/standard feature names
"nightly" | "test_unimplemented" => continue, // crate-local custom features
"test" => continue, // over-ridden with 'uu_test' to avoid collision with rust core crate 'test'
s if s.starts_with(feature_prefix) => continue, // crate feature sets
_ => {} // util feature name

View file

@ -1 +1 @@
msrv = "1.43.1"
msrv = "1.47.0"

View file

@ -8,6 +8,8 @@
#[macro_use]
extern crate uucore;
use uucore::error::UCustomError;
use uucore::error::UResult;
#[cfg(unix)]
use uucore::fsext::statfs_fn;
use uucore::fsext::{read_fs_list, FsUsage, MountInfo};
@ -19,8 +21,10 @@ use std::cell::Cell;
use std::collections::HashMap;
use std::collections::HashSet;
use std::error::Error;
#[cfg(unix)]
use std::ffi::CString;
use std::fmt::Display;
#[cfg(unix)]
use std::mem;
@ -33,9 +37,6 @@ use std::path::Path;
static ABOUT: &str = "Show information about the file system on which each FILE resides,\n\
or all file systems by default.";
static EXIT_OK: i32 = 0;
static EXIT_ERR: i32 = 1;
static OPT_ALL: &str = "all";
static OPT_BLOCKSIZE: &str = "blocksize";
static OPT_DIRECT: &str = "direct";
@ -226,8 +227,8 @@ fn filter_mount_list(vmi: Vec<MountInfo>, paths: &[String], opt: &Options) -> Ve
/// Convert `value` to a human readable string based on `base`.
/// e.g. It returns 1G when value is 1 * 1024 * 1024 * 1024 and base is 1024.
/// Note: It returns `value` if `base` isn't positive.
fn human_readable(value: u64, base: i64) -> String {
match base {
fn human_readable(value: u64, base: i64) -> UResult<String> {
let base_str = match base {
d if d < 0 => value.to_string(),
// ref: [Binary prefix](https://en.wikipedia.org/wiki/Binary_prefix) @@ <https://archive.is/cnwmF>
@ -242,8 +243,10 @@ fn human_readable(value: u64, base: i64) -> String {
NumberPrefix::Prefixed(prefix, bytes) => format!("{:.1}{}", bytes, prefix.symbol()),
},
_ => crash!(EXIT_ERR, "Internal error: Unknown base value {}", base),
}
_ => return Err(DfError::InvalidBaseValue(base.to_string()).into()),
};
Ok(base_str)
}
fn use_size(free_size: u64, total_size: u64) -> String {
@ -256,7 +259,31 @@ fn use_size(free_size: u64, total_size: u64) -> String {
);
}
pub fn uumain(args: impl uucore::Args) -> i32 {
#[derive(Debug)]
enum DfError {
InvalidBaseValue(String),
}
impl Display for DfError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DfError::InvalidBaseValue(s) => write!(f, "Internal error: Unknown base value {}", s),
}
}
}
impl Error for DfError {}
impl UCustomError for DfError {
fn code(&self) -> i32 {
match self {
DfError::InvalidBaseValue(_) => 1,
}
}
}
#[uucore_procs::gen_uumain]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let usage = get_usage();
let matches = uu_app().usage(&usage[..]).get_matches_from(args);
@ -269,7 +296,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
{
if matches.is_present(OPT_INODES) {
println!("{}: doesn't support -i option", executable!());
return EXIT_OK;
return Ok(());
}
}
@ -353,15 +380,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if opt.show_inode_instead {
print!(
"{0: >12} ",
human_readable(fs.usage.files, opt.human_readable_base)
human_readable(fs.usage.files, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(fs.usage.files - fs.usage.ffree, opt.human_readable_base)
human_readable(fs.usage.files - fs.usage.ffree, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(fs.usage.ffree, opt.human_readable_base)
human_readable(fs.usage.ffree, opt.human_readable_base)?
);
print!(
"{0: >5} ",
@ -375,15 +402,15 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let free_size = fs.usage.blocksize * fs.usage.bfree;
print!(
"{0: >12} ",
human_readable(total_size, opt.human_readable_base)
human_readable(total_size, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(total_size - free_size, opt.human_readable_base)
human_readable(total_size - free_size, opt.human_readable_base)?
);
print!(
"{0: >12} ",
human_readable(free_size, opt.human_readable_base)
human_readable(free_size, opt.human_readable_base)?
);
if cfg!(target_os = "macos") {
let used = fs.usage.blocks - fs.usage.bfree;
@ -396,7 +423,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
println!();
}
EXIT_OK
Ok(())
}
pub fn uu_app() -> App<'static, 'static> {

View file

@ -36,28 +36,86 @@ fn get_usage() -> String {
format!("{0} [OPTION]... [FILE]...", executable!())
}
fn tabstops_parse(s: String) -> Vec<usize> {
let words = s.split(',');
/// The mode to use when replacing tabs beyond the last one specified in
/// the `--tabs` argument.
enum RemainingMode {
None,
Slash,
Plus,
}
let nums = words
.map(|sn| {
sn.parse::<usize>()
.unwrap_or_else(|_| crash!(1, "{}\n", "tab size contains invalid character(s)"))
})
.collect::<Vec<usize>>();
/// Decide whether the character is either a space or a comma.
///
/// # Examples
///
/// ```rust,ignore
/// assert!(is_space_or_comma(' '))
/// assert!(is_space_or_comma(','))
/// assert!(!is_space_or_comma('a'))
/// ```
fn is_space_or_comma(c: char) -> bool {
c == ' ' || c == ','
}
if nums.iter().any(|&n| n == 0) {
crash!(1, "{}\n", "tab size cannot be 0");
/// Parse a list of tabstops from a `--tabs` argument.
///
/// This function returns both the vector of numbers appearing in the
/// comma- or space-separated list, and also an optional mode, specified
/// by either a "/" or a "+" character appearing before the final number
/// in the list. This mode defines the strategy to use for computing the
/// number of spaces to use for columns beyond the end of the tab stop
/// list specified here.
fn tabstops_parse(s: String) -> (RemainingMode, Vec<usize>) {
// Leading commas and spaces are ignored.
let s = s.trim_start_matches(is_space_or_comma);
// If there were only commas and spaces in the string, just use the
// default tabstops.
if s.is_empty() {
return (RemainingMode::None, vec![DEFAULT_TABSTOP]);
}
if let (false, _) = nums
.iter()
.fold((true, 0), |(acc, last), &n| (acc && last <= n, n))
{
crash!(1, "{}\n", "tab sizes must be ascending");
}
let mut nums = vec![];
let mut remaining_mode = RemainingMode::None;
for word in s.split(is_space_or_comma) {
let bytes = word.as_bytes();
for i in 0..bytes.len() {
match bytes[i] {
b'+' => {
remaining_mode = RemainingMode::Plus;
}
b'/' => {
remaining_mode = RemainingMode::Slash;
}
_ => {
// Parse a number from the byte sequence.
let num = from_utf8(&bytes[i..]).unwrap().parse::<usize>().unwrap();
nums
// Tab size must be positive.
if num == 0 {
crash!(1, "{}\n", "tab size cannot be 0");
}
// Tab sizes must be ascending.
if let Some(last_stop) = nums.last() {
if *last_stop >= num {
crash!(1, "tab sizes must be ascending");
}
}
// Append this tab stop to the list of all tabstops.
nums.push(num);
break;
}
}
}
}
// If no numbers could be parsed (for example, if `s` were "+,+,+"),
// then just use the default tabstops.
if nums.is_empty() {
nums = vec![DEFAULT_TABSTOP];
}
(remaining_mode, nums)
}
struct Options {
@ -66,13 +124,17 @@ struct Options {
tspaces: String,
iflag: bool,
uflag: bool,
/// Strategy for expanding tabs for columns beyond those specified
/// in `tabstops`.
remaining_mode: RemainingMode,
}
impl Options {
fn new(matches: &ArgMatches) -> Options {
let tabstops = match matches.value_of(options::TABS) {
let (remaining_mode, tabstops) = match matches.value_of(options::TABS) {
Some(s) => tabstops_parse(s.to_string()),
None => vec![DEFAULT_TABSTOP],
None => (RemainingMode::None, vec![DEFAULT_TABSTOP]),
};
let iflag = matches.is_present(options::INITIAL);
@ -102,6 +164,7 @@ impl Options {
tspaces,
iflag,
uflag,
remaining_mode,
}
}
}
@ -159,13 +222,41 @@ fn open(path: String) -> BufReader<Box<dyn Read + 'static>> {
}
}
fn next_tabstop(tabstops: &[usize], col: usize) -> usize {
if tabstops.len() == 1 {
tabstops[0] - col % tabstops[0]
} else {
match tabstops.iter().find(|&&t| t > col) {
/// Compute the number of spaces to the next tabstop.
///
/// `tabstops` is the sequence of tabstop locations.
///
/// `col` is the index of the current cursor in the line being written.
///
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
/// in the `tabstops` slice is interpreted as a relative number of
/// spaces, which this function will return for every input value of
/// `col` beyond the end of the second-to-last element of `tabstops`.
///
/// If `remaining_mode` is [`RemainingMode::Plus`], then the last entry
/// in the `tabstops` slice is interpreted as a relative number of
/// spaces, which this function will return for every input value of
/// `col` beyond the end of the second-to-last element of `tabstops`.
fn next_tabstop(tabstops: &[usize], col: usize, remaining_mode: &RemainingMode) -> usize {
let num_tabstops = tabstops.len();
match remaining_mode {
RemainingMode::Plus => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
Some(t) => t - col,
None => 1,
None => tabstops[num_tabstops - 1] - 1,
},
RemainingMode::Slash => match tabstops[0..num_tabstops - 1].iter().find(|&&t| t > col) {
Some(t) => t - col,
None => tabstops[num_tabstops - 1] - col % tabstops[num_tabstops - 1],
},
RemainingMode::None => {
if num_tabstops == 1 {
tabstops[0] - col % tabstops[0]
} else {
match tabstops.iter().find(|&&t| t > col) {
Some(t) => t - col,
None => 1,
}
}
}
}
}
@ -232,12 +323,16 @@ fn expand(options: Options) {
match ctype {
Tab => {
// figure out how many spaces to the next tabstop
let nts = next_tabstop(ts, col);
let nts = next_tabstop(ts, col, &options.remaining_mode);
col += nts;
// now dump out either spaces if we're expanding, or a literal tab if we're not
if init || !options.iflag {
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
if nts <= options.tspaces.len() {
safe_unwrap!(output.write_all(options.tspaces[..nts].as_bytes()));
} else {
safe_unwrap!(output.write_all(" ".repeat(nts).as_bytes()));
};
} else {
safe_unwrap!(output.write_all(&buf[byte..byte + nbytes]));
}
@ -269,3 +364,30 @@ fn expand(options: Options) {
}
}
}
#[cfg(test)]
mod tests {
use super::next_tabstop;
use super::RemainingMode;
#[test]
fn test_next_tabstop_remaining_mode_none() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::None), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::None), 2);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::None), 1);
}
#[test]
fn test_next_tabstop_remaining_mode_plus() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Plus), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Plus), 4);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Plus), 4);
}
#[test]
fn test_next_tabstop_remaining_mode_slash() {
assert_eq!(next_tabstop(&[1, 5], 0, &RemainingMode::Slash), 1);
assert_eq!(next_tabstop(&[1, 5], 3, &RemainingMode::Slash), 2);
assert_eq!(next_tabstop(&[1, 5], 6, &RemainingMode::Slash), 4);
}
}

View file

@ -18,7 +18,11 @@ path = "src/id.rs"
clap = { version = "2.33", features = ["wrap_help"] }
uucore = { version=">=0.0.9", package="uucore", path="../../uucore", features=["entries", "process"] }
uucore_procs = { version=">=0.0.6", package="uucore_procs", path="../../uucore_procs" }
selinux = { version="0.1.3", optional = true }
[[bin]]
name = "id"
path = "src/main.rs"
[features]
feat_selinux = ["selinux"]

View file

@ -5,7 +5,10 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//
// spell-checker:ignore (ToDO) asid auditid auditinfo auid cstr egid emod euid getaudit getlogin gflag nflag pline rflag termid uflag gsflag zflag cflag
// README:
// This was originally based on BSD's `id`
// (noticeable in functionality, usage text, options text, etc.)
// and synced with:
@ -25,8 +28,10 @@
//
// * Help text based on BSD's `id` manpage and GNU's `id` manpage.
//
// spell-checker:ignore (ToDO) asid auditid auditinfo auid cstr egid emod euid getaudit getlogin gflag nflag pline rflag termid uflag gsflag zflag
// * This passes GNU's coreutils Test suite (8.32) for "tests/id/context.sh" if compiled with
// `--features feat_selinux`. It should also pass "tests/id/no-context.sh", but that depends on
// `uu_ls -Z` being implemented and therefore fails at the moment
//
#![allow(non_camel_case_types)]
#![allow(dead_code)]
@ -35,6 +40,8 @@
extern crate uucore;
use clap::{crate_version, App, Arg};
#[cfg(all(target_os = "linux", feature = "selinux"))]
use selinux;
use std::ffi::CStr;
use uucore::entries::{self, Group, Locate, Passwd};
pub use uucore::libc;
@ -50,6 +57,11 @@ macro_rules! cstr2cow {
static ABOUT: &str = "Print user and group information for each specified USER,
or (when USER omitted) for the current user.";
#[cfg(not(feature = "selinux"))]
static CONTEXT_HELP_TEXT: &str = "print only the security context of the process (not enabled)";
#[cfg(feature = "selinux")]
static CONTEXT_HELP_TEXT: &str = "print only the security context of the process";
mod options {
pub const OPT_AUDIT: &str = "audit"; // GNU's id does not have this
pub const OPT_CONTEXT: &str = "context";
@ -93,6 +105,8 @@ struct State {
gsflag: bool, // --groups
rflag: bool, // --real
zflag: bool, // --zero
cflag: bool, // --context
selinux_supported: bool,
ids: Option<Ids>,
// The behavior for calling GNU's `id` and calling GNU's `id $USER` is similar but different.
// * The SELinux context is only displayed without a specified user.
@ -109,6 +123,7 @@ struct State {
// 1000 10 968 975
// +++ exited with 0 +++
user_specified: bool,
exit_code: i32,
}
pub fn uumain(args: impl uucore::Args) -> i32 {
@ -132,8 +147,21 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
gsflag: matches.is_present(options::OPT_GROUPS),
rflag: matches.is_present(options::OPT_REAL_ID),
zflag: matches.is_present(options::OPT_ZERO),
cflag: matches.is_present(options::OPT_CONTEXT),
selinux_supported: {
#[cfg(feature = "selinux")]
{
selinux::kernel_support() != selinux::KernelSupport::Unsupported
}
#[cfg(not(feature = "selinux"))]
{
false
}
},
user_specified: !users.is_empty(),
ids: None,
exit_code: 0,
};
let default_format = {
@ -141,13 +169,16 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
!(state.uflag || state.gflag || state.gsflag)
};
if (state.nflag || state.rflag) && default_format {
if (state.nflag || state.rflag) && default_format && !state.cflag {
crash!(1, "cannot print only names or real IDs in default format");
}
if (state.zflag) && default_format {
if state.zflag && default_format && !state.cflag {
// NOTE: GNU test suite "id/zero.sh" needs this stderr output:
crash!(1, "option --zero not permitted in default format");
}
if state.user_specified && state.cflag {
crash!(1, "cannot print security context when user specified");
}
let delimiter = {
if state.zflag {
@ -163,7 +194,23 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
'\n'
}
};
let mut exit_code = 0;
if state.cflag {
if state.selinux_supported {
// print SElinux context and exit
#[cfg(all(target_os = "linux", feature = "selinux"))]
if let Ok(context) = selinux::SecurityContext::current(false) {
let bytes = context.as_bytes();
print!("{}{}", String::from_utf8_lossy(bytes), line_ending);
} else {
// print error because `cflag` was explicitly requested
crash!(1, "can't get process context");
}
return state.exit_code;
} else {
crash!(1, "--context (-Z) works only on an SELinux-enabled kernel");
}
}
for i in 0..=users.len() {
let possible_pw = if !state.user_specified {
@ -173,7 +220,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
Ok(p) => Some(p),
Err(_) => {
show_error!("'{}': no such user", users[i]);
exit_code = 1;
state.exit_code = 1;
if i + 1 >= users.len() {
break;
} else {
@ -187,17 +234,17 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if matches.is_present(options::OPT_PASSWORD) {
// BSD's `id` ignores all but the first specified user
pline(possible_pw.map(|v| v.uid()));
return exit_code;
return state.exit_code;
};
if matches.is_present(options::OPT_HUMAN_READABLE) {
// BSD's `id` ignores all but the first specified user
pretty(possible_pw);
return exit_code;
return state.exit_code;
}
if matches.is_present(options::OPT_AUDIT) {
// BSD's `id` ignores specified users
auditid();
return exit_code;
return state.exit_code;
}
let (uid, gid) = possible_pw.map(|p| (p.uid(), p.gid())).unwrap_or((
@ -217,7 +264,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if state.nflag {
entries::gid2grp(gid).unwrap_or_else(|_| {
show_error!("cannot find name for group ID {}", gid);
exit_code = 1;
state.exit_code = 1;
gid.to_string()
})
} else {
@ -232,7 +279,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if state.nflag {
entries::uid2usr(uid).unwrap_or_else(|_| {
show_error!("cannot find name for user ID {}", uid);
exit_code = 1;
state.exit_code = 1;
uid.to_string()
})
} else {
@ -257,7 +304,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
if state.nflag {
entries::gid2grp(id).unwrap_or_else(|_| {
show_error!("cannot find name for group ID {}", id);
exit_code = 1;
state.exit_code = 1;
id.to_string()
})
} else {
@ -276,7 +323,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
}
if default_format {
id_print(&state, groups);
id_print(&mut state, groups);
}
print!("{}", line_ending);
@ -285,7 +332,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
}
}
exit_code
state.exit_code
}
pub fn uu_app() -> App<'static, 'static> {
@ -319,6 +366,7 @@ pub fn uu_app() -> App<'static, 'static> {
Arg::with_name(options::OPT_GROUP)
.short("g")
.long(options::OPT_GROUP)
.conflicts_with(options::OPT_EFFECTIVE_USER)
.help("Display only the effective group ID as a number"),
)
.arg(
@ -328,6 +376,7 @@ pub fn uu_app() -> App<'static, 'static> {
.conflicts_with_all(&[
options::OPT_GROUP,
options::OPT_EFFECTIVE_USER,
options::OPT_CONTEXT,
options::OPT_HUMAN_READABLE,
options::OPT_PASSWORD,
options::OPT_AUDIT,
@ -379,7 +428,8 @@ pub fn uu_app() -> App<'static, 'static> {
Arg::with_name(options::OPT_CONTEXT)
.short("Z")
.long(options::OPT_CONTEXT)
.help("NotImplemented: print only the security context of the process"),
.conflicts_with_all(&[options::OPT_GROUP, options::OPT_EFFECTIVE_USER])
.help(CONTEXT_HELP_TEXT),
)
.arg(
Arg::with_name(options::ARG_USERS)
@ -499,34 +549,80 @@ fn auditid() {
println!("asid={}", auditinfo.ai_asid);
}
fn id_print(state: &State, groups: Vec<u32>) {
fn id_print(state: &mut State, groups: Vec<u32>) {
let uid = state.ids.as_ref().unwrap().uid;
let gid = state.ids.as_ref().unwrap().gid;
let euid = state.ids.as_ref().unwrap().euid;
let egid = state.ids.as_ref().unwrap().egid;
print!("uid={}({})", uid, entries::uid2usr(uid).unwrap());
print!(" gid={}({})", gid, entries::gid2grp(gid).unwrap());
print!(
"uid={}({})",
uid,
entries::uid2usr(uid).unwrap_or_else(|_| {
show_error!("cannot find name for user ID {}", uid);
state.exit_code = 1;
uid.to_string()
})
);
print!(
" gid={}({})",
gid,
entries::gid2grp(gid).unwrap_or_else(|_| {
show_error!("cannot find name for group ID {}", gid);
state.exit_code = 1;
gid.to_string()
})
);
if !state.user_specified && (euid != uid) {
print!(" euid={}({})", euid, entries::uid2usr(euid).unwrap());
print!(
" euid={}({})",
euid,
entries::uid2usr(euid).unwrap_or_else(|_| {
show_error!("cannot find name for user ID {}", euid);
state.exit_code = 1;
euid.to_string()
})
);
}
if !state.user_specified && (egid != gid) {
print!(" egid={}({})", euid, entries::gid2grp(egid).unwrap());
print!(
" egid={}({})",
euid,
entries::gid2grp(egid).unwrap_or_else(|_| {
show_error!("cannot find name for group ID {}", egid);
state.exit_code = 1;
egid.to_string()
})
);
}
print!(
" groups={}",
groups
.iter()
.map(|&gr| format!("{}({})", gr, entries::gid2grp(gr).unwrap()))
.map(|&gr| format!(
"{}({})",
gr,
entries::gid2grp(gr).unwrap_or_else(|_| {
show_error!("cannot find name for group ID {}", gr);
state.exit_code = 1;
gr.to_string()
})
))
.collect::<Vec<_>>()
.join(",")
);
// NOTE: (SELinux NotImplemented) placeholder:
// if !state.user_specified {
// // print SElinux context (does not depend on "-Z")
// print!(" context={}", get_selinux_contexts().join(":"));
// }
#[cfg(all(target_os = "linux", feature = "selinux"))]
if state.selinux_supported
&& !state.user_specified
&& std::env::var_os("POSIXLY_CORRECT").is_none()
{
// print SElinux context (does not depend on "-Z")
if let Ok(context) = selinux::SecurityContext::current(false) {
let bytes = context.as_bytes();
print!(" context={}", String::from_utf8_lossy(bytes));
}
}
}
#[cfg(not(target_os = "linux"))]

View file

@ -11,9 +11,12 @@
extern crate uucore;
use clap::{crate_version, App, Arg};
use uucore::error::{UCustomError, UResult};
use std::borrow::Cow;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt::Display;
use std::fs;
use std::io::{stdin, Result};
@ -44,6 +47,51 @@ pub enum OverwriteMode {
Force,
}
#[derive(Debug)]
enum LnError {
TargetIsDirectory(String),
SomeLinksFailed,
FailedToLink(String),
MissingDestination(String),
ExtraOperand(String),
InvalidBackupMode(String),
}
impl Display for LnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::TargetIsDirectory(s) => write!(f, "target '{}' is not a directory", s),
Self::FailedToLink(s) => write!(f, "failed to link '{}'", s),
Self::SomeLinksFailed => write!(f, "some links failed to create"),
Self::MissingDestination(s) => {
write!(f, "missing destination file operand after '{}'", s)
}
Self::ExtraOperand(s) => write!(
f,
"extra operand '{}'\nTry '{} --help' for more information.",
s,
executable!()
),
Self::InvalidBackupMode(s) => write!(f, "{}", s),
}
}
}
impl Error for LnError {}
impl UCustomError for LnError {
fn code(&self) -> i32 {
match self {
Self::TargetIsDirectory(_) => 1,
Self::SomeLinksFailed => 1,
Self::FailedToLink(_) => 1,
Self::MissingDestination(_) => 1,
Self::ExtraOperand(_) => 1,
Self::InvalidBackupMode(_) => 1,
}
}
}
fn get_usage() -> String {
format!(
"{0} [OPTION]... [-T] TARGET LINK_NAME (1st form)
@ -86,7 +134,8 @@ mod options {
static ARG_FILES: &str = "files";
pub fn uumain(args: impl uucore::Args) -> i32 {
#[uucore_procs::gen_uumain]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let usage = get_usage();
let long_usage = get_long_usage();
@ -122,8 +171,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
);
let backup_mode = match backup_mode {
Err(err) => {
show_usage_error!("{}", err);
return 1;
return Err(LnError::InvalidBackupMode(err).into());
}
Ok(mode) => mode,
};
@ -246,7 +294,7 @@ pub fn uu_app() -> App<'static, 'static> {
)
}
fn exec(files: &[PathBuf], settings: &Settings) -> i32 {
fn exec(files: &[PathBuf], settings: &Settings) -> UResult<()> {
// Handle cases where we create links in a directory first.
if let Some(ref name) = settings.target_dir {
// 4th form: a directory is specified by -t.
@ -267,35 +315,22 @@ fn exec(files: &[PathBuf], settings: &Settings) -> i32 {
// 1st form. Now there should be only two operands, but if -T is
// specified we may have a wrong number of operands.
if files.len() == 1 {
show_error!(
"missing destination file operand after '{}'",
files[0].to_string_lossy()
);
return 1;
return Err(LnError::MissingDestination(files[0].to_string_lossy().into()).into());
}
if files.len() > 2 {
show_error!(
"extra operand '{}'\nTry '{} --help' for more information.",
files[2].display(),
executable!()
);
return 1;
return Err(LnError::ExtraOperand(files[2].display().to_string()).into());
}
assert!(!files.is_empty());
match link(&files[0], &files[1], settings) {
Ok(_) => 0,
Err(e) => {
show_error!("{}", e);
1
}
Ok(_) => Ok(()),
Err(e) => Err(LnError::FailedToLink(e.to_string()).into()),
}
}
fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings) -> i32 {
fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings) -> UResult<()> {
if !target_dir.is_dir() {
show_error!("target '{}' is not a directory", target_dir.display());
return 1;
return Err(LnError::TargetIsDirectory(target_dir.display().to_string()).into());
}
let mut all_successful = true;
@ -354,9 +389,9 @@ fn link_files_in_dir(files: &[PathBuf], target_dir: &Path, settings: &Settings)
}
}
if all_successful {
0
Ok(())
} else {
1
Err(LnError::SomeLinksFailed.into())
}
}

View file

@ -9,24 +9,33 @@
use crate::{
chunks::{self, Chunk, RecycledChunk},
compare_by, open, GlobalSettings,
compare_by, open, GlobalSettings, SortError,
};
use itertools::Itertools;
use std::{
cmp::Ordering,
ffi::OsStr,
io::Read,
iter,
sync::mpsc::{sync_channel, Receiver, SyncSender},
thread,
};
use uucore::error::UResult;
/// Check if the file at `path` is ordered.
///
/// # Returns
///
/// The code we should exit with.
pub fn check(path: &str, settings: &GlobalSettings) -> i32 {
let file = open(path);
pub fn check(path: &OsStr, settings: &GlobalSettings) -> UResult<()> {
let max_allowed_cmp = if settings.unique {
// If `unique` is enabled, the previous line must compare _less_ to the next one.
Ordering::Less
} else {
// Otherwise, the line previous line must compare _less or equal_ to the next one.
Ordering::Equal
};
let file = open(path)?;
let (recycled_sender, recycled_receiver) = sync_channel(2);
let (loaded_sender, loaded_receiver) = sync_channel(2);
thread::spawn({
@ -34,7 +43,13 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 {
move || reader(file, recycled_receiver, loaded_sender, &settings)
});
for _ in 0..2 {
let _ = recycled_sender.send(RecycledChunk::new(100 * 1024));
let _ = recycled_sender.send(RecycledChunk::new(if settings.buffer_size < 100 * 1024 {
// when the buffer size is smaller than 100KiB we choose it instead of the default.
// this improves testability.
settings.buffer_size
} else {
100 * 1024
}));
}
let mut prev_chunk: Option<Chunk> = None;
@ -53,30 +68,35 @@ pub fn check(path: &str, settings: &GlobalSettings) -> i32 {
settings,
prev_chunk.line_data(),
chunk.line_data(),
) == Ordering::Greater
) > max_allowed_cmp
{
if !settings.check_silent {
println!("sort: {}:{}: disorder: {}", path, line_idx, new_first.line);
return Err(SortError::Disorder {
file: path.to_owned(),
line_number: line_idx,
line: new_first.line.to_owned(),
silent: settings.check_silent,
}
return 1;
.into());
}
let _ = recycled_sender.send(prev_chunk.recycle());
}
for (a, b) in chunk.lines().iter().tuple_windows() {
line_idx += 1;
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) == Ordering::Greater
{
if !settings.check_silent {
println!("sort: {}:{}: disorder: {}", path, line_idx, b.line);
if compare_by(a, b, settings, chunk.line_data(), chunk.line_data()) > max_allowed_cmp {
return Err(SortError::Disorder {
file: path.to_owned(),
line_number: line_idx,
line: b.line.to_owned(),
silent: settings.check_silent,
}
return 1;
.into());
}
}
prev_chunk = Some(chunk);
}
0
Ok(())
}
/// The function running on the reader thread.
@ -85,7 +105,7 @@ fn reader(
receiver: Receiver<RecycledChunk>,
sender: SyncSender<Chunk>,
settings: &GlobalSettings,
) {
) -> UResult<()> {
let mut carry_over = vec![];
for recycled_chunk in receiver.iter() {
let should_continue = chunks::read(
@ -101,9 +121,10 @@ fn reader(
b'\n'
},
settings,
);
)?;
if !should_continue {
break;
}
}
Ok(())
}

View file

@ -14,8 +14,9 @@ use std::{
use memchr::memchr_iter;
use ouroboros::self_referencing;
use uucore::error::{UResult, USimpleError};
use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line};
use crate::{numeric_str_cmp::NumInfo, GeneralF64ParseResult, GlobalSettings, Line, SortError};
/// The chunk that is passed around between threads.
/// `lines` consist of slices into `buffer`.
@ -137,10 +138,10 @@ pub fn read<T: Read>(
max_buffer_size: Option<usize>,
carry_over: &mut Vec<u8>,
file: &mut T,
next_files: &mut impl Iterator<Item = T>,
next_files: &mut impl Iterator<Item = UResult<T>>,
separator: u8,
settings: &GlobalSettings,
) -> bool {
) -> UResult<bool> {
let RecycledChunk {
lines,
selections,
@ -159,12 +160,12 @@ pub fn read<T: Read>(
max_buffer_size,
carry_over.len(),
separator,
);
)?;
carry_over.clear();
carry_over.extend_from_slice(&buffer[read..]);
if read != 0 {
let payload = Chunk::new(buffer, |buffer| {
let payload: UResult<Chunk> = Chunk::try_new(buffer, |buffer| {
let selections = unsafe {
// SAFETY: It is safe to transmute to an empty vector of selections with shorter lifetime.
// It was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
@ -175,18 +176,19 @@ pub fn read<T: Read>(
// because it was only temporarily transmuted to a Vec<Line<'static>> to make recycling possible.
std::mem::transmute::<Vec<Line<'static>>, Vec<Line<'_>>>(lines)
};
let read = crash_if_err!(1, std::str::from_utf8(&buffer[..read]));
let read = std::str::from_utf8(&buffer[..read])
.map_err(|error| SortError::Uft8Error { error })?;
let mut line_data = LineData {
selections,
num_infos,
parsed_floats,
};
parse_lines(read, &mut lines, &mut line_data, separator, settings);
ChunkContents { lines, line_data }
Ok(ChunkContents { lines, line_data })
});
sender.send(payload).unwrap();
sender.send(payload?).unwrap();
}
should_continue
Ok(should_continue)
}
/// Split `read` into `Line`s, and add them to `lines`.
@ -242,12 +244,12 @@ fn parse_lines<'a>(
/// * Whether this function should be called again.
fn read_to_buffer<T: Read>(
file: &mut T,
next_files: &mut impl Iterator<Item = T>,
next_files: &mut impl Iterator<Item = UResult<T>>,
buffer: &mut Vec<u8>,
max_buffer_size: Option<usize>,
start_offset: usize,
separator: u8,
) -> (usize, bool) {
) -> UResult<(usize, bool)> {
let mut read_target = &mut buffer[start_offset..];
let mut last_file_target_size = read_target.len();
loop {
@ -274,7 +276,7 @@ fn read_to_buffer<T: Read>(
// We read enough lines.
let end = last_line_end.unwrap();
// We want to include the separator here, because it shouldn't be carried over.
return (end + 1, true);
return Ok((end + 1, true));
} else {
// We need to read more lines
let len = buffer.len();
@ -299,11 +301,11 @@ fn read_to_buffer<T: Read>(
if let Some(next_file) = next_files.next() {
// There is another file.
last_file_target_size = leftover_len;
*file = next_file;
*file = next_file?;
} else {
// This was the last file.
let read_len = buffer.len() - leftover_len;
return (read_len, false);
return Ok((read_len, false));
}
}
}
@ -313,7 +315,7 @@ fn read_to_buffer<T: Read>(
Err(e) if e.kind() == ErrorKind::Interrupted => {
// retry
}
Err(e) => crash!(1, "{}", e),
Err(e) => return Err(USimpleError::new(2, e.to_string())),
}
}
}

View file

@ -22,12 +22,15 @@ use std::{
};
use itertools::Itertools;
use uucore::error::UResult;
use crate::chunks::RecycledChunk;
use crate::merge::ClosedTmpFile;
use crate::merge::WriteableCompressedTmpFile;
use crate::merge::WriteablePlainTmpFile;
use crate::merge::WriteableTmpFile;
use crate::Output;
use crate::SortError;
use crate::{
chunks::{self, Chunk},
compare_by, merge, sort_by, GlobalSettings,
@ -38,7 +41,11 @@ use tempfile::TempDir;
const START_BUFFER_SIZE: usize = 8_000;
/// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result.
pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings: &GlobalSettings) {
pub fn ext_sort(
files: &mut impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
settings: &GlobalSettings,
output: Output,
) -> UResult<()> {
let (sorted_sender, sorted_receiver) = std::sync::mpsc::sync_channel(1);
let (recycled_sender, recycled_receiver) = std::sync::mpsc::sync_channel(1);
thread::spawn({
@ -51,23 +58,29 @@ pub fn ext_sort(files: &mut impl Iterator<Item = Box<dyn Read + Send>>, settings
settings,
sorted_receiver,
recycled_sender,
);
output,
)
} else {
reader_writer::<_, WriteablePlainTmpFile>(
files,
settings,
sorted_receiver,
recycled_sender,
);
output,
)
}
}
fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile + 'static>(
fn reader_writer<
F: Iterator<Item = UResult<Box<dyn Read + Send>>>,
Tmp: WriteableTmpFile + 'static,
>(
files: F,
settings: &GlobalSettings,
receiver: Receiver<Chunk>,
sender: SyncSender<Chunk>,
) {
output: Output,
) -> UResult<()> {
let separator = if settings.zero_terminated {
b'\0'
} else {
@ -81,22 +94,20 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
files,
&settings.tmp_dir,
separator,
// Heuristically chosen: Dividing by 10 seems to keep our memory usage roughly
// around settings.buffer_size as a whole.
buffer_size,
settings,
receiver,
sender,
);
)?;
match read_result {
ReadResult::WroteChunksToFile { tmp_files, tmp_dir } => {
let tmp_dir_size = tmp_files.len();
let mut merger = merge::merge_with_file_limit::<_, _, Tmp>(
let merger = merge::merge_with_file_limit::<_, _, Tmp>(
tmp_files.into_iter().map(|c| c.reopen()),
settings,
Some((tmp_dir, tmp_dir_size)),
);
merger.write_all(settings);
)?;
merger.write_all(settings, output)?;
}
ReadResult::SortedSingleChunk(chunk) => {
if settings.unique {
@ -106,9 +117,10 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
== Ordering::Equal
}),
settings,
output,
);
} else {
print_sorted(chunk.lines().iter(), settings);
print_sorted(chunk.lines().iter(), settings, output);
}
}
ReadResult::SortedTwoChunks([a, b]) => {
@ -128,15 +140,17 @@ fn reader_writer<F: Iterator<Item = Box<dyn Read + Send>>, Tmp: WriteableTmpFile
})
.map(|(line, _)| line),
settings,
output,
);
} else {
print_sorted(merged_iter.map(|(line, _)| line), settings);
print_sorted(merged_iter.map(|(line, _)| line), settings, output);
}
}
ReadResult::EmptyInput => {
// don't output anything
}
}
Ok(())
}
/// The function that is executed on the sorter thread.
@ -145,7 +159,11 @@ fn sorter(receiver: Receiver<Chunk>, sender: SyncSender<Chunk>, settings: Global
payload.with_contents_mut(|contents| {
sort_by(&mut contents.lines, &settings, &contents.line_data)
});
sender.send(payload).unwrap();
if sender.send(payload).is_err() {
// The receiver has gone away, likely because the other thread hit an error.
// We stop silently because the actual error is printed by the other thread.
return;
}
}
}
@ -165,15 +183,15 @@ enum ReadResult<I: WriteableTmpFile> {
}
/// The function that is executed on the reader/writer thread.
fn read_write_loop<I: WriteableTmpFile>(
mut files: impl Iterator<Item = Box<dyn Read + Send>>,
mut files: impl Iterator<Item = UResult<Box<dyn Read + Send>>>,
tmp_dir_parent: &Path,
separator: u8,
buffer_size: usize,
settings: &GlobalSettings,
receiver: Receiver<Chunk>,
sender: SyncSender<Chunk>,
) -> ReadResult<I> {
let mut file = files.next().unwrap();
) -> UResult<ReadResult<I>> {
let mut file = files.next().unwrap()?;
let mut carry_over = vec![];
// kick things off with two reads
@ -191,14 +209,14 @@ fn read_write_loop<I: WriteableTmpFile>(
&mut files,
separator,
settings,
);
)?;
if !should_continue {
drop(sender);
// We have already read the whole input. Since we are in our first two reads,
// this means that we can fit the whole input into memory. Bypass writing below and
// handle this case in a more straightforward way.
return if let Ok(first_chunk) = receiver.recv() {
return Ok(if let Ok(first_chunk) = receiver.recv() {
if let Ok(second_chunk) = receiver.recv() {
ReadResult::SortedTwoChunks([first_chunk, second_chunk])
} else {
@ -206,16 +224,14 @@ fn read_write_loop<I: WriteableTmpFile>(
}
} else {
ReadResult::EmptyInput
};
});
}
}
let tmp_dir = crash_if_err!(
1,
tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(tmp_dir_parent)
);
let tmp_dir = tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(tmp_dir_parent)
.map_err(|_| SortError::TmpDirCreationFailed)?;
let mut sender_option = Some(sender);
let mut file_number = 0;
@ -224,7 +240,7 @@ fn read_write_loop<I: WriteableTmpFile>(
let mut chunk = match receiver.recv() {
Ok(it) => it,
_ => {
return ReadResult::WroteChunksToFile { tmp_files, tmp_dir };
return Ok(ReadResult::WroteChunksToFile { tmp_files, tmp_dir });
}
};
@ -233,7 +249,7 @@ fn read_write_loop<I: WriteableTmpFile>(
tmp_dir.path().join(file_number.to_string()),
settings.compress_prog.as_deref(),
separator,
);
)?;
tmp_files.push(tmp_file);
file_number += 1;
@ -250,7 +266,7 @@ fn read_write_loop<I: WriteableTmpFile>(
&mut files,
separator,
settings,
);
)?;
if !should_continue {
sender_option = None;
}
@ -265,8 +281,8 @@ fn write<I: WriteableTmpFile>(
file: PathBuf,
compress_prog: Option<&str>,
separator: u8,
) -> I::Closed {
let mut tmp_file = I::create(file, compress_prog);
) -> UResult<I::Closed> {
let mut tmp_file = I::create(file, compress_prog)?;
write_lines(chunk.lines(), tmp_file.as_write(), separator);
tmp_file.finished_writing()
}

View file

@ -9,44 +9,85 @@
use std::{
cmp::Ordering,
ffi::OsString,
fs::{self, File},
io::{BufWriter, Read, Write},
iter,
path::PathBuf,
path::{Path, PathBuf},
process::{Child, ChildStdin, ChildStdout, Command, Stdio},
rc::Rc,
sync::mpsc::{channel, sync_channel, Receiver, Sender, SyncSender},
thread,
thread::{self, JoinHandle},
};
use compare::Compare;
use itertools::Itertools;
use tempfile::TempDir;
use uucore::error::UResult;
use crate::{
chunks::{self, Chunk, RecycledChunk},
compare_by, GlobalSettings,
compare_by, open, GlobalSettings, Output, SortError,
};
/// If the output file occurs in the input files as well, copy the contents of the output file
/// and replace its occurrences in the inputs with that copy.
fn replace_output_file_in_input_files(
files: &mut [OsString],
settings: &GlobalSettings,
output: Option<&str>,
) -> UResult<Option<(TempDir, usize)>> {
let mut copy: Option<(TempDir, PathBuf)> = None;
if let Some(Ok(output_path)) = output.map(|path| Path::new(path).canonicalize()) {
for file in files {
if let Ok(file_path) = Path::new(file).canonicalize() {
if file_path == output_path {
if let Some((_dir, copy)) = &copy {
*file = copy.clone().into_os_string();
} else {
let tmp_dir = tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir)
.map_err(|_| SortError::TmpDirCreationFailed)?;
let copy_path = tmp_dir.path().join("0");
std::fs::copy(file_path, &copy_path)
.map_err(|error| SortError::OpenTmpFileFailed { error })?;
*file = copy_path.clone().into_os_string();
copy = Some((tmp_dir, copy_path))
}
}
}
}
}
// if we created a TempDir its size must be one.
Ok(copy.map(|(dir, _copy)| (dir, 1)))
}
/// Merge pre-sorted `Box<dyn Read>`s.
///
/// If `settings.merge_batch_size` is greater than the length of `files`, intermediate files will be used.
/// If `settings.compress_prog` is `Some`, intermediate files will be compressed with it.
pub fn merge<Files: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
files: Files,
settings: &GlobalSettings,
) -> FileMerger {
pub fn merge<'a>(
files: &mut [OsString],
settings: &'a GlobalSettings,
output: Option<&str>,
) -> UResult<FileMerger<'a>> {
let tmp_dir = replace_output_file_in_input_files(files, settings, output)?;
if settings.compress_prog.is_none() {
merge_with_file_limit::<_, _, WriteablePlainTmpFile>(
files.map(|file| PlainMergeInput { inner: file }),
files
.iter()
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings,
None,
tmp_dir,
)
} else {
merge_with_file_limit::<_, _, WriteableCompressedTmpFile>(
files.map(|file| PlainMergeInput { inner: file }),
files
.iter()
.map(|file| open(file).map(|file| PlainMergeInput { inner: file })),
settings,
None,
tmp_dir,
)
}
}
@ -54,24 +95,25 @@ pub fn merge<Files: ExactSizeIterator<Item = Box<dyn Read + Send>>>(
// Merge already sorted `MergeInput`s.
pub fn merge_with_file_limit<
M: MergeInput + 'static,
F: ExactSizeIterator<Item = M>,
F: ExactSizeIterator<Item = UResult<M>>,
Tmp: WriteableTmpFile + 'static,
>(
files: F,
settings: &GlobalSettings,
tmp_dir: Option<(TempDir, usize)>,
) -> FileMerger {
) -> UResult<FileMerger> {
if files.len() > settings.merge_batch_size {
// If we did not get a tmp_dir, create one.
let (tmp_dir, mut tmp_dir_size) = tmp_dir.unwrap_or_else(|| {
(
let (tmp_dir, mut tmp_dir_size) = match tmp_dir {
Some(x) => x,
None => (
tempfile::Builder::new()
.prefix("uutils_sort")
.tempdir_in(&settings.tmp_dir)
.unwrap(),
.map_err(|_| SortError::TmpDirCreationFailed)?,
0,
)
});
),
};
let mut remaining_files = files.len();
let batches = files.chunks(settings.merge_batch_size);
let mut batches = batches.into_iter();
@ -79,14 +121,14 @@ pub fn merge_with_file_limit<
while remaining_files != 0 {
// Work around the fact that `Chunks` is not an `ExactSizeIterator`.
remaining_files = remaining_files.saturating_sub(settings.merge_batch_size);
let mut merger = merge_without_limit(batches.next().unwrap(), settings);
let merger = merge_without_limit(batches.next().unwrap(), settings)?;
let mut tmp_file = Tmp::create(
tmp_dir.path().join(tmp_dir_size.to_string()),
settings.compress_prog.as_deref(),
);
)?;
tmp_dir_size += 1;
merger.write_all_to(settings, tmp_file.as_write());
temporary_files.push(tmp_file.finished_writing());
merger.write_all_to(settings, tmp_file.as_write())?;
temporary_files.push(tmp_file.finished_writing()?);
}
assert!(batches.next().is_none());
merge_with_file_limit::<_, _, Tmp>(
@ -94,7 +136,7 @@ pub fn merge_with_file_limit<
.into_iter()
.map(Box::new(|c: Tmp::Closed| c.reopen())
as Box<
dyn FnMut(Tmp::Closed) -> <Tmp::Closed as ClosedTmpFile>::Reopened,
dyn FnMut(Tmp::Closed) -> UResult<<Tmp::Closed as ClosedTmpFile>::Reopened>,
>),
settings,
Some((tmp_dir, tmp_dir_size)),
@ -108,10 +150,10 @@ pub fn merge_with_file_limit<
///
/// It is the responsibility of the caller to ensure that `files` yields only
/// as many files as we are allowed to open concurrently.
fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = UResult<M>>>(
files: F,
settings: &GlobalSettings,
) -> FileMerger {
) -> UResult<FileMerger> {
let (request_sender, request_receiver) = channel();
let mut reader_files = Vec::with_capacity(files.size_hint().0);
let mut loaded_receivers = Vec::with_capacity(files.size_hint().0);
@ -119,7 +161,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
let (sender, receiver) = sync_channel(2);
loaded_receivers.push(receiver);
reader_files.push(Some(ReaderFile {
file,
file: file?,
sender,
carry_over: vec![],
}));
@ -136,7 +178,7 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
.unwrap();
}
thread::spawn({
let reader_join_handle = thread::spawn({
let settings = settings.clone();
move || {
reader(
@ -155,22 +197,25 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = M>>(
let mut mergeable_files = vec![];
for (file_number, receiver) in loaded_receivers.into_iter().enumerate() {
mergeable_files.push(MergeableFile {
current_chunk: Rc::new(receiver.recv().unwrap()),
file_number,
line_idx: 0,
receiver,
})
if let Ok(chunk) = receiver.recv() {
mergeable_files.push(MergeableFile {
current_chunk: Rc::new(chunk),
file_number,
line_idx: 0,
receiver,
})
}
}
FileMerger {
Ok(FileMerger {
heap: binary_heap_plus::BinaryHeap::from_vec_cmp(
mergeable_files,
FileComparator { settings },
),
request_sender,
prev: None,
}
reader_join_handle,
})
}
/// The struct on the reader thread representing an input file
struct ReaderFile<M: MergeInput> {
@ -185,7 +230,7 @@ fn reader(
files: &mut [Option<ReaderFile<impl MergeInput>>],
settings: &GlobalSettings,
separator: u8,
) {
) -> UResult<()> {
for (file_idx, recycled_chunk) in recycled_receiver.iter() {
if let Some(ReaderFile {
file,
@ -202,15 +247,16 @@ fn reader(
&mut iter::empty(),
separator,
settings,
);
)?;
if !should_continue {
// Remove the file from the list by replacing it with `None`.
let ReaderFile { file, .. } = files[file_idx].take().unwrap();
// Depending on the kind of the `MergeInput`, this may delete the file:
file.finished_reading();
file.finished_reading()?;
}
}
}
Ok(())
}
/// The struct on the main thread representing an input file
pub struct MergeableFile {
@ -234,17 +280,20 @@ pub struct FileMerger<'a> {
heap: binary_heap_plus::BinaryHeap<MergeableFile, FileComparator<'a>>,
request_sender: Sender<(usize, RecycledChunk)>,
prev: Option<PreviousLine>,
reader_join_handle: JoinHandle<UResult<()>>,
}
impl<'a> FileMerger<'a> {
/// Write the merged contents to the output file.
pub fn write_all(&mut self, settings: &GlobalSettings) {
let mut out = settings.out_writer();
self.write_all_to(settings, &mut out);
pub fn write_all(self, settings: &GlobalSettings, output: Output) -> UResult<()> {
let mut out = output.into_write();
self.write_all_to(settings, &mut out)
}
pub fn write_all_to(&mut self, settings: &GlobalSettings, out: &mut impl Write) {
pub fn write_all_to(mut self, settings: &GlobalSettings, out: &mut impl Write) -> UResult<()> {
while self.write_next(settings, out) {}
drop(self.request_sender);
self.reader_join_handle.join().unwrap()
}
fn write_next(&mut self, settings: &GlobalSettings, out: &mut impl Write) -> bool {
@ -328,36 +377,41 @@ impl<'a> Compare<MergeableFile> for FileComparator<'a> {
}
// Wait for the child to exit and check its exit code.
fn assert_child_success(mut child: Child, program: &str) {
fn check_child_success(mut child: Child, program: &str) -> UResult<()> {
if !matches!(
child.wait().map(|e| e.code()),
Ok(Some(0)) | Ok(None) | Err(_)
) {
crash!(2, "'{}' terminated abnormally", program)
Err(SortError::CompressProgTerminatedAbnormally {
prog: program.to_owned(),
}
.into())
} else {
Ok(())
}
}
/// A temporary file that can be written to.
pub trait WriteableTmpFile {
pub trait WriteableTmpFile: Sized {
type Closed: ClosedTmpFile;
type InnerWrite: Write;
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self;
fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self>;
/// Closes the temporary file.
fn finished_writing(self) -> Self::Closed;
fn finished_writing(self) -> UResult<Self::Closed>;
fn as_write(&mut self) -> &mut Self::InnerWrite;
}
/// A temporary file that is (temporarily) closed, but can be reopened.
pub trait ClosedTmpFile {
type Reopened: MergeInput;
/// Reopens the temporary file.
fn reopen(self) -> Self::Reopened;
fn reopen(self) -> UResult<Self::Reopened>;
}
/// A pre-sorted input for merging.
pub trait MergeInput: Send {
type InnerRead: Read;
/// Cleans this `MergeInput` up.
/// Implementations may delete the backing file.
fn finished_reading(self);
fn finished_reading(self) -> UResult<()>;
fn as_read(&mut self) -> &mut Self::InnerRead;
}
@ -376,15 +430,17 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
type Closed = ClosedPlainTmpFile;
type InnerWrite = BufWriter<File>;
fn create(path: PathBuf, _: Option<&str>) -> Self {
WriteablePlainTmpFile {
file: BufWriter::new(File::create(&path).unwrap()),
fn create(path: PathBuf, _: Option<&str>) -> UResult<Self> {
Ok(WriteablePlainTmpFile {
file: BufWriter::new(
File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
),
path,
}
})
}
fn finished_writing(self) -> Self::Closed {
ClosedPlainTmpFile { path: self.path }
fn finished_writing(self) -> UResult<Self::Closed> {
Ok(ClosedPlainTmpFile { path: self.path })
}
fn as_write(&mut self) -> &mut Self::InnerWrite {
@ -393,18 +449,22 @@ impl WriteableTmpFile for WriteablePlainTmpFile {
}
impl ClosedTmpFile for ClosedPlainTmpFile {
type Reopened = PlainTmpMergeInput;
fn reopen(self) -> Self::Reopened {
PlainTmpMergeInput {
file: File::open(&self.path).unwrap(),
fn reopen(self) -> UResult<Self::Reopened> {
Ok(PlainTmpMergeInput {
file: File::open(&self.path).map_err(|error| SortError::OpenTmpFileFailed { error })?,
path: self.path,
}
})
}
}
impl MergeInput for PlainTmpMergeInput {
type InnerRead = File;
fn finished_reading(self) {
fs::remove_file(self.path).ok();
fn finished_reading(self) -> UResult<()> {
// we ignore failures to delete the temporary file,
// because there is a race at the end of the execution and the whole
// temporary directory might already be gone.
let _ = fs::remove_file(self.path);
Ok(())
}
fn as_read(&mut self) -> &mut Self::InnerRead {
@ -432,35 +492,33 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
type Closed = ClosedCompressedTmpFile;
type InnerWrite = BufWriter<ChildStdin>;
fn create(path: PathBuf, compress_prog: Option<&str>) -> Self {
fn create(path: PathBuf, compress_prog: Option<&str>) -> UResult<Self> {
let compress_prog = compress_prog.unwrap();
let mut command = Command::new(compress_prog);
command
.stdin(Stdio::piped())
.stdout(File::create(&path).unwrap());
let mut child = crash_if_err!(
2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let tmp_file =
File::create(&path).map_err(|error| SortError::OpenTmpFileFailed { error })?;
command.stdin(Stdio::piped()).stdout(tmp_file);
let mut child = command
.spawn()
.map_err(|err| SortError::CompressProgExecutionFailed {
code: err.raw_os_error().unwrap(),
})?;
let child_stdin = child.stdin.take().unwrap();
WriteableCompressedTmpFile {
Ok(WriteableCompressedTmpFile {
path,
compress_prog: compress_prog.to_owned(),
child,
child_stdin: BufWriter::new(child_stdin),
}
})
}
fn finished_writing(self) -> Self::Closed {
fn finished_writing(self) -> UResult<Self::Closed> {
drop(self.child_stdin);
assert_child_success(self.child, &self.compress_prog);
ClosedCompressedTmpFile {
check_child_success(self.child, &self.compress_prog)?;
Ok(ClosedCompressedTmpFile {
path: self.path,
compress_prog: self.compress_prog,
}
})
}
fn as_write(&mut self) -> &mut Self::InnerWrite {
@ -470,33 +528,32 @@ impl WriteableTmpFile for WriteableCompressedTmpFile {
impl ClosedTmpFile for ClosedCompressedTmpFile {
type Reopened = CompressedTmpMergeInput;
fn reopen(self) -> Self::Reopened {
fn reopen(self) -> UResult<Self::Reopened> {
let mut command = Command::new(&self.compress_prog);
let file = File::open(&self.path).unwrap();
command.stdin(file).stdout(Stdio::piped()).arg("-d");
let mut child = crash_if_err!(
2,
command.spawn().map_err(|err| format!(
"couldn't execute compress program: errno {}",
err.raw_os_error().unwrap()
))
);
let mut child = command
.spawn()
.map_err(|err| SortError::CompressProgExecutionFailed {
code: err.raw_os_error().unwrap(),
})?;
let child_stdout = child.stdout.take().unwrap();
CompressedTmpMergeInput {
Ok(CompressedTmpMergeInput {
path: self.path,
compress_prog: self.compress_prog,
child,
child_stdout,
}
})
}
}
impl MergeInput for CompressedTmpMergeInput {
type InnerRead = ChildStdout;
fn finished_reading(self) {
fn finished_reading(self) -> UResult<()> {
drop(self.child_stdout);
assert_child_success(self.child, &self.compress_prog);
fs::remove_file(self.path).ok();
check_child_success(self.child, &self.compress_prog)?;
let _ = fs::remove_file(self.path);
Ok(())
}
fn as_read(&mut self) -> &mut Self::InnerRead {
@ -509,7 +566,9 @@ pub struct PlainMergeInput<R: Read + Send> {
}
impl<R: Read + Send> MergeInput for PlainMergeInput<R> {
type InnerRead = R;
fn finished_reading(self) {}
fn finished_reading(self) -> UResult<()> {
Ok(())
}
fn as_read(&mut self) -> &mut Self::InnerRead {
&mut self.inner
}

View file

@ -29,19 +29,22 @@ use custom_str_cmp::custom_str_cmp;
use ext_sort::ext_sort;
use fnv::FnvHasher;
use numeric_str_cmp::{human_numeric_str_cmp, numeric_str_cmp, NumInfo, NumInfoParseSettings};
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use rayon::prelude::*;
use std::cmp::Ordering;
use std::env;
use std::ffi::OsStr;
use std::fs::File;
use std::error::Error;
use std::ffi::{OsStr, OsString};
use std::fmt::Display;
use std::fs::{File, OpenOptions};
use std::hash::{Hash, Hasher};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::ops::Range;
use std::path::Path;
use std::path::PathBuf;
use std::str::Utf8Error;
use unicode_width::UnicodeWidthStr;
use uucore::error::{set_exit_code, UCustomError, UResult, USimpleError, UUsageError};
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::version_cmp::version_cmp;
use uucore::InvalidEncodingHandling;
@ -121,6 +124,111 @@ const POSITIVE: char = '+';
// available memory into consideration, instead of relying on this constant only.
const DEFAULT_BUF_SIZE: usize = 1_000_000_000; // 1 GB
#[derive(Debug)]
enum SortError {
Disorder {
file: OsString,
line_number: usize,
line: String,
silent: bool,
},
OpenFailed {
path: String,
error: std::io::Error,
},
ReadFailed {
path: String,
error: std::io::Error,
},
ParseKeyError {
key: String,
msg: String,
},
OpenTmpFileFailed {
error: std::io::Error,
},
CompressProgExecutionFailed {
code: i32,
},
CompressProgTerminatedAbnormally {
prog: String,
},
TmpDirCreationFailed,
Uft8Error {
error: Utf8Error,
},
}
impl Error for SortError {}
impl UCustomError for SortError {
fn code(&self) -> i32 {
match self {
SortError::Disorder { .. } => 1,
_ => 2,
}
}
fn usage(&self) -> bool {
false
}
}
impl Display for SortError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SortError::Disorder {
file,
line_number,
line,
silent,
} => {
if !silent {
write!(
f,
"{}:{}: disorder: {}",
file.to_string_lossy(),
line_number,
line
)
} else {
Ok(())
}
}
SortError::OpenFailed { path, error } => write!(
f,
"open failed: {}: {}",
path,
strip_errno(&error.to_string())
),
SortError::ParseKeyError { key, msg } => {
write!(f, "failed to parse key `{}`: {}", key, msg)
}
SortError::ReadFailed { path, error } => write!(
f,
"cannot read: {}: {}",
path,
strip_errno(&error.to_string())
),
SortError::OpenTmpFileFailed { error } => {
write!(
f,
"failed to open temporary file: {}",
strip_errno(&error.to_string())
)
}
SortError::CompressProgExecutionFailed { code } => {
write!(f, "couldn't execute compress program: errno {}", code)
}
SortError::CompressProgTerminatedAbnormally { prog } => {
write!(f, "'{}' terminated abnormally", prog)
}
SortError::TmpDirCreationFailed => write!(f, "could not create temporary directory"),
SortError::Uft8Error { error } => write!(f, "{}", error),
}
}
}
#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy, Debug)]
enum SortMode {
Numeric,
@ -146,6 +254,49 @@ impl SortMode {
}
}
pub struct Output {
file: Option<(String, File)>,
}
impl Output {
fn new(name: Option<&str>) -> UResult<Self> {
let file = if let Some(name) = name {
// This is different from `File::create()` because we don't truncate the output yet.
// This allows using the output file as an input file.
let file = OpenOptions::new()
.write(true)
.create(true)
.open(name)
.map_err(|e| SortError::OpenFailed {
path: name.to_owned(),
error: e,
})?;
Some((name.to_owned(), file))
} else {
None
};
Ok(Self { file })
}
fn into_write(self) -> BufWriter<Box<dyn Write>> {
BufWriter::new(match self.file {
Some((_name, file)) => {
// truncate the file
let _ = file.set_len(0);
Box::new(file)
}
None => Box::new(stdout()),
})
}
fn as_output_name(&self) -> Option<&str> {
match &self.file {
Some((name, _file)) => Some(name),
None => None,
}
}
}
#[derive(Clone)]
pub struct GlobalSettings {
mode: SortMode,
@ -156,12 +307,11 @@ pub struct GlobalSettings {
ignore_non_printing: bool,
merge: bool,
reverse: bool,
output_file: Option<String>,
stable: bool,
unique: bool,
check: bool,
check_silent: bool,
salt: String,
salt: Option<[u8; 16]>,
selectors: Vec<FieldSelector>,
separator: Option<char>,
threads: String,
@ -209,19 +359,6 @@ impl GlobalSettings {
}
}
fn out_writer(&self) -> BufWriter<Box<dyn Write>> {
match self.output_file {
Some(ref filename) => match File::create(Path::new(&filename)) {
Ok(f) => BufWriter::new(Box::new(f) as Box<dyn Write>),
Err(e) => {
show_error!("{0}: {1}", filename, e.to_string());
panic!("Could not open output file");
}
},
None => BufWriter::new(Box::new(stdout()) as Box<dyn Write>),
}
}
/// Precompute some data needed for sorting.
/// This function **must** be called before starting to sort, and `GlobalSettings` may not be altered
/// afterwards.
@ -253,12 +390,11 @@ impl Default for GlobalSettings {
ignore_non_printing: false,
merge: false,
reverse: false,
output_file: None,
stable: false,
unique: false,
check: false,
check_silent: false,
salt: String::new(),
salt: None,
selectors: vec![],
separator: None,
threads: String::new(),
@ -697,33 +833,37 @@ impl FieldSelector {
}
}
fn parse(key: &str, global_settings: &GlobalSettings) -> Self {
fn parse(key: &str, global_settings: &GlobalSettings) -> UResult<Self> {
let mut from_to = key.split(',');
let (from, from_options) = Self::split_key_options(from_to.next().unwrap());
let to = from_to.next().map(|to| Self::split_key_options(to));
let options_are_empty = from_options.is_empty() && matches!(to, None | Some((_, "")));
crash_if_err!(
2,
if options_are_empty {
// Inherit the global settings if there are no options attached to this key.
(|| {
// This would be ideal for a try block, I think. In the meantime this closure allows
// to use the `?` operator here.
Self::new(
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
to.map(|(to, _)| {
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
})
.transpose()?,
KeySettings::from(global_settings),
)
})()
} else {
// Do not inherit from `global_settings`, as there are options attached to this key.
Self::parse_with_options((from, from_options), to)
if options_are_empty {
// Inherit the global settings if there are no options attached to this key.
(|| {
// This would be ideal for a try block, I think. In the meantime this closure allows
// to use the `?` operator here.
Self::new(
KeyPosition::new(from, 1, global_settings.ignore_leading_blanks)?,
to.map(|(to, _)| {
KeyPosition::new(to, 0, global_settings.ignore_leading_blanks)
})
.transpose()?,
KeySettings::from(global_settings),
)
})()
} else {
// Do not inherit from `global_settings`, as there are options attached to this key.
Self::parse_with_options((from, from_options), to)
}
.map_err(|msg| {
SortError::ParseKeyError {
key: key.to_owned(),
msg,
}
.map_err(|e| format!("failed to parse key `{}`: {}", key, e))
)
.into()
})
}
fn parse_with_options(
@ -916,9 +1056,7 @@ impl FieldSelector {
fn get_usage() -> String {
format!(
"{0}
Usage:
{0} [OPTION]... [FILE]...
"{0} [OPTION]... [FILE]...
Write the sorted concatenation of all FILE(s) to standard output.
Mandatory arguments for long options are mandatory for short options too.
With no FILE, or when FILE is -, read standard input.",
@ -937,41 +1075,57 @@ fn make_sort_mode_arg<'a, 'b>(mode: &'a str, short: &'b str, help: &'b str) -> A
arg
}
pub fn uumain(args: impl uucore::Args) -> i32 {
#[uucore_procs::gen_uumain]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args
.collect_str(InvalidEncodingHandling::Ignore)
.accept_any();
let usage = get_usage();
let mut settings: GlobalSettings = Default::default();
let matches = uu_app().usage(&usage[..]).get_matches_from(args);
let matches = match uu_app().usage(&usage[..]).get_matches_from_safe(args) {
Ok(t) => t,
Err(e) => {
// not all clap "Errors" are because of a failure to parse arguments.
// "--version" also causes an Error to be returned, but we should not print to stderr
// nor return with a non-zero exit code in this case (we should print to stdout and return 0).
// This logic is similar to the code in clap, but we return 2 as the exit code in case of real failure
// (clap returns 1).
if e.use_stderr() {
eprintln!("{}", e.message);
set_exit_code(2);
} else {
println!("{}", e.message);
}
return Ok(());
}
};
settings.debug = matches.is_present(options::DEBUG);
// check whether user specified a zero terminated list of files for input, otherwise read files from args
let mut files: Vec<String> = if matches.is_present(options::FILES0_FROM) {
let files0_from: Vec<String> = matches
.values_of(options::FILES0_FROM)
.map(|v| v.map(ToString::to_string).collect())
let mut files: Vec<OsString> = if matches.is_present(options::FILES0_FROM) {
let files0_from: Vec<OsString> = matches
.values_of_os(options::FILES0_FROM)
.map(|v| v.map(ToOwned::to_owned).collect())
.unwrap_or_default();
let mut files = Vec::new();
for path in &files0_from {
let reader = open(path.as_str());
let reader = open(&path)?;
let buf_reader = BufReader::new(reader);
for line in buf_reader.split(b'\0').flatten() {
files.push(
files.push(OsString::from(
std::str::from_utf8(&line)
.expect("Could not parse string from zero terminated input.")
.to_string(),
);
.expect("Could not parse string from zero terminated input."),
));
}
}
files
} else {
matches
.values_of(options::FILES)
.map(|v| v.map(ToString::to_string).collect())
.values_of_os(options::FILES)
.map(|v| v.map(ToOwned::to_owned).collect())
.unwrap_or_default()
};
@ -998,7 +1152,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
} else if matches.is_present(options::modes::RANDOM)
|| matches.value_of(options::modes::SORT) == Some("random")
{
settings.salt = get_rand_string();
settings.salt = Some(get_rand_string());
SortMode::Random
} else {
SortMode::Default
@ -1015,12 +1169,14 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
env::set_var("RAYON_NUM_THREADS", &settings.threads);
}
settings.buffer_size = matches
.value_of(options::BUF_SIZE)
.map_or(DEFAULT_BUF_SIZE, |s| {
GlobalSettings::parse_byte_count(s)
.unwrap_or_else(|e| crash!(2, "{}", format_error_message(e, s, options::BUF_SIZE)))
});
settings.buffer_size =
matches
.value_of(options::BUF_SIZE)
.map_or(Ok(DEFAULT_BUF_SIZE), |s| {
GlobalSettings::parse_byte_count(s).map_err(|e| {
USimpleError::new(2, format_error_message(e, s, options::BUF_SIZE))
})
})?;
settings.tmp_dir = matches
.value_of(options::TMP_DIR)
@ -1030,9 +1186,9 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.compress_prog = matches.value_of(options::COMPRESS_PROG).map(String::from);
if let Some(n_merge) = matches.value_of(options::BATCH_SIZE) {
settings.merge_batch_size = n_merge
.parse()
.unwrap_or_else(|_| crash!(2, "invalid --batch-size argument '{}'", n_merge));
settings.merge_batch_size = n_merge.parse().map_err(|_| {
UUsageError::new(2, format!("invalid --batch-size argument '{}'", n_merge))
})?;
}
settings.zero_terminated = matches.is_present(options::ZERO_TERMINATED);
@ -1053,32 +1209,45 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
settings.ignore_leading_blanks = matches.is_present(options::IGNORE_LEADING_BLANKS);
settings.output_file = matches.value_of(options::OUTPUT).map(String::from);
settings.reverse = matches.is_present(options::REVERSE);
settings.stable = matches.is_present(options::STABLE);
settings.unique = matches.is_present(options::UNIQUE);
if files.is_empty() {
/* if no file, default to stdin */
files.push("-".to_owned());
files.push("-".to_string().into());
} else if settings.check && files.len() != 1 {
crash!(1, "extra operand `{}' not allowed with -c", files[1])
return Err(UUsageError::new(
2,
format!(
"extra operand `{}' not allowed with -c",
files[1].to_string_lossy()
),
));
}
if let Some(arg) = matches.args.get(options::SEPARATOR) {
let separator = arg.vals[0].to_string_lossy();
let separator = separator;
let mut separator = separator.as_ref();
if separator == "\\0" {
separator = "\0";
}
if separator.len() != 1 {
crash!(1, "separator must be exactly one character long");
return Err(UUsageError::new(
2,
"separator must be exactly one character long".into(),
));
}
settings.separator = Some(separator.chars().next().unwrap())
}
if let Some(values) = matches.values_of(options::KEY) {
for value in values {
settings
.selectors
.push(FieldSelector::parse(value, &settings));
let selector = FieldSelector::parse(value, &settings)?;
if selector.settings.mode == SortMode::Random && settings.salt.is_none() {
settings.salt = Some(get_rand_string());
}
settings.selectors.push(selector);
}
}
@ -1099,9 +1268,19 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
);
}
// Verify that we can open all input files.
// It is the correct behavior to close all files afterwards,
// and to reopen them at a later point. This is different from how the output file is handled,
// probably to prevent running out of file descriptors.
for file in &files {
open(file)?;
}
let output = Output::new(matches.value_of(options::OUTPUT))?;
settings.init_precomputed();
exec(&files, &settings)
exec(&mut files, &settings, output)
}
pub fn uu_app() -> App<'static, 'static> {
@ -1112,73 +1291,57 @@ pub fn uu_app() -> App<'static, 'static> {
Arg::with_name(options::modes::SORT)
.long(options::modes::SORT)
.takes_value(true)
.possible_values(
&[
"general-numeric",
"human-numeric",
"month",
"numeric",
"version",
"random",
]
)
.conflicts_with_all(&options::modes::ALL_SORT_MODES)
)
.arg(
make_sort_mode_arg(
options::modes::HUMAN_NUMERIC,
"h",
"compare according to human readable sizes, eg 1M > 100k"
),
)
.arg(
make_sort_mode_arg(
options::modes::MONTH,
"M",
"compare according to month name abbreviation"
),
)
.arg(
make_sort_mode_arg(
options::modes::NUMERIC,
"n",
"compare according to string numerical value"
),
)
.arg(
make_sort_mode_arg(
options::modes::GENERAL_NUMERIC,
"g",
"compare according to string general numerical value"
),
)
.arg(
make_sort_mode_arg(
options::modes::VERSION,
"V",
"Sort by SemVer version number, eg 1.12.2 > 1.1.2",
),
)
.arg(
make_sort_mode_arg(
options::modes::RANDOM,
"R",
"shuffle in random order",
),
.possible_values(&[
"general-numeric",
"human-numeric",
"month",
"numeric",
"version",
"random",
])
.conflicts_with_all(&options::modes::ALL_SORT_MODES),
)
.arg(make_sort_mode_arg(
options::modes::HUMAN_NUMERIC,
"h",
"compare according to human readable sizes, eg 1M > 100k",
))
.arg(make_sort_mode_arg(
options::modes::MONTH,
"M",
"compare according to month name abbreviation",
))
.arg(make_sort_mode_arg(
options::modes::NUMERIC,
"n",
"compare according to string numerical value",
))
.arg(make_sort_mode_arg(
options::modes::GENERAL_NUMERIC,
"g",
"compare according to string general numerical value",
))
.arg(make_sort_mode_arg(
options::modes::VERSION,
"V",
"Sort by SemVer version number, eg 1.12.2 > 1.1.2",
))
.arg(make_sort_mode_arg(
options::modes::RANDOM,
"R",
"shuffle in random order",
))
.arg(
Arg::with_name(options::DICTIONARY_ORDER)
.short("d")
.long(options::DICTIONARY_ORDER)
.help("consider only blanks and alphanumeric characters")
.conflicts_with_all(
&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]
),
.conflicts_with_all(&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]),
)
.arg(
Arg::with_name(options::MERGE)
@ -1206,7 +1369,10 @@ pub fn uu_app() -> App<'static, 'static> {
.short("C")
.long(options::check::CHECK_SILENT)
.conflicts_with(options::OUTPUT)
.help("exit successfully if the given file is already sorted, and exit with status 1 otherwise."),
.help(
"exit successfully if the given file is already sorted,\
and exit with status 1 otherwise.",
),
)
.arg(
Arg::with_name(options::IGNORE_CASE)
@ -1219,14 +1385,12 @@ pub fn uu_app() -> App<'static, 'static> {
.short("i")
.long(options::IGNORE_NONPRINTING)
.help("ignore nonprinting characters")
.conflicts_with_all(
&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH
]
),
.conflicts_with_all(&[
options::modes::NUMERIC,
options::modes::GENERAL_NUMERIC,
options::modes::HUMAN_NUMERIC,
options::modes::MONTH,
]),
)
.arg(
Arg::with_name(options::IGNORE_LEADING_BLANKS)
@ -1275,7 +1439,8 @@ pub fn uu_app() -> App<'static, 'static> {
.short("t")
.long(options::SEPARATOR)
.help("custom separator for -k")
.takes_value(true))
.takes_value(true),
)
.arg(
Arg::with_name(options::ZERO_TERMINATED)
.short("z")
@ -1310,13 +1475,13 @@ pub fn uu_app() -> App<'static, 'static> {
.long(options::COMPRESS_PROG)
.help("compress temporary files with PROG, decompress with PROG -d")
.long_help("PROG has to take input from stdin and output to stdout")
.value_name("PROG")
.value_name("PROG"),
)
.arg(
Arg::with_name(options::BATCH_SIZE)
.long(options::BATCH_SIZE)
.help("Merge at most N_MERGE inputs at once.")
.value_name("N_MERGE")
.value_name("N_MERGE"),
)
.arg(
Arg::with_name(options::FILES0_FROM)
@ -1331,24 +1496,27 @@ pub fn uu_app() -> App<'static, 'static> {
.long(options::DEBUG)
.help("underline the parts of the line that are actually used for sorting"),
)
.arg(Arg::with_name(options::FILES).multiple(true).takes_value(true))
.arg(
Arg::with_name(options::FILES)
.multiple(true)
.takes_value(true),
)
}
fn exec(files: &[String], settings: &GlobalSettings) -> i32 {
fn exec(files: &mut [OsString], settings: &GlobalSettings, output: Output) -> UResult<()> {
if settings.merge {
let mut file_merger = merge::merge(files.iter().map(open), settings);
file_merger.write_all(settings);
let file_merger = merge::merge(files, settings, output.as_output_name())?;
file_merger.write_all(settings, output)
} else if settings.check {
if files.len() > 1 {
crash!(1, "only one file allowed with -c");
Err(UUsageError::new(2, "only one file allowed with -c".into()))
} else {
check::check(files.first().unwrap(), settings)
}
return check::check(files.first().unwrap(), settings);
} else {
let mut lines = files.iter().map(open);
ext_sort(&mut lines, settings);
ext_sort(&mut lines, settings, output)
}
0
}
fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_data: &LineData<'a>) {
@ -1387,7 +1555,22 @@ fn compare_by<'a>(
let settings = &selector.settings;
let cmp: Ordering = match settings.mode {
SortMode::Random => random_shuffle(a_str, b_str, &global_settings.salt),
SortMode::Random => {
// check if the two strings are equal
if custom_str_cmp(
a_str,
b_str,
settings.ignore_non_printing,
settings.dictionary_order,
settings.ignore_case,
) == Ordering::Equal
{
Ordering::Equal
} else {
// Only if they are not equal compare by the hash
random_shuffle(a_str, b_str, &global_settings.salt.unwrap())
}
}
SortMode::Numeric => {
let a_num_info = &a_line_data.num_infos
[a.index * global_settings.precomputed.num_infos_per_line + num_info_index];
@ -1536,12 +1719,8 @@ fn general_numeric_compare(a: &GeneralF64ParseResult, b: &GeneralF64ParseResult)
a.partial_cmp(b).unwrap()
}
fn get_rand_string() -> String {
thread_rng()
.sample_iter(&Alphanumeric)
.take(16)
.map(char::from)
.collect::<String>()
fn get_rand_string() -> [u8; 16] {
thread_rng().sample(rand::distributions::Standard)
}
fn get_hash<T: Hash>(t: &T) -> u64 {
@ -1550,10 +1729,9 @@ fn get_hash<T: Hash>(t: &T) -> u64 {
s.finish()
}
fn random_shuffle(a: &str, b: &str, salt: &str) -> Ordering {
let da = get_hash(&[a, salt].concat());
let db = get_hash(&[b, salt].concat());
fn random_shuffle(a: &str, b: &str, salt: &[u8]) -> Ordering {
let da = get_hash(&(a, salt));
let db = get_hash(&(b, salt));
da.cmp(&db)
}
@ -1618,26 +1796,38 @@ fn month_compare(a: &str, b: &str) -> Ordering {
}
}
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(iter: T, settings: &GlobalSettings) {
let mut writer = settings.out_writer();
fn print_sorted<'a, T: Iterator<Item = &'a Line<'a>>>(
iter: T,
settings: &GlobalSettings,
output: Output,
) {
let mut writer = output.into_write();
for line in iter {
line.print(&mut writer, settings);
}
}
// from cat.rs
fn open(path: impl AsRef<OsStr>) -> Box<dyn Read + Send> {
/// Strips the trailing " (os error XX)" from io error strings.
fn strip_errno(err: &str) -> &str {
&err[..err.find(" (os error ").unwrap_or(err.len())]
}
fn open(path: impl AsRef<OsStr>) -> UResult<Box<dyn Read + Send>> {
let path = path.as_ref();
if path == "-" {
let stdin = stdin();
return Box::new(stdin) as Box<dyn Read + Send>;
return Ok(Box::new(stdin) as Box<dyn Read + Send>);
}
match File::open(Path::new(path)) {
Ok(f) => Box::new(f) as Box<dyn Read + Send>,
Err(e) => {
crash!(2, "cannot read: {0:?}: {1}", path, e);
let path = Path::new(path);
match File::open(path) {
Ok(f) => Ok(Box::new(f) as Box<dyn Read + Send>),
Err(error) => Err(SortError::ReadFailed {
path: path.to_string_lossy().to_string(),
error,
}
.into()),
}
}

View file

@ -436,6 +436,7 @@ impl Stater {
'f' => tokens.push(Token::Char('\x0C')),
'n' => tokens.push(Token::Char('\n')),
'r' => tokens.push(Token::Char('\r')),
't' => tokens.push(Token::Char('\t')),
'v' => tokens.push(Token::Char('\x0B')),
c => {
show_warning!("unrecognized escape '\\{}'", c);

View file

@ -35,15 +35,11 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let before = matches.is_present(options::BEFORE);
let regex = matches.is_present(options::REGEX);
let separator = match matches.value_of(options::SEPARATOR) {
Some(m) => {
if m.is_empty() {
crash!(1, "separator cannot be empty")
} else {
m.to_owned()
}
}
None => "\n".to_owned(),
let raw_separator = matches.value_of(options::SEPARATOR).unwrap_or("\n");
let separator = if raw_separator.is_empty() {
"\0"
} else {
raw_separator
};
let files: Vec<String> = match matches.values_of(options::FILE) {
@ -51,7 +47,7 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
None => vec!["-".to_owned()],
};
tac(files, before, regex, &separator[..])
tac(files, before, regex, separator)
}
pub fn uu_app() -> App<'static, 'static> {
@ -97,7 +93,7 @@ fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
let path = Path::new(filename);
if path.is_dir() || path.metadata().is_err() {
if path.is_dir() {
show_error!("dir: read error: Invalid argument");
show_error!("{}: read error: Invalid argument", filename);
} else {
show_error!(
"failed to open '{}' for reading: No such file or directory",
@ -139,9 +135,16 @@ fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
i += 1;
}
}
// If the file contains no line separators, then simply write
// the contents of the file directly to stdout.
if offsets.is_empty() {
out.write_all(&data)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
return exit_code;
}
// if there isn't a separator at the end of the file, fake it
if offsets.is_empty() || *offsets.last().unwrap() < data.len() - slen {
if *offsets.last().unwrap() < data.len() - slen {
offsets.push(data.len());
}

View file

@ -24,6 +24,10 @@ winapi = { version="0.3", features=["fileapi", "handleapi", "processthreadsapi",
[target.'cfg(target_os = "redox")'.dependencies]
redox_syscall = "0.1"
[target.'cfg(unix)'.dependencies]
nix = "0.20"
libc = "0.2"
[[bin]]
name = "tail"
path = "src/main.rs"

View file

@ -11,7 +11,7 @@
### Others
- [ ] The current implementation does not handle `-` as an alias for stdin.
- [ ] The current implementation doesn't follow stdin in non-unix platforms
## Possible optimizations

View file

@ -2,13 +2,14 @@
* This file is part of the uutils coreutils package.
*
* (c) Alexander Batischev <eual.jp@gmail.com>
* (c) Thomas Queiroz <thomasqueirozb@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#[cfg(unix)]
pub use self::unix::{supports_pid_checks, Pid, ProcessChecker};
pub use self::unix::{stdin_is_pipe_or_fifo, supports_pid_checks, Pid, ProcessChecker};
#[cfg(windows)]
pub use self::windows::{supports_pid_checks, Pid, ProcessChecker};

View file

@ -2,6 +2,7 @@
* This file is part of the uutils coreutils package.
*
* (c) Alexander Batischev <eual.jp@gmail.com>
* (c) Thomas Queiroz <thomasqueirozb@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
@ -9,7 +10,13 @@
// spell-checker:ignore (ToDO) errno EPERM ENOSYS
use std::io::Error;
use std::io::{stdin, Error};
use std::os::unix::prelude::AsRawFd;
use nix::sys::stat::fstat;
use libc::{S_IFIFO, S_IFSOCK};
pub type Pid = libc::pid_t;
@ -40,3 +47,16 @@ pub fn supports_pid_checks(pid: self::Pid) -> bool {
fn get_errno() -> i32 {
Error::last_os_error().raw_os_error().unwrap()
}
pub fn stdin_is_pipe_or_fifo() -> bool {
let fd = stdin().lock().as_raw_fd();
fd >= 0 // GNU tail checks fd >= 0
&& match fstat(fd) {
Ok(stat) => {
let mode = stat.st_mode;
// NOTE: This is probably not the most correct way to check this
(mode & S_IFIFO != 0) || (mode & S_IFSOCK != 0)
}
Err(err) => panic!("{}", err),
}
}

View file

@ -2,6 +2,7 @@
// *
// * (c) Morten Olsen Lysgaard <morten@lysgaard.no>
// * (c) Alexander Batischev <eual.jp@gmail.com>
// * (c) Thomas Queiroz <thomasqueirozb@gmail.com>
// *
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
@ -29,6 +30,9 @@ use std::time::Duration;
use uucore::parse_size::{parse_size, ParseSizeError};
use uucore::ringbuffer::RingBuffer;
#[cfg(unix)]
use crate::platform::stdin_is_pipe_or_fifo;
pub mod options {
pub mod verbosity {
pub static QUIET: &str = "quiet";
@ -130,25 +134,56 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
let files: Vec<String> = matches
.values_of(options::ARG_FILES)
.map(|v| v.map(ToString::to_string).collect())
.unwrap_or_default();
.unwrap_or_else(|| vec![String::from("-")]);
if files.is_empty() {
let mut buffer = BufReader::new(stdin());
unbounded_tail(&mut buffer, &settings);
} else {
let multiple = files.len() > 1;
let mut first_header = true;
let mut readers = Vec::new();
let multiple = files.len() > 1;
let mut first_header = true;
let mut readers: Vec<(Box<dyn BufRead>, &String)> = Vec::new();
for filename in &files {
if (multiple || verbose) && !quiet {
if !first_header {
println!();
}
#[cfg(unix)]
let stdin_string = String::from("standard input");
for filename in &files {
let use_stdin = filename.as_str() == "-";
if (multiple || verbose) && !quiet {
if !first_header {
println!();
}
if use_stdin {
println!("==> standard input <==");
} else {
println!("==> {} <==", filename);
}
first_header = false;
}
first_header = false;
if use_stdin {
let mut reader = BufReader::new(stdin());
unbounded_tail(&mut reader, &settings);
// Don't follow stdin since there are no checks for pipes/FIFOs
//
// FIXME windows has GetFileType which can determine if the file is a pipe/FIFO
// so this check can also be performed
#[cfg(unix)]
{
/*
POSIX specification regarding tail -f
If the input file is a regular file or if the file operand specifies a FIFO, do not
terminate after the last line of the input file has been copied, but read and copy
further bytes from the input file when they become available. If no file operand is
specified and standard input is a pipe or FIFO, the -f option shall be ignored. If
the input file is not a FIFO, pipe, or regular file, it is unspecified whether or
not the -f option shall be ignored.
*/
if settings.follow && !stdin_is_pipe_or_fifo() {
readers.push((Box::new(reader), &stdin_string));
}
}
} else {
let path = Path::new(filename);
if path.is_dir() {
continue;
@ -158,20 +193,20 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
bounded_tail(&mut file, &settings);
if settings.follow {
let reader = BufReader::new(file);
readers.push(reader);
readers.push((Box::new(reader), filename));
}
} else {
let mut reader = BufReader::new(file);
unbounded_tail(&mut reader, &settings);
if settings.follow {
readers.push(reader);
readers.push((Box::new(reader), filename));
}
}
}
}
if settings.follow {
follow(&mut readers[..], &files[..], &settings);
}
if settings.follow {
follow(&mut readers[..], &settings);
}
0
@ -248,8 +283,12 @@ pub fn uu_app() -> App<'static, 'static> {
)
}
fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings: &Settings) {
fn follow<T: BufRead>(readers: &mut [(T, &String)], settings: &Settings) {
assert!(settings.follow);
if readers.is_empty() {
return;
}
let mut last = readers.len() - 1;
let mut read_some = false;
let mut process = platform::ProcessChecker::new(settings.pid);
@ -260,7 +299,7 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
let pid_is_dead = !read_some && settings.pid != 0 && process.is_dead();
read_some = false;
for (i, reader) in readers.iter_mut().enumerate() {
for (i, (reader, filename)) in readers.iter_mut().enumerate() {
// Print all new content since the last pass
loop {
let mut datum = String::new();
@ -269,7 +308,7 @@ fn follow<T: Read>(readers: &mut [BufReader<T>], filenames: &[String], settings:
Ok(_) => {
read_some = true;
if i != last {
println!("\n==> {} <==", filenames[i]);
println!("\n==> {} <==", filename);
last = i;
}
print!("{}", datum);

View file

@ -251,7 +251,7 @@ impl Display for UError {
///
/// A crate like [`quick_error`](https://crates.io/crates/quick-error) might
/// also be used, but will still require an `impl` for the `code` method.
pub trait UCustomError: Error {
pub trait UCustomError: Error + Send {
/// Error code of a custom error.
///
/// Set a return value for each variant of an enum-type to associate an

View file

@ -1,4 +1,5 @@
use crate::common::util::*;
// spell-checker:ignore (ToDO) taaaa tbbbb tcccc
#[test]
fn test_with_tab() {
@ -53,3 +54,140 @@ fn test_with_multiple_files() {
.stdout_contains(" return")
.stdout_contains(" ");
}
#[test]
fn test_tabs_space_separated_list() {
new_ucmd!()
.args(&["--tabs", "3 6 9"])
.pipe_in("a\tb\tc\td\te")
.succeeds()
.stdout_is("a b c d e");
}
#[test]
fn test_tabs_mixed_style_list() {
new_ucmd!()
.args(&["--tabs", ", 3,6 9"])
.pipe_in("a\tb\tc\td\te")
.succeeds()
.stdout_is("a b c d e");
}
#[test]
fn test_tabs_empty_string() {
new_ucmd!()
.args(&["--tabs", ""])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_comma_only() {
new_ucmd!()
.args(&["--tabs", ","])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_space_only() {
new_ucmd!()
.args(&["--tabs", " "])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_slash() {
new_ucmd!()
.args(&["--tabs", "/"])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_plus() {
new_ucmd!()
.args(&["--tabs", "+"])
.pipe_in("a\tb\tc")
.succeeds()
.stdout_is("a b c");
}
#[test]
fn test_tabs_trailing_slash() {
new_ucmd!()
.arg("--tabs=1,/5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 01234567890
.stdout_is(" a b c");
}
#[test]
fn test_tabs_trailing_slash_long_columns() {
new_ucmd!()
.arg("--tabs=1,/3")
.pipe_in("\taaaa\tbbbb\tcccc")
.succeeds()
// 0 1
// 01234567890123456
.stdout_is(" aaaa bbbb cccc");
}
#[test]
fn test_tabs_trailing_plus() {
new_ucmd!()
.arg("--tabs=1,+5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 012345678901
.stdout_is(" a b c");
}
#[test]
fn test_tabs_trailing_plus_long_columns() {
new_ucmd!()
.arg("--tabs=1,+3")
.pipe_in("\taaaa\tbbbb\tcccc")
.succeeds()
// 0 1
// 012345678901234567
.stdout_is(" aaaa bbbb cccc");
}
#[test]
fn test_tabs_must_be_ascending() {
new_ucmd!()
.arg("--tabs=1,1")
.fails()
.stderr_contains("tab sizes must be ascending");
}
#[test]
fn test_tabs_keep_last_trailing_specifier() {
// If there are multiple trailing specifiers, use only the last one
// before the number.
new_ucmd!()
.arg("--tabs=1,+/+/5")
.pipe_in("\ta\tb\tc")
.succeeds()
// 0 1
// 01234567890
.stdout_is(" a b c");
}
#[test]
fn test_tabs_comma_separated_no_numbers() {
new_ucmd!()
.arg("--tabs=+,/,+,/")
.pipe_in("\ta\tb\tc")
.succeeds()
.stdout_is(" a b c");
}

View file

@ -17,9 +17,9 @@ fn test_id_no_specified_user() {
let exp_result = unwrap_or_return!(expected_result(&ts, &[]));
let mut _exp_stdout = exp_result.stdout_str().to_string();
#[cfg(target_os = "linux")]
#[cfg(not(feature = "feat_selinux"))]
{
// NOTE: (SELinux NotImplemented) strip 'context' part from exp_stdout:
// NOTE: strip 'context' part from exp_stdout if selinux not enabled:
// example:
// uid=1001(runner) gid=121(docker) groups=121(docker),4(adm),101(systemd-journal) \
// context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
@ -363,3 +363,88 @@ fn test_id_zero() {
}
}
}
#[test]
#[cfg(feature = "feat_selinux")]
fn test_id_context() {
use selinux::{self, KernelSupport};
if selinux::kernel_support() == KernelSupport::Unsupported {
println!("test skipped: Kernel has no support for SElinux context",);
return;
}
let ts = TestScenario::new(util_name!());
for c_flag in &["-Z", "--context"] {
ts.ucmd()
.args(&[c_flag])
.succeeds()
.stdout_only(unwrap_or_return!(expected_result(&ts, &[c_flag])).stdout_str());
for &z_flag in &["-z", "--zero"] {
let args = [c_flag, z_flag];
ts.ucmd()
.args(&args)
.succeeds()
.stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str());
for &opt1 in &["--name", "--real"] {
// id: cannot print only names or real IDs in default format
let args = [opt1, c_flag];
ts.ucmd()
.args(&args)
.succeeds()
.stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str());
let args = [opt1, c_flag, z_flag];
ts.ucmd()
.args(&args)
.succeeds()
.stdout_only(unwrap_or_return!(expected_result(&ts, &args)).stdout_str());
for &opt2 in &["--user", "--group", "--groups"] {
// u/g/G n/r z Z
// for now, we print clap's standard response for "conflicts_with" instead of:
// id: cannot print "only" of more than one choice
let args = [opt2, c_flag, opt1];
let _result = ts.ucmd().args(&args).fails();
// let exp_result = unwrap_or_return!(expected_result(&args));
// result
// .stdout_is(exp_result.stdout_str())
// .stderr_is(exp_result.stderr_str())
// .code_is(exp_result.code());
}
}
for &opt2 in &["--user", "--group", "--groups"] {
// u/g/G z Z
// for now, we print clap's standard response for "conflicts_with" instead of:
// id: cannot print "only" of more than one choice
let args = [opt2, c_flag];
let _result = ts.ucmd().args(&args).fails();
// let exp_result = unwrap_or_return!(expected_result(&args));
// result
// .stdout_is(exp_result.stdout_str())
// .stderr_is(exp_result.stderr_str())
// .code_is(exp_result.code());
}
}
}
}
#[test]
#[cfg(unix)]
fn test_id_no_specified_user_posixly() {
// gnu/tests/id/no-context.sh
let ts = TestScenario::new(util_name!());
let result = ts.ucmd().env("POSIXLY_CORRECT", "1").run();
assert!(!result.stdout_str().contains("context="));
if !is_ci() {
result.success();
}
#[cfg(all(target_os = "linux", feature = "feat_selinux"))]
{
use selinux::{self, KernelSupport};
if selinux::kernel_support() == KernelSupport::Unsupported {
println!("test skipped: Kernel has no support for SElinux context",);
} else {
let result = ts.ucmd().succeeds();
assert!(result.stdout_str().contains("context="));
}
}
}

View file

@ -181,7 +181,7 @@ fn test_check_zero_terminated_failure() {
.arg("-c")
.arg("zero-terminated.txt")
.fails()
.stdout_is("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n");
.stderr_only("sort: zero-terminated.txt:2: disorder: ../../fixtures/du\n");
}
#[test]
@ -220,32 +220,29 @@ fn test_random_shuffle_contains_all_lines() {
#[test]
fn test_random_shuffle_two_runs_not_the_same() {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the very unlikely event that the random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
for arg in &["-R", "-k1,1R"] {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the very unlikely event that the random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg(arg).arg(FILE).run().stdout_move_str();
assert_ne!(result, expected);
assert_ne!(result, unexpected);
assert_ne!(result, expected);
assert_ne!(result, unexpected);
}
}
#[test]
fn test_random_shuffle_contains_two_runs_not_the_same() {
// check to verify that two random shuffles are not equal; this has the
// potential to fail in the unlikely event that random order is the same
// as the starting order, or if both random sorts end up having the same order.
const FILE: &str = "default_unsorted_ints.expected";
let (at, _ucmd) = at_and_ucmd!();
let result = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
let expected = at.read(FILE);
let unexpected = new_ucmd!().arg("-R").arg(FILE).run().stdout_move_str();
assert_ne!(result, expected);
assert_ne!(result, unexpected);
fn test_random_ignore_case() {
let input = "ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n";
new_ucmd!()
.args(&["-fR"])
.pipe_in(input)
.succeeds()
.stdout_is(input);
}
#[test]
@ -774,14 +771,15 @@ fn test_check() {
new_ucmd!()
.arg(diagnose_arg)
.arg("check_fail.txt")
.arg("--buffer-size=10b")
.fails()
.stdout_is("sort: check_fail.txt:6: disorder: 5\n");
.stderr_only("sort: check_fail.txt:6: disorder: 5\n");
new_ucmd!()
.arg(diagnose_arg)
.arg("multiple_files.expected")
.succeeds()
.stdout_is("");
.stderr_is("");
}
}
@ -796,6 +794,18 @@ fn test_check_silent() {
}
}
#[test]
fn test_check_unique() {
// Due to a clap bug the combination "-cu" does not work. "-c -u" works.
// See https://github.com/clap-rs/clap/issues/2624
new_ucmd!()
.args(&["-c", "-u"])
.pipe_in("A\nA\n")
.fails()
.code_is(1)
.stderr_only("sort: -:2: disorder: A");
}
#[test]
fn test_dictionary_and_nonprinting_conflicts() {
let conflicting_args = ["n", "h", "g", "M"];
@ -839,9 +849,9 @@ fn test_nonexistent_file() {
.status_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: cannot read: \"nonexistent.txt\": No such file or directory (os error 2)",
"sort: cannot read: nonexistent.txt: No such file or directory",
#[cfg(windows)]
"sort: cannot read: \"nonexistent.txt\": The system cannot find the file specified. (os error 2)",
"sort: cannot read: nonexistent.txt: The system cannot find the file specified.",
);
}
@ -883,6 +893,29 @@ fn test_compress() {
.stdout_only_fixture("ext_sort.expected");
}
#[test]
#[cfg(target_os = "linux")]
fn test_compress_merge() {
new_ucmd!()
.args(&[
"--compress-program",
"gzip",
"-S",
"10",
"--batch-size=2",
"-m",
"--unique",
"merge_ints_interleaved_1.txt",
"merge_ints_interleaved_2.txt",
"merge_ints_interleaved_3.txt",
"merge_ints_interleaved_3.txt",
"merge_ints_interleaved_2.txt",
"merge_ints_interleaved_1.txt",
])
.succeeds()
.stdout_only_fixture("merge_ints_interleaved.expected");
}
#[test]
fn test_compress_fail() {
TestScenario::new(util_name!())
@ -959,3 +992,102 @@ fn test_key_takes_one_arg() {
.succeeds()
.stdout_is_fixture("keys_open_ended.expected");
}
#[test]
fn test_verifies_out_file() {
let inputs = ["" /* no input */, "some input"];
for &input in &inputs {
new_ucmd!()
.args(&["-o", "nonexistent_dir/nonexistent_file"])
.pipe_in(input)
.ignore_stdin_write_error()
.fails()
.status_code(2)
.stderr_only(
#[cfg(not(windows))]
"sort: open failed: nonexistent_dir/nonexistent_file: No such file or directory",
#[cfg(windows)]
"sort: open failed: nonexistent_dir/nonexistent_file: The system cannot find the path specified.",
);
}
}
#[test]
fn test_verifies_files_after_keys() {
new_ucmd!()
.args(&[
"-o",
"nonexistent_dir/nonexistent_file",
"-k",
"0",
"nonexistent_dir/input_file",
])
.fails()
.status_code(2)
.stderr_contains("failed to parse key");
}
#[test]
#[cfg(unix)]
fn test_verifies_input_files() {
new_ucmd!()
.args(&["/dev/random", "nonexistent_file"])
.fails()
.status_code(2)
.stderr_is("sort: cannot read: nonexistent_file: No such file or directory");
}
#[test]
fn test_separator_null() {
new_ucmd!()
.args(&["-k1,1", "-k3,3", "-t", "\\0"])
.pipe_in("z\0a\0b\nz\0b\0a\na\0z\0z\n")
.succeeds()
.stdout_only("a\0z\0z\nz\0b\0a\nz\0a\0b\n");
}
#[test]
fn test_output_is_input() {
let input = "a\nb\nc\n";
let (at, mut cmd) = at_and_ucmd!();
at.touch("file");
at.append("file", input);
cmd.args(&["-m", "-u", "-o", "file", "file", "file", "file"])
.succeeds();
assert_eq!(at.read("file"), input);
}
#[test]
#[cfg(unix)]
fn test_output_device() {
new_ucmd!()
.args(&["-o", "/dev/null"])
.pipe_in("input")
.succeeds();
}
#[test]
fn test_merge_empty_input() {
new_ucmd!()
.args(&["-m", "empty.txt"])
.succeeds()
.no_stderr()
.no_stdout();
}
#[test]
fn test_no_error_for_version() {
new_ucmd!()
.arg("--version")
.succeeds()
.stdout_contains("sort");
}
#[test]
fn test_wrong_args_exit_code() {
new_ucmd!()
.arg("--misspelled")
.fails()
.status_code(2)
.stderr_contains("--misspelled");
}

View file

@ -64,7 +64,7 @@ mod test_generate_tokens {
#[test]
fn printf_format() {
let s = "%-# 15a\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n";
let s = "%-# 15a\\t\\r\\\"\\\\\\a\\b\\e\\f\\v%+020.-23w\\x12\\167\\132\\112\\n";
let expected = vec![
Token::Directive {
flag: F_LEFT | F_ALTER | F_SPACE,
@ -72,6 +72,7 @@ mod test_generate_tokens {
precision: -1,
format: 'a',
},
Token::Char('\t'),
Token::Char('\r'),
Token::Char('"'),
Token::Char('\\'),

View file

@ -66,5 +66,19 @@ fn test_invalid_input() {
.ucmd()
.arg("a")
.fails()
.stderr_contains("dir: read error: Invalid argument");
.stderr_contains("a: read error: Invalid argument");
}
#[test]
fn test_no_line_separators() {
new_ucmd!().pipe_in("a").succeeds().stdout_is("a");
}
#[test]
fn test_null_separator() {
new_ucmd!()
.args(&["-s", ""])
.pipe_in("a\0b\0")
.succeeds()
.stdout_is("b\0a\0");
}

View file

@ -23,6 +23,15 @@ fn test_stdin_default() {
.stdout_is_fixture("foobar_stdin_default.expected");
}
#[test]
fn test_stdin_explicit() {
new_ucmd!()
.pipe_in_fixture(FOOBAR_TXT)
.arg("-")
.run()
.stdout_is_fixture("foobar_stdin_default.expected");
}
#[test]
fn test_single_default() {
new_ucmd!()

View file

@ -591,28 +591,40 @@ impl AtPath {
}
}
pub fn hard_link(&self, src: &str, dst: &str) {
pub fn hard_link(&self, original: &str, link: &str) {
log_info(
"hard_link",
&format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)),
&format!(
"{},{}",
self.plus_as_string(original),
self.plus_as_string(link)
),
);
hard_link(&self.plus(src), &self.plus(dst)).unwrap();
hard_link(&self.plus(original), &self.plus(link)).unwrap();
}
pub fn symlink_file(&self, src: &str, dst: &str) {
pub fn symlink_file(&self, original: &str, link: &str) {
log_info(
"symlink",
&format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)),
&format!(
"{},{}",
self.plus_as_string(original),
self.plus_as_string(link)
),
);
symlink_file(&self.plus(src), &self.plus(dst)).unwrap();
symlink_file(&self.plus(original), &self.plus(link)).unwrap();
}
pub fn symlink_dir(&self, src: &str, dst: &str) {
pub fn symlink_dir(&self, original: &str, link: &str) {
log_info(
"symlink",
&format!("{},{}", self.plus_as_string(src), self.plus_as_string(dst)),
&format!(
"{},{}",
self.plus_as_string(original),
self.plus_as_string(link)
),
);
symlink_dir(&self.plus(src), &self.plus(dst)).unwrap();
symlink_dir(&self.plus(original), &self.plus(link)).unwrap();
}
pub fn is_symlink(&self, path: &str) -> bool {

0
tests/fixtures/sort/empty.txt vendored Normal file
View file