mirror of
https://github.com/sharkdp/fd
synced 2024-09-30 04:55:28 +00:00
Merge branch 'master' into dir_alias
This commit is contained in:
commit
c591106b86
2
.github/workflows/CICD.yml
vendored
2
.github/workflows/CICD.yml
vendored
|
@ -209,7 +209,7 @@ jobs:
|
|||
|
||||
DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }}
|
||||
DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }}-musl
|
||||
case ${{ matrix.job.target }} in *-musl) DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }}-musl ; DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }} ;; esac;
|
||||
case ${{ matrix.job.target }} in *-musl*) DPKG_BASENAME=${{ needs.crate_metadata.outputs.name }}-musl ; DPKG_CONFLICTS=${{ needs.crate_metadata.outputs.name }} ;; esac;
|
||||
DPKG_VERSION=${{ needs.crate_metadata.outputs.version }}
|
||||
|
||||
unset DPKG_ARCH
|
||||
|
|
46
CHANGELOG.md
46
CHANGELOG.md
|
@ -2,21 +2,55 @@
|
|||
|
||||
## Features
|
||||
|
||||
|
||||
## Bugfixes
|
||||
|
||||
- Respect NO_COLOR environment variable with `--list-details` option. (#1455)
|
||||
|
||||
|
||||
## Changes
|
||||
|
||||
|
||||
## Other
|
||||
|
||||
|
||||
|
||||
|
||||
# v9.0.0
|
||||
|
||||
## Performance
|
||||
|
||||
- Performance has been *significantly improved*, both due to optimizations in the underlying `ignore`
|
||||
crate (#1429), and in `fd` itself (#1422, #1408, #1362) - @tavianator.
|
||||
[Benchmarks results](https://gist.github.com/tavianator/32edbe052f33ef60570cf5456b59de81) show gains
|
||||
of 6-8x for full traversals of smaller directories (100k files) and up to 13x for larger directories (1M files).
|
||||
|
||||
- The default number of threads is now constrained to be at most 64. This should improve startup time on
|
||||
systems with many CPU cores. (#1203, #1410, #1412, #1431) - @tmccombs and @tavianator
|
||||
|
||||
- New flushing behavior when writing output to stdout, providing better performance for TTY and non-TTY
|
||||
use cases, see #1452 and #1313 (@tavianator).
|
||||
|
||||
## Features
|
||||
|
||||
- Support character and block device file types, see #1213 and #1336 (@cgzones)
|
||||
- Breaking: `.git/` is now ignored by default when using `--hidden` / `-H`, use `--no-ignore` / `-I` or
|
||||
`--no-ignore-vcs` to override, see #1387 and #1396 (@skoriop)
|
||||
|
||||
|
||||
## Bugfixes
|
||||
|
||||
- Fix `NO_COLOR` support, see #1421 (@acuteenvy)
|
||||
|
||||
## Changes
|
||||
|
||||
- The default number of threads is now constrained to be at most 16. This should improve startup time on
|
||||
systems with many CPU cores. (#1203)
|
||||
|
||||
## Other
|
||||
|
||||
- Fixed documentation typos, see #1409 (@marcospb19)
|
||||
|
||||
## Thanks
|
||||
|
||||
Special thanks to @tavianator for his incredible work on performance in the `ignore` crate and `fd` itself.
|
||||
|
||||
|
||||
|
||||
# v8.7.1
|
||||
|
||||
## Bugfixes
|
||||
|
|
22
Cargo.lock
generated
22
Cargo.lock
generated
|
@ -154,9 +154,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.4.7"
|
||||
version = "4.4.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b"
|
||||
checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
|
@ -164,9 +164,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.4.7"
|
||||
version = "4.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663"
|
||||
checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
@ -313,7 +313,7 @@ checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
|
|||
|
||||
[[package]]
|
||||
name = "fd-find"
|
||||
version = "8.7.1"
|
||||
version = "9.0.0"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"anyhow",
|
||||
|
@ -465,9 +465,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.149"
|
||||
version = "0.2.150"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
|
@ -483,9 +483,9 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
|||
|
||||
[[package]]
|
||||
name = "lscolors"
|
||||
version = "0.15.0"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf7015a04103ad78abb77e4b79ed151e767922d1cfde5f62640471c629a2320d"
|
||||
checksum = "ab0b209ec3976527806024406fe765474b9a1750a0ed4b8f0372364741f50e7b"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
]
|
||||
|
@ -749,9 +749,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "test-case"
|
||||
version = "3.2.1"
|
||||
version = "3.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8f1e820b7f1d95a0cdbf97a5df9de10e1be731983ab943e56703ac1b8e9d425"
|
||||
checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8"
|
||||
dependencies = [
|
||||
"test-case-macros",
|
||||
]
|
||||
|
|
|
@ -16,7 +16,7 @@ license = "MIT OR Apache-2.0"
|
|||
name = "fd-find"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/sharkdp/fd"
|
||||
version = "8.7.1"
|
||||
version = "9.0.0"
|
||||
edition= "2021"
|
||||
rust-version = "1.70.0"
|
||||
|
||||
|
@ -51,7 +51,7 @@ clap_complete = {version = "4.4.4", optional = true}
|
|||
faccess = "0.2.4"
|
||||
|
||||
[dependencies.clap]
|
||||
version = "4.4.7"
|
||||
version = "4.4.10"
|
||||
features = ["suggestions", "color", "wrap_help", "cargo", "derive"]
|
||||
|
||||
[dependencies.chrono]
|
||||
|
@ -60,7 +60,7 @@ default-features = false
|
|||
features = ["std", "clock"]
|
||||
|
||||
[dependencies.lscolors]
|
||||
version = "0.15"
|
||||
version = "0.16"
|
||||
default-features = false
|
||||
features = ["nu-ansi-term"]
|
||||
|
||||
|
@ -80,7 +80,7 @@ jemallocator = {version = "0.5.4", optional = true}
|
|||
diff = "0.1"
|
||||
tempfile = "3.8"
|
||||
filetime = "0.2"
|
||||
test-case = "3.1"
|
||||
test-case = "3.3"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
|
95
README.md
95
README.md
|
@ -143,7 +143,7 @@ target/debug/deps/libnum_cpus-f5ce7ef99006aa05.rlib
|
|||
```
|
||||
|
||||
To really search *all* files and directories, simply combine the hidden and ignore features to show
|
||||
everything (`-HI`).
|
||||
everything (`-HI`) or use `-u`/`--unrestricted`.
|
||||
|
||||
### Matching the full path
|
||||
By default, *fd* only matches the filename of each file. However, using the `--full-path` or `-p` option,
|
||||
|
@ -261,7 +261,9 @@ To make exclude-patterns like these permanent, you can create a `.fdignore` file
|
|||
/mnt/external-drive
|
||||
*.bak
|
||||
```
|
||||
Note: `fd` also supports `.ignore` files that are used by other programs such as `rg` or `ag`.
|
||||
|
||||
> [!NOTE]
|
||||
> `fd` also supports `.ignore` files that are used by other programs such as `rg` or `ag`.
|
||||
|
||||
If you want `fd` to ignore these patterns globally, you can put them in `fd`'s global ignore file.
|
||||
This is usually located in `~/.config/fd/ignore` in macOS or Linux, and `%APPDATA%\fd\ignore` in
|
||||
|
@ -284,7 +286,8 @@ option:
|
|||
If you also want to remove a certain class of directories, you can use the same technique. You will
|
||||
have to use `rm`s `--recursive`/`-r` flag to remove directories.
|
||||
|
||||
Note: there are scenarios where using `fd … -X rm -r` can cause race conditions: if you have a
|
||||
> [!NOTE]
|
||||
> There are scenarios where using `fd … -X rm -r` can cause race conditions: if you have a
|
||||
path like `…/foo/bar/foo/…` and want to remove all directories named `foo`, you can end up in a
|
||||
situation where the outer `foo` directory is removed first, leading to (harmless) *"'foo/bar/foo':
|
||||
No such file or directory"* errors in the `rm` call.
|
||||
|
@ -331,64 +334,57 @@ Options:
|
|||
|
||||
## Benchmark
|
||||
|
||||
Let's search my home folder for files that end in `[0-9].jpg`. It contains ~190.000
|
||||
subdirectories and about a million files. For averaging and statistical analysis, I'm using
|
||||
Let's search my home folder for files that end in `[0-9].jpg`. It contains ~750.000
|
||||
subdirectories and about a 4 million files. For averaging and statistical analysis, I'm using
|
||||
[hyperfine](https://github.com/sharkdp/hyperfine). The following benchmarks are performed
|
||||
with a "warm"/pre-filled disk-cache (results for a "cold" disk-cache show the same trends).
|
||||
|
||||
Let's start with `find`:
|
||||
```
|
||||
Benchmark #1: find ~ -iregex '.*[0-9]\.jpg$'
|
||||
|
||||
Time (mean ± σ): 7.236 s ± 0.090 s
|
||||
|
||||
Range (min … max): 7.133 s … 7.385 s
|
||||
Benchmark 1: find ~ -iregex '.*[0-9]\.jpg$'
|
||||
Time (mean ± σ): 19.922 s ± 0.109 s
|
||||
Range (min … max): 19.765 s … 20.065 s
|
||||
```
|
||||
|
||||
`find` is much faster if it does not need to perform a regular-expression search:
|
||||
```
|
||||
Benchmark #2: find ~ -iname '*[0-9].jpg'
|
||||
|
||||
Time (mean ± σ): 3.914 s ± 0.027 s
|
||||
|
||||
Range (min … max): 3.876 s … 3.964 s
|
||||
Benchmark 2: find ~ -iname '*[0-9].jpg'
|
||||
Time (mean ± σ): 11.226 s ± 0.104 s
|
||||
Range (min … max): 11.119 s … 11.466 s
|
||||
```
|
||||
|
||||
Now let's try the same for `fd`. Note that `fd` *always* performs a regular expression
|
||||
search. The options `--hidden` and `--no-ignore` are needed for a fair comparison,
|
||||
otherwise `fd` does not have to traverse hidden folders and ignored paths (see below):
|
||||
Now let's try the same for `fd`. Note that `fd` performs a regular expression
|
||||
search by default. The options `-u`/`--unrestricted` option is needed here for
|
||||
a fair comparison. Otherwise `fd` does not have to traverse hidden folders and
|
||||
ignored paths (see below):
|
||||
```
|
||||
Benchmark #3: fd -HI '.*[0-9]\.jpg$' ~
|
||||
|
||||
Time (mean ± σ): 811.6 ms ± 26.9 ms
|
||||
|
||||
Range (min … max): 786.0 ms … 870.7 ms
|
||||
Benchmark 3: fd -u '[0-9]\.jpg$' ~
|
||||
Time (mean ± σ): 854.8 ms ± 10.0 ms
|
||||
Range (min … max): 839.2 ms … 868.9 ms
|
||||
```
|
||||
For this particular example, `fd` is approximately nine times faster than `find -iregex`
|
||||
and about five times faster than `find -iname`. By the way, both tools found the exact
|
||||
same 20880 files :smile:.
|
||||
For this particular example, `fd` is approximately **23 times faster** than `find -iregex`
|
||||
and about **13 times faster** than `find -iname`. By the way, both tools found the exact
|
||||
same 546 files :smile:.
|
||||
|
||||
Finally, let's run `fd` without `--hidden` and `--no-ignore` (this can lead to different
|
||||
search results, of course). If *fd* does not have to traverse the hidden and git-ignored
|
||||
folders, it is almost an order of magnitude faster:
|
||||
```
|
||||
Benchmark #4: fd '[0-9]\.jpg$' ~
|
||||
|
||||
Time (mean ± σ): 123.7 ms ± 6.0 ms
|
||||
|
||||
Range (min … max): 118.8 ms … 140.0 ms
|
||||
```
|
||||
|
||||
**Note**: This is *one particular* benchmark on *one particular* machine. While I have
|
||||
performed quite a lot of different tests (and found consistent results), things might
|
||||
be different for you! I encourage everyone to try it out on their own. See
|
||||
**Note**: This is *one particular* benchmark on *one particular* machine. While we have
|
||||
performed a lot of different tests (and found consistent results), things might
|
||||
be different for you! We encourage everyone to try it out on their own. See
|
||||
[this repository](https://github.com/sharkdp/fd-benchmarks) for all necessary scripts.
|
||||
|
||||
Concerning *fd*'s speed, the main credit goes to the `regex` and `ignore` crates that are also used
|
||||
in [ripgrep](https://github.com/BurntSushi/ripgrep) (check it out!).
|
||||
Concerning *fd*'s speed, a lot of credit goes to the `regex` and `ignore` crates that are
|
||||
also used in [ripgrep](https://github.com/BurntSushi/ripgrep) (check it out!).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### `fd` does not find my file!
|
||||
|
||||
Remember that `fd` ignores hidden directories and files by default. It also ignores patterns
|
||||
from `.gitignore` files. If you want to make sure to find absolutely every possible file, always
|
||||
use the options `-u`/`--unrestricted` option (or `-HI` to enable hidden and ignored files):
|
||||
``` bash
|
||||
> fd -u …
|
||||
```
|
||||
|
||||
### Colorized output
|
||||
|
||||
`fd` can colorize files by extension, just like `ls`. In order for this to work, the environment
|
||||
|
@ -402,15 +398,6 @@ for alternative, more complete (or more colorful) variants, see [here](https://g
|
|||
|
||||
`fd` also honors the [`NO_COLOR`](https://no-color.org/) environment variable.
|
||||
|
||||
### `fd` does not find my file!
|
||||
|
||||
Remember that `fd` ignores hidden directories and files by default. It also ignores patterns
|
||||
from `.gitignore` files. If you want to make sure to find absolutely every possible file, always
|
||||
use the options `-H` and `-I` to disable these two features:
|
||||
``` bash
|
||||
> fd -HI …
|
||||
```
|
||||
|
||||
### `fd` doesn't seem to interpret my regex pattern correctly
|
||||
|
||||
A lot of special regex characters (like `[]`, `^`, `$`, ..) are also special characters in your
|
||||
|
@ -543,7 +530,7 @@ Make sure that `$HOME/.local/bin` is in your `$PATH`.
|
|||
If you use an older version of Ubuntu, you can download the latest `.deb` package from the
|
||||
[release page](https://github.com/sharkdp/fd/releases) and install it via:
|
||||
``` bash
|
||||
sudo dpkg -i fd_8.7.1_amd64.deb # adapt version number and architecture
|
||||
sudo dpkg -i fd_9.0.0_amd64.deb # adapt version number and architecture
|
||||
```
|
||||
|
||||
### On Debian
|
||||
|
@ -677,7 +664,7 @@ With Rust's package manager [cargo](https://github.com/rust-lang/cargo), you can
|
|||
```
|
||||
cargo install fd-find
|
||||
```
|
||||
Note that rust version *1.64.0* or later is required.
|
||||
Note that rust version *1.70.0* or later is required.
|
||||
|
||||
`make` is also needed for the build.
|
||||
|
||||
|
@ -708,8 +695,6 @@ cargo install --path .
|
|||
|
||||
## License
|
||||
|
||||
Copyright (c) 2017-2021 The fd developers
|
||||
|
||||
`fd` is distributed under the terms of both the MIT License and the Apache License 2.0.
|
||||
|
||||
See the [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) files for license details.
|
||||
|
|
2
doc/release-checklist.md
vendored
2
doc/release-checklist.md
vendored
|
@ -9,7 +9,7 @@ necessary changes for the upcoming release.
|
|||
- [ ] Update version in `Cargo.toml`. Run `cargo build` to update `Cargo.lock`.
|
||||
Make sure to `git add` the `Cargo.lock` changes as well.
|
||||
- [ ] Find the current min. supported Rust version by running
|
||||
`grep '^\s*MIN_SUPPORTED_RUST_VERSION' .github/workflows/CICD.yml`.
|
||||
`grep rust-version Cargo.toml`.
|
||||
- [ ] Update the `fd` version and the min. supported Rust version in `README.md`.
|
||||
- [ ] Update `CHANGELOG.md`. Change the heading of the *"Upcoming release"* section
|
||||
to the version of this release.
|
||||
|
|
35
src/cli.rs
35
src/cli.rs
|
@ -715,24 +715,14 @@ impl Opts {
|
|||
fn default_num_threads() -> NonZeroUsize {
|
||||
// If we can't get the amount of parallelism for some reason, then
|
||||
// default to a single thread, because that is safe.
|
||||
// Note that the minimum value for a NonZeroUsize is 1.
|
||||
// Unfortunately, we can't do `NonZeroUsize::new(1).unwrap()`
|
||||
// in a const context.
|
||||
const FALLBACK_PARALLELISM: NonZeroUsize = NonZeroUsize::MIN;
|
||||
// As the number of threads increases, the startup time suffers from
|
||||
// initializing the threads, and we get diminishing returns from additional
|
||||
// parallelism. So set a maximum number of threads to use by default.
|
||||
//
|
||||
// This value is based on some empirical observations, but the ideal value
|
||||
// probably depends on the exact hardware in use.
|
||||
//
|
||||
// Safety: The literal "20" is known not to be zero.
|
||||
const MAX_DEFAULT_THREADS: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(20) };
|
||||
let fallback = NonZeroUsize::MIN;
|
||||
// To limit startup overhead on massively parallel machines, don't use more
|
||||
// than 64 threads.
|
||||
let limit = NonZeroUsize::new(64).unwrap();
|
||||
|
||||
std::cmp::min(
|
||||
std::thread::available_parallelism().unwrap_or(FALLBACK_PARALLELISM),
|
||||
MAX_DEFAULT_THREADS,
|
||||
)
|
||||
std::thread::available_parallelism()
|
||||
.unwrap_or(fallback)
|
||||
.min(limit)
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
|
||||
|
@ -768,17 +758,6 @@ pub enum ColorWhen {
|
|||
Never,
|
||||
}
|
||||
|
||||
impl ColorWhen {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
use ColorWhen::*;
|
||||
match *self {
|
||||
Auto => "auto",
|
||||
Never => "never",
|
||||
Always => "always",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// there isn't a derive api for getting grouped values yet,
|
||||
// so we have to use hand-rolled parsing for exec and exec-batch
|
||||
pub struct Exec {
|
||||
|
|
|
@ -8,11 +8,13 @@ use lscolors::{Colorable, LsColors, Style};
|
|||
use crate::config::Config;
|
||||
use crate::filesystem::strip_current_dir;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum DirEntryInner {
|
||||
Normal(ignore::DirEntry),
|
||||
BrokenSymlink(PathBuf),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DirEntry {
|
||||
inner: DirEntryInner,
|
||||
metadata: OnceCell<Option<Metadata>>,
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
use std::sync::Mutex;
|
||||
|
||||
use crossbeam_channel::Receiver;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::dir_entry::DirEntry;
|
||||
use crate::error::print_error;
|
||||
use crate::exit_codes::{merge_exitcodes, ExitCode};
|
||||
use crate::walk::WorkerResult;
|
||||
|
@ -14,7 +11,7 @@ use super::CommandSet;
|
|||
/// generate a command with the supplied command template. The generated command will then
|
||||
/// be executed, and this process will continue until the receiver's sender has closed.
|
||||
pub fn job(
|
||||
rx: Receiver<WorkerResult>,
|
||||
results: impl IntoIterator<Item = WorkerResult>,
|
||||
cmd: &CommandSet,
|
||||
out_perm: &Mutex<()>,
|
||||
config: &Config,
|
||||
|
@ -22,35 +19,39 @@ pub fn job(
|
|||
// Output should be buffered when only running a single thread
|
||||
let buffer_output: bool = config.threads > 1;
|
||||
|
||||
let mut results: Vec<ExitCode> = Vec::new();
|
||||
loop {
|
||||
let mut ret = ExitCode::Success;
|
||||
for result in results {
|
||||
// Obtain the next result from the receiver, else if the channel
|
||||
// has closed, exit from the loop
|
||||
let dir_entry: DirEntry = match rx.recv() {
|
||||
Ok(WorkerResult::Entry(dir_entry)) => dir_entry,
|
||||
Ok(WorkerResult::Error(err)) => {
|
||||
let dir_entry = match result {
|
||||
WorkerResult::Entry(dir_entry) => dir_entry,
|
||||
WorkerResult::Error(err) => {
|
||||
if config.show_filesystem_errors {
|
||||
print_error(err.to_string());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
// Generate a command, execute it and store its exit code.
|
||||
results.push(cmd.execute(
|
||||
let code = cmd.execute(
|
||||
dir_entry.stripped_path(config),
|
||||
config.path_separator.as_deref(),
|
||||
out_perm,
|
||||
buffer_output,
|
||||
))
|
||||
);
|
||||
ret = merge_exitcodes([ret, code]);
|
||||
}
|
||||
// Returns error in case of any error.
|
||||
merge_exitcodes(results)
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn batch(rx: Receiver<WorkerResult>, cmd: &CommandSet, config: &Config) -> ExitCode {
|
||||
let paths = rx
|
||||
pub fn batch(
|
||||
results: impl IntoIterator<Item = WorkerResult>,
|
||||
cmd: &CommandSet,
|
||||
config: &Config,
|
||||
) -> ExitCode {
|
||||
let paths = results
|
||||
.into_iter()
|
||||
.filter_map(|worker_result| match worker_result {
|
||||
WorkerResult::Entry(dir_entry) => Some(dir_entry.into_stripped_path(config)),
|
||||
|
|
10
src/main.rs
10
src/main.rs
|
@ -325,18 +325,22 @@ fn extract_command(opts: &mut Opts, colored_output: bool) -> Result<Option<Comma
|
|||
if !opts.list_details {
|
||||
return None;
|
||||
}
|
||||
let color_arg = format!("--color={}", opts.color.as_str());
|
||||
|
||||
let res = determine_ls_command(&color_arg, colored_output)
|
||||
let res = determine_ls_command(colored_output)
|
||||
.map(|cmd| CommandSet::new_batch([cmd]).unwrap());
|
||||
Some(res)
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
|
||||
fn determine_ls_command(color_arg: &str, colored_output: bool) -> Result<Vec<&str>> {
|
||||
fn determine_ls_command(colored_output: bool) -> Result<Vec<&'static str>> {
|
||||
#[allow(unused)]
|
||||
let gnu_ls = |command_name| {
|
||||
let color_arg = if colored_output {
|
||||
"--color=always"
|
||||
} else {
|
||||
"--color=never"
|
||||
};
|
||||
// Note: we use short options here (instead of --long-options) to support more
|
||||
// platforms (like BusyBox).
|
||||
vec![
|
||||
|
|
168
src/walk.rs
168
src/walk.rs
|
@ -4,12 +4,12 @@ use std::io::{self, Write};
|
|||
use std::mem;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, Sender};
|
||||
use crossbeam_channel::{bounded, Receiver, RecvTimeoutError, SendError, Sender};
|
||||
use etcetera::BaseStrategy;
|
||||
use ignore::overrides::{Override, OverrideBuilder};
|
||||
use ignore::{self, WalkBuilder, WalkParallel, WalkState};
|
||||
|
@ -36,6 +36,7 @@ enum ReceiverMode {
|
|||
|
||||
/// The Worker threads can result in a valid entry having PathBuf or an error.
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug)]
|
||||
pub enum WorkerResult {
|
||||
// Errors should be rare, so it's probably better to allow large_enum_variant than
|
||||
// to box the Entry variant
|
||||
|
@ -43,6 +44,83 @@ pub enum WorkerResult {
|
|||
Error(ignore::Error),
|
||||
}
|
||||
|
||||
/// A batch of WorkerResults to send over a channel.
|
||||
#[derive(Clone)]
|
||||
struct Batch {
|
||||
items: Arc<Mutex<Option<Vec<WorkerResult>>>>,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
items: Arc::new(Mutex::new(Some(vec![]))),
|
||||
}
|
||||
}
|
||||
|
||||
fn lock(&self) -> MutexGuard<'_, Option<Vec<WorkerResult>>> {
|
||||
self.items.lock().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for Batch {
|
||||
type Item = WorkerResult;
|
||||
type IntoIter = std::vec::IntoIter<WorkerResult>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.lock().take().unwrap().into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper that sends batches of items at once over a channel.
|
||||
struct BatchSender {
|
||||
batch: Batch,
|
||||
tx: Sender<Batch>,
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
impl BatchSender {
|
||||
fn new(tx: Sender<Batch>, limit: usize) -> Self {
|
||||
Self {
|
||||
batch: Batch::new(),
|
||||
tx,
|
||||
limit,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if we need to flush a batch.
|
||||
fn needs_flush(&self, batch: Option<&Vec<WorkerResult>>) -> bool {
|
||||
match batch {
|
||||
// Limit the batch size to provide some backpressure
|
||||
Some(vec) => vec.len() >= self.limit,
|
||||
// Batch was already taken by the receiver, so make a new one
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an item to a batch.
|
||||
fn send(&mut self, item: WorkerResult) -> Result<(), SendError<()>> {
|
||||
let mut batch = self.batch.lock();
|
||||
|
||||
if self.needs_flush(batch.as_ref()) {
|
||||
drop(batch);
|
||||
self.batch = Batch::new();
|
||||
batch = self.batch.lock();
|
||||
}
|
||||
|
||||
let items = batch.as_mut().unwrap();
|
||||
items.push(item);
|
||||
|
||||
if items.len() == 1 {
|
||||
// New batch, send it over the channel
|
||||
self.tx
|
||||
.send(self.batch.clone())
|
||||
.map_err(|_| SendError(()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Maximum size of the output buffer before flushing results to the console
|
||||
const MAX_BUFFER_LENGTH: usize = 1000;
|
||||
/// Default duration until output buffering switches to streaming.
|
||||
|
@ -57,7 +135,7 @@ struct ReceiverBuffer<'a, W> {
|
|||
/// The ^C notifier.
|
||||
interrupt_flag: &'a AtomicBool,
|
||||
/// Receiver for worker results.
|
||||
rx: Receiver<WorkerResult>,
|
||||
rx: Receiver<Batch>,
|
||||
/// Standard output.
|
||||
stdout: W,
|
||||
/// The current buffer mode.
|
||||
|
@ -72,7 +150,7 @@ struct ReceiverBuffer<'a, W> {
|
|||
|
||||
impl<'a, W: Write> ReceiverBuffer<'a, W> {
|
||||
/// Create a new receiver buffer.
|
||||
fn new(state: &'a WorkerState, rx: Receiver<WorkerResult>, stdout: W) -> Self {
|
||||
fn new(state: &'a WorkerState, rx: Receiver<Batch>, stdout: W) -> Self {
|
||||
let config = &state.config;
|
||||
let quit_flag = state.quit_flag.as_ref();
|
||||
let interrupt_flag = state.interrupt_flag.as_ref();
|
||||
|
@ -103,7 +181,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> {
|
|||
}
|
||||
|
||||
/// Receive the next worker result.
|
||||
fn recv(&self) -> Result<WorkerResult, RecvTimeoutError> {
|
||||
fn recv(&self) -> Result<Batch, RecvTimeoutError> {
|
||||
match self.mode {
|
||||
ReceiverMode::Buffering => {
|
||||
// Wait at most until we should switch to streaming
|
||||
|
@ -119,34 +197,44 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> {
|
|||
/// Wait for a result or state change.
|
||||
fn poll(&mut self) -> Result<(), ExitCode> {
|
||||
match self.recv() {
|
||||
Ok(WorkerResult::Entry(dir_entry)) => {
|
||||
if self.config.quiet {
|
||||
return Err(ExitCode::HasResults(true));
|
||||
}
|
||||
Ok(batch) => {
|
||||
for result in batch {
|
||||
match result {
|
||||
WorkerResult::Entry(dir_entry) => {
|
||||
if self.config.quiet {
|
||||
return Err(ExitCode::HasResults(true));
|
||||
}
|
||||
|
||||
match self.mode {
|
||||
ReceiverMode::Buffering => {
|
||||
self.buffer.push(dir_entry);
|
||||
if self.buffer.len() > MAX_BUFFER_LENGTH {
|
||||
self.stream()?;
|
||||
match self.mode {
|
||||
ReceiverMode::Buffering => {
|
||||
self.buffer.push(dir_entry);
|
||||
if self.buffer.len() > MAX_BUFFER_LENGTH {
|
||||
self.stream()?;
|
||||
}
|
||||
}
|
||||
ReceiverMode::Streaming => {
|
||||
self.print(&dir_entry)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.num_results += 1;
|
||||
if let Some(max_results) = self.config.max_results {
|
||||
if self.num_results >= max_results {
|
||||
return self.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
WorkerResult::Error(err) => {
|
||||
if self.config.show_filesystem_errors {
|
||||
print_error(err.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
ReceiverMode::Streaming => {
|
||||
self.print(&dir_entry)?;
|
||||
self.flush()?;
|
||||
}
|
||||
}
|
||||
|
||||
self.num_results += 1;
|
||||
if let Some(max_results) = self.config.max_results {
|
||||
if self.num_results >= max_results {
|
||||
return self.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(WorkerResult::Error(err)) => {
|
||||
if self.config.show_filesystem_errors {
|
||||
print_error(err.to_string());
|
||||
// If we don't have another batch ready, flush before waiting
|
||||
if self.mode == ReceiverMode::Streaming && self.rx.is_empty() {
|
||||
self.flush()?;
|
||||
}
|
||||
}
|
||||
Err(RecvTimeoutError::Timeout) => {
|
||||
|
@ -201,7 +289,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> {
|
|||
|
||||
/// Flush stdout if necessary.
|
||||
fn flush(&mut self) -> Result<(), ExitCode> {
|
||||
if self.config.interactive_terminal && self.stdout.flush().is_err() {
|
||||
if self.stdout.flush().is_err() {
|
||||
// Probably a broken pipe. Exit gracefully.
|
||||
return Err(ExitCode::GeneralError);
|
||||
}
|
||||
|
@ -319,13 +407,13 @@ impl WorkerState {
|
|||
|
||||
/// Run the receiver work, either on this thread or a pool of background
|
||||
/// threads (for --exec).
|
||||
fn receive(&self, rx: Receiver<WorkerResult>) -> ExitCode {
|
||||
fn receive(&self, rx: Receiver<Batch>) -> ExitCode {
|
||||
let config = &self.config;
|
||||
|
||||
// This will be set to `Some` if the `--exec` argument was supplied.
|
||||
if let Some(ref cmd) = config.command {
|
||||
if cmd.in_batch_mode() {
|
||||
exec::batch(rx, cmd, &config)
|
||||
exec::batch(rx.into_iter().flatten(), cmd, &config)
|
||||
} else {
|
||||
let out_perm = Mutex::new(());
|
||||
|
||||
|
@ -337,7 +425,8 @@ impl WorkerState {
|
|||
let rx = rx.clone();
|
||||
|
||||
// Spawn a job thread that will listen for and execute inputs.
|
||||
let handle = scope.spawn(|| exec::job(rx, cmd, &out_perm, &config));
|
||||
let handle = scope
|
||||
.spawn(|| exec::job(rx.into_iter().flatten(), cmd, &out_perm, &config));
|
||||
|
||||
// Push the handle of the spawned thread into the vector for later joining.
|
||||
handles.push(handle);
|
||||
|
@ -355,12 +444,20 @@ impl WorkerState {
|
|||
}
|
||||
|
||||
/// Spawn the sender threads.
|
||||
fn spawn_senders(&self, walker: WalkParallel, tx: Sender<WorkerResult>) {
|
||||
fn spawn_senders(&self, walker: WalkParallel, tx: Sender<Batch>) {
|
||||
walker.run(|| {
|
||||
let patterns = &self.patterns;
|
||||
let config = &self.config;
|
||||
let quit_flag = self.quit_flag.as_ref();
|
||||
let tx = tx.clone();
|
||||
|
||||
let mut limit = 0x100;
|
||||
if let Some(cmd) = &config.command {
|
||||
if !cmd.in_batch_mode() && config.threads > 1 {
|
||||
// Evenly distribute work between multiple receivers
|
||||
limit = 1;
|
||||
}
|
||||
}
|
||||
let mut tx = BatchSender::new(tx.clone(), limit);
|
||||
|
||||
Box::new(move |entry| {
|
||||
if quit_flag.load(Ordering::Relaxed) {
|
||||
|
@ -545,8 +642,7 @@ impl WorkerState {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
// Channel capacity was chosen empircally to perform similarly to an unbounded channel
|
||||
let (tx, rx) = bounded(0x4000 * config.threads);
|
||||
let (tx, rx) = bounded(2 * config.threads);
|
||||
|
||||
let exit_code = thread::scope(|scope| {
|
||||
// Spawn the receiver thread(s)
|
||||
|
|
Loading…
Reference in a new issue