cp: fix the result of inodes are not the same when preserve links is flagged (#5064)

Should fix:
```
rm -rf a b c
touch a
ln -s a b
mkdir c
./target/debug/coreutils cp --preserve=links -R -H a b c
a_inode=$(ls -i c/a|sed 's,c/.*,,')
b_inode=$(ls -i c/b|sed 's,c/.*,,')
echo "$a_inode" = "$b_inode"
```
This commit is contained in:
tommady 2023-09-24 16:53:27 +08:00 committed by GitHub
parent e2e42ac265
commit bd0fb817a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 296 additions and 118 deletions

View file

@ -8,7 +8,7 @@
//! See the [`copy_directory`] function for more information.
#[cfg(windows)]
use std::borrow::Cow;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::env;
use std::fs;
use std::io;
@ -24,8 +24,8 @@ use uucore::uio_error;
use walkdir::{DirEntry, WalkDir};
use crate::{
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, preserve_hardlinks,
CopyResult, Error, Options,
aligned_ancestors, context_for, copy_attributes, copy_file, copy_link, CopyResult, Error,
Options,
};
/// Ensure a Windows path starts with a `\\?`.
@ -200,7 +200,7 @@ fn copy_direntry(
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
preserve_hard_links: bool,
hard_links: &mut Vec<(String, u64)>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
) -> CopyResult<()> {
let Entry {
source_absolute,
@ -240,30 +240,27 @@ fn copy_direntry(
// If the source is not a directory, then we need to copy the file.
if !source_absolute.is_dir() {
if preserve_hard_links {
let dest = local_to_target.as_path().to_path_buf();
let found_hard_link = preserve_hardlinks(hard_links, &source_absolute, &dest)?;
if !found_hard_link {
match copy_file(
progress_bar,
&source_absolute,
local_to_target.as_path(),
options,
symlinked_files,
false,
) {
Ok(_) => Ok(()),
Err(err) => {
if source_absolute.is_symlink() {
// silent the error with a symlink
// In case we do --archive, we might copy the symlink
// before the file itself
Ok(())
} else {
Err(err)
}
match copy_file(
progress_bar,
&source_absolute,
local_to_target.as_path(),
options,
symlinked_files,
copied_files,
false,
) {
Ok(_) => Ok(()),
Err(err) => {
if source_absolute.is_symlink() {
// silent the error with a symlink
// In case we do --archive, we might copy the symlink
// before the file itself
Ok(())
} else {
Err(err)
}
}?;
}
}
}?;
} else {
// At this point, `path` is just a plain old file.
// Terminate this function immediately if there is any
@ -277,6 +274,7 @@ fn copy_direntry(
local_to_target.as_path(),
options,
symlinked_files,
copied_files,
false,
) {
Ok(_) => {}
@ -310,6 +308,7 @@ pub(crate) fn copy_directory(
target: &Path,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
source_in_command_line: bool,
) -> CopyResult<()> {
if !options.recursive {
@ -324,6 +323,7 @@ pub(crate) fn copy_directory(
target,
options,
symlinked_files,
copied_files,
source_in_command_line,
);
}
@ -372,7 +372,6 @@ pub(crate) fn copy_directory(
};
let target = tmp.as_path();
let mut hard_links: Vec<(String, u64)> = vec![];
let preserve_hard_links = options.preserve_hard_links();
// Collect some paths here that are invariant during the traversal
@ -397,7 +396,7 @@ pub(crate) fn copy_directory(
options,
symlinked_files,
preserve_hard_links,
&mut hard_links,
copied_files,
)?;
}
// Print an error message, but continue traversing the directory.

View file

@ -9,7 +9,7 @@
use quick_error::quick_error;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::env;
#[cfg(not(windows))]
use std::ffi::CString;
@ -1106,67 +1106,6 @@ fn parse_path_args(
Ok((paths, target))
}
/// Get the inode information for a file.
fn get_inode(file_info: &FileInformation) -> u64 {
#[cfg(unix)]
let result = file_info.inode();
#[cfg(windows)]
let result = file_info.file_index();
result
}
#[cfg(target_os = "redox")]
fn preserve_hardlinks(
hard_links: &mut Vec<(String, u64)>,
source: &std::path::Path,
dest: &std::path::Path,
found_hard_link: &mut bool,
) -> CopyResult<()> {
// Redox does not currently support hard links
Ok(())
}
/// Hard link a pair of files if needed _and_ record if this pair is a new hard link.
#[cfg(not(target_os = "redox"))]
fn preserve_hardlinks(
hard_links: &mut Vec<(String, u64)>,
source: &std::path::Path,
dest: &std::path::Path,
) -> CopyResult<bool> {
let info = FileInformation::from_path(source, false)
.context(format!("cannot stat {}", source.quote()))?;
let inode = get_inode(&info);
let nlinks = info.number_of_links();
let mut found_hard_link = false;
#[allow(clippy::explicit_iter_loop)]
for (link, link_inode) in hard_links.iter() {
if *link_inode == inode {
// Consider the following files:
//
// * `src/f` - a regular file
// * `src/link` - a hard link to `src/f`
// * `dest/src/f` - a different regular file
//
// In this scenario, if we do `cp -a src/ dest/`, it is
// possible that the order of traversal causes `src/link`
// to get copied first (to `dest/src/link`). In that case,
// in order to make sure `dest/src/link` is a hard link to
// `dest/src/f` and `dest/src/f` has the contents of
// `src/f`, we delete the existing file to allow the hard
// linking.
if file_or_link_exists(dest) && file_or_link_exists(Path::new(link)) {
std::fs::remove_file(dest)?;
}
std::fs::hard_link(link, dest).unwrap();
found_hard_link = true;
}
}
if !found_hard_link && nlinks > 1 {
hard_links.push((dest.to_str().unwrap().to_string(), inode));
}
Ok(found_hard_link)
}
/// When handling errors, we don't always want to show them to the user. This function handles that.
fn show_error_if_needed(error: &Error) {
match error {
@ -1195,14 +1134,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
let target_type = TargetType::determine(sources, target);
verify_target_type(target, &target_type)?;
let preserve_hard_links = options.preserve_hard_links();
let mut hard_links: Vec<(String, u64)> = vec![];
let mut non_fatal_errors = false;
let mut seen_sources = HashSet::with_capacity(sources.len());
let mut symlinked_files = HashSet::new();
// to remember the copied files for further usage.
// the FileInformation implemented the Hash trait by using
// 1. inode number
// 2. device number
// the combination of a file's inode number and device number is unique throughout all the file systems.
//
// key is the source file's information and the value is the destination filepath.
let mut copied_files: HashMap<FileInformation, PathBuf> = HashMap::with_capacity(sources.len());
let progress_bar = if options.progress_bar {
let pb = ProgressBar::new(disk_usage(sources, options.recursive)?)
.with_style(
@ -1222,28 +1166,19 @@ pub fn copy(sources: &[PathBuf], target: &Path, options: &Options) -> CopyResult
if seen_sources.contains(source) {
// FIXME: compare sources by the actual file they point to, not their path. (e.g. dir/file == dir/../dir/file in most cases)
show_warning!("source {} specified more than once", source.quote());
} else {
let found_hard_link = if preserve_hard_links && !source.is_dir() {
let dest = construct_dest_path(source, target, &target_type, options)?;
preserve_hardlinks(&mut hard_links, source, &dest)?
} else {
false
};
if !found_hard_link {
if let Err(error) = copy_source(
&progress_bar,
source,
target,
&target_type,
options,
&mut symlinked_files,
) {
show_error_if_needed(&error);
non_fatal_errors = true;
}
}
seen_sources.insert(source);
} else if let Err(error) = copy_source(
&progress_bar,
source,
target,
&target_type,
options,
&mut symlinked_files,
&mut copied_files,
) {
show_error_if_needed(&error);
non_fatal_errors = true;
}
seen_sources.insert(source);
}
if let Some(pb) = progress_bar {
@ -1295,11 +1230,20 @@ fn copy_source(
target_type: &TargetType,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
) -> CopyResult<()> {
let source_path = Path::new(&source);
if source_path.is_dir() {
// Copy as directory
copy_directory(progress_bar, source, target, options, symlinked_files, true)
copy_directory(
progress_bar,
source,
target,
options,
symlinked_files,
copied_files,
true,
)
} else {
// Copy as file
let dest = construct_dest_path(source_path, target, target_type, options)?;
@ -1309,6 +1253,7 @@ fn copy_source(
dest.as_path(),
options,
symlinked_files,
copied_files,
true,
);
if options.parents {
@ -1570,6 +1515,24 @@ fn handle_existing_dest(
OverwriteMode::Clobber(ClobberMode::RemoveDestination) => {
fs::remove_file(dest)?;
}
OverwriteMode::Clobber(ClobberMode::Standard) => {
// Consider the following files:
//
// * `src/f` - a regular file
// * `src/link` - a hard link to `src/f`
// * `dest/src/f` - a different regular file
//
// In this scenario, if we do `cp -a src/ dest/`, it is
// possible that the order of traversal causes `src/link`
// to get copied first (to `dest/src/link`). In that case,
// in order to make sure `dest/src/link` is a hard link to
// `dest/src/f` and `dest/src/f` has the contents of
// `src/f`, we delete the existing file to allow the hard
// linking.
if options.preserve_hard_links() {
fs::remove_file(dest)?;
}
}
_ => (),
};
@ -1643,6 +1606,7 @@ fn copy_file(
dest: &Path,
options: &Options,
symlinked_files: &mut HashSet<FileInformation>,
copied_files: &mut HashMap<FileInformation, PathBuf>,
source_in_command_line: bool,
) -> CopyResult<()> {
if (options.update == UpdateMode::ReplaceIfOlder || options.update == UpdateMode::ReplaceNone)
@ -1686,6 +1650,19 @@ fn copy_file(
handle_existing_dest(source, dest, options, source_in_command_line)?;
}
if options.preserve_hard_links() {
// if we encounter a matching device/inode pair in the source tree
// we can arrange to create a hard link between the corresponding names
// in the destination tree.
if let Some(new_source) = copied_files.get(
&FileInformation::from_path(source, options.dereference(source_in_command_line))
.context(format!("cannot stat {}", source.quote()))?,
) {
std::fs::hard_link(new_source, dest)?;
return Ok(());
};
}
if options.verbose {
if let Some(pb) = progress_bar {
// Suspend (hide) the progress bar so the println won't overlap with the progress bar.
@ -1873,6 +1850,11 @@ fn copy_file(
copy_attributes(source, dest, &options.attributes)?;
copied_files.insert(
FileInformation::from_path(source, options.dereference(source_in_command_line))?,
dest.to_path_buf(),
);
if let Some(progress_bar) = progress_bar {
progress_bar.inc(fs::metadata(source)?.len());
}

View file

@ -11,7 +11,7 @@ use std::fs::set_permissions;
#[cfg(not(windows))]
use std::os::unix::fs;
#[cfg(all(unix, not(target_os = "freebsd")))]
#[cfg(unix)]
use std::os::unix::fs::MetadataExt;
#[cfg(all(unix, not(target_os = "freebsd")))]
use std::os::unix::fs::PermissionsExt;
@ -1353,6 +1353,203 @@ fn test_cp_preserve_xattr_fails_on_android() {
.fails();
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_1() {
let (at, mut ucmd) = at_and_ucmd!();
at.touch("a");
at.hard_link("a", "b");
at.mkdir("c");
ucmd.arg("-d").arg("a").arg("b").arg("c").succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_2() {
let (at, mut ucmd) = at_and_ucmd!();
at.touch("a");
at.symlink_file("a", "b");
at.mkdir("c");
ucmd.arg("--preserve=links")
.arg("-R")
.arg("-H")
.arg("a")
.arg("b")
.arg("c")
.succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_3() {
let (at, mut ucmd) = at_and_ucmd!();
at.mkdir("d");
at.touch("d/a");
at.symlink_file("d/a", "d/b");
ucmd.arg("--preserve=links")
.arg("-R")
.arg("-L")
.arg("d")
.arg("c")
.succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_4() {
let (at, mut ucmd) = at_and_ucmd!();
at.mkdir("d");
at.touch("d/a");
at.hard_link("d/a", "d/b");
ucmd.arg("--preserve=links")
.arg("-R")
.arg("-L")
.arg("d")
.arg("c")
.succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_5() {
let (at, mut ucmd) = at_and_ucmd!();
at.mkdir("d");
at.touch("d/a");
at.hard_link("d/a", "d/b");
ucmd.arg("-dR")
.arg("--no-preserve=links")
.arg("d")
.arg("c")
.succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_6() {
let (at, mut ucmd) = at_and_ucmd!();
at.touch("a");
at.hard_link("a", "b");
at.mkdir("c");
ucmd.arg("-d").arg("a").arg("b").arg("c").succeeds();
assert!(at.dir_exists("c"));
assert!(at.plus("c").join("a").exists());
assert!(at.plus("c").join("b").exists());
#[cfg(unix)]
{
let metadata_a = std::fs::metadata(at.subdir.join("c").join("a")).unwrap();
let metadata_b = std::fs::metadata(at.subdir.join("c").join("b")).unwrap();
assert_eq!(metadata_a.ino(), metadata_b.ino());
}
}
#[test]
// android platform will causing stderr = cp: Permission denied (os error 13)
#[cfg(not(target_os = "android"))]
fn test_cp_preserve_links_case_7() {
let (at, mut ucmd) = at_and_ucmd!();
at.mkdir("src");
at.touch("src/f");
at.hard_link("src/f", "src/g");
at.mkdir("dest");
at.touch("dest/g");
ucmd.arg("-n")
.arg("--preserve=links")
.arg("src/f")
.arg("src/g")
.arg("dest")
.fails()
.stderr_contains("not replacing");
();
assert!(at.dir_exists("dest"));
assert!(at.plus("dest").join("f").exists());
assert!(at.plus("dest").join("g").exists());
}
#[test]
// For now, disable the test on Windows. Symlinks aren't well support on Windows.
// It works on Unix for now and it works locally when run from a powershell