Match GNU semantics for missing EOF

While the rust coreutils semantics were arguably more correct,
they were different than the gnu split semantics when handling a
file without a trailing EOF. This patch addresses that difference
and allows passing one more GNU test suite.
This commit is contained in:
Andrew Baptist 2022-10-07 17:46:49 -04:00
parent 97dd4824e5
commit 4922d34177
3 changed files with 45 additions and 5 deletions

View file

@ -37,8 +37,9 @@ static OPT_HEX_SUFFIXES: &str = "hex-suffixes";
static OPT_SUFFIX_LENGTH: &str = "suffix-length";
static OPT_DEFAULT_SUFFIX_LENGTH: &str = "0";
static OPT_VERBOSE: &str = "verbose";
//The ---io-blksize parameter is consumed and ignored.
//The ---io and ---io-blksize parameters are consumed and ignored.
//The parameter is included to make GNU coreutils tests pass.
static OPT_IO: &str = "-io";
static OPT_IO_BLKSIZE: &str = "-io-blksize";
static OPT_ELIDE_EMPTY_FILES: &str = "elide-empty-files";
@ -159,6 +160,13 @@ pub fn uu_app<'a>() -> Command<'a> {
.long(OPT_VERBOSE)
.help("print a diagnostic just before each output file is opened"),
)
.arg(
Arg::new(OPT_IO)
.long(OPT_IO)
.alias(OPT_IO)
.takes_value(true)
.hide(true),
)
.arg(
Arg::new(OPT_IO_BLKSIZE)
.long(OPT_IO_BLKSIZE)
@ -922,10 +930,22 @@ impl<'a> Write for LineBytesChunkWriter<'a> {
// then move on to the next chunk if necessary.
None => {
let end = self.num_bytes_remaining_in_current_chunk;
let num_bytes_written = self.inner.write(&buf[..end.min(buf.len())])?;
self.num_bytes_remaining_in_current_chunk -= num_bytes_written;
total_bytes_written += num_bytes_written;
buf = &buf[num_bytes_written..];
// This is ugly but here to match GNU behavior. If the input
// doesn't end with a \n, pretend that it does for handling
// the second to last segment chunk. See `line-bytes.sh`.
if end == buf.len()
&& self.num_bytes_remaining_in_current_chunk
< self.chunk_size.try_into().unwrap()
&& buf[buf.len() - 1] != b'\n'
{
self.num_bytes_remaining_in_current_chunk = 0;
} else {
let num_bytes_written = self.inner.write(&buf[..end.min(buf.len())])?;
self.num_bytes_remaining_in_current_chunk -= num_bytes_written;
total_bytes_written += num_bytes_written;
buf = &buf[num_bytes_written..];
}
}
// If there is a newline character and the line

View file

@ -659,6 +659,22 @@ fn test_line_bytes_no_empty_file() {
assert!(!at.plus("xak").exists());
}
#[test]
fn test_line_bytes_no_eof() {
let (at, mut ucmd) = at_and_ucmd!();
ucmd.args(&["-C", "3"])
.pipe_in("1\n2222\n3\n4")
.succeeds()
.no_stdout()
.no_stderr();
assert_eq!(at.read("xaa"), "1\n");
assert_eq!(at.read("xab"), "222");
assert_eq!(at.read("xac"), "2\n");
assert_eq!(at.read("xad"), "3\n");
assert_eq!(at.read("xae"), "4");
assert!(!at.plus("xaf").exists());
}
#[test]
fn test_guard_input() {
let ts = TestScenario::new(util_name!());

4
tests/fixtures/split/noeof.txt vendored Normal file
View file

@ -0,0 +1,4 @@
1
2222
3
4