tac: correct behavior of -b option (#2523)

* tac: correct behavior of -b option

Correct the behavior of `tac -b` to match that of GNU coreutils
`tac`. Specifically, this changes `tac -b` to assume *leading* line
separators instead of the default *trailing* line separators.

Before this commit, the (incorrect) behavior was

    $ printf "/abc/def" | tac -b -s "/"
    def/abc/

After this commit, the behavior is

    $ printf "/abc/def" | tac -b -s "/"
    /def/abc

Fixes #2262.

* fixup! tac: correct behavior of -b option

* fixup! tac: correct behavior of -b option

Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
This commit is contained in:
jfinkels 2021-08-22 15:01:17 -04:00 committed by GitHub
parent 114c9a409c
commit 4ef35d4a96
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 96 additions and 63 deletions

View file

@ -5,13 +5,13 @@
// * For the full copyright and license information, please view the LICENSE
// * file that was distributed with this source code.
// spell-checker:ignore (ToDO) sbytes slen
// spell-checker:ignore (ToDO) sbytes slen dlen
#[macro_use]
extern crate uucore;
use clap::{crate_version, App, Arg};
use std::io::{stdin, stdout, BufReader, Read, Stdout, Write};
use std::io::{stdin, stdout, BufReader, Read, Write};
use std::{fs::File, path::Path};
use uucore::InvalidEncodingHandling;
@ -80,12 +80,49 @@ pub fn uu_app() -> App<'static, 'static> {
.arg(Arg::with_name(options::FILE).hidden(true).multiple(true))
}
fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
let mut exit_code = 0;
fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> {
let mut out = stdout();
// Convert the line separator to a byte sequence.
let sbytes = separator.as_bytes();
let slen = sbytes.len();
// If there are more characters in the separator than in the data,
// we can't possibly split the data on the separator. Write the
// entire buffer to stdout.
let dlen = data.len();
if dlen < slen {
return out.write_all(data);
}
// Iterate over each byte in the buffer in reverse. When we find a
// line separator, write the line to stdout.
//
// The `before` flag controls whether the line separator appears at
// the end of the line (as in "abc\ndef\n") or at the beginning of
// the line (as in "/abc/def").
let mut following_line_start = data.len();
for i in (0..dlen - slen + 1).rev() {
if &data[i..i + slen] == sbytes {
if before {
out.write_all(&data[i..following_line_start])?;
following_line_start = i;
} else {
out.write_all(&data[i + slen..following_line_start])?;
following_line_start = i + slen;
}
}
}
// After the loop terminates, write whatever bytes are remaining at
// the beginning of the buffer.
out.write_all(&data[0..following_line_start])?;
Ok(())
}
fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
let mut exit_code = 0;
for filename in &filenames {
let mut file = BufReader::new(if filename == "-" {
Box::new(stdin()) as Box<dyn Read>
@ -120,63 +157,8 @@ fn tac(filenames: Vec<String>, before: bool, _: bool, separator: &str) -> i32 {
continue;
};
// find offsets in string of all separators
let mut offsets = Vec::new();
let mut i = 0;
loop {
if i + slen > data.len() {
break;
}
if &data[i..i + slen] == sbytes {
offsets.push(i);
i += slen;
} else {
i += 1;
}
}
// If the file contains no line separators, then simply write
// the contents of the file directly to stdout.
if offsets.is_empty() {
out.write_all(&data)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
return exit_code;
}
// if there isn't a separator at the end of the file, fake it
if *offsets.last().unwrap() < data.len() - slen {
offsets.push(data.len());
}
let mut prev = *offsets.last().unwrap();
let mut start = true;
for off in offsets.iter().rev().skip(1) {
// correctly handle case of no final separator in file
if start && prev == data.len() {
show_line(&mut out, &[], &data[*off + slen..prev], before);
start = false;
} else {
show_line(&mut out, sbytes, &data[*off + slen..prev], before);
}
prev = *off;
}
show_line(&mut out, sbytes, &data[0..prev], before);
buffer_tac(&data, before, separator)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
}
exit_code
}
fn show_line(out: &mut Stdout, sep: &[u8], dat: &[u8], before: bool) {
if before {
out.write_all(sep)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
}
out.write_all(dat)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
if !before {
out.write_all(sep)
.unwrap_or_else(|e| crash!(1, "failed to write to stdout: {}", e));
}
}

View file

@ -1,3 +1,4 @@
// spell-checker:ignore axxbxx bxxaxx
use crate::common::util::*;
#[test]
@ -23,7 +24,7 @@ fn test_stdin_non_newline_separator_before() {
.args(&["-b", "-s", ":"])
.pipe_in("100:200:300:400:500")
.run()
.stdout_is("500:400:300:200:100");
.stdout_is(":500:400:300:200100");
}
#[test]
@ -74,6 +75,56 @@ fn test_no_line_separators() {
new_ucmd!().pipe_in("a").succeeds().stdout_is("a");
}
#[test]
fn test_before_trailing_separator_no_leading_separator() {
new_ucmd!()
.arg("-b")
.pipe_in("a\nb\n")
.succeeds()
.stdout_is("\n\nba");
}
#[test]
fn test_before_trailing_separator_and_leading_separator() {
new_ucmd!()
.arg("-b")
.pipe_in("\na\nb\n")
.succeeds()
.stdout_is("\n\nb\na");
}
#[test]
fn test_before_leading_separator_no_trailing_separator() {
new_ucmd!()
.arg("-b")
.pipe_in("\na\nb")
.succeeds()
.stdout_is("\nb\na");
}
#[test]
fn test_before_no_separator() {
new_ucmd!()
.arg("-b")
.pipe_in("ab")
.succeeds()
.stdout_is("ab");
}
#[test]
fn test_before_empty_file() {
new_ucmd!().arg("-b").pipe_in("").succeeds().stdout_is("");
}
#[test]
fn test_multi_char_separator() {
new_ucmd!()
.args(&["-s", "xx"])
.pipe_in("axxbxx")
.succeeds()
.stdout_is("bxxaxx");
}
#[test]
fn test_null_separator() {
new_ucmd!()

View file

@ -1 +1 @@
97:89:83:79:73:71:67:61:59:53:47:43:41:37:31:29:23:19:17:13:11:7:5:3:2
:97:89:83:79:73:71:67:61:59:53:47:43:41:37:31:29:23:19:17:13:11:7:5:32