[FEATURE] specify output field delimiter (#8)

Add output_field_separator option

Add output_field_separator tests

Change structopt req to 0.3

Separate negative choices into a function

Prevent tail printing output_field_separator

Change OFS to Option<String> with a default value of " "

Reorder arguments to write_bytes to parallel print_choice

Print output_separator in main loop if applicable

Add `cargo test` to Makefile

Add write_separator function
This commit is contained in:
Ryan Geary 2020-04-04 22:29:51 -04:00
parent d40c9d5234
commit 7243843d2f
10 changed files with 188 additions and 46 deletions

View file

@ -7,6 +7,6 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
structopt = "0.3.0"
structopt = "0.3"
regex = "1"
lazy_static = "1"

View file

@ -8,6 +8,7 @@ flamegraph_commit: release-debug
.PHONY: test
test:
cargo test
test/e2e_test.sh
bench: release

View file

@ -23,11 +23,11 @@ impl Choice {
}
}
pub fn print_choice<WriterType: Write>(
pub fn print_choice<W: Write>(
&self,
line: &String,
config: &Config,
handle: &mut BufWriter<WriterType>,
handle: &mut BufWriter<W>,
) {
let mut line_iter = config
.separator
@ -51,59 +51,87 @@ impl Choice {
}
}
let mut iter = stack.iter().rev().peekable();
loop {
match stack.pop() {
Some(s) => Choice::write_bytes(handle, s.as_bytes()),
match iter.next() {
Some(s) => {
Choice::write_bytes(s.as_bytes(), config, handle, iter.peek().is_some())
}
None => break,
}
}
} else if self.has_negative_index() {
let vec = line_iter.collect::<Vec<&str>>();
let start = if self.start >= 0 {
self.start.try_into().unwrap()
} else {
vec.len()
.checked_sub(self.start.abs().try_into().unwrap())
.unwrap()
};
let end = if self.end >= 0 {
self.end.try_into().unwrap()
} else {
vec.len()
.checked_sub(self.end.abs().try_into().unwrap())
.unwrap()
};
if end > start {
for word in vec[start..=std::cmp::min(end, vec.len() - 1)].iter() {
Choice::write_bytes(handle, word.as_bytes());
}
} else if self.start < 0 {
for word in vec[end..=std::cmp::min(start, vec.len() - 1)].iter().rev() {
Choice::write_bytes(handle, word.as_bytes());
}
}
self.print_negative_choice(vec, config, handle);
} else {
if self.start > 0 {
line_iter.nth((self.start - 1).try_into().unwrap());
}
for i in 0..=(self.end - self.start) {
match line_iter.next() {
Some(s) => Choice::write_bytes(handle, s.as_bytes()),
let mut peek_line_iter = line_iter.peekable();
for i in self.start..=self.end {
match peek_line_iter.next() {
Some(s) => Choice::write_bytes(
s.as_bytes(),
config,
handle,
peek_line_iter.peek().is_some() && i != self.end,
),
None => break,
};
if self.end <= self.start + i {
break;
}
}
}
}
fn write_bytes<WriterType: Write>(handle: &mut BufWriter<WriterType>, b: &[u8]) {
fn print_negative_choice<W: Write>(
&self,
vec: Vec<&str>,
config: &Config,
handle: &mut BufWriter<W>,
) {
let start = if self.start >= 0 {
self.start.try_into().unwrap()
} else {
vec.len()
.checked_sub(self.start.abs().try_into().unwrap())
.unwrap()
};
let end = if self.end >= 0 {
self.end.try_into().unwrap()
} else {
vec.len()
.checked_sub(self.end.abs().try_into().unwrap())
.unwrap()
};
if end > start {
for word in vec[start..std::cmp::min(end, vec.len() - 1)].iter() {
Choice::write_bytes(word.as_bytes(), config, handle, true);
}
Choice::write_bytes(
vec[std::cmp::min(end, vec.len() - 1)].as_bytes(),
config,
handle,
false,
);
} else if self.start < 0 {
for word in vec[end + 1..=std::cmp::min(start, vec.len() - 1)]
.iter()
.rev()
{
Choice::write_bytes(word.as_bytes(), config, handle, true);
}
Choice::write_bytes(vec[end].as_bytes(), config, handle, false);
}
}
fn write_bytes<WriterType: Write>(
b: &[u8],
config: &Config,
handle: &mut BufWriter<WriterType>,
print_separator: bool,
) {
let num_bytes_written = match handle.write(b) {
Ok(x) => x,
Err(e) => {
@ -111,11 +139,15 @@ impl Choice {
0
}
};
if num_bytes_written > 0 {
match handle.write(b" ") {
Ok(_) => (),
Err(e) => eprintln!("Failed to write to output: {}", e),
}
if num_bytes_written > 0 && print_separator {
Choice::write_separator(config, handle);
};
}
pub fn write_separator<W: Write>(config: &Config, handle: &mut BufWriter<W>) {
match handle.write(&config.output_separator) {
Ok(_) => (),
Err(e) => eprintln!("Failed to write to output: {}", e),
}
}
@ -564,6 +596,72 @@ mod tests {
config.opt.choice[0].print_choice(&String::from("a:b::c:::d"), &config, &mut handle);
assert_eq!(String::from("d"), MockStdout::str_from_buf_writer(handle));
}
#[test]
fn print_1_to_3_with_output_field_separator() {
let config = Config::from_iter(vec!["choose", "1:3", "-o", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(
String::from("b#c#d"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_1_and_3_with_output_field_separator() {
let config = Config::from_iter(vec!["choose", "1", "3", "-o", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
handle.write(&config.output_separator).unwrap();
config.opt.choice[1].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(String::from("b#d"), MockStdout::str_from_buf_writer(handle));
}
#[test]
fn print_2_to_4_with_output_field_separator() {
let config = Config::from_iter(vec!["choose", "2:4", "-o", "%"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(
&String::from("Lorem ipsum dolor sit amet, consectetur"),
&config,
&mut handle,
);
assert_eq!(
String::from("dolor%sit%amet,"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_3_to_1_with_output_field_separator() {
let config = Config::from_iter(vec!["choose", "3:1", "-o", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(
String::from("d#c#b"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_0_to_neg_2_with_output_field_separator() {
let config = Config::from_iter(vec!["choose", "0:-2", "-o", "#"]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(
String::from("a#b#c"),
MockStdout::str_from_buf_writer(handle)
);
}
#[test]
fn print_0_to_2_with_empty_output_field_separator() {
let config = Config::from_iter(vec!["choose", "0:2", "-o", ""]);
let mut handle = BufWriter::new(MockStdout::new());
config.opt.choice[0].print_choice(&String::from("a b c d"), &config, &mut handle);
assert_eq!(String::from("abc"), MockStdout::str_from_buf_writer(handle));
}
}
mod is_reverse_range_tests {

View file

@ -12,6 +12,7 @@ lazy_static! {
pub struct Config {
pub opt: Opt,
pub separator: Regex,
pub output_separator: Box<[u8]>,
}
impl Config {
@ -50,7 +51,16 @@ impl Config {
}
};
Config { opt, separator }
let output_separator = match opt.output_field_separator.clone() {
Some(s) => s.into_boxed_str().into_boxed_bytes(),
None => Box::new([0x20; 1]),
};
Config {
opt,
separator,
output_separator,
}
}
pub fn parse_choice(src: &str) -> Result<Choice, ParseIntError> {
@ -91,6 +101,10 @@ impl Config {
return Ok(Choice::new(start, end));
}
pub fn parse_output_field_separator(src: &str) -> String {
String::from(src)
}
}
#[cfg(test)]

View file

@ -39,8 +39,12 @@ fn main() {
while let Some(line) = reader.read_line(&mut buffer) {
match line {
Ok(l) => {
for choice in &config.opt.choice {
let choice_iter = &mut config.opt.choice.iter().peekable();
while let Some(choice) = choice_iter.next() {
choice.print_choice(&l, &config, &mut handle);
if choice_iter.peek().is_some() {
choice::Choice::write_separator(&config, &mut handle);
}
}
match handle.write(b"\n") {
Ok(_) => (),

View file

@ -12,6 +12,10 @@ pub struct Opt {
#[structopt(short, long)]
pub field_separator: Option<String>,
/// Specify output field separator
#[structopt(short, long, parse(from_str = Config::parse_output_field_separator))]
pub output_field_separator: Option<String>,
/// Use non-greedy field separators
#[structopt(short, long)]
pub non_greedy: bool,

6
test/choose_1:3of%.txt Normal file
View file

@ -0,0 +1,6 @@
ipsum%dolor%sit
ut%labore%et
exercitation%ullamco%laboris
aute%irure%dolor
nulla%pariatur.%Excepteur
qui%officia%deserunt

6
test/choose_1_3of%.txt Normal file
View file

@ -0,0 +1,6 @@
ipsum%sit
ut%et
exercitation%laboris
aute%dolor
nulla%Excepteur
qui%deserunt

6
test/choose_1_3of.txt Normal file
View file

@ -0,0 +1,6 @@
ipsumsit
utet
exercitationlaboris
autedolor
nullaExcepteur
quideserunt

View file

@ -14,6 +14,9 @@ diff -w <(cargo run -- 9 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir
diff -w <(cargo run -- 12 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_12.txt")
diff -w <(cargo run -- 4:2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_4:2.txt")
diff -w <(cargo run -- -4:-2 -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_-4:-2.txt")
diff -w <(cargo run -- 1:3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1:3of%.txt")
diff -w <(cargo run -- 1 3 -o % -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of%.txt")
diff -w <(cargo run -- 1 3 -o '' -i ${test_dir}/lorem.txt 2>/dev/null) <(cat "${test_dir}/choose_1_3of.txt")
# add tests for different delimiters
# add tests using piping