mirror of
https://github.com/uutils/coreutils
synced 2024-10-06 16:09:08 +00:00
csplit: refresh of the previous PR
This commit is contained in:
parent
da362ced71
commit
89bf7a726e
|
@ -37,6 +37,7 @@ feat_common_core = [
|
|||
"cksum",
|
||||
"comm",
|
||||
"cp",
|
||||
"csplit",
|
||||
"cut",
|
||||
"date",
|
||||
"df",
|
||||
|
@ -241,6 +242,7 @@ chroot = { optional=true, version="0.0.1", package="uu_chroot", path="src/uu/c
|
|||
cksum = { optional=true, version="0.0.1", package="uu_cksum", path="src/uu/cksum" }
|
||||
comm = { optional=true, version="0.0.1", package="uu_comm", path="src/uu/comm" }
|
||||
cp = { optional=true, version="0.0.1", package="uu_cp", path="src/uu/cp" }
|
||||
csplit = { optional=true, version="0.0.1", package="uu_csplit", path="src/uu/csplit" }
|
||||
cut = { optional=true, version="0.0.1", package="uu_cut", path="src/uu/cut" }
|
||||
date = { optional=true, version="0.0.1", package="uu_date", path="src/uu/date" }
|
||||
df = { optional=true, version="0.0.1", package="uu_df", path="src/uu/df" }
|
||||
|
@ -332,6 +334,7 @@ pin_winapi-util = { version="0.1.2, < 0.1.3", package="winapi-util" } ## winapi-
|
|||
[dev-dependencies]
|
||||
conv = "0.3"
|
||||
filetime = "0.2"
|
||||
glob = "0.3.0"
|
||||
libc = "0.2"
|
||||
rand = "0.7"
|
||||
regex = "1.0"
|
||||
|
|
|
@ -53,6 +53,7 @@ PROGS := \
|
|||
cksum \
|
||||
comm \
|
||||
cp \
|
||||
csplit \
|
||||
cut \
|
||||
df \
|
||||
dircolors \
|
||||
|
@ -160,6 +161,7 @@ TEST_PROGS := \
|
|||
cksum \
|
||||
comm \
|
||||
cp \
|
||||
csplit \
|
||||
cut \
|
||||
dircolors \
|
||||
dirname \
|
||||
|
|
27
src/uu/csplit/Cargo.toml
Normal file
27
src/uu/csplit/Cargo.toml
Normal file
|
@ -0,0 +1,27 @@
|
|||
[package]
|
||||
name = "uu_csplit"
|
||||
version = "0.0.1"
|
||||
authors = ["uutils developers"]
|
||||
license = "MIT"
|
||||
description = "csplit ~ (uutils) Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ..., and output byte counts of each piece to standard output"
|
||||
|
||||
homepage = "https://github.com/uutils/coreutils"
|
||||
repository = "https://github.com/uutils/coreutils/tree/master/src/uu/ls"
|
||||
keywords = ["coreutils", "uutils", "cross-platform", "cli", "utility"]
|
||||
categories = ["command-line-utilities"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
path = "src/csplit.rs"
|
||||
|
||||
[dependencies]
|
||||
getopts = "0.2.17"
|
||||
failure = "0.1.1"
|
||||
failure_derive = "0.1.1"
|
||||
regex = "1.0.0"
|
||||
glob = "0.2.11"
|
||||
uucore = { version=">=0.0.4", package="uucore", path="../../uucore", features=["entries", "fs"] }
|
||||
|
||||
[[bin]]
|
||||
name = "csplit"
|
||||
path = "src/main.rs"
|
760
src/uu/csplit/src/csplit.rs
Normal file
760
src/uu/csplit/src/csplit.rs
Normal file
|
@ -0,0 +1,760 @@
|
|||
#![crate_name = "uu_csplit"]
|
||||
|
||||
#[macro_use]
|
||||
extern crate failure;
|
||||
#[macro_use]
|
||||
extern crate uucore;
|
||||
extern crate getopts;
|
||||
extern crate regex;
|
||||
use std::{fs::{File, remove_file}, io::{BufRead, BufWriter, Write}};
|
||||
use std::io::{self, BufReader};
|
||||
use getopts::Matches;
|
||||
use regex::Regex;
|
||||
/*
|
||||
mod split_name;
|
||||
mod patterns;
|
||||
*/
|
||||
mod splitname;
|
||||
mod patterns;
|
||||
mod csplitError;
|
||||
|
||||
use crate::splitname::SplitName;
|
||||
use crate::csplitError::CsplitError;
|
||||
//mod split_name;
|
||||
|
||||
|
||||
//mod csplit;
|
||||
|
||||
static SYNTAX: &'static str = "[OPTION]... FILE PATTERN...";
|
||||
static SUMMARY: &'static str = "split a file into sections determined by context lines";
|
||||
static LONG_HELP: &'static str = "Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ..., and output byte counts of each piece to standard output.";
|
||||
|
||||
static SUFFIX_FORMAT_OPT: &'static str = "suffix-format";
|
||||
static SUPPRESS_MATCHED_OPT: &'static str = "suppress-matched";
|
||||
static DIGITS_OPT: &'static str = "digits";
|
||||
static PREFIX_OPT: &'static str = "prefix";
|
||||
static KEEP_FILES_OPT: &'static str = "keep-files";
|
||||
static QUIET_OPT: &'static str = "quiet";
|
||||
static ELIDE_EMPTY_FILES_OPT: &'static str = "elide-empty-files";
|
||||
|
||||
/// Command line options for csplit.
|
||||
pub struct CsplitOptions {
|
||||
split_name: crate::SplitName,
|
||||
keep_files: bool,
|
||||
quiet: bool,
|
||||
elide_empty_files: bool,
|
||||
suppress_matched: bool,
|
||||
}
|
||||
|
||||
impl CsplitOptions {
|
||||
fn new(matches: &Matches) -> CsplitOptions {
|
||||
let keep_files = matches.opt_present(KEEP_FILES_OPT);
|
||||
let quiet = matches.opt_present(QUIET_OPT);
|
||||
let elide_empty_files = matches.opt_present(ELIDE_EMPTY_FILES_OPT);
|
||||
let suppress_matched = matches.opt_present(SUPPRESS_MATCHED_OPT);
|
||||
|
||||
CsplitOptions {
|
||||
split_name: crash_if_err!(
|
||||
1,
|
||||
SplitName::new(
|
||||
matches.opt_str(PREFIX_OPT),
|
||||
matches.opt_str(SUFFIX_FORMAT_OPT),
|
||||
matches.opt_str(DIGITS_OPT)
|
||||
)
|
||||
),
|
||||
keep_files,
|
||||
quiet,
|
||||
elide_empty_files,
|
||||
suppress_matched,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits a file into severals according to the command line patterns.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - [`io::Error`] if there is some problem reading/writing from/to a file.
|
||||
/// - [`::CsplitError::LineOutOfRange`] if the linenum pattern is larger than the number of input
|
||||
/// lines.
|
||||
/// - [`::CsplitError::LineOutOfRangeOnRepetition`], like previous but after applying the pattern
|
||||
/// more than once.
|
||||
/// - [`::CsplitError::MatchNotFound`] if no line matched a regular expression.
|
||||
/// - [`::CsplitError::MatchNotFoundOnRepetition`], like previous but after applying the pattern
|
||||
/// more than once.
|
||||
pub fn csplit<T>(
|
||||
options: &CsplitOptions,
|
||||
patterns: Vec<patterns::Pattern>,
|
||||
input: T,
|
||||
) -> Result<(), CsplitError>
|
||||
where
|
||||
T: BufRead,
|
||||
{
|
||||
let mut input_iter = InputSplitter::new(input.lines().enumerate());
|
||||
let mut split_writer = SplitWriter::new(&options)?;
|
||||
let ret = do_csplit(&mut split_writer, patterns, &mut input_iter);
|
||||
|
||||
// consume the rest
|
||||
input_iter.rewind_buffer();
|
||||
if let Some((_, line)) = input_iter.next() {
|
||||
split_writer.new_writer()?;
|
||||
split_writer.writeln(line?)?;
|
||||
for (_, line) in input_iter {
|
||||
split_writer.writeln(line?)?;
|
||||
}
|
||||
split_writer.finish_split()?;
|
||||
}
|
||||
// delete files on error by default
|
||||
if ret.is_err() && !options.keep_files {
|
||||
split_writer.delete_all_splits()?;
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
fn do_csplit<I>(
|
||||
split_writer: &mut SplitWriter,
|
||||
patterns: Vec<patterns::Pattern>,
|
||||
input_iter: &mut InputSplitter<I>,
|
||||
) -> Result<(), CsplitError>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
// split the file based on patterns
|
||||
for pattern in patterns.into_iter() {
|
||||
let pattern_as_str = pattern.to_string();
|
||||
let is_skip = if let patterns::Pattern::SkipToMatch(_, _, _) = pattern {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
match pattern {
|
||||
patterns::Pattern::UpToLine(n, ex) => {
|
||||
let mut up_to_line = n;
|
||||
for (_, ith) in ex.iter() {
|
||||
split_writer.new_writer()?;
|
||||
match split_writer.do_to_line(&pattern_as_str, up_to_line, input_iter) {
|
||||
// the error happened when applying the pattern more than once
|
||||
Err(CsplitError::LineOutOfRange(_)) if ith != 1 => {
|
||||
return Err(CsplitError::LineOutOfRangeOnRepetition(
|
||||
pattern_as_str.to_string(),
|
||||
ith - 1,
|
||||
));
|
||||
}
|
||||
Err(err) => return Err(err),
|
||||
// continue the splitting process
|
||||
Ok(()) => (),
|
||||
}
|
||||
up_to_line += n;
|
||||
}
|
||||
}
|
||||
patterns::Pattern::UpToMatch(regex, offset, ex)
|
||||
| patterns::Pattern::SkipToMatch(regex, offset, ex) => {
|
||||
for (max, ith) in ex.iter() {
|
||||
if is_skip {
|
||||
// when skipping a part of the input, no writer is created
|
||||
split_writer.as_dev_null();
|
||||
} else {
|
||||
split_writer.new_writer()?;
|
||||
}
|
||||
match (
|
||||
split_writer.do_to_match(&pattern_as_str, ®ex, offset, input_iter),
|
||||
max,
|
||||
) {
|
||||
// in case of ::pattern::ExecutePattern::Always, then it's fine not to find a
|
||||
// matching line
|
||||
(Err(CsplitError::MatchNotFound(_)), None) => {
|
||||
return Ok(());
|
||||
}
|
||||
// the error happened when applying the pattern more than once
|
||||
(Err(CsplitError::MatchNotFound(_)), Some(m)) if m != 1 && ith != 1 => {
|
||||
return Err(CsplitError::MatchNotFoundOnRepetition(
|
||||
pattern_as_str.to_string(),
|
||||
ith - 1,
|
||||
));
|
||||
}
|
||||
(Err(err), _) => return Err(err),
|
||||
// continue the splitting process
|
||||
(Ok(()), _) => (),
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a portion of the input file into a split which filename is based on an incrementing
|
||||
/// counter.
|
||||
struct SplitWriter<'a> {
|
||||
/// the options set through the command line
|
||||
options: &'a CsplitOptions,
|
||||
/// a split counter
|
||||
counter: usize,
|
||||
/// the writer to the current split
|
||||
current_writer: Option<BufWriter<File>>,
|
||||
/// the size in bytes of the current split
|
||||
size: usize,
|
||||
/// flag to indicate that no content should be written to a split
|
||||
dev_null: bool,
|
||||
}
|
||||
|
||||
impl<'a> Drop for SplitWriter<'a> {
|
||||
fn drop(&mut self) {
|
||||
if self.options.elide_empty_files && self.size == 0 {
|
||||
let file_name = self.options.split_name.get(self.counter);
|
||||
remove_file(file_name).expect("Failed to elide split");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SplitWriter<'a> {
|
||||
fn new(options: &CsplitOptions) -> io::Result<SplitWriter> {
|
||||
Ok(SplitWriter {
|
||||
options,
|
||||
counter: 0,
|
||||
current_writer: None,
|
||||
size: 0,
|
||||
dev_null: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new split and returns its filename.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// The creation of the split file may fail with some [`io::Error`].
|
||||
fn new_writer(&mut self) -> io::Result<()> {
|
||||
let file_name = self.options.split_name.get(self.counter);
|
||||
let file = File::create(&file_name)?;
|
||||
self.current_writer = Some(BufWriter::new(file));
|
||||
self.counter += 1;
|
||||
self.size = 0;
|
||||
self.dev_null = false;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The current split will not keep any of the read input lines.
|
||||
fn as_dev_null(&mut self) {
|
||||
self.dev_null = true;
|
||||
}
|
||||
|
||||
/// Writes the line to the current split, appending a newline character.
|
||||
/// If [`dev_null`] is true, then the line is discarded.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Some [`io::Error`] may occur when attempting to write the line.
|
||||
fn writeln(&mut self, line: String) -> io::Result<()> {
|
||||
if !self.dev_null {
|
||||
match self.current_writer {
|
||||
Some(ref mut current_writer) => {
|
||||
let bytes = line.as_bytes();
|
||||
current_writer.write_all(bytes)?;
|
||||
current_writer.write(b"\n")?;
|
||||
self.size += bytes.len() + 1;
|
||||
}
|
||||
None => panic!("trying to write to a split that was not created"),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform some operations after completing a split, i.e., either remove it
|
||||
/// if the [`::ELIDE_EMPTY_FILES_OPT`] option is enabled, or print how much bytes were written
|
||||
/// to it if [`::QUIET_OPT`] is disabled.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Some [`io::Error`] if the split could not be removed in case it should be elided.
|
||||
fn finish_split(&mut self) -> io::Result<()> {
|
||||
if !self.dev_null {
|
||||
if self.options.elide_empty_files && self.size == 0 {
|
||||
self.counter -= 1;
|
||||
} else if !self.options.quiet {
|
||||
println!("{}", self.size);
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
/// Removes all the split files that were created.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an [`io::Error`] if there was a problem removing a split.
|
||||
fn delete_all_splits(&self) -> io::Result<()> {
|
||||
let mut ret = Ok(());
|
||||
for ith in 0..self.counter {
|
||||
let file_name = self.options.split_name.get(ith);
|
||||
if let Err(err) = remove_file(file_name) {
|
||||
ret = Err(err);
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
/// Split the input stream up to the line number `n`.
|
||||
///
|
||||
/// If the line number `n` is smaller than the current position in the input, then an empty
|
||||
/// split is created.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// In addition to errors reading/writing from/to a file, if the line number
|
||||
/// `n` is greater than the total available lines, then a
|
||||
/// [`::CsplitError::LineOutOfRange`] error is returned.
|
||||
fn do_to_line<I>(
|
||||
&mut self,
|
||||
pattern_as_str: &str,
|
||||
n: usize,
|
||||
input_iter: &mut InputSplitter<I>,
|
||||
) -> Result<(), CsplitError>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
input_iter.rewind_buffer();
|
||||
input_iter.set_size_of_buffer(1);
|
||||
|
||||
let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string()));
|
||||
while let Some((ln, line)) = input_iter.next() {
|
||||
let l = line?;
|
||||
if ln + 1 > n {
|
||||
if input_iter.add_line_to_buffer(ln, l).is_some() {
|
||||
panic!("the buffer is big enough to contain 1 line");
|
||||
}
|
||||
ret = Ok(());
|
||||
break;
|
||||
} else if ln + 1 == n {
|
||||
if !self.options.suppress_matched {
|
||||
if input_iter.add_line_to_buffer(ln, l).is_some() {
|
||||
panic!("the buffer is big enough to contain 1 line");
|
||||
}
|
||||
}
|
||||
ret = Ok(());
|
||||
break;
|
||||
}
|
||||
self.writeln(l)?;
|
||||
}
|
||||
self.finish_split()?;
|
||||
ret
|
||||
}
|
||||
|
||||
/// Read lines up to the line matching a [`Regex`]. With a non-zero offset,
|
||||
/// the block of relevant lines can be extended (if positive), or reduced
|
||||
/// (if negative).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// In addition to errors reading/writing from/to a file, the following errors may be returned:
|
||||
/// - if no line matched, an [`::CsplitError::MatchNotFound`].
|
||||
/// - if there are not enough lines to accomodate the offset, an
|
||||
/// [`::CsplitError::LineOutOfRange`].
|
||||
fn do_to_match<I>(
|
||||
&mut self,
|
||||
pattern_as_str: &str,
|
||||
regex: &Regex,
|
||||
mut offset: i32,
|
||||
input_iter: &mut InputSplitter<I>,
|
||||
) -> Result<(), CsplitError>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
if offset >= 0 {
|
||||
// The offset is zero or positive, no need for a buffer on the lines read.
|
||||
// NOTE: drain the buffer of input_iter, no match should be done within.
|
||||
for line in input_iter.drain_buffer() {
|
||||
self.writeln(line)?;
|
||||
}
|
||||
// retain the matching line
|
||||
input_iter.set_size_of_buffer(1);
|
||||
|
||||
while let Some((ln, line)) = input_iter.next() {
|
||||
let l = line?;
|
||||
if regex.is_match(&l) {
|
||||
match (self.options.suppress_matched, offset) {
|
||||
// no offset, add the line to the next split
|
||||
(false, 0) => {
|
||||
if input_iter.add_line_to_buffer(ln, l).is_some() {
|
||||
panic!("the buffer is big enough to contain 1 line");
|
||||
}
|
||||
}
|
||||
// a positive offset, some more lines need to be added to the current split
|
||||
(false, _) => self.writeln(l)?,
|
||||
_ => (),
|
||||
};
|
||||
offset -= 1;
|
||||
|
||||
// write the extra lines required by the offset
|
||||
while offset > 0 {
|
||||
match input_iter.next() {
|
||||
Some((_, line)) => {
|
||||
self.writeln(line?)?;
|
||||
}
|
||||
None => {
|
||||
self.finish_split()?;
|
||||
return Err(CsplitError::LineOutOfRange(
|
||||
pattern_as_str.to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
offset -= 1;
|
||||
}
|
||||
self.finish_split()?;
|
||||
return Ok(());
|
||||
}
|
||||
self.writeln(l)?;
|
||||
}
|
||||
} else {
|
||||
// With a negative offset we use a buffer to keep the lines within the offset.
|
||||
// NOTE: do not drain the buffer of input_iter, in case of an LineOutOfRange error
|
||||
// but do not rewind it either since no match should be done within.
|
||||
// The consequence is that the buffer may already be full with lines from a previous
|
||||
// split, which is taken care of when calling `shrink_buffer_to_size`.
|
||||
let offset_usize = -offset as usize;
|
||||
input_iter.set_size_of_buffer(offset_usize);
|
||||
while let Some((ln, line)) = input_iter.next() {
|
||||
let l = line?;
|
||||
if regex.is_match(&l) {
|
||||
for line in input_iter.shrink_buffer_to_size() {
|
||||
self.writeln(line)?;
|
||||
}
|
||||
if !self.options.suppress_matched {
|
||||
// add 1 to the buffer size to make place for the matched line
|
||||
input_iter.set_size_of_buffer(offset_usize + 1);
|
||||
if input_iter.add_line_to_buffer(ln, l).is_some() {
|
||||
panic!("should be big enough to hold every lines");
|
||||
}
|
||||
}
|
||||
self.finish_split()?;
|
||||
if input_iter.buffer_len() < offset_usize {
|
||||
return Err(CsplitError::LineOutOfRange(pattern_as_str.to_string()));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(line) = input_iter.add_line_to_buffer(ln, l) {
|
||||
self.writeln(line)?;
|
||||
}
|
||||
}
|
||||
// no match, drain the buffer into the current split
|
||||
for line in input_iter.drain_buffer() {
|
||||
self.writeln(line)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.finish_split()?;
|
||||
Err(CsplitError::MatchNotFound(pattern_as_str.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator which can output items from a buffer filled externally.
|
||||
/// This is used to pass matching lines to the next split and to support patterns with a negative offset.
|
||||
struct InputSplitter<I>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
iter: I,
|
||||
buffer: Vec<<I as Iterator>::Item>,
|
||||
/// the number of elements the buffer may hold
|
||||
size: usize,
|
||||
/// flag to indicate content off the buffer should be returned instead of off the wrapped
|
||||
/// iterator
|
||||
rewind: bool,
|
||||
}
|
||||
|
||||
impl<I> InputSplitter<I>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
fn new(iter: I) -> InputSplitter<I> {
|
||||
InputSplitter {
|
||||
iter,
|
||||
buffer: Vec::new(),
|
||||
rewind: false,
|
||||
size: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewind the iteration by outputing the buffer's content.
|
||||
fn rewind_buffer(&mut self) {
|
||||
self.rewind = true;
|
||||
}
|
||||
|
||||
/// Shrink the buffer so that its length is equal to the set size, returning an iterator for
|
||||
/// the elements that were too much.
|
||||
fn shrink_buffer_to_size<'a>(&'a mut self) -> impl Iterator<Item = String> + 'a {
|
||||
let mut shrink_offset = 0;
|
||||
if self.buffer.len() > self.size {
|
||||
shrink_offset = self.buffer.len() - self.size;
|
||||
}
|
||||
self.buffer
|
||||
.drain(..shrink_offset)
|
||||
.map(|(_, line)| line.unwrap())
|
||||
}
|
||||
|
||||
/// Drain the content of the buffer.
|
||||
fn drain_buffer<'a>(&'a mut self) -> impl Iterator<Item = String> + 'a {
|
||||
self.buffer.drain(..).map(|(_, line)| line.unwrap())
|
||||
}
|
||||
|
||||
/// Set the maximum number of lines to keep.
|
||||
fn set_size_of_buffer(&mut self, size: usize) {
|
||||
self.size = size;
|
||||
}
|
||||
|
||||
/// Add a line to the buffer. If the buffer has [`size`] elements, then its head is removed and
|
||||
/// the new line is pushed to the buffer. The removed head is then available in the returned
|
||||
/// option.
|
||||
fn add_line_to_buffer(&mut self, ln: usize, line: String) -> Option<String> {
|
||||
if self.rewind {
|
||||
self.buffer.insert(0, (ln, Ok(line)));
|
||||
None
|
||||
} else if self.buffer.len() >= self.size {
|
||||
let (_, head_line) = self.buffer.remove(0);
|
||||
self.buffer.push((ln, Ok(line)));
|
||||
Some(head_line.unwrap())
|
||||
} else {
|
||||
self.buffer.push((ln, Ok(line)));
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of lines stored in the buffer
|
||||
fn buffer_len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for InputSplitter<I>
|
||||
where
|
||||
I: Iterator<Item = (usize, io::Result<String>)>,
|
||||
{
|
||||
type Item = <I as Iterator>::Item;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.rewind {
|
||||
if !self.buffer.is_empty() {
|
||||
return Some(self.buffer.remove(0));
|
||||
}
|
||||
self.rewind = false;
|
||||
}
|
||||
self.iter.next()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn input_splitter() {
|
||||
let input = vec![
|
||||
Ok(String::from("aaa")),
|
||||
Ok(String::from("bbb")),
|
||||
Ok(String::from("ccc")),
|
||||
Ok(String::from("ddd")),
|
||||
];
|
||||
let mut input_splitter = InputSplitter::new(input.into_iter().enumerate());
|
||||
|
||||
input_splitter.set_size_of_buffer(2);
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((0, Ok(line))) => {
|
||||
assert_eq!(line, String::from("aaa"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(0, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 1);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((1, Ok(line))) => {
|
||||
assert_eq!(line, String::from("bbb"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(1, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 2);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((2, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ccc"));
|
||||
assert_eq!(
|
||||
input_splitter.add_line_to_buffer(2, line),
|
||||
Some(String::from("aaa"))
|
||||
);
|
||||
assert_eq!(input_splitter.buffer_len(), 2);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
input_splitter.rewind_buffer();
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((1, Ok(line))) => {
|
||||
assert_eq!(line, String::from("bbb"));
|
||||
assert_eq!(input_splitter.buffer_len(), 1);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((2, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ccc"));
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((3, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ddd"));
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
assert!(input_splitter.next().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn input_splitter_interrupt_rewind() {
|
||||
let input = vec![
|
||||
Ok(String::from("aaa")),
|
||||
Ok(String::from("bbb")),
|
||||
Ok(String::from("ccc")),
|
||||
Ok(String::from("ddd")),
|
||||
];
|
||||
let mut input_splitter = InputSplitter::new(input.into_iter().enumerate());
|
||||
|
||||
input_splitter.set_size_of_buffer(3);
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((0, Ok(line))) => {
|
||||
assert_eq!(line, String::from("aaa"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(0, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 1);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((1, Ok(line))) => {
|
||||
assert_eq!(line, String::from("bbb"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(1, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 2);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((2, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ccc"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(2, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 3);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
input_splitter.rewind_buffer();
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((0, Ok(line))) => {
|
||||
assert_eq!(line, String::from("aaa"));
|
||||
assert_eq!(input_splitter.add_line_to_buffer(0, line), None);
|
||||
assert_eq!(input_splitter.buffer_len(), 3);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((0, Ok(line))) => {
|
||||
assert_eq!(line, String::from("aaa"));
|
||||
assert_eq!(input_splitter.buffer_len(), 2);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((1, Ok(line))) => {
|
||||
assert_eq!(line, String::from("bbb"));
|
||||
assert_eq!(input_splitter.buffer_len(), 1);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((2, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ccc"));
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
match input_splitter.next() {
|
||||
Some((3, Ok(line))) => {
|
||||
assert_eq!(line, String::from("ddd"));
|
||||
assert_eq!(input_splitter.buffer_len(), 0);
|
||||
}
|
||||
item @ _ => panic!("wrong item: {:?}", item),
|
||||
};
|
||||
|
||||
assert!(input_splitter.next().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uumain(args: impl uucore::Args) -> i32 {
|
||||
let args = args.collect_str();
|
||||
|
||||
let matches = app!(SYNTAX, SUMMARY, LONG_HELP)
|
||||
.optopt(
|
||||
"b",
|
||||
SUFFIX_FORMAT_OPT,
|
||||
"use sprintf FORMAT instead of %02d",
|
||||
"FORMAT",
|
||||
)
|
||||
.optopt("f", PREFIX_OPT, "use PREFIX instead of 'xx'", "PREFIX")
|
||||
.optflag("k", KEEP_FILES_OPT, "do not remove output files on errors")
|
||||
.optflag(
|
||||
"",
|
||||
SUPPRESS_MATCHED_OPT,
|
||||
"suppress the lines matching PATTERN",
|
||||
)
|
||||
.optopt(
|
||||
"n",
|
||||
DIGITS_OPT,
|
||||
"use specified number of digits instead of 2",
|
||||
"DIGITS",
|
||||
)
|
||||
.optflag("s", QUIET_OPT, "do not print counts of output file sizes")
|
||||
.optflag("z", ELIDE_EMPTY_FILES_OPT, "remove empty output files")
|
||||
.parse(args);
|
||||
|
||||
// check for mandatory arguments
|
||||
if matches.free.is_empty() {
|
||||
show_error!("missing operand");
|
||||
exit!(1);
|
||||
}
|
||||
if matches.free.len() == 1 {
|
||||
show_error!("missing operand after '{}'", matches.free[0]);
|
||||
exit!(1);
|
||||
}
|
||||
// get the patterns to split on
|
||||
let patterns = return_if_err!(1, patterns::get_patterns(&matches.free[1..]));
|
||||
// get the file to split
|
||||
let file_name: &str = &matches.free[0];
|
||||
let options = CsplitOptions::new(&matches);
|
||||
if file_name == "-" {
|
||||
let stdin = io::stdin();
|
||||
crash_if_err!(1, csplit(&options, patterns, stdin.lock()));
|
||||
} else {
|
||||
let file = return_if_err!(1, File::open(file_name));
|
||||
let file_metadata = return_if_err!(1, file.metadata());
|
||||
if !file_metadata.is_file() {
|
||||
crash!(1, "'{}' is not a regular file", file_name);
|
||||
}
|
||||
crash_if_err!(1, csplit(&options, patterns, BufReader::new(file)));
|
||||
};
|
||||
0
|
||||
}
|
34
src/uu/csplit/src/csplitError.rs
Normal file
34
src/uu/csplit/src/csplitError.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
use std::io;
|
||||
|
||||
/// Errors thrown by the csplit command
|
||||
#[derive(Debug, Fail)]
|
||||
pub enum CsplitError {
|
||||
#[fail(display = "IO error: {}", _0)]
|
||||
IoError(io::Error),
|
||||
#[fail(display = "'{}': line number out of range", _0)]
|
||||
LineOutOfRange(String),
|
||||
#[fail(display = "'{}': line number out of range on repetition {}", _0, _1)]
|
||||
LineOutOfRangeOnRepetition(String, usize),
|
||||
#[fail(display = "'{}': match not found", _0)]
|
||||
MatchNotFound(String),
|
||||
#[fail(display = "'{}': match not found on repetition {}", _0, _1)]
|
||||
MatchNotFoundOnRepetition(String, usize),
|
||||
#[fail(display = "line number must be greater than zero")]
|
||||
LineNumberIsZero,
|
||||
#[fail(display = "line number '{}' is smaller than preceding line number, {}", _0, _1)]
|
||||
LineNumberSmallerThanPrevious(usize, usize),
|
||||
#[fail(display = "invalid pattern: {}", _0)]
|
||||
InvalidPattern(String),
|
||||
#[fail(display = "invalid number: '{}'", _0)]
|
||||
InvalidNumber(String),
|
||||
#[fail(display = "incorrect conversion specification in suffix")]
|
||||
SuffixFormatIncorrect,
|
||||
#[fail(display = "too many % conversion specifications in suffix")]
|
||||
SuffixFormatTooManyPercents,
|
||||
}
|
||||
|
||||
impl From<io::Error> for CsplitError {
|
||||
fn from(error: io::Error) -> Self {
|
||||
CsplitError::IoError(error)
|
||||
}
|
||||
}
|
2
src/uu/csplit/src/main.rs
Normal file
2
src/uu/csplit/src/main.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
|
||||
uucore_procs::main!(uu_csplit); // spell-checker:ignore procs uucore
|
353
src/uu/csplit/src/patterns.rs
Normal file
353
src/uu/csplit/src/patterns.rs
Normal file
|
@ -0,0 +1,353 @@
|
|||
use regex::Regex;
|
||||
use crate::csplitError::CsplitError;
|
||||
|
||||
/// The definition of a pattern to match on a line.
|
||||
#[derive(Debug)]
|
||||
pub enum Pattern {
|
||||
/// Copy the file's content to a split up to, not including, the given line number. The number
|
||||
/// of times the pattern is executed is detailed in [`ExecutePattern`].
|
||||
UpToLine(usize, ExecutePattern),
|
||||
/// Copy the file's content to a split up to, not including, the line matching the regex. The
|
||||
/// integer is an offset relative to the matched line of what to include (if positive) or
|
||||
/// to exclude (if negative). The number of times the pattern is executed is detailed in
|
||||
/// [`ExecutePattern`].
|
||||
UpToMatch(Regex, i32, ExecutePattern),
|
||||
/// Skip the file's content up to, not including, the line matching the regex. The integer
|
||||
/// is an offset relative to the matched line of what to include (if positive) or to exclude
|
||||
/// (if negative). The number of times the pattern is executed is detailed in [`ExecutePattern`].
|
||||
SkipToMatch(Regex, i32, ExecutePattern),
|
||||
}
|
||||
|
||||
impl ToString for Pattern {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Pattern::UpToLine(n, _) => n.to_string(),
|
||||
Pattern::UpToMatch(regex, 0, _) => format!("/{}/", regex.as_str()),
|
||||
Pattern::UpToMatch(regex, offset, _) => format!("/{}/{:+}", regex.as_str(), offset),
|
||||
Pattern::SkipToMatch(regex, 0, _) => format!("%{}%", regex.as_str()),
|
||||
Pattern::SkipToMatch(regex, offset, _) => format!("%{}%{:+}", regex.as_str(), offset),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of times a pattern can be used.
|
||||
#[derive(Debug)]
|
||||
pub enum ExecutePattern {
|
||||
/// Execute the pattern as many times as possible
|
||||
Always,
|
||||
/// Execute the pattern a fixed number of times
|
||||
Times(usize),
|
||||
}
|
||||
|
||||
impl ExecutePattern {
|
||||
pub fn iter(&self) -> ExecutePatternIter {
|
||||
match self {
|
||||
ExecutePattern::Times(n) => ExecutePatternIter::new(Some(*n)),
|
||||
ExecutePattern::Always => ExecutePatternIter::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ExecutePatternIter {
|
||||
max: Option<usize>,
|
||||
cur: usize,
|
||||
}
|
||||
|
||||
impl ExecutePatternIter {
|
||||
fn new(max: Option<usize>) -> ExecutePatternIter {
|
||||
ExecutePatternIter { max, cur: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for ExecutePatternIter {
|
||||
type Item = (Option<usize>, usize);
|
||||
|
||||
fn next(&mut self) -> Option<(Option<usize>, usize)> {
|
||||
match self.max {
|
||||
// iterate until m is reached
|
||||
Some(m) => {
|
||||
if self.cur == m {
|
||||
None
|
||||
} else {
|
||||
self.cur += 1;
|
||||
Some((self.max, self.cur))
|
||||
}
|
||||
}
|
||||
// no limit, just increment a counter
|
||||
None => {
|
||||
self.cur += 1;
|
||||
Some((None, self.cur))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the definitions of patterns given on the command line into a list of [`Pattern`]s.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// If a pattern is incorrect, a [`::CsplitError::InvalidPattern`] error is returned, which may be
|
||||
/// due to, e.g.,:
|
||||
/// - an invalid regular expression;
|
||||
/// - an invalid number for, e.g., the offset.
|
||||
pub fn get_patterns(args: &[String]) -> Result<Vec<Pattern>, CsplitError> {
|
||||
let patterns = extract_patterns(args)?;
|
||||
validate_line_numbers(&patterns)?;
|
||||
Ok(patterns)
|
||||
}
|
||||
|
||||
fn extract_patterns(args: &[String]) -> Result<Vec<Pattern>, CsplitError> {
|
||||
let mut patterns = Vec::with_capacity(args.len());
|
||||
let to_match_reg =
|
||||
Regex::new(r"^(/(?P<UPTO>.+)/|%(?P<SKIPTO>.+)%)(?P<OFFSET>[\+-]\d+)?$").unwrap();
|
||||
let execute_ntimes_reg = Regex::new(r"^\{(?P<TIMES>\d+)|\*\}$").unwrap();
|
||||
let mut iter = args.iter().peekable();
|
||||
|
||||
while let Some(arg) = iter.next() {
|
||||
// get the number of times a pattern is repeated, which is at least once plus whatever is
|
||||
// in the quantifier.
|
||||
let execute_ntimes = match iter.peek() {
|
||||
None => ExecutePattern::Times(1),
|
||||
Some(&next_item) => {
|
||||
match execute_ntimes_reg.captures(next_item) {
|
||||
None => ExecutePattern::Times(1),
|
||||
Some(r) => {
|
||||
// skip the next item
|
||||
iter.next();
|
||||
if let Some(times) = r.name("TIMES") {
|
||||
ExecutePattern::Times(times.as_str().parse::<usize>().unwrap() + 1)
|
||||
} else {
|
||||
ExecutePattern::Always
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// get the pattern definition
|
||||
if let Some(captures) = to_match_reg.captures(arg) {
|
||||
let offset = match captures.name("OFFSET") {
|
||||
None => 0,
|
||||
Some(m) => m.as_str().parse().unwrap(),
|
||||
};
|
||||
if let Some(up_to_match) = captures.name("UPTO") {
|
||||
let pattern = match Regex::new(up_to_match.as_str()) {
|
||||
Err(_) => {
|
||||
return Err(CsplitError::InvalidPattern(arg.to_string()));
|
||||
}
|
||||
Ok(reg) => reg,
|
||||
};
|
||||
patterns.push(Pattern::UpToMatch(pattern, offset, execute_ntimes));
|
||||
} else if let Some(skip_to_match) = captures.name("SKIPTO") {
|
||||
let pattern = match Regex::new(skip_to_match.as_str()) {
|
||||
Err(_) => {
|
||||
return Err(CsplitError::InvalidPattern(arg.to_string()));
|
||||
}
|
||||
Ok(reg) => reg,
|
||||
};
|
||||
patterns.push(Pattern::SkipToMatch(pattern, offset, execute_ntimes));
|
||||
}
|
||||
} else if let Some(line_number) = arg.parse::<usize>().ok() {
|
||||
patterns.push(Pattern::UpToLine(line_number, execute_ntimes));
|
||||
} else {
|
||||
return Err(CsplitError::InvalidPattern(arg.to_string()));
|
||||
}
|
||||
}
|
||||
Ok(patterns)
|
||||
}
|
||||
|
||||
/// Asserts the line numbers are in increasing order, starting at 1.
|
||||
fn validate_line_numbers(patterns: &[Pattern]) -> Result<(), CsplitError> {
|
||||
patterns
|
||||
.iter()
|
||||
.filter_map(|pattern| match pattern {
|
||||
Pattern::UpToLine(line_number, _) => Some(line_number),
|
||||
_ => None,
|
||||
})
|
||||
.try_fold(0, |prev_ln, ¤t_ln| match (prev_ln, current_ln) {
|
||||
// a line number cannot be zero
|
||||
(_, 0) => Err(CsplitError::LineNumberIsZero),
|
||||
// two consecutifs numbers should not be equal
|
||||
(n, m) if n == m => {
|
||||
show_warning!("line number '{}' is the same as preceding line number", n);
|
||||
Ok(n)
|
||||
}
|
||||
// a number cannot be greater than the one that follows
|
||||
(n, m) if n > m => Err(CsplitError::LineNumberSmallerThanPrevious(m, n)),
|
||||
(_, m) => Ok(m),
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn bad_pattern() {
|
||||
let input = vec!["bad".to_string()];
|
||||
assert!(get_patterns(input.as_slice()).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn up_to_line_pattern() {
|
||||
let input: Vec<String> = vec!["24", "42", "{*}", "50", "{4}"]
|
||||
.into_iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect();
|
||||
let patterns = get_patterns(input.as_slice()).unwrap();
|
||||
assert_eq!(patterns.len(), 3);
|
||||
match patterns.get(0) {
|
||||
Some(Pattern::UpToLine(24, ExecutePattern::Times(1))) => (),
|
||||
_ => panic!("expected UpToLine pattern"),
|
||||
};
|
||||
match patterns.get(1) {
|
||||
Some(Pattern::UpToLine(42, ExecutePattern::Always)) => (),
|
||||
_ => panic!("expected UpToLine pattern"),
|
||||
};
|
||||
match patterns.get(2) {
|
||||
Some(Pattern::UpToLine(50, ExecutePattern::Times(5))) => (),
|
||||
_ => panic!("expected UpToLine pattern"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn up_to_match_pattern() {
|
||||
let input: Vec<String> = vec![
|
||||
"/test1.*end$/",
|
||||
"/test2.*end$/",
|
||||
"{*}",
|
||||
"/test3.*end$/",
|
||||
"{4}",
|
||||
"/test4.*end$/+3",
|
||||
"/test5.*end$/-3",
|
||||
].into_iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect();
|
||||
let patterns = get_patterns(input.as_slice()).unwrap();
|
||||
assert_eq!(patterns.len(), 5);
|
||||
match patterns.get(0) {
|
||||
Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test1.*end$");
|
||||
}
|
||||
_ => panic!("expected UpToMatch pattern"),
|
||||
};
|
||||
match patterns.get(1) {
|
||||
Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Always)) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test2.*end$");
|
||||
}
|
||||
_ => panic!("expected UpToMatch pattern"),
|
||||
};
|
||||
match patterns.get(2) {
|
||||
Some(Pattern::UpToMatch(reg, 0, ExecutePattern::Times(5))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test3.*end$");
|
||||
}
|
||||
_ => panic!("expected UpToMatch pattern"),
|
||||
};
|
||||
match patterns.get(3) {
|
||||
Some(Pattern::UpToMatch(reg, 3, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test4.*end$");
|
||||
}
|
||||
_ => panic!("expected UpToMatch pattern"),
|
||||
};
|
||||
match patterns.get(4) {
|
||||
Some(Pattern::UpToMatch(reg, -3, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test5.*end$");
|
||||
}
|
||||
_ => panic!("expected UpToMatch pattern"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skip_to_match_pattern() {
|
||||
let input: Vec<String> = vec![
|
||||
"%test1.*end$%",
|
||||
"%test2.*end$%",
|
||||
"{*}",
|
||||
"%test3.*end$%",
|
||||
"{4}",
|
||||
"%test4.*end$%+3",
|
||||
"%test5.*end$%-3",
|
||||
].into_iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect();
|
||||
let patterns = get_patterns(input.as_slice()).unwrap();
|
||||
assert_eq!(patterns.len(), 5);
|
||||
match patterns.get(0) {
|
||||
Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test1.*end$");
|
||||
}
|
||||
_ => panic!("expected SkipToMatch pattern"),
|
||||
};
|
||||
match patterns.get(1) {
|
||||
Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Always)) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test2.*end$");
|
||||
}
|
||||
_ => panic!("expected SkipToMatch pattern"),
|
||||
};
|
||||
match patterns.get(2) {
|
||||
Some(Pattern::SkipToMatch(reg, 0, ExecutePattern::Times(5))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test3.*end$");
|
||||
}
|
||||
_ => panic!("expected SkipToMatch pattern"),
|
||||
};
|
||||
match patterns.get(3) {
|
||||
Some(Pattern::SkipToMatch(reg, 3, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test4.*end$");
|
||||
}
|
||||
_ => panic!("expected SkipToMatch pattern"),
|
||||
};
|
||||
match patterns.get(4) {
|
||||
Some(Pattern::SkipToMatch(reg, -3, ExecutePattern::Times(1))) => {
|
||||
let parsed_reg = format!("{}", reg);
|
||||
assert_eq!(parsed_reg, "test5.*end$");
|
||||
}
|
||||
_ => panic!("expected SkipToMatch pattern"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_number_zero() {
|
||||
let patterns = vec![Pattern::UpToLine(0, ExecutePattern::Times(1))];
|
||||
match validate_line_numbers(&patterns) {
|
||||
Err(::CsplitError::LineNumberIsZero) => (),
|
||||
_ => panic!("expected LineNumberIsZero error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_number_smaller_than_previous() {
|
||||
let input: Vec<String> = vec!["10".to_string(), "5".to_string()];
|
||||
match get_patterns(input.as_slice()) {
|
||||
Err(::CsplitError::LineNumberSmallerThanPrevious(5, 10)) => (),
|
||||
_ => panic!("expected LineNumberSmallerThanPrevious error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_number_smaller_than_previous_separate() {
|
||||
let input: Vec<String> = vec!["10".to_string(), "/20/".to_string(), "5".to_string()];
|
||||
match get_patterns(input.as_slice()) {
|
||||
Err(::CsplitError::LineNumberSmallerThanPrevious(5, 10)) => (),
|
||||
_ => panic!("expected LineNumberSmallerThanPrevious error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_number_zero_separate() {
|
||||
let input: Vec<String> = vec!["10".to_string(), "/20/".to_string(), "0".to_string()];
|
||||
match get_patterns(input.as_slice()) {
|
||||
Err(::CsplitError::LineNumberIsZero) => (),
|
||||
_ => panic!("expected LineNumberIsZero error"),
|
||||
}
|
||||
}
|
||||
}
|
397
src/uu/csplit/src/splitname.rs
Normal file
397
src/uu/csplit/src/splitname.rs
Normal file
|
@ -0,0 +1,397 @@
|
|||
use regex::Regex;
|
||||
|
||||
//mod csplit;
|
||||
use crate::CsplitError;
|
||||
|
||||
/// Computes the filename of a split, taking into consideration a possible user-defined suffix
|
||||
/// format.
|
||||
pub struct SplitName {
|
||||
fn_split_name: Box<dyn Fn(usize) -> String>,
|
||||
}
|
||||
|
||||
impl SplitName {
|
||||
/// Creates a new SplitName with the given user-defined options:
|
||||
/// - `prefix_opt` specifies a prefix for all splits.
|
||||
/// - `format_opt` specifies a custom format for the suffix part of the filename, using the
|
||||
/// `sprintf` format notation.
|
||||
/// - `n_digits_opt` defines the width of the split number.
|
||||
///
|
||||
/// # Caveats
|
||||
///
|
||||
/// If `prefix_opt` and `format_opt` are defined, and the `format_opt` has some string appearing
|
||||
/// before the conversion pattern (e.g., "here-%05d"), then it is appended to the passed prefix
|
||||
/// via `prefix_opt`.
|
||||
///
|
||||
/// If `n_digits_opt` and `format_opt` are defined, then width defined in `format_opt` is
|
||||
/// taken.
|
||||
pub fn new(
|
||||
prefix_opt: Option<String>,
|
||||
format_opt: Option<String>,
|
||||
n_digits_opt: Option<String>,
|
||||
) -> Result<SplitName, CsplitError> {
|
||||
// get the prefix
|
||||
let prefix = prefix_opt.unwrap_or("xx".to_string());
|
||||
// the width for the split offset
|
||||
let n_digits = match n_digits_opt {
|
||||
None => 2,
|
||||
Some(opt) => match opt.parse::<usize>() {
|
||||
Ok(digits) => digits,
|
||||
Err(_) => return Err(CsplitError::InvalidNumber(opt)),
|
||||
},
|
||||
};
|
||||
// translate the custom format into a function
|
||||
let fn_split_name: Box<dyn Fn(usize) -> String> = match format_opt {
|
||||
None => Box::new(move |n: usize| -> String {
|
||||
format!("{}{:0width$}", prefix, n, width = n_digits)
|
||||
}),
|
||||
Some(custom) => {
|
||||
let spec = Regex::new(
|
||||
r"(?P<ALL>%(?P<FLAG>[0#-])(?P<WIDTH>\d+)?(?P<TYPE>[diuoxX]))",
|
||||
).unwrap();
|
||||
let mut captures_iter = spec.captures_iter(&custom);
|
||||
let custom_fn: Box<dyn Fn(usize) -> String> = match captures_iter.next() {
|
||||
Some(captures) => {
|
||||
let all = captures.name("ALL").unwrap();
|
||||
let before = custom[0..all.start()].to_owned();
|
||||
let after = custom[all.end()..].to_owned();
|
||||
let n_digits = match captures.name("WIDTH") {
|
||||
None => 0,
|
||||
Some(m) => m.as_str().parse::<usize>().unwrap(),
|
||||
};
|
||||
match (captures.name("FLAG"), captures.name("TYPE")) {
|
||||
(Some(ref f), Some(ref t)) => {
|
||||
match (f.as_str(), t.as_str()) {
|
||||
/*
|
||||
* zero padding
|
||||
*/
|
||||
|
||||
// decimal
|
||||
("0", "d") | ("0", "i") | ("0", "u") => {
|
||||
Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:0width$}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
})
|
||||
}
|
||||
// octal
|
||||
("0", "o") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:0width$o}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("0", "x") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:0width$x}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("0", "X") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:0width$X}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
|
||||
/*
|
||||
* Alternate form
|
||||
*/
|
||||
|
||||
// octal
|
||||
("#", "o") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:>#width$o}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("#", "x") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:>#width$x}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("#", "X") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:>#width$X}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
|
||||
/*
|
||||
* Left adjusted
|
||||
*/
|
||||
|
||||
// decimal
|
||||
("-", "d") | ("-", "i") | ("-", "u") => {
|
||||
Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:<#width$}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
})
|
||||
}
|
||||
// octal
|
||||
("-", "o") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:<#width$o}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// lower hexadecimal
|
||||
("-", "x") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:<#width$x}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
// upper hexadecimal
|
||||
("-", "X") => Box::new(move |n: usize| -> String {
|
||||
format!(
|
||||
"{}{}{:<#width$X}{}",
|
||||
prefix,
|
||||
before,
|
||||
n,
|
||||
after,
|
||||
width = n_digits
|
||||
)
|
||||
}),
|
||||
|
||||
_ => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
}
|
||||
}
|
||||
_ => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
}
|
||||
}
|
||||
None => return Err(CsplitError::SuffixFormatIncorrect),
|
||||
};
|
||||
|
||||
// there cannot be more than one format pattern
|
||||
if captures_iter.next().is_some() {
|
||||
return Err(CsplitError::SuffixFormatTooManyPercents);
|
||||
}
|
||||
custom_fn
|
||||
}
|
||||
};
|
||||
|
||||
Ok(SplitName { fn_split_name })
|
||||
}
|
||||
|
||||
/// Returns the filename of the i-th split.
|
||||
pub fn get(&self, n: usize) -> String {
|
||||
(self.fn_split_name)(n)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn invalid_number() {
|
||||
let split_name = SplitName::new(None, None, Some(String::from("bad")));
|
||||
match split_name {
|
||||
Err(CsplitError::InvalidNumber(_)) => (),
|
||||
_ => panic!("should fail with InvalidNumber"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix_format1() {
|
||||
let split_name = SplitName::new(None, Some(String::from("no conversion string")), None);
|
||||
match split_name {
|
||||
Err(CsplitError::SuffixFormatIncorrect) => (),
|
||||
_ => panic!("should fail with SuffixFormatIncorrect"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix_format2() {
|
||||
let split_name = SplitName::new(None, Some(String::from("%042a")), None);
|
||||
match split_name {
|
||||
Err(CsplitError::SuffixFormatIncorrect) => (),
|
||||
_ => panic!("should fail with SuffixFormatIncorrect"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_formatter() {
|
||||
let split_name = SplitName::new(None, None, None).unwrap();
|
||||
assert_eq!(split_name.get(2), "xx02");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_formatter_with_prefix() {
|
||||
let split_name = SplitName::new(Some(String::from("aaa")), None, None).unwrap();
|
||||
assert_eq!(split_name.get(2), "aaa02");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_formatter_with_width() {
|
||||
let split_name = SplitName::new(None, None, Some(String::from("5"))).unwrap();
|
||||
assert_eq!(split_name.get(2), "xx00002");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_decimal1() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03d-")), None).unwrap();
|
||||
assert_eq!(split_name.get(2), "xxcst-002-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_decimal2() {
|
||||
let split_name = SplitName::new(
|
||||
Some(String::from("pre-")),
|
||||
Some(String::from("cst-%03d-post")),
|
||||
None,
|
||||
).unwrap();
|
||||
assert_eq!(split_name.get(2), "pre-cst-002-post");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_decimal3() {
|
||||
let split_name = SplitName::new(
|
||||
None,
|
||||
Some(String::from("cst-%03d-")),
|
||||
Some(String::from("42")),
|
||||
).unwrap();
|
||||
assert_eq!(split_name.get(2), "xxcst-002-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_decimal4() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03i-")), None).unwrap();
|
||||
assert_eq!(split_name.get(2), "xxcst-002-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_decimal5() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03u-")), None).unwrap();
|
||||
assert_eq!(split_name.get(2), "xxcst-002-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_octal() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03o-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-052-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_lower_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03x-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-02a-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_padding_upper_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%03X-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-02A-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alternate_form_octal() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%#10o-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst- 0o52-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alternate_form_lower_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%#10x-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst- 0x2a-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alternate_form_upper_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%#10X-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst- 0x2A-");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_decimal1() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10d-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-42 -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_decimal2() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10i-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-42 -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_decimal3() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10u-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-42 -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_octal() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10o-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0o52 -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_lower_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10x-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0x2a -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn left_adjusted_upper_hexa() {
|
||||
let split_name = SplitName::new(None, Some(String::from("cst-%-10X-")), None).unwrap();
|
||||
assert_eq!(split_name.get(42), "xxcst-0x2A -");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn too_many_percent() {
|
||||
let split_name = SplitName::new(None, Some(String::from("%02d-%-3x")), None);
|
||||
match split_name {
|
||||
Err(CsplitError::SuffixFormatTooManyPercents) => (),
|
||||
_ => panic!("should fail with SuffixFormatTooManyPercents"),
|
||||
};
|
||||
}
|
||||
}
|
1335
tests/by-util/test_csplit.rs
Normal file
1335
tests/by-util/test_csplit.rs
Normal file
File diff suppressed because it is too large
Load diff
50
tests/fixtures/csplit/numbers50.txt
vendored
Normal file
50
tests/fixtures/csplit/numbers50.txt
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
15
|
||||
16
|
||||
17
|
||||
18
|
||||
19
|
||||
20
|
||||
21
|
||||
22
|
||||
23
|
||||
24
|
||||
25
|
||||
26
|
||||
27
|
||||
28
|
||||
29
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
Loading…
Reference in a new issue