1
0
mirror of https://github.com/uutils/coreutils synced 2024-07-09 04:06:02 +00:00

Merge pull request #234 from polyphemus/cut

Implement cut - implement #165
This commit is contained in:
Arcterus 2014-06-27 09:30:53 -07:00
commit 8fd455f8e5
6 changed files with 647 additions and 1 deletions

View File

@ -31,6 +31,10 @@ path = "comm/comm.rs"
name = "cp"
path = "cp/cp.rs"
[[bin]]
name = "cut"
path = "cut/cut.rs"
[[bin]]
name = "dirname"
path = "dirname/dirname.rs"

View File

@ -13,6 +13,7 @@ PROGS := \
cksum \
comm \
cp \
cut \
dirname \
echo \
env \

View File

@ -114,7 +114,6 @@ To do
- copy
- cp (not much done)
- csplit
- cut
- date
- dd
- df

528
cut/cut.rs Normal file
View File

@ -0,0 +1,528 @@
#![crate_id(name="cut", vers="1.0.0", author="Rolf Morel")]
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#![feature(macro_rules)]
extern crate getopts;
extern crate libc;
use std::os;
use std::io::{File, BufferedWriter, BufferedReader, stdin, print};
use getopts::{optopt, optflag, getopts, usage};
use ranges::Range;
#[path = "../common/util.rs"]
mod util;
mod ranges;
static NAME: &'static str = "cut";
static VERSION: &'static str = "1.0.0";
struct Options {
out_delim: Option<String>,
}
struct FieldOptions {
delimiter: String, // one char long, String because of UTF8 representation
out_delimeter: Option<String>,
only_delimited: bool,
}
enum Mode {
Bytes(Vec<Range>, Options),
Characters(Vec<Range>, Options),
Fields(Vec<Range>, FieldOptions),
}
fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
if complement {
Range::from_list(list).map(|r| ranges::complement(&r))
} else {
Range::from_list(list)
}
}
fn cut_bytes<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &Options) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
let (use_delim, out_delim) = match opts.out_delim.clone() {
Some(delim) => (true, delim),
None => (false, "".to_str())
};
'newline: loop {
let line = match reader.read_until(b'\n') {
Ok(line) => line,
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
_ => fail!(),
};
let line_len = line.len();
let mut print_delim = false;
for &Range { low: low, high: high } in ranges.iter() {
if low > line_len { break; }
if use_delim {
if print_delim {
out.write_str(out_delim.as_slice()).unwrap();
}
print_delim = true;
}
if high >= line_len {
let segment = line.slice(low - 1, line_len);
out.write(segment).unwrap();
if *line.get(line_len - 1) == b'\n' {
continue 'newline
}
} else {
let segment = line.slice(low - 1, high);
out.write(segment).unwrap();
}
}
out.write(&[b'\n']).unwrap();
}
0
}
fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &Options) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
let (use_delim, out_delim) = match opts.out_delim.clone() {
Some(delim) => (true, delim),
None => (false, "".to_str())
};
'newline: loop {
let line = match reader.read_line() {
Ok(line) => line,
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
_ => fail!(),
};
let mut char_pos = 0;
let mut char_indices = line.as_slice().char_indices();
let mut print_delim = false;
for &Range { low: low, high: high } in ranges.iter() {
let low_idx = match char_indices.nth(low - char_pos - 1) {
Some((low_idx, _)) => low_idx,
None => break
};
if use_delim {
if print_delim {
out.write_str(out_delim.as_slice()).unwrap();
}
print_delim = true;
}
match char_indices.nth(high - low) {
Some((high_idx, _)) => {
let segment = line.as_bytes().slice(low_idx, high_idx);
out.write(segment).unwrap();
}
None => {
let bytes = line.as_bytes();
let segment = bytes.slice(low_idx, bytes.len());
out.write(segment).unwrap();
if line.as_bytes()[bytes.len() - 1] == b'\n' {
continue 'newline
}
}
}
char_pos = high + 1;
}
out.write(&[b'\n']).unwrap();
}
0
}
#[deriving(Clone)]
struct Searcher<'a> {
haystack: &'a [u8],
needle: &'a [u8],
position: uint
}
impl<'a> Searcher<'a> {
fn new(haystack: &'a [u8], needle: &'a [u8]) -> Searcher<'a> {
Searcher {
haystack: haystack,
needle: needle,
position: 0
}
}
}
impl<'a> Iterator<(uint, uint)> for Searcher<'a> {
fn next(&mut self) -> Option<(uint, uint)> {
if self.needle.len() == 1 {
for offset in range(self.position, self.haystack.len()) {
if self.haystack[offset] == self.needle[0] {
self.position = offset + 1;
return Some((offset, offset + 1));
}
}
self.position = self.haystack.len();
return None;
}
while self.position + self.needle.len() <= self.haystack.len() {
if self.haystack.slice(self.position,
self.position + self.needle.len()) == self.needle {
let match_pos = self.position;
self.position += self.needle.len();
return Some((match_pos, match_pos + self.needle.len()));
} else {
self.position += 1;
}
}
None
}
}
fn cut_fields_delimiter<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
delim: &String,
only_delimited: bool,
out_delim: &String) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
'newline: loop {
let line = match reader.read_until(b'\n') {
Ok(line) => line,
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
_ => fail!(),
};
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(),
delim.as_bytes()).peekable();
let mut print_delim = false;
if delim_search.peek().is_none() {
if ! only_delimited {
out.write(line.as_slice()).unwrap();
if *line.get(line.len() - 1) != b'\n' {
out.write([b'\n']).unwrap();
}
}
continue
}
for &Range { low: low, high: high } in ranges.iter() {
if low - fields_pos > 0 {
low_idx = match delim_search.nth(low - fields_pos - 1) {
Some((_, beyond_delim)) => beyond_delim,
None => break
};
}
for _ in range(0, high - low + 1) {
if print_delim {
out.write_str(out_delim.as_slice()).unwrap();
}
match delim_search.next() {
Some((high_idx, next_low_idx)) => {
let segment = line.slice(low_idx, high_idx);
out.write(segment).unwrap();
print_delim = true;
low_idx = next_low_idx;
fields_pos = high + 1;
}
None => {
let segment = line.slice(low_idx, line.len());
out.write(segment).unwrap();
if *line.get(line.len() - 1) == b'\n' {
continue 'newline
}
break
}
}
}
}
out.write(&[b'\n']).unwrap();
}
0
}
fn cut_fields<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &FieldOptions) -> int {
match opts.out_delimeter {
Some(ref delim) => {
return cut_fields_delimiter(reader, ranges, &opts.delimiter,
opts.only_delimited, delim);
}
None => ()
}
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
'newline: loop {
let line = match reader.read_until(b'\n') {
Ok(line) => line,
Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break,
_ => fail!(),
};
let mut fields_pos = 1;
let mut low_idx = 0;
let mut delim_search = Searcher::new(line.as_slice(),
opts.delimiter.as_bytes()).peekable();
let mut print_delim = false;
if delim_search.peek().is_none() {
if ! opts.only_delimited {
out.write(line.as_slice()).unwrap();
if *line.get(line.len() - 1) != b'\n' {
out.write([b'\n']).unwrap();
}
}
continue
}
for &Range { low: low, high: high } in ranges.iter() {
if low - fields_pos > 0 {
low_idx = match delim_search.nth(low - fields_pos - 1) {
Some((_, beyond_delim)) => beyond_delim,
None => break
};
}
if print_delim {
if low_idx >= opts.delimiter.as_bytes().len() {
low_idx -= opts.delimiter.as_bytes().len();
}
}
match delim_search.nth(high - low) {
Some((high_idx, next_low_idx)) => {
let segment = line.slice(low_idx, high_idx);
out.write(segment).unwrap();
print_delim = true;
low_idx = next_low_idx;
fields_pos = high + 1;
}
None => {
let segment = line.slice(low_idx, line.len());
out.write(segment).unwrap();
if *line.get(line.len() - 1) == b'\n' {
continue 'newline
}
break
}
}
}
out.write(&[b'\n']).unwrap();
}
0
}
fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
let mut stdin_read = false;
let mut exit_code = 0;
if filenames.len() == 0 { filenames.push("-".to_str()); }
for filename in filenames.iter() {
if filename.as_slice() == "-" {
if stdin_read { continue; }
exit_code |= match mode {
Bytes(ref ranges, ref opts) => {
cut_bytes(stdin(), ranges, opts)
}
Characters(ref ranges, ref opts) => {
cut_characters(stdin(), ranges, opts)
}
Fields(ref ranges, ref opts) => {
cut_fields(stdin(), ranges, opts)
}
};
stdin_read = true;
} else {
let path = Path::new(filename.as_slice());
if ! path.exists() {
show_error!("{}: No such file or directory", filename);
continue;
}
let buf_file = match File::open(&path) {
Ok(file) => BufferedReader::new(file),
Err(e) => {
show_error!("{}: {}", filename, e.desc);
continue
}
};
exit_code |= match mode {
Bytes(ref ranges, ref opts) => cut_bytes(buf_file, ranges, opts),
Characters(ref ranges, ref opts) => {
cut_characters(buf_file, ranges, opts)
}
Fields(ref ranges, ref opts) => {
cut_fields(buf_file, ranges, opts)
}
};
}
}
exit_code
}
#[allow(dead_code)]
fn main() { os::set_exit_status(uumain(os::args())); }
pub fn uumain(args: Vec<String>) -> int {
let opts = [
optopt("b", "bytes", "select only these bytes", "LIST"),
optopt("c", "characters", "select only these characters", "LIST"),
optopt("d", "delimiter", "use DELIM instead of TAB for field delimiter", "DELIM"),
optopt("f", "fields", "select only these fields; also print any line that contains no delimiter character, unless the -s option is specified", "LIST"),
optflag("n", "", "(ignored)"),
optflag("", "complement", "complement the set of selected bytes, characters or fields"),
optflag("s", "only-delimited", "do not print lines not containing delimiters"),
optopt("", "output-delimiter", "use STRING as the output delimiter the default is to use the input delimiter", "STRING"),
optflag("", "help", "display this help and exit"),
optflag("", "version", "output version information and exit"),
];
let matches = match getopts(args.tail(), opts) {
Ok(m) => m,
Err(f) => {
show_error!("Invalid options\n{}", f)
return 1;
}
};
if matches.opt_present("help") {
println!("Usage:");
println!(" {0} OPTION... [FILE]...", args.get(0));
println!("");
print(usage("Print selected parts of lines from each FILE to standard output.", opts).as_slice());
println!("");
println!("Use one, and only one of -b, -c or -f. Each LIST is made up of one");
println!("range, or many ranges separated by commas. Selected input is written");
println!("in the same order that it is read, and is written exactly once.");
println!("Each range is one of:");
println!("");
println!(" N N'th byte, character or field, counted from 1");
println!(" N- from N'th byte, character or field, to end of line");
println!(" N-M from N'th to M'th (included) byte, character or field");
println!(" -M from first to M'th (included) byte, character or field");
println!("");
println!("With no FILE, or when FILE is -, read standard input.");
return 0;
}
if matches.opt_present("version") {
println!("{} {}", NAME, VERSION);
return 0;
}
let complement = matches.opt_present("complement");
let mode_parse = match (matches.opt_str("bytes"),
matches.opt_str("characters"),
matches.opt_str("fields")) {
(Some(byte_ranges), None, None) => {
list_to_ranges(byte_ranges.as_slice(), complement).map(|ranges|
Bytes(ranges,
Options { out_delim: matches.opt_str("output-delimiter") })
)
}
(None, Some(char_ranges), None) => {
list_to_ranges(char_ranges.as_slice(), complement).map(|ranges|
Characters(ranges,
Options { out_delim: matches.opt_str("output-delimiter") })
)
}
(None, None, Some(field_ranges)) => {
list_to_ranges(field_ranges.as_slice(), complement).and_then(|ranges|
{
let out_delim = matches.opt_str("output-delimiter");
let only_delimited = matches.opt_present("only-delimited");
match matches.opt_str("delimiter") {
Some(delim) => {
if delim.as_slice().char_len() != 1 {
Err("the delimiter must be a single character".to_str())
} else {
Ok(Fields(ranges,
FieldOptions {
delimiter: delim,
out_delimeter: out_delim,
only_delimited: only_delimited
}))
}
}
None => Ok(Fields(ranges,
FieldOptions {
delimiter: "\t".to_str(),
out_delimeter: out_delim,
only_delimited: only_delimited
}))
}
}
)
}
(ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => {
Err("only one type of list may be specified".to_str())
}
_ => Err("you must specify a list of bytes, characters, or fields".to_str())
};
match mode_parse {
Ok(mode) => cut_files(matches.free, mode),
Err(err_msg) => {
show_error!("{}\n\
Try '{} --help' for more information",
err_msg, args.get(0));
1
}
}
}

112
cut/ranges.rs Normal file
View File

@ -0,0 +1,112 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use std;
#[deriving(PartialEq,Eq,PartialOrd,Ord,Show)]
pub struct Range {
pub low: uint,
pub high: uint,
}
impl std::from_str::FromStr for Range {
fn from_str(s: &str) -> Option<Range> {
use std::uint::MAX;
let mut parts = s.splitn('-', 1);
match (parts.next(), parts.next()) {
(Some(nm), None) => {
from_str::<uint>(nm).filtered(|nm| *nm > 0)
.map(|nm| Range { low: nm, high: nm })
}
(Some(n), Some(m)) if m.len() == 0 => {
from_str::<uint>(n).filtered(|low| *low > 0)
.map(|low| Range { low: low, high: MAX })
}
(Some(n), Some(m)) if n.len() == 0 => {
from_str::<uint>(m).filtered(|high| *high >= 1)
.map(|high| Range { low: 1, high: high })
}
(Some(n), Some(m)) => {
match (from_str::<uint>(n), from_str::<uint>(m)) {
(Some(low), Some(high)) if low > 0 && low <= high => {
Some(Range { low: low, high: high })
}
_ => None
}
}
_ => unreachable!()
}
}
}
impl Range {
pub fn from_list(list: &str) -> Result<Vec<Range>, String> {
use std::cmp::max;
let mut ranges = vec!();
for item in list.split(',') {
match from_str::<Range>(item) {
Some(range_item) => ranges.push(range_item),
None => return Err(format!("range '{}' was invalid", item))
}
}
ranges.sort();
// merge overlapping ranges
for i in range(0, ranges.len()) {
let j = i + 1;
while j < ranges.len() && ranges.get(j).low <= ranges.get(i).high {
let j_high = ranges.remove(j).unwrap().high;
ranges.get_mut(i).high = max(ranges.get(i).high, j_high);
}
}
Ok(ranges)
}
}
pub fn complement(ranges: &Vec<Range>) -> Vec<Range> {
use std::uint;
let mut complements = Vec::with_capacity(ranges.len() + 1);
if ranges.len() > 0 && ranges.get(0).low > 1 {
complements.push(Range { low: 1, high: ranges.get(0).low - 1 });
}
let mut ranges_iter = ranges.iter().peekable();
loop {
match (ranges_iter.next(), ranges_iter.peek()) {
(Some(left), Some(right)) => {
if left.high + 1 != right.low {
complements.push(Range {
low: left.high + 1,
high: right.low - 1
});
}
}
(Some(last), None) => {
if last.high < uint::MAX {
complements.push(Range {
low: last.high + 1,
high: uint::MAX
});
}
}
_ => break
}
}
complements
}

View File

@ -18,6 +18,7 @@ extern crate chroot;
extern crate cksum;
extern crate comm;
extern crate cp;
extern crate cut;
extern crate dirname;
extern crate du;
extern crate echo;
@ -80,6 +81,7 @@ fn util_map() -> HashMap<&str, fn(Vec<String>) -> int> {
map.insert("cksum", cksum::uumain);
map.insert("comm", comm::uumain);
map.insert("cp", cp::uumain);
map.insert("cut", cut::uumain);
map.insert("dirname", dirname::uumain);
map.insert("du", du::uumain);
map.insert("echo", echo::uumain);