From 49a733a864c17a61cf6865365dd5c9b2cd6db4c9 Mon Sep 17 00:00:00 2001 From: Virgile Andreani Date: Mon, 28 Jul 2014 02:11:49 +0200 Subject: [PATCH] Implement unexpand --- Cargo.toml | 4 + Makefile | 2 + README.md | 1 - src/unexpand/unexpand.rs | 228 +++++++++++++++++++++++++++++++++++++++ test/unexpand.rs | 103 ++++++++++++++++++ 5 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 src/unexpand/unexpand.rs create mode 100644 test/unexpand.rs diff --git a/Cargo.toml b/Cargo.toml index f30e6f22d..6cd3e60aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -223,6 +223,10 @@ path = "tty/tty.rs" name = "uname" path = "uname/uname.rs" +[[bin]] +name = "unexpand" +path = "unexpand/unexpand.rs" + [[bin]] name = "uniq" path = "uniq/uniq.rs" diff --git a/Makefile b/Makefile index d73c49e4b..1f3e3a1ad 100644 --- a/Makefile +++ b/Makefile @@ -76,6 +76,7 @@ PROGS := \ true \ truncate \ tsort \ + unexpand \ unlink \ uniq \ wc \ @@ -133,6 +134,7 @@ TEST_PROGS := \ seq \ tr \ truncate \ + unexpand TEST ?= $(TEST_PROGS) diff --git a/README.md b/README.md index afb3d40e0..065018207 100644 --- a/README.md +++ b/README.md @@ -164,7 +164,6 @@ To do - stty - tail (not all features implemented) - test (not all features implemented) -- unexpand - uniq (in progress) - who diff --git a/src/unexpand/unexpand.rs b/src/unexpand/unexpand.rs new file mode 100644 index 000000000..495ebcb69 --- /dev/null +++ b/src/unexpand/unexpand.rs @@ -0,0 +1,228 @@ +#![crate_name = "unexpand"] + +/* + * This file is part of the uutils coreutils package. + * + * (c) Virgile Andreani + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +#![feature(macro_rules)] + +extern crate getopts; +extern crate libc; + +use std::io; +use std::from_str; + +#[path = "../common/util.rs"] +mod util; + +static NAME: &'static str = "unexpand"; +static VERSION: &'static str = "0.0.1"; + +static DEFAULT_TABSTOP: uint = 8; + +fn tabstops_parse(s: String) -> Vec { + let words = s.as_slice().split(',').collect::>(); + + let nums = words.move_iter() + .map(|sn| from_str::from_str::(sn) + .unwrap_or_else( + || crash!(1, "{}\n", "tab size contains invalid character(s)")) + ) + .collect::>(); + + if nums.iter().any(|&n| n == 0) { + crash!(1, "{}\n", "tab size cannot be 0"); + } + + match nums.iter().fold((true, 0), |(acc, last), &n| (acc && last <= n, n)) { + (false, _) => crash!(1, "{}\n", "tab sizes must be ascending"), + _ => {} + } + + nums +} + +struct Options { + files: Vec, + tabstops: Vec, + aflag: bool +} + +impl Options { + fn new(matches: getopts::Matches) -> Options { + let tabstops = match matches.opt_str("t") { + None => vec!(DEFAULT_TABSTOP), + Some(s) => tabstops_parse(s) + }; + + let aflag = (matches.opt_present("all") || matches.opt_present("tabs")) + && !matches.opt_present("first-only"); + + let files = + if matches.free.is_empty() { + vec!("-".to_string()) + } else { + matches.free + }; + + Options { files: files, tabstops: tabstops, aflag: aflag } + } +} + +pub fn uumain(args: Vec) -> int { + let opts = [ + getopts::optflag("a", "all", "convert all blanks, instead of just initial blanks"), + getopts::optflag("", "first-only", "convert only leading sequences of blanks (overrides -a)"), + getopts::optopt("t", "tabs", "have tabs N characters apart instead of 8 (enables -a)", "N"), + getopts::optopt("t", "tabs", "use comma separated LIST of tab positions (enables -a)", "LIST"), + getopts::optflag("h", "help", "display this help and exit"), + getopts::optflag("V", "version", "output version information and exit"), + ]; + + let matches = match getopts::getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => crash!(1, "{}", f) + }; + + if matches.opt_present("help") { + println!("Usage: {:s} [OPTION]... [FILE]...", NAME); + io::print(getopts::usage( + "Convert blanks in each FILE to tabs, writing to standard output.\n\ + With no FILE, or when FILE is -, read standard input.", opts).as_slice()); + return 0; + } + + if matches.opt_present("V") { + println!("{} v{}", NAME, VERSION); + return 0; + } + + unexpand(Options::new(matches)); + + return 0; +} + +fn open(path: String) -> io::BufferedReader> { + let mut file_buf; + if path.as_slice() == "-" { + io::BufferedReader::new(box io::stdio::stdin_raw() as Box) + } else { + file_buf = match io::File::open(&Path::new(path.as_slice())) { + Ok(a) => a, + _ => crash!(1, "{}: {}\n", path, "No such file or directory") + }; + io::BufferedReader::new(box file_buf as Box) + } +} + +fn is_tabstop(tabstops: &[uint], col: uint) -> bool { + match tabstops { + [tabstop] => col % tabstop == 0, + tabstops => tabstops.bsearch(|&e| e.cmp(&col)).is_some() + } +} + +fn to_next_stop(tabstops: &[uint], col: uint) -> Option { + match tabstops { + [tabstop] => Some(tabstop - col % tabstop), + tabstops => tabstops.iter().skip_while(|&t| *t <= col).next() + .map(|&tabstop| tabstop - col % tabstop) + } +} + +fn unexpandspan(output: &mut io::LineBufferedWriter, + tabstops: &[uint], nspaces: uint, col: uint, init: bool) { + let mut cur = col - nspaces; + if nspaces > 1 || init { + loop { + match to_next_stop(tabstops, cur) { + Some(to_next) if cur + to_next <= col => { + safe_write!(output, "{:c}", '\t'); + cur += to_next; + } + _ => break + } + } + } + safe_write!(output, "{:1$s}", "", col - cur); +} + +fn unexpand(options: Options) { + let mut output = io::stdout(); + let ts = options.tabstops.as_slice(); + + for file in options.files.move_iter() { + let mut col = 0; + let mut nspaces = 0; + let mut init = true; + for c in open(file).chars() { + match c { + Ok(' ') => { + if init || options.aflag { + nspaces += 1; + } else { + nspaces = 0; + safe_write!(output, "{:c}", ' '); + } + col += 1; + } + Ok('\t') if nspaces > 0 => { + if is_tabstop(ts, col) { + nspaces = 0; + col += 1; + safe_write!(output, "{:c}", '\t'); + } + match to_next_stop(ts, col) { + Some(to_next) => { + nspaces += to_next; + col += to_next; + } + None => { + col += 1; + unexpandspan(&mut output, ts, nspaces, col, init); + nspaces = 0; + safe_write!(output, "{:c}", '\t'); + } + } + } + Ok('\x08') => { // '\b' + if init || options.aflag { + unexpandspan(&mut output, ts, nspaces, col, init) + } + nspaces = 0; + if col > 0 { col -= 1; } + init = false; + safe_write!(output, "{:c}", '\x08'); + } + Ok('\n') => { + if init || options.aflag { + unexpandspan(&mut output, ts, nspaces, col, init) + } + nspaces = 0; + col = 0; + init = true; + safe_write!(output, "{:c}", '\n'); + } + Ok(c) => { + if init || options.aflag { + unexpandspan(&mut output, ts, nspaces, col, init) + } + nspaces = 0; + col += 1; + init = false; + safe_write!(output, "{:c}", c); + } + Err(_) => break + } + } + if init || options.aflag { + unexpandspan(&mut output, ts, nspaces, col, init) + } + } +} + diff --git a/test/unexpand.rs b/test/unexpand.rs new file mode 100644 index 000000000..98c30669c --- /dev/null +++ b/test/unexpand.rs @@ -0,0 +1,103 @@ +use std::io::process::Command; + +static PROGNAME: &'static str = "./unexpand"; + +fn run(input: &str, args: &[&'static str]) -> Vec { + let mut process = Command::new(PROGNAME).args(args).spawn().unwrap(); + + process.stdin.take_unwrap().write_str(input).unwrap(); + + let po = match process.wait_with_output() { + Ok(p) => p, + Err(err) => fail!("{}", err), + }; + po.output +} + +#[test] +fn unexpand_init_0() { + let out = run(" 1\n 2\n 3\n 4\n", ["-t4"]); + assert_eq!(out.as_slice(), b" 1\n 2\n 3\n\t4\n"); +} + +#[test] +fn unexpand_init_1() { + let out = run(" 5\n 6\n 7\n 8\n", ["-t4"]); + assert_eq!(out.as_slice(), b"\t 5\n\t 6\n\t 7\n\t\t8\n"); +} + +#[test] +fn unexpand_init_list_0() { + let out = run(" 1\n 2\n 3\n 4\n", ["-t2,4"]); + assert_eq!(out.as_slice(), b" 1\n\t2\n\t 3\n\t\t4\n"); +} + +#[test] +fn unexpand_init_list_1() { + // Once the list is exhausted, spaces are not converted anymore + let out = run(" 5\n 6\n 7\n 8\n", ["-t2,4"]); + assert_eq!(out.as_slice(), b"\t\t 5\n\t\t 6\n\t\t 7\n\t\t 8\n"); +} + +#[test] +fn unexpand_aflag_0() { + let out = run("e E\nf F\ng G\nh H\n", []); + assert_eq!(out.as_slice(), b"e E\nf F\ng G\nh H\n"); +} + +#[test] +fn unexpand_aflag_1() { + let out = run("e E\nf F\ng G\nh H\n", ["-a"]); + assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n"); +} + +#[test] +fn unexpand_aflag_2() { + let out = run("e E\nf F\ng G\nh H\n", ["-t8"]); + assert_eq!(out.as_slice(), b"e E\nf F\ng\tG\nh\t H\n"); +} + +#[test] +fn unexpand_first_only_0() { + let out = run(" A B", ["-t3"]); + assert_eq!(out.as_slice(), b"\t\t A\t B"); +} + +#[test] +fn unexpand_first_only_1() { + let out = run(" A B", ["-t3", "--first-only"]); + assert_eq!(out.as_slice(), b"\t\t A B"); +} + +#[test] +fn unexpand_trailing_space_0() { // evil + // Individual spaces before fields starting with non blanks should not be + // converted, unless they are at the beginning of the line. + let out = run("123 \t1\n123 1\n123 \n123 ", ["-t4"]); + assert_eq!(out.as_slice(), b"123\t\t1\n123 1\n123 \n123 "); +} + +#[test] +fn unexpand_trailing_space_1() { // super evil + let out = run(" abc d e f g ", ["-t1"]); + assert_eq!(out.as_slice(), b"\tabc d e\t\tf\t\tg "); +} + +#[test] +fn unexpand_spaces_follow_tabs_0() { + // The two first spaces can be included into the first tab. + let out = run(" \t\t A", []); + assert_eq!(out.as_slice(), b"\t\t A"); +} + +#[test] +fn unexpand_spaces_follow_tabs_1() { // evil + // Explanation of what is going on here: + // 'a' -> 'a' // first tabstop (1) + // ' \t' -> '\t' // second tabstop (4) + // ' ' -> '\t' // third tabstop (5) + // ' B \t' -> ' B \t' // after the list is exhausted, nothing must change + let out = run("a \t B \t", ["-t1,4,5"]); + assert_eq!(out.as_slice(), b"a\t\t B \t"); +} +