join: implement basic functionality

The basic implementation of join with some tests. The supported
options: -1, -2, -j, -a, -i.
This commit is contained in:
Konstantin Pospelov 2017-12-14 00:02:42 +03:00
parent 372dda9dfa
commit b33ce67d91
19 changed files with 560 additions and 0 deletions

9
Cargo.lock generated
View file

@ -35,6 +35,7 @@ dependencies = [
"hostname 0.0.1",
"id 0.0.1",
"install 0.0.1",
"join 0.0.1",
"kill 0.0.1",
"lazy_static 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
@ -588,6 +589,14 @@ dependencies = [
"either 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "join"
version = "0.0.1"
dependencies = [
"getopts 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
"uucore 0.0.1",
]
[[package]]
name = "kernel32-sys"
version = "0.2.2"

View file

@ -69,6 +69,7 @@ fuchsia = [
generic = [
"cat",
"hashsum",
"join",
"more",
"ln",
"ls",
@ -178,6 +179,7 @@ hostid = { optional=true, path="src/hostid" }
hostname = { optional=true, path="src/hostname" }
id = { optional=true, path="src/id" }
install = { optional=true, path="src/install" }
join = { optional=true, path="src/join" }
kill = { optional=true, path="src/kill" }
link = { optional=true, path="src/link" }
ln = { optional=true, path="src/ln" }

View file

@ -61,6 +61,7 @@ PROGS := \
fold \
hashsum \
head \
join \
link \
ln \
ls \

16
src/join/Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
[package]
name = "join"
version = "0.0.1"
authors = []
[lib]
name = "uu_join"
path = "join.rs"
[dependencies]
getopts = "0.2.14"
uucore = { path="../uucore" }
[[bin]]
name = "join"
path = "main.rs"

374
src/join/join.rs Normal file
View file

@ -0,0 +1,374 @@
#![crate_name = "uu_join"]
/*
* This file is part of the uutils coreutils package.
*
* (c) Konstantin Pospelov <kupospelov@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
extern crate getopts;
#[macro_use]
extern crate uucore;
use std::fs::File;
use std::io::{BufRead, BufReader, Lines, Read, stdin};
use std::cmp::Ordering;
static NAME: &'static str = "join";
static VERSION: &'static str = env!("CARGO_PKG_VERSION");
#[derive(PartialEq)]
enum FileNum {
None,
File1,
File2,
}
struct Settings {
key1: usize,
key2: usize,
print_unpaired: FileNum,
ignore_case: bool,
}
impl Default for Settings {
fn default() -> Settings {
Settings {
key1: 0,
key2: 0,
print_unpaired: FileNum::None,
ignore_case: false,
}
}
}
struct Line {
fields: Vec<String>,
}
impl Line {
fn new(string: String) -> Line {
Line { fields: string.split_whitespace().map(|s| String::from(s)).collect() }
}
/// Get field at index.
fn get_field(&self, index: usize) -> &str {
if index < self.fields.len() {
&self.fields[index]
} else {
""
}
}
/// Iterate each field except the one at the index.
fn foreach_except<F>(&self, index: usize, f: &F)
where
F: Fn(&String),
{
for (i, field) in self.fields.iter().enumerate() {
if i != index {
f(&field);
}
}
}
}
struct State {
key: usize,
print_unpaired: bool,
lines: Lines<BufReader<Box<Read>>>,
seq: Vec<Line>,
}
impl State {
fn new(name: &str, key: usize, print_unpaired: bool) -> State {
let f: Box<Read> = if name == "-" {
Box::new(stdin()) as Box<Read>
} else {
match File::open(name) {
Ok(file) => Box::new(file) as Box<Read>,
Err(err) => crash!(1, "{}: {}", name, err),
}
};
State {
key: key,
print_unpaired: print_unpaired,
lines: BufReader::new(f).lines(),
seq: Vec::new(),
}
}
/// Compare the key fields of the two current lines.
fn compare(&self, other: &State, ignore_case: bool) -> Ordering {
let key1 = self.seq[0].get_field(self.key);
let key2 = other.seq[0].get_field(other.key);
compare(key1, key2, ignore_case)
}
/// Skip the current unpaired line.
fn skip_line(&mut self) {
if self.print_unpaired {
self.print_unpaired_line(&self.seq[0]);
}
self.next_line();
}
/// Move to the next line, if any.
fn next_line(&mut self) {
match self.read_line() {
Some(line) => {
self.seq[0] = line;
}
None => {
self.seq.clear();
}
}
}
/// Keep reading line sequence until the key does not change, return
/// the first line whose key differs.
fn extend(&mut self, ignore_case: bool) -> Option<Line> {
while let Some(line) = self.read_line() {
let diff = compare(
self.seq[0].get_field(self.key),
line.get_field(self.key),
ignore_case,
);
if diff == Ordering::Equal {
self.seq.push(line);
} else {
return Some(line);
}
}
return None;
}
/// Combine two line sequences.
fn combine(&self, other: &State) {
let key = self.seq[0].get_field(self.key);
for line1 in &self.seq {
for line2 in &other.seq {
print!("{}", key);
line1.foreach_except(self.key, &print_field);
line2.foreach_except(other.key, &print_field);
println!();
}
}
}
/// Reset with the next line.
fn reset(&mut self, next_line: Option<Line>) {
self.seq.clear();
if let Some(line) = next_line {
self.seq.push(line);
}
}
fn has_line(&self) -> bool {
!self.seq.is_empty()
}
fn initialize(&mut self) {
if let Some(line) = self.read_line() {
self.seq.push(line);
}
}
fn finalize(&mut self) {
if self.has_line() && self.print_unpaired {
self.print_unpaired_line(&self.seq[0]);
while let Some(line) = self.read_line() {
self.print_unpaired_line(&line);
}
}
}
fn read_line(&mut self) -> Option<Line> {
match self.lines.next() {
Some(value) => Some(Line::new(value.expect("error reading file"))),
None => None,
}
}
fn print_unpaired_line(&self, line: &Line) {
print!("{}", line.get_field(self.key));
line.foreach_except(self.key, &print_field);
println!();
}
}
pub fn uumain(args: Vec<String>) -> i32 {
let mut settings: Settings = Default::default();
let mut opts = getopts::Options::new();
opts.optflag("h", "help", "display this help and exit");
opts.optopt(
"a",
"",
"also print unpairable lines from file FILENUM, where FILENUM is 1 or 2, corresponding to FILE1 or FILE2",
"FILENUM"
);
opts.optflag(
"i",
"ignore-case",
"ignore differences in case when comparing fields",
);
opts.optopt("j", "", "equivalent to '-1 FIELD -2 FIELD'", "FIELD");
opts.optopt("1", "", "join on this FIELD of file 1", "FIELD");
opts.optopt("2", "", "join on this FIELD of file 2", "FIELD");
let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Err(f) => crash!(1, "Invalid options\n{}", f),
};
if matches.opt_present("help") {
let msg = format!(
"{0} {1}
Usage:
{0} [OPTION]... FILE1 FILE2
For each pair of input lines with identical join fields, write a line to
standard output. The default join field is the first, delimited by blanks.",
NAME,
VERSION
);
print!("{}", opts.usage(&msg));
return 0;
}
let keys = parse_field_number(matches.opt_str("j"));
let key1 = parse_field_number(matches.opt_str("1"));
let key2 = parse_field_number(matches.opt_str("2"));
settings.print_unpaired = match matches.opt_str("a") {
Some(value) => {
match &value[..] {
"1" => FileNum::File1,
"2" => FileNum::File2,
value => crash!(1, "invalid file number: {}", value),
}
}
None => FileNum::None,
};
settings.ignore_case = matches.opt_present("ignore-case");
settings.key1 = get_field_number(keys, key1);
settings.key2 = get_field_number(keys, key2);
let files = matches.free;
let file_count = files.len();
if file_count < 1 {
crash!(1, "missing operand");
} else if file_count < 2 {
crash!(1, "missing operand after '{}'", files[0]);
} else if file_count > 2 {
crash!(1, "extra operand '{}'", files[2]);
}
if files[0] == "-" && files[1] == "-" {
crash!(1, "both files cannot be standard input");
}
exec(files, &settings)
}
fn exec(files: Vec<String>, settings: &Settings) -> i32 {
let mut state1 = State::new(
&files[0],
settings.key1,
settings.print_unpaired == FileNum::File1,
);
let mut state2 = State::new(
&files[1],
settings.key2,
settings.print_unpaired == FileNum::File2,
);
state1.initialize();
state2.initialize();
while state1.has_line() && state2.has_line() {
let diff = state1.compare(&state2, settings.ignore_case);
match diff {
Ordering::Less => {
state1.skip_line();
}
Ordering::Greater => {
state2.skip_line();
}
Ordering::Equal => {
let next_line1 = state1.extend(settings.ignore_case);
let next_line2 = state2.extend(settings.ignore_case);
state1.combine(&state2);
state1.reset(next_line1);
state2.reset(next_line2);
}
}
}
state1.finalize();
state2.finalize();
0
}
/// Check that keys for both files and for a particular file are not
/// contradictory and return the zero-based key index.
fn get_field_number(keys: Option<usize>, key: Option<usize>) -> usize {
if let Some(keys) = keys {
if let Some(key) = key {
if keys != key {
crash!(1, "incompatible join fields {}, {}", keys, key);
}
}
return keys - 1;
}
match key {
Some(key) => key - 1,
None => 0,
}
}
/// Parse the specified field string as a natural number and return it.
fn parse_field_number(value: Option<String>) -> Option<usize> {
match value {
Some(value) => {
match value.parse() {
Ok(result) if result > 0 => Some(result),
_ => crash!(1, "invalid field number: '{}'", value),
}
}
None => None,
}
}
fn compare(field1: &str, field2: &str, ignore_case: bool) -> Ordering {
if ignore_case {
field1.to_lowercase().cmp(&field2.to_lowercase())
} else {
field1.cmp(field2)
}
}
fn print_field(field: &String) {
print!("{}{}", ' ', field);
}

5
src/join/main.rs Normal file
View file

@ -0,0 +1,5 @@
extern crate uu_join;
fn main() {
std::process::exit(uu_join::uumain(std::env::args().collect()));
}

4
tests/fixtures/join/capitalized.txt vendored Normal file
View file

@ -0,0 +1,4 @@
A 1
B 2
C 4
D 8

View file

@ -0,0 +1,3 @@
A 1 2 f
B 2 3 g
C 4 4 h

5
tests/fixtures/join/default.expected vendored Normal file
View file

@ -0,0 +1,5 @@
1 a
2 b
3 c
5 e
8 h

View file

@ -0,0 +1,6 @@
2 b a f
3 c b g
4 d c h
5 e f i
6 f g j
7 g h k

View file

@ -0,0 +1,5 @@
c 3 2 1 cd
d 4 3 2 de
e 5 5 3 ef
f 6 7 4 fg
g 7 11 5 gh

0
tests/fixtures/join/empty.txt vendored Normal file
View file

5
tests/fixtures/join/fields_1.txt vendored Normal file
View file

@ -0,0 +1,5 @@
1
2
3
5
8

9
tests/fixtures/join/fields_2.txt vendored Normal file
View file

@ -0,0 +1,9 @@
1 a
2 b
3 c
4 d
5 e
6 f
7 g
8 h
9 i

6
tests/fixtures/join/fields_3.txt vendored Normal file
View file

@ -0,0 +1,6 @@
a 2 f
b 3 g
c 4 h
f 5 i
g 6 j
h 7 k

5
tests/fixtures/join/fields_4.txt vendored Normal file
View file

@ -0,0 +1,5 @@
2 c 1 cd
3 d 2 de
5 e 3 ef
7 f 4 fg
11 g 5 gh

View file

@ -0,0 +1,9 @@
1 a
2 a f b
3 b g c
4 c h d
5 f i e
6 g j f
7 h k g
8 h
9 i

95
tests/test_join.rs Normal file
View file

@ -0,0 +1,95 @@
use common::util::*;
#[test]
fn empty_files() {
new_ucmd!()
.arg("empty.txt")
.arg("empty.txt")
.succeeds().stdout_only("");
new_ucmd!()
.arg("empty.txt")
.arg("fields_1.txt")
.succeeds().stdout_only("");
new_ucmd!()
.arg("fields_1.txt")
.arg("empty.txt")
.succeeds().stdout_only("");
}
#[test]
fn empty_intersection() {
new_ucmd!()
.arg("fields_1.txt")
.arg("fields_2.txt")
.arg("-2")
.arg("2")
.succeeds().stdout_only("");
}
#[test]
fn default_arguments() {
new_ucmd!()
.arg("fields_1.txt")
.arg("fields_2.txt")
.succeeds().stdout_only_fixture("default.expected");
}
#[test]
fn different_fields() {
new_ucmd!()
.arg("fields_2.txt")
.arg("fields_4.txt")
.arg("-j")
.arg("2")
.succeeds().stdout_only_fixture("different_fields.expected");
new_ucmd!()
.arg("fields_2.txt")
.arg("fields_4.txt")
.arg("-1")
.arg("2")
.arg("-2")
.arg("2")
.succeeds().stdout_only_fixture("different_fields.expected");
}
#[test]
fn different_field() {
new_ucmd!()
.arg("fields_2.txt")
.arg("fields_3.txt")
.arg("-2")
.arg("2")
.succeeds().stdout_only_fixture("different_field.expected");
}
#[test]
fn unpaired_lines() {
new_ucmd!()
.arg("fields_2.txt")
.arg("fields_3.txt")
.arg("-a")
.arg("1")
.succeeds().stdout_only_fixture("fields_2.txt");
new_ucmd!()
.arg("fields_3.txt")
.arg("fields_2.txt")
.arg("-1")
.arg("2")
.arg("-a")
.arg("2")
.succeeds().stdout_only_fixture("unpaired_lines.expected");
}
#[test]
fn case_insensitive() {
new_ucmd!()
.arg("capitalized.txt")
.arg("fields_3.txt")
.arg("-i")
.succeeds().stdout_only_fixture("case_insensitive.expected");
}

View file

@ -58,6 +58,7 @@ generic! {
"fold", test_fold;
"hashsum", test_hashsum;
"head", test_head;
"join", test_join;
"link", test_link;
"ln", test_ln;
"ls", test_ls;