diff --git a/Cargo.lock b/Cargo.lock index e0a5a45bd..db6789500 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,6 +48,7 @@ dependencies = [ "paste 0.0.1", "primal 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "printenv 0.0.1", + "printf 0.0.1", "ptx 0.0.1", "pwd 0.0.1", "rand 0.3.12 (registry+https://github.com/rust-lang/crates.io-index)", @@ -387,6 +388,11 @@ dependencies = [ "uucore 0.0.1", ] +[[package]] +name = "itertools" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "kernel32-sys" version = "0.2.1" @@ -615,6 +621,14 @@ dependencies = [ "uucore 0.0.1", ] +[[package]] +name = "printf" +version = "0.0.1" +dependencies = [ + "itertools 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", + "uucore 0.0.1", +] + [[package]] name = "ptx" version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index 530e2139b..0fa2cac40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ generic = [ "od", "paste", "printenv", + "printf", "ptx", "pwd", "readlink", @@ -129,6 +130,7 @@ nproc = { optional=true, path="src/nproc" } od = { optional=true, path="src/od" } paste = { optional=true, path="src/paste" } printenv = { optional=true, path="src/printenv" } +printf = { optional=true, path="src/printf" } ptx = { optional=true, path="src/ptx" } pwd = { optional=true, path="src/pwd" } readlink = { optional=true, path="src/readlink" } diff --git a/Makefile b/Makefile index 8c7cd2a6d..9a580c379 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,7 @@ PROGS := \ od \ paste \ printenv \ + printf \ ptx \ pwd \ readlink \ @@ -149,6 +150,7 @@ TEST_PROGS := \ mv \ nl \ paste \ + printf \ ptx \ pwd \ readlink \ diff --git a/src/printf/Cargo.toml b/src/printf/Cargo.toml new file mode 100644 index 000000000..a2e1dfdad --- /dev/null +++ b/src/printf/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "printf" +version = "0.0.1" +authors = ["Nathan Ross"] + +[lib] +name = "uu_printf" +path = "printf.rs" + +[dependencies] +"itertools" = "*" +uucore = { path="../uucore" } + +[[bin]] +name = "printf" +path = "main.rs" diff --git a/src/printf/cli.rs b/src/printf/cli.rs new file mode 100644 index 000000000..27a788e52 --- /dev/null +++ b/src/printf/cli.rs @@ -0,0 +1,34 @@ +//! stdio convenience fns +#[allow(unused_must_use)] + +use std::io::{stderr, stdout, Write}; +use std::env; + +pub static EXIT_OK: i32 = 0; +pub static EXIT_ERR: i32 = 1; + +pub fn err_msg(msg:&str) { + let exe_path = match env::current_exe() { + Ok(p) => p.to_string_lossy().into_owned(), + _ => String::from("") + }; + writeln!(&mut stderr(),"{}: {}", exe_path, msg).unwrap(); +} + +// by default stdout only flushes +// to console when a newline is passed. +#[allow(unused_must_use)] +pub fn flush_char(c: &char) { + print!("{}", c); + stdout().flush(); +} +#[allow(unused_must_use)] +pub fn flush_str(s: &str) { + print!("{}", s); + stdout().flush(); +} +#[allow(unused_must_use)] +pub fn flush_bytes(bslice: &[u8]) { + stdout().write(bslice); + stdout().flush(); +} diff --git a/src/printf/main.rs b/src/printf/main.rs new file mode 100644 index 000000000..073aa250d --- /dev/null +++ b/src/printf/main.rs @@ -0,0 +1,5 @@ +extern crate uu_printf; + +fn main() { + std::process::exit(uu_printf::uumain(std::env::args().collect())); +} diff --git a/src/printf/memo.rs b/src/printf/memo.rs new file mode 100644 index 000000000..aa831ca7f --- /dev/null +++ b/src/printf/memo.rs @@ -0,0 +1,84 @@ +//! Memo runner of printf +//! Takes a format string and arguments +//! 1. tokenizes format string into tokens, consuming +//! any subst. arguments along the way. +//! 2. feeds remaining arguments into function +//! that prints tokens. + +use std::iter::Peekable; +use std::slice::Iter; +use itertools::PutBackN; +use cli; +use tokenize::token::{Token, Tokenizer}; +use tokenize::unescaped_text::UnescapedText; +use tokenize::sub::Sub; + +pub struct Memo { + tokens: Vec>, +} + +fn warn_excess_args(first_arg : &str) { + cli::err_msg(&format!("warning: ignoring excess arguments, starting with '{}'", + first_arg)); +} + +impl Memo { + pub fn new( + pf_string: &String, + pf_args_it: &mut Peekable> + ) -> Memo { + let mut pm = Memo { tokens: Vec::new() }; + let mut tmp_token : Option>; + let mut it = PutBackN::new(pf_string.chars()); + let mut has_sub = false; + loop { + tmp_token = UnescapedText::from_it(&mut it, pf_args_it); + match tmp_token { + Some(x) => pm.tokens.push(x), + None => {} + } + tmp_token = Sub::from_it(&mut it, pf_args_it); + match tmp_token { + Some(x) => { + if ! has_sub { has_sub = true; } + pm.tokens.push(x); + }, + None => {} + } + if let Some(x) = it.next() { + it.put_back(x); + } else { break; } + } + if ! has_sub { + let mut drain= false; + if let Some(first_arg) = pf_args_it.peek() { + warn_excess_args(first_arg); + drain = true; + } + if drain { + loop { + //drain remaining args; + if pf_args_it.next().is_none() { + break; + } + } + } + } + pm + } + pub fn apply(&self, pf_args_it: &mut Peekable>) { + for tkn in self.tokens.iter() { + tkn.print(pf_args_it); + } + } + pub fn run_all(pf_string: &String, pf_args: &[String]) { + let mut arg_it = pf_args.iter().peekable(); + let pm = Memo::new(pf_string, &mut arg_it); + loop { + if arg_it.peek().is_none() { + break; + } + pm.apply(&mut arg_it); + } + } +} diff --git a/src/printf/mod.rs b/src/printf/mod.rs new file mode 100644 index 000000000..de04b0859 --- /dev/null +++ b/src/printf/mod.rs @@ -0,0 +1,4 @@ +mod cli; +mod memo; +mod tokenize; + diff --git a/src/printf/printf.rs b/src/printf/printf.rs new file mode 100644 index 000000000..f7a732df6 --- /dev/null +++ b/src/printf/printf.rs @@ -0,0 +1,288 @@ +#![crate_name = "uu_printf"] + +#![allow(dead_code)] + +extern crate itertools; + +mod cli; +mod memo; +mod tokenize; + +#[macro_use] +extern crate uucore; + +static NAME: &'static str = "printf"; +static VERSION: &'static str = "0.0.1"; +static SHORT_USAGE: &'static str = "printf: usage: printf [-v var] format [arguments]"; +static LONGHELP_LEAD: &'static str = "printf + + USAGE: printf FORMATSTRING [ARGUMENT]... + + basic anonymous string templating: + + prints format string at least once, repeating as long as there are remaining arguments + output prints escaped literals in the format string as character literals + output replaces anonymous fields with the next unused argument, formatted according to the field. + +Options: + --help display this help and exit + --version output version information and exit + +"; +static LONGHELP_BODY: &'static str = " + Prints the , replacing escaped character sequences with character literals + and substitution field sequences with passed arguments + + literally, with the exception of the below + escaped character sequences, and the substitution sequences described further down. + + ESCAPE SEQUENCES + + The following escape sequences, organized here in alphabetical order, + will print the corresponding character literal: + + \" double quote + + \\\\ backslash + + \\a alert (BEL) + + \\b backspace + + \\c End-of-Input + + \\e escape + + \\f form feed + + \\n new line + + \\r carriage return + + \\t horizontal tab + + \\v vertical tab + + \\NNN byte with value expressed in octal value NNN (1 to 3 digits) + values greater than 256 will be treated + + \\xHH byte with value expressed in hexadecimal value NN (1 to 2 digits) + + \\uHHHH Unicode (IEC 10646) character with value expressed in hexadecimal value HHHH (4 digits) + + \\uHHHH Unicode character with value expressed in hexadecimal value HHHH (8 digits) + + %% a single % + + SUBSTITUTIONS + + SUBSTITUTION QUICK REFERENCE + + Fields + + %s - string + %b - string parsed for literals + second parameter is max length + + %c - char + no second parameter + + %i or %d - 64-bit integer + %u - 64 bit unsigned integer + %x or %X - 64-bit unsigned integer as hex + %o - 64-bit unsigned integer as octal + second parameter is min-width, integer + output below that width is padded with leading zeroes + + %f or %F - decimal floating point value + %e or %E - scientific notation floating point value + %g or %G - shorter of specially interpreted decimal or SciNote floating point value. + second parameter is + -max places after decimal point for floating point output + -max number of significant digits for scientific notation output + + parameterizing fields + + examples: + + printf '%4.3i' 7 + has a first parameter of 4 + and a second parameter of 3 + will result in ' 007' + + printf '%.1s' abcde + has no first parameter + and a second parameter of 1 + will result in 'a' + + printf '%4c' q + has a first parameter of 4 + and no second parameter + will result in ' q' + + The first parameter of a field is the minimum width to pad the output to + if the output is less than this absolute value of this width, + it will be padded with leading spaces, or, if the argument is negative, + with trailing spaces. the default is zero. + + The second parameter of a field is particular to the output field type. + defaults can be found in the full substitution help below + + special prefixes to numeric arguments + 0 (e.g. 010) - interpret argument as octal (integer output fields only) + 0x (e.g. 0xABC) - interpret argument as hex (numeric output fields only) + \' (e.g. \'a) - interpret argument as a character constant + + HOW TO USE SUBSTITUTIONS + + Substitutions are used to pass additional argument(s) into the FORMAT string, to be formatted a + particular way. E.g. + + printf 'the letter %X comes before the letter %X' 10 11 + + will print + + 'the letter A comes before the letter B' + + because the substitution field %X means + 'take an integer argument and write it as a hexadecimal number' + + Passing more arguments than are in the format string will cause the format string to be + repeated for the remaining substitutions + + printf 'it is %i F in %s \n' 22 Portland 25 Boston 27 New York + + will print + + 'it is 22 F in Portland + it is 25 F in Boston + it is 27 F in Boston + ' + If a format string is printed but there are less arguments remaining + than there are substitution fields, substitution fields without + an argument will default to empty strings, or for numeric fields + the value 0 + + AVAILABLE SUBSTITUTIONS + + This program, like GNU coreutils printf, + interprets a modified subset of the POSIX C printf spec, + a quick reference to substitutions is below. + + STRING SUBSTITUTIONS + All string fields have a 'max width' parameter + %.3s means 'print no more than three characters of the original input' + + %s - string + + %b - escaped string - the string will be checked for any escaped literals from + the escaped literal list above, and translate them to literal charcters. + e.g. \\n will be transformed into a newline character. + + One special rule about %b mode is that octal literals are intepreted differently + In arguments passed by %b, pass octal-interpreted literals must be in the form of \\0NNN instead of \\NNN + (Although, for legacy reasons, octal literals in the form of \\NNN will still be interpreted and not throw a warning, you will have problems if you use this for a literal whose code begins with zero, as it will be viewed as in \\0NNN form.) + + CHAR SUBSTITUTIONS + The character field does not have a secondary parameter. + + %c - a single character + + INTEGER SUBSTITUTIONS + All integer fields have a 'pad with zero' parameter + %.4i means an integer which if it is less than 4 digits in length, + is padded with leading zeros until it is 4 digits in length. + + %d or %i - 64-bit integer + + %u - 64 bit unsigned integer + + %x or %X - 64 bit unsigned integer printed in Hexadecimal (base 16) + %X instead of %x means to use uppercase letters for 'a' through 'f' + + %o - 64 bit unsigned integer printed in octal (base 8) + + FLOATING POINT SUBSTITUTIONS + + All floating point fields have a 'max decimal places / max significant digits' parameter + %.10f means a decimal floating point with 7 decimal places past 0 + %.10e means a scientific notation number with 10 significant digits + %.10g means the same behavior for decimal and Sci. Note, respectively, and provides the shorter of each's output. + + Like with GNU coreutils, the value after the decimal point is these outputs is parsed as a double first before being rendered to text. For both implementations do not expect meaningful precision past the 18th decimal place. When using a number of decimal places that is 18 or higher, you can expect variation in output between GNU coreutils printf and this printf at the 18th decimal place of +/- 1 + + %f - floating point value presented in decimal, truncated and displayed to 6 decimal places by default. + There is not past-double behavior parity with Coreutils printf, values are not estimated or adjusted beyond input values. + + %e or %E - floating point value presented in scientific notation + 7 significant digits by default + %E means use to use uppercase E for the mantissa. + + %g or %G - floating point value presented in the shorter of decimal and scientific notation + behaves differently from %f and %E, please see posix printf spec for full details, + some examples of different behavior: + + Sci Note has 6 significant digits by default + Trailing zeroes are removed + Instead of being truncated, digit after last is rounded + + Like other behavior in this utility, the design choices of floating point + behavior in this utility is selected to reproduce in exact + the behavior of GNU coreutils' printf from an inputs and outputs standpoint. + + USING PARAMETERS + Most substitution fields can be parameterized using up to 2 numbers that can + be passed to the field, between the % sign and the field letter. + + The 1st parameter always indicates the minimum width of output, it is useful for creating + columnar output. Any output that would be less than this minimum width is padded with + leading spaces + The 2nd parameter is proceeded by a dot. + You do not have to use parameters + + SPECIAL FORMS OF INPUT + For numeric input, the following additional forms of input are accepted besides decimal: + + Octal (only with integer): if the argument begins with a 0 the proceeding characters + will be interpreted as octal (base 8) for integer fields + + Hexadecimal: if the argument begins with 0x the proceeding characters will be interpreted + will be interpreted as hex (base 16) for any numeric fields + for float fields, hexadecimal input results in a precision + limit (in converting input past the decimal point) of 10^-15 + + Character Constant: if the argument begins with a single quote character, the first byte + of the next character will be interpreted as an 8-bit unsigned integer. If there are + additional bytes, they will throw an error (unless the environment variable POSIXLY_CORRECt is set) + +WRITTEN BY : + Nathan E. Ross, et al. for the uutils project + +MORE INFO : + https://github.com/uutils/coreutils + +COPYRIGHT : + Copyright 2015 uutils project. + Licensed under the MIT License, please see LICENSE file for details + +"; + +pub fn uumain(args: Vec) -> i32 { + let location = &args[0]; + if args.len() <= 1 { + println!("{0}: missing operand\nTry '{0} --help' for more information.", + location); + return 1; + } + let ref formatstr = args[1]; + + if formatstr == "--help" { + print!("{} {}", LONGHELP_LEAD, LONGHELP_BODY); + } else if formatstr == "--version" { + println!("{} {}", NAME, VERSION); + } else { + let printf_args = &args[2..]; + memo::Memo::run_all(formatstr, printf_args); + } + return 0; +} diff --git a/src/printf/tokenize/mod.rs b/src/printf/tokenize/mod.rs new file mode 100644 index 000000000..0570b7489 --- /dev/null +++ b/src/printf/tokenize/mod.rs @@ -0,0 +1,4 @@ +pub mod token; +pub mod sub; +pub mod unescaped_text; +mod num_format; diff --git a/src/printf/tokenize/num_format/format_field.rs b/src/printf/tokenize/num_format/format_field.rs new file mode 100644 index 000000000..65a69c35b --- /dev/null +++ b/src/printf/tokenize/num_format/format_field.rs @@ -0,0 +1,42 @@ +//! Primitievs used by Sub Tokenizer +//! and num_format modules +#[derive(Clone)] +pub enum FieldType { + Strf, + Floatf, + Scif, + Decf, + Intf, + Charf, +} + +/* +#[allow(non_camel_case_types)] +pub enum FChar { + d, + e, + E, + i, + f, + F, + g, + G, + u, + x, + X, + o +} + */ + +// a Sub Tokens' fields are stored +// as a single object so they can be more simply +// passed by ref to num_format in a Sub method +#[derive(Clone)] +pub struct FormatField<'a> { + pub min_width: Option, + pub second_field: Option, + pub field_char: & 'a char, + pub field_type: & 'a FieldType, + pub orig : & 'a String +} + diff --git a/src/printf/tokenize/num_format/formatter.rs b/src/printf/tokenize/num_format/formatter.rs new file mode 100644 index 000000000..9df3778a5 --- /dev/null +++ b/src/printf/tokenize/num_format/formatter.rs @@ -0,0 +1,73 @@ +//! Primitives used by num_format and sub_modules. +//! never dealt with above (e.g. Sub Tokenizer never uses these) +use std::str::Chars; +use itertools::PutBackN; +use cli; +use super::format_field::FormatField; + +// contains the rough ingredients to final +// output for a number, organized together +// to allow for easy generalization of output manipulation +// (e.g. max number of digits after decimal) +pub struct FormatPrimitive { + pub prefix: Option, + pub pre_decimal: Option, + pub post_decimal: Option, + pub suffix: Option +} + +impl Default for FormatPrimitive { + fn default() -> FormatPrimitive { + FormatPrimitive { + prefix: None, + pre_decimal: None, + post_decimal: None, + suffix: None + } + } +} + +#[derive(Clone)] +#[derive(PartialEq)] +pub enum Base { + Ten=10, + Hex=16, + Octal=8, +} + +// information from the beginning of a numeric argument +// the precedes the beginning of a numeric value +pub struct InPrefix { + pub radix_in : Base, + pub sign : i8, + pub offset : usize +} + +pub trait Formatter { + // return a FormatPrimitive for + // particular field char(s), given the argument + // string and prefix information (sign, radix) + fn get_primitive( + &self, + field: &FormatField, + inprefix: &InPrefix, + str_in: &str + ) -> Option; + // return a string from a formatprimitive, + // given information about the field + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String; +} +pub fn get_it_at(offset: usize, + str_in: &str) -> PutBackN { + PutBackN::new(str_in[offset..].chars()) +} + +// TODO: put this somewhere better +pub fn warn_incomplete_conv(pf_arg: &str) { + //important: keep println here not print + cli::err_msg(&format!("{}: value not completely converted", + pf_arg)) +} diff --git a/src/printf/tokenize/num_format/formatters/base_conv.rs b/src/printf/tokenize/num_format/formatters/base_conv.rs new file mode 100644 index 000000000..706bdf6cd --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/base_conv.rs @@ -0,0 +1,273 @@ +pub fn arrnum_int_mult( + arrnum : &Vec, + basenum : u8, + base_ten_int_fact : u8 + ) -> Vec { + let mut carry : u16 = 0; + let mut rem : u16; + let mut new_amount : u16; + let fact : u16 = base_ten_int_fact as u16; + let base : u16 = basenum as u16; + + let mut ret_rev : Vec = Vec::new(); + let mut it = arrnum.iter().rev(); + loop { + let i = it.next(); + match i { + Some(u) => { + new_amount = ((u.clone() as u16)*fact) + carry; + rem = new_amount % base; + carry = (new_amount - rem) / base; + ret_rev.push(rem as u8) + }, + None => { + while carry != 0 { + rem = carry % base; + carry = (carry - rem) / base; + ret_rev.push(rem as u8); + } + break; + } + } + } + let ret : Vec = + ret_rev.iter().rev().map(|x| x.clone()).collect(); + ret +} + +pub struct Remainder { + position : usize, + replace : Option +} + +pub struct DivOut { + quotient : u8, + remainder: Remainder +} + +pub fn arrnum_int_div( + arrnum : &Vec, + basenum : u8, + base_ten_int_divisor : u8, + rem_in : Remainder + ) -> DivOut { + + let mut rem_out = Remainder { + position: rem_in.position, + replace : None + }; + + let mut bufferval : u16 = 0; + let base : u16 = basenum as u16; + let divisor : u16 = base_ten_int_divisor as u16; + + let mut quotient = 0; + let mut u_cur : Option<&u8> = Some(match rem_in.replace { + Some(ref u) => { u } + None => { &arrnum[rem_in.position] } + }); + + let str_f = &arrnum[rem_in.position+1..]; + let mut it_f = str_f.iter(); + loop { + match u_cur { + Some(u) => { + bufferval += u.clone() as u16; + if bufferval > divisor { + while bufferval >= divisor { + quotient+=1; + bufferval -= divisor; + } + if bufferval == 0 { + rem_out.position +=1; + } else { + rem_out.replace = Some(bufferval as u8); + } + break; + } else { + bufferval *= base; + } + }, + None => { + break; + } + } + u_cur = it_f.next().clone(); + rem_out.position+=1; + } + DivOut { quotient: quotient, remainder: rem_out } +} + +pub fn arrnum_int_add( + arrnum : &Vec, + basenum : u8, + base_ten_int_term : u8 + ) -> Vec { + let mut carry : u16 = base_ten_int_term as u16; + let mut rem : u16; + let mut new_amount : u16; + let base : u16 = basenum as u16; + + let mut ret_rev : Vec = Vec::new(); + let mut it = arrnum.iter().rev(); + loop { + let i = it.next(); + match i { + Some(u) => { + new_amount = (u.clone() as u16) + carry; + rem = new_amount % base; + carry = (new_amount - rem) / base; + ret_rev.push(rem as u8) + }, + None => { + while carry != 0 { + rem = carry % base; + carry = (carry - rem) / base; + ret_rev.push(rem as u8); + } + break; + } + } + } + let ret : Vec = + ret_rev.iter().rev().map(|x| x.clone()).collect(); + ret +} + +pub fn base_conv_vec( + src : &Vec, + radix_src : u8, + radix_dest : u8 + ) -> Vec { + let mut result : Vec = Vec::new(); + result.push(0); + for i in src { + result = arrnum_int_mult(&result, + radix_dest, radix_src); + result = arrnum_int_add( + &result, + radix_dest, + i.clone() + ); + } + result +} + + +pub fn base_conv_float( + src : &Vec, + radix_src : u8, + radix_dest : u8 + ) -> f64 { + //it would require a lot of addl code + // to implement this for arbitrary string input. + //until then, the below operates as an outline + // of how it would work. + let mut result : Vec = Vec::new(); + result.push(0); + let mut factor : f64 = radix_dest as f64; + let radix_src_float : f64 = radix_src as f64; + let mut i = 0; + let mut r :f64 = 0 as f64; + factor /= 10.; + for u in src { + if i > 15 { break; } + i+=1; + factor /= radix_src_float; + r += factor * (u.clone() as f64) + } + r +} + +pub fn str_to_arrnum( + src: &str, + radix_def_src : &RadixDef + ) -> Vec { + let mut intermed_in : Vec = Vec::new(); + for c in src.chars() { + match radix_def_src.from_char::<>(c) { + Some(u) => { intermed_in.push(u); } + None => {} //todo err msg on incorrect + } + } + intermed_in +} + +pub fn arrnum_to_str( + src: &Vec, + radix_def_dest : &RadixDef + ) -> String { + let mut str_out = String::new(); + for u in src.iter() { + match radix_def_dest.from_u8(u.clone()) { + Some(c) => { + str_out.push(c); + } + None => {} //todo + } + } + str_out +} + +#[allow(unused_variables)] +pub fn base_conv_str( + src: &str, + radix_def_src : &RadixDef, + radix_def_dest : &RadixDef + ) -> String { + let intermed_in : Vec = + str_to_arrnum(src, radix_def_src); + let intermed_out = base_conv_vec( + &intermed_in, + radix_def_src.get_max(), + radix_def_dest.get_max(), + ); + arrnum_to_str(&intermed_out, radix_def_dest) +} + +pub trait RadixDef { + fn get_max (&self) -> u8; + fn from_char (&self, x:char) -> Option; + fn from_u8 (&self, x:u8) -> Option; +} +pub struct RadixTen; + +const ZERO_ASC : u8 = '0' as u8; +const UPPER_A_ASC : u8 = 'A' as u8; +const LOWER_A_ASC : u8 = 'a' as u8; + +impl RadixDef for RadixTen { + fn get_max(&self) -> u8 { 10 } + fn from_char (&self, c:char) -> Option { + match c { + '0'...'9' => Some(c as u8 - ZERO_ASC), + _ => None + } + } + fn from_u8 (&self, u:u8) -> Option { + match u { + 0...9 => Some((ZERO_ASC + u) as char), + _ => None + } + } +} +pub struct RadixHex; +impl RadixDef for RadixHex { + fn get_max(&self) -> u8 { 16 } + fn from_char (&self, c:char) -> Option { + match c { + '0'...'9' => Some(c as u8 - ZERO_ASC), + 'A'...'F' => Some(c as u8 +10 - UPPER_A_ASC), + 'a'...'f' => Some(c as u8 +10 - LOWER_A_ASC), + _ => None + } + } + fn from_u8 (&self, u:u8) -> Option { + match u { + 0...9 => Some((ZERO_ASC + u) as char), + 10...15 => Some((UPPER_A_ASC + (u-10)) as char), + _ => None + } + } +} + diff --git a/src/printf/tokenize/num_format/formatters/decf.rs b/src/printf/tokenize/num_format/formatters/decf.rs new file mode 100644 index 000000000..6a0a98816 --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/decf.rs @@ -0,0 +1,84 @@ +//! formatter for %g %G decimal subs +use super::super::format_field::FormatField; +use super::super::formatter::{InPrefix,FormatPrimitive,Formatter}; +use super::float_common::{FloatAnalysis, + get_primitive_dec, + primitive_to_str_common}; + +fn get_len_fprim( + fprim : &FormatPrimitive + ) -> usize { + let mut len = 0; + if let Some(ref s) = fprim.prefix { len += s.len(); } + if let Some(ref s) = fprim.pre_decimal { len += s.len(); } + if let Some(ref s) = fprim.post_decimal { len += s.len(); } + if let Some(ref s) = fprim.suffix { len += s.len(); } + len +} + +pub struct Decf { + as_num : f64 +} +impl Decf { + pub fn new() -> Decf { + Decf { as_num: 0.0 } + } +} +impl Formatter for Decf { + fn get_primitive( + &self, + field : &FormatField, + inprefix : &InPrefix, + str_in : &str + ) -> Option { + let second_field = field.second_field.unwrap_or(6)+1; + let analysis = FloatAnalysis::analyze( + str_in, + inprefix, + Some(second_field as usize+1), + None + ); + let mut f_sci = get_primitive_dec( + inprefix, + &str_in[inprefix.offset..], + &analysis, + second_field as usize, + Some(*field.field_char == 'G')); + //strip trailing zeroes + match f_sci.post_decimal.clone() { + Some(ref post_dec) => { + let mut i = post_dec.len(); + { + let mut it = post_dec.chars(); + while let Some(c) = it.next_back() { + if c != '0' { break; } + i-=1; + } + } + if i != post_dec.len() { + f_sci.post_decimal = + Some(String::from(&post_dec[0..i])); + } + } + None => {} + } + let f_fl = get_primitive_dec( + inprefix, + &str_in[inprefix.offset..], + &analysis, + second_field as usize, + None); + Some(if get_len_fprim(&f_fl) >= get_len_fprim(&f_sci) { + f_sci + } else { f_fl }) + } + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String { + primitive_to_str_common( + prim, + &field + ) + } +} diff --git a/src/printf/tokenize/num_format/formatters/float_common.rs b/src/printf/tokenize/num_format/formatters/float_common.rs new file mode 100644 index 000000000..b17f21228 --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/float_common.rs @@ -0,0 +1,331 @@ +use super::super::format_field::{FormatField}; +use super::super::formatter::{InPrefix,Base,FormatPrimitive,warn_incomplete_conv,get_it_at}; +use super::base_conv; +use super::base_conv::{RadixDef}; + +// if the memory, copy, and comparison cost of chars +// becomes an issue, we can always operate in vec here +// rather than just at de_hex + +pub struct FloatAnalysis { + pub len_important: usize, + //none means no decimal point. + pub decimal_pos: Option, + pub follow: Option +} +impl FloatAnalysis { + pub fn analyze( + str_in: &str, + inprefix: &InPrefix, + max_sd_opt: Option, + max_after_dec_opt: Option, + ) -> FloatAnalysis { + // this fn assumes + // the input string + // has no leading spaces or 0s + let mut str_it = get_it_at(inprefix.offset, str_in); + let mut ret = FloatAnalysis { + len_important: 0, + decimal_pos: None, + follow: None + }; + let mut i=0; + while let Some(c) = str_it.next() { match c{ + e @ '0'...'9' | e @ 'A'...'F' | e @ 'a'...'f' => { + match inprefix.radix_in { + Base::Ten => { + match e { + '0'...'9' => {}, + _ => { + warn_incomplete_conv(str_in); + break; + } + } + } + _ => {} + } + if let Some(max_sd) = max_sd_opt { + if i == max_sd { + //follow is used in cases of %g + //where the character right after the last + //sd is considered is rounded affecting + //the previous digit in 1/2 of instances + ret.follow = Some(e); + } else if ret.decimal_pos.is_some() && i > max_sd { + break; + } + } + if let Some(p) = ret.decimal_pos { + if let Some(max_after_dec) = max_after_dec_opt { + if (i-1) - p == max_after_dec { + break + } + } + } + }, + '.' => { + if ret.decimal_pos.is_none() { + ret.decimal_pos = Some(i); + } else { + warn_incomplete_conv(str_in); + break; + } + } + _ => { + println!("awarn2"); + warn_incomplete_conv(str_in); + break; + } + }; i+=1; } + ret.len_important = i; + ret + } +} + +fn de_hex( + src: &str, + before_decimal: bool + ) -> String { + let rten = base_conv::RadixTen; + let rhex = base_conv::RadixHex; + if before_decimal { + base_conv::base_conv_str(src, &rhex, &rten) + } else { + let as_arrnum_hex =base_conv::str_to_arrnum(src, &rhex); + let s = format!("{}", base_conv::base_conv_float( + &as_arrnum_hex, + rhex.get_max(), + rten.get_max() + )); + if s.len() > 2 { + String::from(&s[2..]) + } else { + //zero + s + } + } +} + +// takes a string in, +// truncates to a position, +// bumps the last digit up one, +// and if the digit was nine +// propagate to the next, etc. +fn _round_str_from( + in_str : &str, + position : usize + ) -> (String, bool) { + + let mut it=in_str[0..position].chars(); + let mut rev = String::new(); + let mut i = position; + let mut finished_in_dec=false; + while let Some(c)=it.next_back() { + i-=1; + match c { + '9' => { rev.push('0'); } + e @ _ => { + rev.push( + ((e as u8)+1) as char); + finished_in_dec = true; + break; + } + } + } + let mut fwd = String::from(&in_str[0..i]); + for ch in rev.chars().rev() { + fwd.push(ch); + } + (fwd, finished_in_dec) +} + +fn round_terminal_digit( + before_dec: String, + after_dec: String, + position: usize + ) -> (String, String) { + + if position < after_dec.len() { + let digit_at_pos:char; + { + digit_at_pos=(&after_dec[position..position+1]) + .chars().next().expect(""); + } + match digit_at_pos { + '5'...'9' => { + let (new_after_dec, finished_in_dec) = + _round_str_from(&after_dec, position); + if finished_in_dec { + return (before_dec, new_after_dec) + } else { + let (new_before_dec, _) = + _round_str_from(&before_dec, + before_dec.len()); + return (new_before_dec, new_after_dec) + } + //TODO + }, + _ =>{ } + } + } + (before_dec, after_dec) +} + +pub fn get_primitive_dec( + inprefix : &InPrefix, + str_in : &str, + analysis : &FloatAnalysis, + last_dec_place : usize, + sci_mode : Option + ) -> FormatPrimitive { + let mut f : FormatPrimitive = Default::default(); + + //add negative sign section + if inprefix.sign == -1 { + f.prefix = Some(String::from("-")); + } + + // assign the digits before and after the decimal points + // to separate slices. If no digits after decimal point, + // assign 0 + let (mut first_segment_raw, second_segment_raw) = + match analysis.decimal_pos { + Some(pos) => { + (&str_in[..pos], &str_in[pos+1..]) + }, + None => { (&str_in[..], "0") } + }; + if first_segment_raw.len() == 0 { + first_segment_raw = "0"; + } + // convert to string, de_hexifying if input is in hex. + let (first_segment, second_segment) = + match inprefix.radix_in { + Base::Hex => { + (de_hex(first_segment_raw, true), + de_hex(second_segment_raw, false)) + } + _ => { + (String::from(first_segment_raw), + String::from(second_segment_raw)) + } + }; + let (pre_dec_unrounded, post_dec_unrounded, mantissa) = + if sci_mode.is_some() { + if first_segment.len() > 1 { + let mut post_dec = String::from(&first_segment[1..]); + post_dec.push_str(&second_segment); + (String::from(&first_segment[0..1]), + post_dec, + first_segment.len() as isize -1) + } else { + match first_segment.chars().next() { + Some('0') => { + let mut it = second_segment.chars().enumerate(); + let mut m : isize = 0; + let mut pre = String::from("0"); + let mut post = String::from("0"); + while let Some((i,c)) = it.next() { match c { + '0' => {} + _ => { + m=((i as isize)+1) * -1; + pre = String::from( + &second_segment[i..i+1]); + post = String::from( + &second_segment[i+1..]); + break; + } + } } + (pre, post, m) + }, + Some(_) => { + (first_segment, second_segment, 0) + }, + None => { + panic!( + "float_common: no chars in first segment."); + } + } + } + } else { + (first_segment, second_segment, 0) + }; + + let (pre_dec_draft, post_dec_draft) = + round_terminal_digit(pre_dec_unrounded, + post_dec_unrounded, + last_dec_place-1); + + f.pre_decimal=Some(pre_dec_draft); + f.post_decimal=Some(post_dec_draft); + if let Some(capitalized) = sci_mode { + let si_ind = if capitalized { 'E' } else { 'e' }; + f.suffix=Some(if mantissa >=0 { + format!("{}+{:02}", si_ind, mantissa) + } else { + //negative sign is considered in format!s + // leading zeroes + format!("{}{:03}", si_ind, mantissa) + }); + } + + f +} + +pub fn primitive_to_str_common( + prim: &FormatPrimitive, + field: &FormatField + ) -> String { + let mut final_str = String::new(); + match prim.prefix { + Some(ref prefix) => { + final_str.push_str(&prefix); + }, + None => {} + } + match prim.pre_decimal { + Some(ref pre_decimal) => { + final_str.push_str(&pre_decimal); + } + None => { + panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value."); + } + } + let decimal_places = field.second_field.unwrap_or(6); + match prim.post_decimal { + Some(ref post_decimal) => { + if post_decimal.len() > 0 && decimal_places > 0 { + final_str.push('.'); + let len_avail=post_decimal.len() as u32; + + if decimal_places >= len_avail { + //println!("dec {}, len avail {}", decimal_places, len_avail); + final_str.push_str(post_decimal); + + if *field.field_char != 'g' && + *field.field_char != 'G' { + let diff = decimal_places - len_avail; + for _ in 0..diff { + final_str.push('0'); + } + } + } else { + //println!("printing to only {}", decimal_places); + final_str.push_str( + &post_decimal[0..decimal_places as usize]); + } + } + } + None => { + panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value."); + } + } + match prim.suffix { + Some(ref suffix) => { + final_str.push_str(suffix); + }, + None => {} + } + + final_str +} diff --git a/src/printf/tokenize/num_format/formatters/floatf.rs b/src/printf/tokenize/num_format/formatters/floatf.rs new file mode 100644 index 000000000..7fb2a8fbf --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/floatf.rs @@ -0,0 +1,48 @@ +//! formatter for %f %F common-notation floating-point subs +use super::super::format_field::FormatField; +use super::super::formatter::{InPrefix,FormatPrimitive,Formatter}; +use super::float_common::{FloatAnalysis, + get_primitive_dec, + primitive_to_str_common}; + +pub struct Floatf { + as_num : f64 +} +impl Floatf { + pub fn new() -> Floatf { + Floatf { as_num: 0.0 } + } +} +impl Formatter for Floatf { + fn get_primitive( + &self, + field : &FormatField, + inprefix : &InPrefix, + str_in : &str + ) -> Option { + let second_field = field.second_field.unwrap_or(6)+1; + let analysis = FloatAnalysis::analyze( + &str_in, + inprefix, + None, + Some(second_field as usize) + ); + let f = get_primitive_dec( + inprefix, + &str_in[inprefix.offset..], + &analysis, + second_field as usize, + None); + Some(f) + } + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String { + primitive_to_str_common( + prim, + &field + ) + } +} + diff --git a/src/printf/tokenize/num_format/formatters/intf.rs b/src/printf/tokenize/num_format/formatters/intf.rs new file mode 100644 index 000000000..f7e8564fa --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/intf.rs @@ -0,0 +1,297 @@ +//! formatter for unsigned and signed int subs +//! unsigned ints: %X %x (hex u64) %o (octal u64) %u (base ten u64) +//! signed ints: %i %d (both base ten i64) +use std::u64; +use std::i64; +use super::super::format_field::FormatField; +use super::super::formatter::{InPrefix,FormatPrimitive,Base,Formatter,warn_incomplete_conv,get_it_at}; + +pub struct Intf { + a : u32 +} + +// see the Intf::analyze() function below +struct IntAnalysis { + check_past_max : bool, + past_max : bool, + is_zero: bool, + len_digits: u8 +} + +impl Intf { + pub fn new() -> Intf { + Intf { a:0 } + } + // take a ref to argument string, and basic information + // about prefix (offset, radix, sign), and analyze string + // to gain the IntAnalysis information above + // check_past_max: true if the number *may* be above max, + // but we don't know either way. One of several reasons + // we may have to parse as int. + // past_max: true if the object is past max, false if not + // in the future we should probably combine these into an + // Option + // is_zero: true if number is zero, false otherwise + // len_digits: length of digits used to create the int + // important, for example, if we run into a non-valid character + fn analyze( + str_in: &str, + signed_out: bool, + inprefix: &InPrefix + ) -> IntAnalysis { + // the maximum number of digits we could conceivably + // have before the decimal point without exceeding the + // max + let mut str_it = get_it_at(inprefix.offset, str_in); + let max_sd_in = + if signed_out { + match inprefix.radix_in { + Base::Ten => 19, + Base::Octal => 21, + Base::Hex => 16 + } + } else { + match inprefix.radix_in { + Base::Ten => 20, + Base::Octal => 22, + Base::Hex => 16 + } + }; + let mut ret = IntAnalysis { + check_past_max: false, + past_max: false, + is_zero: false, + len_digits : 0 + }; + + // todo turn this to a while let now that we know + // no special behavior on EOI break + loop { + let c_opt = str_it.next(); + if let Some(c) = c_opt { match c { + '0'...'9' | 'a'...'f' | 'A'...'F' => { + if ret.len_digits == 0 && c == '0' { + ret.is_zero = true; + } else if ret.is_zero { + ret.is_zero = false; + } + ret.len_digits += 1; + if ret.len_digits == max_sd_in { + if let Some(next_ch) = str_it.next() { + match next_ch { + '0'...'9' => { + ret.past_max = true; + } + _ => { + // force conversion + // to check if its above max. + // todo: spin out convert + // into fn, call it here to try + // read val, on Ok() + // save val for reuse later + // that way on same-base in and out + // we don't needlessly convert int + // to str, we can just copy it over. + ret.check_past_max = true; + str_it.put_back(next_ch); + } + } + if ret.past_max { break; } + } else { ret.check_past_max = true; } + } + } + _ => { + warn_incomplete_conv(str_in); + break; + } + } } else { + //breaks on EOL + break; + } + } + ret + } + // get a FormatPrimitive of the maximum value for the field char + // and given sign + fn get_max( + fchar : char, + sign : i8 + ) -> FormatPrimitive { + let mut fmt_prim : FormatPrimitive = Default::default(); + fmt_prim.pre_decimal = Some(String::from(match fchar { + 'd' | 'i' => match sign { + 1 => "9223372036854775807", + _ => { + fmt_prim.prefix = Some(String::from("-")); + "9223372036854775808" + } + }, + 'x' | 'X' => "ffffffffffffffff", + 'o' => "1777777777777777777777", + 'u' | _ => "18446744073709551615" + })); + fmt_prim + } + // conv_from_segment contract: + // 1. takes + // - a string that begins with a non-zero digit, and proceeds + // with zero or more following digits until the end of the string + // - a radix to interpret those digits as + // - a char that communicates: + // whether to interpret+output the string as an i64 or u64 + // what radix to write the parsed number as. + // 2. parses it as a rust integral type + // 3. outputs FormatPrimitive with: + // - if the string falls within bounds: + // number parsed and written in the correct radix + // - if the string falls outside bounds: + // for i64 output, the int minimum or int max (depending on sign) + // for u64 output, the u64 max in the output radix + fn conv_from_segment( + segment : &str, + radix_in : Base, + fchar : char, + sign : i8, + ) -> + FormatPrimitive + { + match fchar { + 'i' | 'd' => { + match i64::from_str_radix(segment, radix_in as u32) { + Ok(i) => { + let mut fmt_prim : FormatPrimitive = + Default::default(); + if sign == -1 { + fmt_prim.prefix = Some(String::from("-")); + } + fmt_prim.pre_decimal = + Some(format!("{}", i)); + fmt_prim + } + Err(_) => Intf::get_max(fchar, sign) + } + }, + _ => { + match u64::from_str_radix(segment, radix_in as u32) { + Ok(u) => { + let mut fmt_prim : FormatPrimitive = + Default::default(); + let u_f = + if sign == -1 { u64::MAX - (u -1) + } else { u }; + fmt_prim.pre_decimal = Some(match fchar { + 'X' => format!("{:X}", u_f), + 'x' => format!("{:x}", u_f), + 'o' => format!("{:o}", u_f), + _ => format!("{}", u_f) + }); + fmt_prim + } + Err(_) => Intf::get_max(fchar, sign) + } + } + } + } +} +impl Formatter for Intf { + fn get_primitive( + &self, + field : &FormatField, + inprefix : &InPrefix, + str_in : &str + ) -> Option { + + let begin = inprefix.offset; + + //get information about the string. see Intf::Analyze + // def above. + let convert_hints = Intf::analyze(str_in, + *field.field_char == 'i' || *field.field_char == 'd', + inprefix); + //We always will have a formatprimitive to return + Some(if convert_hints.len_digits == 0 || convert_hints.is_zero { + //if non-digit or end is reached before a non-zero digit + let mut fmt_prim : FormatPrimitive = Default::default(); + fmt_prim.pre_decimal=Some(String::from("0")); + fmt_prim + } else if ! convert_hints.past_max { + //if the number is or may be below the bounds limit + let radix_out = match *field.field_char { + 'd' | 'i' | 'u' => Base::Ten, + 'x' | 'X' => Base::Hex, + 'o' | _ => Base::Octal + }; + let radix_mismatch = ! radix_out.eq(&inprefix.radix_in); + let decr_from_max :bool = inprefix.sign == -1 && + *field.field_char !='i'; + let end = begin + convert_hints.len_digits as usize; + + // convert to int if any one of these is true: + // - number of digits in int indicates it may be past max + // - we're subtracting from the max + // - we're converting the base + if convert_hints.check_past_max + || decr_from_max || radix_mismatch { + //radix of in and out is the same. + let segment = String::from(&str_in[begin..end]); + let m = Intf::conv_from_segment( + &segment, + inprefix.radix_in.clone(), + *field.field_char, + inprefix.sign); + m + } else { + //otherwise just do a straight string copy. + let mut fmt_prim : FormatPrimitive = Default::default(); + + // this is here and not earlier because + // zero doesn't get a sign, and conv_from_segment + // creates its format primitive separately + if inprefix.sign == -1 && *field.field_char == 'i' { + + fmt_prim.prefix = Some(String::from("-")); + } + fmt_prim.pre_decimal = Some(String::from + (&str_in[begin..end])); + fmt_prim + } + } else { + Intf::get_max(*field.field_char, inprefix.sign) + }) + + } + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String { + let mut finalstr : String = String::new(); + match prim.prefix { + Some(ref prefix) => { + finalstr.push_str(&prefix); + }, + None => {} + } + //integral second fields is zero-padded minimum-width + //which gets handled before general minimum-width + match prim.pre_decimal { + Some(ref pre_decimal) => { + match field.second_field { + Some(min) => { + let mut i = min; + let len = pre_decimal.len() as u32; + while i > len { + finalstr.push('0'); + i -= 1; + } + } + None => {} + } + finalstr.push_str(&pre_decimal); + } + None => { + panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value."); + } + } + finalstr + } +} diff --git a/src/printf/tokenize/num_format/formatters/mod.rs b/src/printf/tokenize/num_format/formatters/mod.rs new file mode 100644 index 000000000..243a19263 --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/mod.rs @@ -0,0 +1,6 @@ +pub mod intf; +pub mod floatf; +pub mod scif; +pub mod decf; +mod float_common; +mod base_conv; diff --git a/src/printf/tokenize/num_format/formatters/scif.rs b/src/printf/tokenize/num_format/formatters/scif.rs new file mode 100644 index 000000000..3755894c6 --- /dev/null +++ b/src/printf/tokenize/num_format/formatters/scif.rs @@ -0,0 +1,47 @@ +//! formatter for %e %E scientific notation subs +use super::super::format_field::FormatField; +use super::super::formatter::{InPrefix,FormatPrimitive,Formatter}; +use super::float_common::{FloatAnalysis, + get_primitive_dec, + primitive_to_str_common}; + +pub struct Scif { + as_num : f64 +} +impl Scif { + pub fn new() -> Scif { + Scif { as_num: 0.0 } + } +} +impl Formatter for Scif { + fn get_primitive( + &self, + field : &FormatField, + inprefix : &InPrefix, + str_in : &str + ) -> Option { + let second_field = field.second_field.unwrap_or(6)+1; + let analysis = FloatAnalysis::analyze( + str_in, + inprefix, + Some(second_field as usize+1), + None + ); + let f = get_primitive_dec( + inprefix, + &str_in[inprefix.offset..], + &analysis, + second_field as usize, + Some(*field.field_char == 'E')); + Some(f) + } + fn primitive_to_str( + &self, + prim: &FormatPrimitive, + field: FormatField) -> String { + primitive_to_str_common( + prim, + &field + ) + } +} diff --git a/src/printf/tokenize/num_format/mod.rs b/src/printf/tokenize/num_format/mod.rs new file mode 100644 index 000000000..d40cf92de --- /dev/null +++ b/src/printf/tokenize/num_format/mod.rs @@ -0,0 +1,4 @@ +pub mod format_field; +mod formatter; +mod formatters; +pub mod num_format; diff --git a/src/printf/tokenize/num_format/num_format.rs b/src/printf/tokenize/num_format/num_format.rs new file mode 100644 index 000000000..126b14584 --- /dev/null +++ b/src/printf/tokenize/num_format/num_format.rs @@ -0,0 +1,269 @@ +//! handles creating printed output for numeric substitutions + +use std::env; +use std::vec::Vec; +use cli; +use super::format_field::{FormatField, FieldType}; +use super::formatter::{Formatter, FormatPrimitive, InPrefix, Base}; +use super::formatters::intf::Intf; +use super::formatters::floatf::Floatf; +use super::formatters::scif::Scif; +use super::formatters::decf::Decf; + +pub fn warn_expected_numeric(pf_arg: &String) { + //important: keep println here not print + cli::err_msg(&format!("{}: expected a numeric value", pf_arg)); +} + +// when character costant arguments have excess characters +// issue a warning when POSIXLY_CORRECT is not set +fn warn_char_constant_ign(remaining_bytes: Vec) { + match env::var("POSIXLY_CORRECT") { + Ok(_) => {} + Err(e) => { + match e { + env::VarError::NotPresent => { + cli::err_msg(&format!("warning: {:?}: character(s) following character constant have been ignored", &*remaining_bytes)); + }, + _ => {} + } + } + } +} + +// this function looks at the first few +// characters of an argument and returns a value if we can learn +// a value from that (e.g. no argument? return 0, char constant? ret value) +fn get_provided( + str_in_opt : Option<&String> + ) -> Option { + const C_S_QUOTE: u8=39; + const C_D_QUOTE: u8=34; + match str_in_opt { + Some(str_in) => { + let mut byte_it = str_in.bytes(); + if let Some(qchar) = byte_it.next() { + match qchar { + C_S_QUOTE | C_D_QUOTE => { + return Some(match byte_it.next() { + Some(second_byte) => { + let mut ignored : Vec = Vec::new(); + while let Some(cont)=byte_it.next() { + ignored.push(cont); + } + if ignored.len() > 0 { + warn_char_constant_ign(ignored); + } + second_byte as u8 + }, + //no byte after quote + None => { + let so_far = + (qchar as u8 as char).to_string(); + warn_expected_numeric(&so_far); + 0 as u8 + } + }); + }, + //first byte is not quote + _ => { return None; } + //no first byte + } + } else { Some(0 as u8) } + } + None =>{ Some(0) } + } +} + +// takes a string and returns +// a sign, +// a base, +// and an offset for index after all +// initial spacing, sign, base prefix, and leading zeroes +fn get_inprefix( + str_in : &String, + field_type : &FieldType + ) -> InPrefix { + let mut str_it = str_in.chars(); + let mut ret = InPrefix { radix_in: Base::Ten, sign: 1, offset: 0 }; + let mut topchar = str_it.next().clone(); + //skip spaces and ensure topchar is the first non-space char + // (or None if none exists) + loop { + match topchar + { + Some(' ')=>{ret.offset+=1; topchar=str_it.next();}, + _=>{ break; } + } + } + //parse sign + match topchar { + Some('+')=>{ ret.offset+=1; topchar=str_it.next(); } + Some('-')=>{ ret.sign = -1; ret.offset+=1; topchar=str_it.next(); } + _=>{} + } + // we want to exit with offset being + // the index of the first non-zero + // digit before the decimal point or + // if there is none, the zero before the + // decimal point, or, if there is none, + // the decimal point. + + // while we are determining the offset + // we will ensure as a convention + // the offset is always on the first character + // that we are yet unsure if it is the + // final offset. If the zero could be before + // a decimal point we don't move past the zero. + let mut is_hex = false; + if Some('0') == topchar { + if let Some(base) = str_it.next() { + // lead zeroes can only exist in + // octal and hex base + let mut do_clean_lead_zeroes = false; + match base { + 'x' | 'X' => { + is_hex = true; + ret.offset += 2; + ret.radix_in = Base::Hex; + do_clean_lead_zeroes = true; + }, + e @ '0'...'9' => { + ret.offset+=1; + match *field_type { + FieldType::Intf => { + ret.radix_in = Base::Octal; + }, + _ => {} + } + if e == '0' { + do_clean_lead_zeroes = true; + } + } + _=>{} + } + if do_clean_lead_zeroes { + let mut first = true; + while let Some(ch_zero) = str_it.next() { + // see notes on offset above: + // this is why the offset for octals and decimals + // that reach this branch is 1 even though + // they have already eaten the characters '00' + // this is also why when hex encounters its + // first zero it does not move its offset + // forward because it does not know for sure + // that it's current offset (of that zero) + // is not the final offset, + // whereas at that point octal knows its + // current offset is not the final offset. + match ch_zero { + '0' => { + if !(is_hex && first) { ret.offset+=1; } + }, + //if decimal, keep last zero if one exists + //(it's possible for last zero to + // not exist at this branch if we're in hex input) + '.' => { + break + }, + //other digit, etc. + _ => { + if !(is_hex && first) { ret.offset+=1; } + break + } + } + if first { first = false; } + + } + } + } + } + ret +} + +// this is the function a Sub's print will delegate to +// if it is a numeric field, passing the field details +// and an iterator to the argument +pub fn num_format( + field: &FormatField, + in_str_opt: Option<&String> + ) -> Option { + + + let fchar = field.field_char.clone(); + + // num format mainly operates by further delegating to one of + // several Formatter structs depending on the field + // see formatter.rs for more details + + // to do switch to static dispatch + let fmtr : Box = match *field.field_type { + FieldType::Intf => Box::new(Intf::new()), + FieldType::Floatf => Box::new(Floatf::new()), + FieldType::Scif => Box::new(Scif::new()), + FieldType::Decf => Box::new(Decf::new()), + _ => { panic!("asked to do num format with non-num fieldtype"); } + }; + let prim_opt= + // if we can get an assumed value from looking at the first + // few characters, use that value to create the FormatPrimitive + if let Some(provided_num) = get_provided(in_str_opt) { + let mut tmp : FormatPrimitive = Default::default(); + match fchar { + 'u' | 'i' | 'd' => { + tmp.pre_decimal = Some( + format!("{}", provided_num)); + }, + 'x' | 'X' => { + tmp.pre_decimal = Some( + format!("{:x}", provided_num)); + }, + 'o' => { + tmp.pre_decimal = Some( + format!("{:o}", provided_num)); + }, + 'e' | 'E' | 'g' | 'G' => { + let as_str = format!("{}", provided_num); + let inprefix = get_inprefix( + &as_str, + &field.field_type + ); + tmp=fmtr.get_primitive(field, &inprefix, &as_str) + .expect("err during default provided num"); + }, + _ => { + tmp.pre_decimal = Some( + format!("{}", provided_num)); + tmp.post_decimal = Some(String::from("0")); + } + } + Some(tmp) + } else { + // otherwise we'll interpret the argument as a number + // using the appropriate Formatter + let in_str = in_str_opt.expect( + "please send the devs this message: + \n get_provided is failing to ret as Some(0) on no str "); + // first get information about the beginning of the + // numeric argument that would be useful for + // any formatter (int or float) + let inprefix = get_inprefix( + in_str, + &field.field_type + ); + // then get the FormatPrimitive from the Formatter + fmtr.get_primitive(field, &inprefix, in_str) + }; + // if we have a formatPrimitive, print its results + // according to the field-char appropriate Formatter + if let Some(prim) = prim_opt { + Some( + fmtr.primitive_to_str( + &prim, + field.clone() + ) + ) + } else { + None + } +} diff --git a/src/printf/tokenize/sub.rs b/src/printf/tokenize/sub.rs new file mode 100644 index 000000000..045219082 --- /dev/null +++ b/src/printf/tokenize/sub.rs @@ -0,0 +1,425 @@ +//! Sub is a token that represents a +//! segment of the format string that is a substitution +//! it is created by Sub's implementation of the Tokenizer trait +//! Subs which have numeric field chars make use of the num_format +//! submodule +use std::slice::Iter; +use std::iter::Peekable; +use std::str::Chars; +use std::process::exit; +use cli; +use itertools::PutBackN; +use super::token; +use super::unescaped_text::UnescapedText; +use super::num_format::format_field::{FormatField, FieldType}; +use super::num_format::num_format; +//use std::collections::HashSet; + +fn err_conv(sofar: &String) { + cli::err_msg(&format!("%{}: invalid conversion specification", sofar)); + exit(cli::EXIT_ERR); +} + +fn convert_asterisk_arg_int(asterisk_arg : &String) -> isize { + // this is a costly way to parse the + // args used for asterisk values into integers + // from various bases. Actually doing it correctly + // (going through the pipeline to intf, but returning + // the integer instead of writing it to string and then + // back) is on the refactoring TODO + let field_type = FieldType::Intf; + let field_char = 'i'; + let field_info = FormatField{ + min_width: Some(0), + second_field: Some(0), + orig: asterisk_arg, + field_type: &field_type, + field_char: &field_char + }; + num_format::num_format( + &field_info, + Some(asterisk_arg) + ).unwrap().parse::().unwrap() +} + +pub enum CanAsterisk { + Fixed(T), + Asterisk +} + +// Sub is a tokenizer which creates tokens +// for substitution segments of a format string +pub struct Sub { + min_width: CanAsterisk>, + second_field: CanAsterisk>, + field_char: char, + field_type: FieldType, + orig: String +} +impl Sub { + pub fn new(min_width: CanAsterisk>, + second_field: CanAsterisk>, + field_char: char, + orig: String) -> Sub { + // for more dry printing, field characters are grouped + // in initialization of token. + let field_type = match field_char { + 's' | 'b' => FieldType::Strf, + 'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf, + 'f' | 'F' => FieldType::Floatf, + 'e' | 'E' => FieldType::Scif, + 'g' | 'G' => FieldType::Decf, + 'c' => FieldType::Charf, + _ => { + //should be unreachable. + println!("Invalid fieldtype"); + exit(cli::EXIT_ERR); + } + }; + Sub { + min_width: min_width, + second_field: second_field, + field_char: field_char, + field_type: field_type, + orig: orig + } + } + +} + +struct SubParser { + min_width_tmp : Option, + min_width_is_asterisk: bool, + past_decimal : bool, + second_field_tmp : Option, + second_field_is_asterisk : bool, + specifiers_found : bool, + field_char : Option, + text_so_far : String +} + +impl SubParser { + fn new() -> SubParser { + SubParser { + min_width_tmp : None, + min_width_is_asterisk : false, + past_decimal : false, + second_field_tmp : None, + second_field_is_asterisk : false, + specifiers_found : false, + field_char : None, + text_so_far : String::new() + } + } + fn from_it(it: &mut PutBackN, + args: &mut Peekable>) + -> Option> { + let mut parser = SubParser::new(); + if parser.sub_vals_retrieved(it) { + let t: Box = SubParser::build_token(parser); + t.print(args); + Some(t) + } else { + None + } + } + fn build_token(parser : SubParser) -> Box { + //not a self method so as to allow move of subparser vals. + //return new Sub struct as token + let t: Box = Box::new( + Sub::new( + if parser.min_width_is_asterisk { + CanAsterisk::Asterisk + } else { + CanAsterisk::Fixed(parser.min_width_tmp.map(|x| x.parse::().unwrap())) + }, + if parser.second_field_is_asterisk { + CanAsterisk::Asterisk + } else { + CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::().unwrap())) + }, + parser.field_char.unwrap(), + parser.text_so_far + ) + ); + t + } + fn sub_vals_retrieved(&mut self, + it: &mut PutBackN) + -> bool { + + if !SubParser::successfully_eat_prefix(it, &mut self.text_so_far) { + return false; + } + // this fn in particular is much longer than it needs to be + //.could get a lot + // of code savings just by cleaning it up. shouldn't use a regex + // though, as we want to mimic the original behavior of printing + // the field as interpreted up until the error in the field. + + let mut legal_fields=vec!['b', 'c', 'd', 'e', 'E', + 'f', 'g', 'G', 'i', 'o', + 's', 'u', 'x', 'X']; + let mut specifiers=vec!['h', 'j', 'l', 'L', 't', 'z']; + legal_fields.sort(); + specifiers.sort(); + + // divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z]) + // into min_width, second_field, field_char + while let Some(ch) = it.next() { + self.text_so_far.push(ch); + match ch as char { + '-' | '*' | '0' ... '9' => { + if ! self.past_decimal { + if self.min_width_is_asterisk + || self.specifiers_found { + err_conv(&self.text_so_far); + } + if self.min_width_tmp.is_none() { + self.min_width_tmp=Some(String::new()); + } + match self.min_width_tmp.as_mut() { + Some(x) => { + if (ch == '-' || ch == '*') && x.len() > 0 { + err_conv(&self.text_so_far); + } + if ch == '*' { + self.min_width_is_asterisk = true; + } + x.push(ch); + } + None => { panic!("should be unreachable"); } + } + } else { + //second field should never have a + // negative value + if self.second_field_is_asterisk + || ch == '-' + || self.specifiers_found { + err_conv(&self.text_so_far); + } + if self.second_field_tmp.is_none() { + self.second_field_tmp=Some(String::new()); + } + match self.second_field_tmp.as_mut() { + Some(x) => { + if ch == '*' && x.len() > 0 { + err_conv(&self.text_so_far); + } + if ch == '*' { + self.second_field_is_asterisk = true; + } + x.push(ch); + } + None => { panic!("should be unreachable"); } + } + } + }, + '.' => { + if ! self.past_decimal { + self.past_decimal = true; + } else { + err_conv(&self.text_so_far); + } + }, + x if legal_fields.binary_search(&x).is_ok() => { + self.field_char=Some(ch); + self.text_so_far.push(ch); + break + } + x if specifiers.binary_search(&x).is_ok() => { + if ! self.past_decimal { + self.past_decimal = true; + } + if ! self.specifiers_found { + self.specifiers_found = true; + } + } + _ => { + err_conv(&self.text_so_far); + } + } + } + if ! self.field_char.is_some() + { err_conv(&self.text_so_far); } + let field_char_retrieved = self.field_char.unwrap(); + if self.past_decimal && self.second_field_tmp.is_none() { + self.second_field_tmp = Some(String::from("0")); + } + self.validate_field_params(field_char_retrieved); + // if the dot is provided without a second field + // printf interprets it as 0. + match self.second_field_tmp.as_mut() { + Some(x) => { + if x.len() == 0 { + self.min_width_tmp = Some(String::from("0")); + } + } + _ => {} + } + + true + } + fn successfully_eat_prefix(it: &mut PutBackN, + text_so_far : &mut String ) -> bool { + //get next two chars, + // if they're '%%' we're not tokenizing it + // else put chars back + let preface = it.next(); + let n_ch = it.next(); + if preface == Some('%') && + n_ch != Some('%') { + match n_ch { + Some(x) => { + it.put_back(x); + true + } + None => { + text_so_far.push('%'); + err_conv(&text_so_far); + false + } + } + } else { + n_ch.map(|x| it.put_back(x)); + preface.map(|x| it.put_back(x)); + false + } + } + fn validate_field_params(&self, field_char : char) { + //check for illegal combinations here when possible vs + // on each application so we check less per application + // to do: move these checks to Sub::new + if (field_char == 's' && + self.min_width_tmp == Some(String::from("0"))) || + (field_char == 'c' && + (self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) || + (field_char == 'b' && + (self.min_width_tmp.is_some() || self.past_decimal || + self.second_field_tmp.is_some())) { + err_conv(&self.text_so_far); + } + } +} + + + +impl token::Tokenizer for Sub { + fn from_it(it: &mut PutBackN, + args: &mut Peekable>) + -> Option> { + SubParser::from_it(it, args) + } +} +impl token::Token for Sub { + fn print(&self, pf_args_it: &mut Peekable>) { + let field = FormatField { + min_width: match self.min_width { + CanAsterisk::Fixed(x) => x, + CanAsterisk::Asterisk => { + match pf_args_it.next() { + //temporary, use intf.rs instead + Some(x) => Some(convert_asterisk_arg_int(x)), + None => Some(0) + } + } + }, + second_field: match self.second_field { + CanAsterisk::Fixed(x) => x, + CanAsterisk::Asterisk => { + match pf_args_it.next() { + //temporary, use intf.rs instead + Some(x) => { + let result = convert_asterisk_arg_int(x); + if result < 0 { + None + } else { + Some(result as u32) + } + }, + None => Some(0) + } + } + }, + field_char: &self.field_char, + field_type: &self.field_type, + orig: &self.orig, + }; + let pf_arg = pf_args_it.next(); + + // minimum width is handled independently of actual + // field char + let pre_min_width_opt : Option = match *field.field_type { + // if %s just return arg + // if %b use UnescapedText module's unescaping-fn + // if %c return first char of arg + FieldType::Strf | FieldType::Charf => { + match pf_arg { + Some(arg_string) => { + match *field.field_char { + 's' => { + Some(match field.second_field { + Some(max) =>{ + String::from( + &arg_string[..max as usize]) + } + None => { + arg_string.clone() + } + }) + } + 'b' => { + let mut a_it=PutBackN::new( + arg_string.chars()); + UnescapedText::from_it_core( + &mut a_it, true); + None + } + //for 'c': get iter of string vals, + //get opt of first val + //and map it to opt + 'c' | _ => arg_string.chars().next().map( + |x| x.to_string()) + } + }, + None => None + } + }, + _ => { + // non string/char fields are delegated to num_format + num_format::num_format(&field, pf_arg) + } + }; + match pre_min_width_opt { + // if have a string, print it, ensuring minimum width is met. + Some(pre_min_width) => { + print!("{}", match field.min_width { + Some(min_width) => { + let diff : isize = min_width.abs() as isize - + pre_min_width.len() as isize; + if diff > 0 { + let mut final_str = String::new(); + // definitely more efficient ways + // to do this. + let pad_before = min_width > 0; + if ! pad_before { + final_str.push_str(&pre_min_width); + } + for _ in 0..diff { + final_str.push(' '); + } + if pad_before { + final_str.push_str(&pre_min_width); + } + final_str + } else { + pre_min_width + } + } + None => { pre_min_width } + }); + } + None => {} + } + } +} diff --git a/src/printf/tokenize/token.rs b/src/printf/tokenize/token.rs new file mode 100644 index 000000000..c1d2df4d7 --- /dev/null +++ b/src/printf/tokenize/token.rs @@ -0,0 +1,32 @@ +//! Traits and enums dealing with Tokenization of printf Format String +#[allow(unused_must_use)] + +use std::iter::Peekable; +use std::str::Chars; +use std::slice::Iter; +use itertools::PutBackN; + +// A token object is an object that can print the expected output +// of a contiguous segment of the format string, and +// requires at most 1 argusegment +pub trait Token { + fn print(&self, args: &mut Peekable>); +} + +// A tokenizer object is an object that takes an iterator +// at a position in a format string, and sees whether +// it can return a token of a type it knows how to produce +// if so, return the token, move the iterator past the +// format string text the token repsresents, and if an +// argument is used move the argument iter forward one + +// creating token of a format string segment should also cause +// printing of that token's value. Essentially tokenizing +// a whole format string will print the format string and consume +// a number of arguments equal to the number of argument-using tokens + +pub trait Tokenizer { + fn from_it(it: &mut PutBackN, + args: &mut Peekable>) + -> Option>; +} diff --git a/src/printf/tokenize/unescaped_text.rs b/src/printf/tokenize/unescaped_text.rs new file mode 100644 index 000000000..b23a659ef --- /dev/null +++ b/src/printf/tokenize/unescaped_text.rs @@ -0,0 +1,268 @@ +//! UnescapedText is a tokenizer impl +//! for tokenizing character literals, +//! and escaped character literals (of allowed escapes), +//! into an unescaped text byte array + +use std::iter::Peekable; +use std::slice::Iter; +use std::str::Chars; +use std::char::from_u32; +use std::process::exit; +use cli; +use itertools::PutBackN; +use super::token; + +pub struct UnescapedText(Vec); +impl UnescapedText { + fn new() -> UnescapedText { + UnescapedText(Vec::new()) + } + //take an iterator to the format string + //consume between min and max chars + //and return it as a base-X number + fn base_to_u32( + min_chars: u8, + max_chars: u8, + base : u32, + it: &mut PutBackN + ) -> u32 { + let mut retval : u32 = 0; + let mut found=0; + while found < max_chars { + //if end of input break + let nc = it.next(); + match nc { + Some(digit) => { + //if end of hexchars break + match digit.to_digit(base) { + Some(d) => { + found += 1; + retval *= base; + retval += d; + } + None => { + it.put_back(digit); + break + } + } + }, + None => { + break; + } + } + } + if found < min_chars { + //only ever expected for hex + println!("missing hexadecimal number in escape"); //todo stderr + exit(cli::EXIT_ERR); + } + retval + } + // validates against valid + // IEC 10646 vals - these values + // are pinned against the more popular + // printf so as to not disrupt when + // dropped-in as a replacement. + fn validate_iec(val: u32, eight_word: bool) { + let mut preface = 'u'; + let mut leading_zeros= 4; + if eight_word { + preface='U'; + leading_zeros=8; + } + let err_msg=format!("invalid universal character name {0}{1:02$x}", + preface, + val, + leading_zeros); + if (val < 159 && (val != 36 && + val != 64 && + val != 96)) || + (val > 55296 && val < 57343) { + println!("{}", err_msg);//todo stderr + exit(cli::EXIT_ERR); + } + } + // pass an iterator that succeeds an '/', + // and process the remaining character + // adding the unescaped bytes + // to the passed byte_vec + // in subs_mode change octal behavior + fn handle_escaped( + byte_vec: &mut Vec, + it: &mut PutBackN, + subs_mode: bool + ) { + let ch = match it.next() { + Some(x) => x, + None => '\\' + }; + match ch { + '0'...'9' | 'x' => { + let min_len = 1; + let mut max_len = 2; + let mut base = 16; + let ignore = false; + match ch { + 'x'=>{ }, + e @ '0'...'9' => { + max_len=3; base =8; + // in practice, gnu coreutils printf + // interprets octals without a + // leading zero in %b + // but it only skips leading zeros + // in %b mode. + // if we ever want to match gnu coreutil + // printf's docs instead of its behavior + // we'd set this to true. + //if subs_mode && e != '0' + // { ignore = true; } + if ! subs_mode || e != '0' + { it.put_back(ch); } + } + _ =>{} + } + if ! ignore { + let val = (UnescapedText::base_to_u32(min_len, + max_len, + base, + it) % 256) as u8; + byte_vec.push(val); + let bvec = [val]; + cli::flush_bytes(&bvec); + } else { + byte_vec.push(ch as u8); + } + }, + e @ _ => { + //only for hex and octal + //is byte encoding specified. + //otherwise, why not leave the door open + //for other encodings unless it turns out + //a bottleneck. + let mut s = String::new(); + let ch = match e { + '\\' => '\\', + '"' => '"', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + //bell + 'a' => '\x07', + //backspace + 'b' => '\x08', + //vertical tab + 'v' => '\x0B', + //form feed + 'f' => '\x0C', + //escape character + 'e' => '\x1B', + 'c' => { exit(cli::EXIT_OK) }, + 'u' | 'U' => { + let len = match e { + 'u' => 4, + 'U' | _ => 8 + }; + let val = UnescapedText::base_to_u32(len, + len, + 16, + it); + UnescapedText::validate_iec(val, false); + if let Some(c) = from_u32(val) { + c + } else { '-' } + }, + _ => { + s.push('\\'); + ch + } + }; + s.push(ch); + cli::flush_str(&s); + byte_vec.extend(s.bytes()); + } + }; + + } + + // take an iteratator to a string, + // and return a wrapper around a Vec of unescaped bytes + // break on encounter of sub symbol ('%[^%]') unless called + // through %b subst. + pub fn from_it_core(it: &mut PutBackN, + subs_mode: bool) -> Option> { + let mut addchar = false; + let mut new_text = UnescapedText::new(); + let mut tmp_str = String::new(); + { + let mut new_vec : &mut Vec = &mut (new_text.0); + while let Some(ch) = it.next() { + if ! addchar { addchar = true; } + match ch as char { + x if x != '\\' && x != '%' => { + // lazy branch eval + // remember this fn could be called + // many times in a single exec through %b + cli::flush_char(&ch); + tmp_str.push(ch); + } + '\\' => { + // the literal may be a literal bytecode + // and not valid utf-8. Str only supports + // valid utf-8. + // if we find the unnecessary drain + // on non hex or octal escapes is costly + // then we can make it faster/more complex + // with as-necessary draining. + if tmp_str.len() > 0 { + new_vec.extend(tmp_str.bytes()); + tmp_str = String::new(); + } + UnescapedText::handle_escaped(new_vec, + it, + subs_mode) + } + x if x == '%' && !subs_mode => { + if let Some(follow) = it.next() { + if follow == '%' { + cli::flush_char(&ch); + tmp_str.push(ch); + } else { + it.put_back(follow); + it.put_back(ch); + break + } + } else { + it.put_back(ch); + break + } + } + _ => { + cli::flush_char(&ch); + tmp_str.push(ch); + } + } + } + if tmp_str.len() > 0 { + new_vec.extend(tmp_str.bytes()); + } + } + match addchar { + true => Some(Box::new(new_text)), + false => None + } + } +} +#[allow(unused_variables)] +impl token::Tokenizer for UnescapedText { + fn from_it(it: &mut PutBackN, + args: &mut Peekable>) + -> Option> { + UnescapedText::from_it_core(it, false) + } +} +#[allow(unused_variables)] +impl token::Token for UnescapedText { + fn print(&self, pf_args_it: &mut Peekable>) { + cli::flush_bytes(&self.0[..]); + } +} diff --git a/tests/printf.rs b/tests/printf.rs new file mode 100644 index 000000000..a8d6d4f45 --- /dev/null +++ b/tests/printf.rs @@ -0,0 +1,235 @@ +#[macro_use] +mod common; + +use common::util::*; + +static UTIL_NAME: &'static str = "printf"; + +fn expect_stdout(input: Vec<&str>, expected: &str) { + let (_, mut ucmd) = testing(UTIL_NAME); + let results = ucmd.args(&input).run(); + //assert_empty_stderr!(result); + //assert!(result.success); + assert_eq!(expected,results.stdout); +} + +#[test] +fn basic_literal() { expect_stdout( + vec!("hello world"), "hello world"); } + +#[test] +fn escaped_tab() { expect_stdout( + vec!("hello\\t world"), "hello\t world"); } + +#[test] +fn escaped_newline() { expect_stdout( + vec!("hello\\n world"), "hello\n world"); } + +#[test] +fn escaped_slash() { expect_stdout( + vec!("hello\\\\ world"), "hello\\ world"); } + +#[test] +fn escaped_hex() { expect_stdout( + vec!("\\x41"), "A"); } + +#[test] +fn escaped_octal() { expect_stdout( + vec!("\\101"), "A"); } + +#[test] +fn escaped_unicode_fourdigit() { expect_stdout( + vec!("\\u0125"), "ĥ"); } + +#[test] +fn escaped_unicode_eightdigit() { expect_stdout( + vec!("\\U00000125"), "ĥ"); } + +#[test] +fn escaped_percent_sign() { expect_stdout( + vec!("hello%% world"), "hello% world"); } + +#[test] +fn escaped_unrecognized() { expect_stdout( + vec!("c\\d"), "c\\d"); } + +#[test] +fn sub_string() { expect_stdout( + vec!("hello %s", "world"), "hello world"); } + +#[test] +fn sub_multifield() { expect_stdout( + vec!("%s %s", "hello", "world"), "hello world"); } + +#[test] +fn sub_repeat_formatstr() { expect_stdout( + vec!("%s.", "hello", "world"), "hello.world."); } + +#[test] +fn sub_string_ignore_escapes() { expect_stdout( + vec!("hello %s", "\\tworld"), "hello \\tworld"); } + +#[test] +fn sub_bstring_handle_escapes() { expect_stdout( + vec!("hello %b", "\\tworld"), "hello \tworld"); } + +#[test] +fn sub_bstring_ignore_subs() { expect_stdout( + vec!("hello %b", "world %% %i"), "hello world %% %i"); } + +#[test] +fn sub_char() { expect_stdout( + vec!("the letter %c", "A"), "the letter A"); } + +#[test] +fn sub_num_int() { expect_stdout( + vec!("twenty is %i", "20"), "twenty is 20"); } + +#[test] +fn sub_num_int_minwidth() { expect_stdout( + vec!("twenty is %1i", "20"), "twenty is 20"); } + +#[test] +fn sub_num_int_neg() { expect_stdout( + vec!("neg. twenty is %i", "-20"), "neg. twenty is -20"); } + +#[test] +fn sub_num_int_oct_in() { expect_stdout( + vec!("twenty is %i", "024"), "twenty is 20"); } + +#[test] +fn sub_num_int_oct_in_neg() { expect_stdout( + vec!("neg. twenty is %i", "-024"), "neg. twenty is -20"); } + +#[test] +fn sub_num_int_hex_in() { expect_stdout( + vec!("twenty is %i", "0x14"), "twenty is 20"); } + +#[test] +fn sub_num_int_hex_in_neg() { expect_stdout( + vec!("neg. twenty is %i", "-0x14"), "neg. twenty is -20"); } + +#[test] +fn sub_num_int_charconst_in() { expect_stdout( + vec!("ninetyseven is %i", "'a"), "ninetyseven is 97"); } + +#[test] +fn sub_num_uint() { expect_stdout( + vec!("twenty is %u", "20"), "twenty is 20"); } + +#[test] +fn sub_num_octal() { expect_stdout( + vec!("twenty in octal is %o", "20"), "twenty in octal is 24"); } + +#[test] +fn sub_num_hex_lower() { expect_stdout( + vec!("thirty in hex is %x", "30"), "thirty in hex is 1e"); } + +#[test] +fn sub_num_hex_upper() { expect_stdout( + vec!("thirty in hex is %X", "30"), "thirty in hex is 1E"); } + +#[test] +fn sub_num_float() { expect_stdout( + vec!("twenty is %f", "20"), "twenty is 20.000000"); } + +#[test] +fn sub_num_float_round() { expect_stdout( + vec!("two is %f", "1.9999995"), "two is 2.000000"); } + +#[test] +fn sub_num_sci_lower() { expect_stdout( + vec!("twenty is %e", "20"), "twenty is 2.000000e+01"); } + +#[test] +fn sub_num_sci_upper() { expect_stdout( + vec!("twenty is %E", "20"), "twenty is 2.000000E+01"); } + +#[test] +fn sub_num_sci_trunc() { expect_stdout( + vec!("pi is ~ %e", "3.1415926535"), "pi is ~ 3.141593e+00"); } + +#[test] +fn sub_num_dec_trunc() { expect_stdout( + vec!("pi is ~ %g", "3.1415926535"), "pi is ~ 3.141593"); } + +#[test] +fn sub_minwidth() { expect_stdout( + vec!("hello %7s", "world"), "hello world"); } + +#[test] +fn sub_minwidth_negative() { expect_stdout( + vec!("hello %-7s", "world"), "hello world "); } + +#[test] +fn sub_str_max_chars_input() { expect_stdout( + vec!("hello %7.2s", "world"), "hello wo"); } + +#[test] +fn sub_int_decimal() { expect_stdout( + vec!("%0.i", "11"), "11"); } + +#[test] +fn sub_int_leading_zeroes() { expect_stdout( + vec!("%.4i", "11"), "0011"); } + +#[test] +fn sub_int_leading_zeroes_prio() { expect_stdout( + vec!("%5.4i", "11"), " 0011"); } + +#[test] +fn sub_float_dec_places() { expect_stdout( + vec!("pi is ~ %.11f", "3.1415926535"), "pi is ~ 3.14159265350"); } + +#[test] +fn sub_float_hex_in() { expect_stdout( + vec!("%f", "0xF1.1F"), "241.121094"); } + +#[test] +fn sub_float_no_octal_in() { expect_stdout( + vec!("%f", "077"), "77.000000"); } + +#[test] +fn sub_any_asterisk_firstparam() { expect_stdout( + vec!("%*i", "3", "11", "4", "12"), " 11 12"); +} + +#[test] +fn sub_any_asterisk_second_param() { expect_stdout( + vec!("%.*i", "3", "11", "4", "12"), "0110012"); +} + +#[test] +fn sub_any_asterisk_both_params() { expect_stdout( + vec!("%*.*i", "4", "3", "11", "5", "4", "12"), " 011 0012"); +} + +#[test] +fn sub_any_asterisk_octal_arg() { expect_stdout( + vec!("%.*i", "011", "12345678"), "012345678"); +} + +#[test] +fn sub_any_asterisk_hex_arg() { expect_stdout( + vec!("%.*i", "0xA", "123456789"), "0123456789"); +} + +#[test] +fn sub_any_specifiers_no_params() { expect_stdout( + vec!("%ztlhLji", "3"), "3"); +} + +#[test] +fn sub_any_specifiers_after_first_param() { expect_stdout( + vec!("%0ztlhLji", "3"), "3"); +} + +#[test] +fn sub_any_specifiers_after_period() { expect_stdout( + vec!("%0.ztlhLji", "3"), "3"); +} + +#[test] +fn sub_any_specifiers_after_second_param() { expect_stdout( + vec!("%0.0ztlhLji", "3"), "3"); +}