printf: add (spare C99 hex floats)

This commit is contained in:
Nathan Ross 2015-12-24 01:11:00 -05:00
parent 1760f2937b
commit 0892ad3cde
25 changed files with 2887 additions and 0 deletions

Cargo.lock generated
View file

@ -48,6 +48,7 @@ dependencies = [
"paste 0.0.1",
"primal 0.2.3 (registry+",
"printenv 0.0.1",
"printf 0.0.1",
"ptx 0.0.1",
"pwd 0.0.1",
"rand 0.3.12 (registry+",
@ -387,6 +388,11 @@ dependencies = [
"uucore 0.0.1",
name = "itertools"
version = "0.4.7"
source = "registry+"
name = "kernel32-sys"
version = "0.2.1"
@ -615,6 +621,14 @@ dependencies = [
"uucore 0.0.1",
name = "printf"
version = "0.0.1"
dependencies = [
"itertools 0.4.7 (registry+",
"uucore 0.0.1",
name = "ptx"
version = "0.0.1"

View file

@ -56,6 +56,7 @@ generic = [
@ -129,6 +130,7 @@ nproc = { optional=true, path="src/nproc" }
od = { optional=true, path="src/od" }
paste = { optional=true, path="src/paste" }
printenv = { optional=true, path="src/printenv" }
printf = { optional=true, path="src/printf" }
ptx = { optional=true, path="src/ptx" }
pwd = { optional=true, path="src/pwd" }
readlink = { optional=true, path="src/readlink" }

View file

@ -65,6 +65,7 @@ PROGS := \
od \
paste \
printenv \
printf \
ptx \
pwd \
readlink \
@ -149,6 +150,7 @@ TEST_PROGS := \
mv \
nl \
paste \
printf \
ptx \
pwd \
readlink \

src/printf/Cargo.toml Normal file
View file

@ -0,0 +1,16 @@
name = "printf"
version = "0.0.1"
authors = ["Nathan Ross"]
name = "uu_printf"
path = ""
"itertools" = "*"
uucore = { path="../uucore" }
name = "printf"
path = ""

src/printf/ Normal file
View file

@ -0,0 +1,34 @@
//! stdio convenience fns
use std::io::{stderr, stdout, Write};
use std::env;
pub static EXIT_OK: i32 = 0;
pub static EXIT_ERR: i32 = 1;
pub fn err_msg(msg:&str) {
let exe_path = match env::current_exe() {
Ok(p) => p.to_string_lossy().into_owned(),
_ => String::from("")
writeln!(&mut stderr(),"{}: {}", exe_path, msg).unwrap();
// by default stdout only flushes
// to console when a newline is passed.
pub fn flush_char(c: &char) {
print!("{}", c);
pub fn flush_str(s: &str) {
print!("{}", s);
pub fn flush_bytes(bslice: &[u8]) {

src/printf/ Normal file
View file

@ -0,0 +1,5 @@
extern crate uu_printf;
fn main() {

src/printf/ Normal file
View file

@ -0,0 +1,84 @@
//! Memo runner of printf
//! Takes a format string and arguments
//! 1. tokenizes format string into tokens, consuming
//! any subst. arguments along the way.
//! 2. feeds remaining arguments into function
//! that prints tokens.
use std::iter::Peekable;
use std::slice::Iter;
use itertools::PutBackN;
use cli;
use tokenize::token::{Token, Tokenizer};
use tokenize::unescaped_text::UnescapedText;
use tokenize::sub::Sub;
pub struct Memo {
tokens: Vec<Box<Token>>,
fn warn_excess_args(first_arg : &str) {
cli::err_msg(&format!("warning: ignoring excess arguments, starting with '{}'",
impl Memo {
pub fn new(
pf_string: &String,
pf_args_it: &mut Peekable<Iter<String>>
) -> Memo {
let mut pm = Memo { tokens: Vec::new() };
let mut tmp_token : Option<Box<Token>>;
let mut it = PutBackN::new(pf_string.chars());
let mut has_sub = false;
loop {
tmp_token = UnescapedText::from_it(&mut it, pf_args_it);
match tmp_token {
Some(x) => pm.tokens.push(x),
None => {}
tmp_token = Sub::from_it(&mut it, pf_args_it);
match tmp_token {
Some(x) => {
if ! has_sub { has_sub = true; }
None => {}
if let Some(x) = {
} else { break; }
if ! has_sub {
let mut drain= false;
if let Some(first_arg) = pf_args_it.peek() {
drain = true;
if drain {
loop {
//drain remaining args;
if {
pub fn apply(&self, pf_args_it: &mut Peekable<Iter<String>>) {
for tkn in self.tokens.iter() {
pub fn run_all(pf_string: &String, pf_args: &[String]) {
let mut arg_it = pf_args.iter().peekable();
let pm = Memo::new(pf_string, &mut arg_it);
loop {
if arg_it.peek().is_none() {
pm.apply(&mut arg_it);

src/printf/ Normal file
View file

@ -0,0 +1,4 @@
mod cli;
mod memo;
mod tokenize;

src/printf/ Normal file
View file

@ -0,0 +1,288 @@
#![crate_name = "uu_printf"]
extern crate itertools;
mod cli;
mod memo;
mod tokenize;
extern crate uucore;
static NAME: &'static str = "printf";
static VERSION: &'static str = "0.0.1";
static SHORT_USAGE: &'static str = "printf: usage: printf [-v var] format [arguments]";
static LONGHELP_LEAD: &'static str = "printf
basic anonymous string templating:
prints format string at least once, repeating as long as there are remaining arguments
output prints escaped literals in the format string as character literals
output replaces anonymous fields with the next unused argument, formatted according to the field.
--help display this help and exit
--version output version information and exit
static LONGHELP_BODY: &'static str = "
Prints the , replacing escaped character sequences with character literals
and substitution field sequences with passed arguments
literally, with the exception of the below
escaped character sequences, and the substitution sequences described further down.
The following escape sequences, organized here in alphabetical order,
will print the corresponding character literal:
\" double quote
\\\\ backslash
\\a alert (BEL)
\\b backspace
\\c End-of-Input
\\e escape
\\f form feed
\\n new line
\\r carriage return
\\t horizontal tab
\\v vertical tab
\\NNN byte with value expressed in octal value NNN (1 to 3 digits)
values greater than 256 will be treated
\\xHH byte with value expressed in hexadecimal value NN (1 to 2 digits)
\\uHHHH Unicode (IEC 10646) character with value expressed in hexadecimal value HHHH (4 digits)
\\uHHHH Unicode character with value expressed in hexadecimal value HHHH (8 digits)
%% a single %
%s - string
%b - string parsed for literals
second parameter is max length
%c - char
no second parameter
%i or %d - 64-bit integer
%u - 64 bit unsigned integer
%x or %X - 64-bit unsigned integer as hex
%o - 64-bit unsigned integer as octal
second parameter is min-width, integer
output below that width is padded with leading zeroes
%f or %F - decimal floating point value
%e or %E - scientific notation floating point value
%g or %G - shorter of specially interpreted decimal or SciNote floating point value.
second parameter is
-max places after decimal point for floating point output
-max number of significant digits for scientific notation output
parameterizing fields
printf '%4.3i' 7
has a first parameter of 4
and a second parameter of 3
will result in ' 007'
printf '%.1s' abcde
has no first parameter
and a second parameter of 1
will result in 'a'
printf '%4c' q
has a first parameter of 4
and no second parameter
will result in ' q'
The first parameter of a field is the minimum width to pad the output to
if the output is less than this absolute value of this width,
it will be padded with leading spaces, or, if the argument is negative,
with trailing spaces. the default is zero.
The second parameter of a field is particular to the output field type.
defaults can be found in the full substitution help below
special prefixes to numeric arguments
0 (e.g. 010) - interpret argument as octal (integer output fields only)
0x (e.g. 0xABC) - interpret argument as hex (numeric output fields only)
\' (e.g. \'a) - interpret argument as a character constant
Substitutions are used to pass additional argument(s) into the FORMAT string, to be formatted a
particular way. E.g.
printf 'the letter %X comes before the letter %X' 10 11
will print
'the letter A comes before the letter B'
because the substitution field %X means
'take an integer argument and write it as a hexadecimal number'
Passing more arguments than are in the format string will cause the format string to be
repeated for the remaining substitutions
printf 'it is %i F in %s \n' 22 Portland 25 Boston 27 New York
will print
'it is 22 F in Portland
it is 25 F in Boston
it is 27 F in Boston
If a format string is printed but there are less arguments remaining
than there are substitution fields, substitution fields without
an argument will default to empty strings, or for numeric fields
the value 0
This program, like GNU coreutils printf,
interprets a modified subset of the POSIX C printf spec,
a quick reference to substitutions is below.
All string fields have a 'max width' parameter
%.3s means 'print no more than three characters of the original input'
%s - string
%b - escaped string - the string will be checked for any escaped literals from
the escaped literal list above, and translate them to literal charcters.
e.g. \\n will be transformed into a newline character.
One special rule about %b mode is that octal literals are intepreted differently
In arguments passed by %b, pass octal-interpreted literals must be in the form of \\0NNN instead of \\NNN
(Although, for legacy reasons, octal literals in the form of \\NNN will still be interpreted and not throw a warning, you will have problems if you use this for a literal whose code begins with zero, as it will be viewed as in \\0NNN form.)
The character field does not have a secondary parameter.
%c - a single character
All integer fields have a 'pad with zero' parameter
%.4i means an integer which if it is less than 4 digits in length,
is padded with leading zeros until it is 4 digits in length.
%d or %i - 64-bit integer
%u - 64 bit unsigned integer
%x or %X - 64 bit unsigned integer printed in Hexadecimal (base 16)
%X instead of %x means to use uppercase letters for 'a' through 'f'
%o - 64 bit unsigned integer printed in octal (base 8)
All floating point fields have a 'max decimal places / max significant digits' parameter
%.10f means a decimal floating point with 7 decimal places past 0
%.10e means a scientific notation number with 10 significant digits
%.10g means the same behavior for decimal and Sci. Note, respectively, and provides the shorter of each's output.
Like with GNU coreutils, the value after the decimal point is these outputs is parsed as a double first before being rendered to text. For both implementations do not expect meaningful precision past the 18th decimal place. When using a number of decimal places that is 18 or higher, you can expect variation in output between GNU coreutils printf and this printf at the 18th decimal place of +/- 1
%f - floating point value presented in decimal, truncated and displayed to 6 decimal places by default.
There is not past-double behavior parity with Coreutils printf, values are not estimated or adjusted beyond input values.
%e or %E - floating point value presented in scientific notation
7 significant digits by default
%E means use to use uppercase E for the mantissa.
%g or %G - floating point value presented in the shorter of decimal and scientific notation
behaves differently from %f and %E, please see posix printf spec for full details,
some examples of different behavior:
Sci Note has 6 significant digits by default
Trailing zeroes are removed
Instead of being truncated, digit after last is rounded
Like other behavior in this utility, the design choices of floating point
behavior in this utility is selected to reproduce in exact
the behavior of GNU coreutils' printf from an inputs and outputs standpoint.
Most substitution fields can be parameterized using up to 2 numbers that can
be passed to the field, between the % sign and the field letter.
The 1st parameter always indicates the minimum width of output, it is useful for creating
columnar output. Any output that would be less than this minimum width is padded with
leading spaces
The 2nd parameter is proceeded by a dot.
You do not have to use parameters
For numeric input, the following additional forms of input are accepted besides decimal:
Octal (only with integer): if the argument begins with a 0 the proceeding characters
will be interpreted as octal (base 8) for integer fields
Hexadecimal: if the argument begins with 0x the proceeding characters will be interpreted
will be interpreted as hex (base 16) for any numeric fields
for float fields, hexadecimal input results in a precision
limit (in converting input past the decimal point) of 10^-15
Character Constant: if the argument begins with a single quote character, the first byte
of the next character will be interpreted as an 8-bit unsigned integer. If there are
additional bytes, they will throw an error (unless the environment variable POSIXLY_CORRECt is set)
Nathan E. Ross, et al. for the uutils project
Copyright 2015 uutils project.
Licensed under the MIT License, please see LICENSE file for details
pub fn uumain(args: Vec<String>) -> i32 {
let location = &args[0];
if args.len() <= 1 {
println!("{0}: missing operand\nTry '{0} --help' for more information.",
return 1;
let ref formatstr = args[1];
if formatstr == "--help" {
} else if formatstr == "--version" {
println!("{} {}", NAME, VERSION);
} else {
let printf_args = &args[2..];
memo::Memo::run_all(formatstr, printf_args);
return 0;

View file

@ -0,0 +1,4 @@
pub mod token;
pub mod sub;
pub mod unescaped_text;
mod num_format;

View file

@ -0,0 +1,42 @@
//! Primitievs used by Sub Tokenizer
//! and num_format modules
pub enum FieldType {
pub enum FChar {
// a Sub Tokens' fields are stored
// as a single object so they can be more simply
// passed by ref to num_format in a Sub method
pub struct FormatField<'a> {
pub min_width: Option<isize>,
pub second_field: Option<u32>,
pub field_char: & 'a char,
pub field_type: & 'a FieldType,
pub orig : & 'a String

View file

@ -0,0 +1,73 @@
//! Primitives used by num_format and sub_modules.
//! never dealt with above (e.g. Sub Tokenizer never uses these)
use std::str::Chars;
use itertools::PutBackN;
use cli;
use super::format_field::FormatField;
// contains the rough ingredients to final
// output for a number, organized together
// to allow for easy generalization of output manipulation
// (e.g. max number of digits after decimal)
pub struct FormatPrimitive {
pub prefix: Option<String>,
pub pre_decimal: Option<String>,
pub post_decimal: Option<String>,
pub suffix: Option<String>
impl Default for FormatPrimitive {
fn default() -> FormatPrimitive {
FormatPrimitive {
prefix: None,
pre_decimal: None,
post_decimal: None,
suffix: None
pub enum Base {
// information from the beginning of a numeric argument
// the precedes the beginning of a numeric value
pub struct InPrefix {
pub radix_in : Base,
pub sign : i8,
pub offset : usize
pub trait Formatter {
// return a FormatPrimitive for
// particular field char(s), given the argument
// string and prefix information (sign, radix)
fn get_primitive(
field: &FormatField,
inprefix: &InPrefix,
str_in: &str
) -> Option<FormatPrimitive>;
// return a string from a formatprimitive,
// given information about the field
fn primitive_to_str(
prim: &FormatPrimitive,
field: FormatField) -> String;
pub fn get_it_at(offset: usize,
str_in: &str) -> PutBackN<Chars> {
// TODO: put this somewhere better
pub fn warn_incomplete_conv(pf_arg: &str) {
//important: keep println here not print
cli::err_msg(&format!("{}: value not completely converted",

View file

@ -0,0 +1,273 @@
pub fn arrnum_int_mult(
arrnum : &Vec<u8>,
basenum : u8,
base_ten_int_fact : u8
) -> Vec<u8> {
let mut carry : u16 = 0;
let mut rem : u16;
let mut new_amount : u16;
let fact : u16 = base_ten_int_fact as u16;
let base : u16 = basenum as u16;
let mut ret_rev : Vec<u8> = Vec::new();
let mut it = arrnum.iter().rev();
loop {
let i =;
match i {
Some(u) => {
new_amount = ((u.clone() as u16)*fact) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8)
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
let ret : Vec<u8> =
ret_rev.iter().rev().map(|x| x.clone()).collect();
pub struct Remainder {
position : usize,
replace : Option<u8>
pub struct DivOut {
quotient : u8,
remainder: Remainder
pub fn arrnum_int_div(
arrnum : &Vec<u8>,
basenum : u8,
base_ten_int_divisor : u8,
rem_in : Remainder
) -> DivOut {
let mut rem_out = Remainder {
position: rem_in.position,
replace : None
let mut bufferval : u16 = 0;
let base : u16 = basenum as u16;
let divisor : u16 = base_ten_int_divisor as u16;
let mut quotient = 0;
let mut u_cur : Option<&u8> = Some(match rem_in.replace {
Some(ref u) => { u }
None => { &arrnum[rem_in.position] }
let str_f = &arrnum[rem_in.position+1..];
let mut it_f = str_f.iter();
loop {
match u_cur {
Some(u) => {
bufferval += u.clone() as u16;
if bufferval > divisor {
while bufferval >= divisor {
bufferval -= divisor;
if bufferval == 0 {
rem_out.position +=1;
} else {
rem_out.replace = Some(bufferval as u8);
} else {
bufferval *= base;
None => {
u_cur =;
DivOut { quotient: quotient, remainder: rem_out }
pub fn arrnum_int_add(
arrnum : &Vec<u8>,
basenum : u8,
base_ten_int_term : u8
) -> Vec<u8> {
let mut carry : u16 = base_ten_int_term as u16;
let mut rem : u16;
let mut new_amount : u16;
let base : u16 = basenum as u16;
let mut ret_rev : Vec<u8> = Vec::new();
let mut it = arrnum.iter().rev();
loop {
let i =;
match i {
Some(u) => {
new_amount = (u.clone() as u16) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8)
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
let ret : Vec<u8> =
ret_rev.iter().rev().map(|x| x.clone()).collect();
pub fn base_conv_vec(
src : &Vec<u8>,
radix_src : u8,
radix_dest : u8
) -> Vec<u8> {
let mut result : Vec<u8> = Vec::new();
for i in src {
result = arrnum_int_mult(&result,
radix_dest, radix_src);
result = arrnum_int_add(
pub fn base_conv_float(
src : &Vec<u8>,
radix_src : u8,
radix_dest : u8
) -> f64 {
//it would require a lot of addl code
// to implement this for arbitrary string input.
//until then, the below operates as an outline
// of how it would work.
let mut result : Vec<u8> = Vec::new();
let mut factor : f64 = radix_dest as f64;
let radix_src_float : f64 = radix_src as f64;
let mut i = 0;
let mut r :f64 = 0 as f64;
factor /= 10.;
for u in src {
if i > 15 { break; }
factor /= radix_src_float;
r += factor * (u.clone() as f64)
pub fn str_to_arrnum(
src: &str,
radix_def_src : &RadixDef
) -> Vec<u8> {
let mut intermed_in : Vec<u8> = Vec::new();
for c in src.chars() {
match radix_def_src.from_char::<>(c) {
Some(u) => { intermed_in.push(u); }
None => {} //todo err msg on incorrect
pub fn arrnum_to_str(
src: &Vec<u8>,
radix_def_dest : &RadixDef
) -> String {
let mut str_out = String::new();
for u in src.iter() {
match radix_def_dest.from_u8(u.clone()) {
Some(c) => {
None => {} //todo
pub fn base_conv_str(
src: &str,
radix_def_src : &RadixDef,
radix_def_dest : &RadixDef
) -> String {
let intermed_in : Vec<u8> =
str_to_arrnum(src, radix_def_src);
let intermed_out = base_conv_vec(
arrnum_to_str(&intermed_out, radix_def_dest)
pub trait RadixDef {
fn get_max (&self) -> u8;
fn from_char (&self, x:char) -> Option<u8>;
fn from_u8 (&self, x:u8) -> Option<char>;
pub struct RadixTen;
const ZERO_ASC : u8 = '0' as u8;
const UPPER_A_ASC : u8 = 'A' as u8;
const LOWER_A_ASC : u8 = 'a' as u8;
impl RadixDef for RadixTen {
fn get_max(&self) -> u8 { 10 }
fn from_char (&self, c:char) -> Option<u8> {
match c {
'0'...'9' => Some(c as u8 - ZERO_ASC),
_ => None
fn from_u8 (&self, u:u8) -> Option<char> {
match u {
0...9 => Some((ZERO_ASC + u) as char),
_ => None
pub struct RadixHex;
impl RadixDef for RadixHex {
fn get_max(&self) -> u8 { 16 }
fn from_char (&self, c:char) -> Option<u8> {
match c {
'0'...'9' => Some(c as u8 - ZERO_ASC),
'A'...'F' => Some(c as u8 +10 - UPPER_A_ASC),
'a'...'f' => Some(c as u8 +10 - LOWER_A_ASC),
_ => None
fn from_u8 (&self, u:u8) -> Option<char> {
match u {
0...9 => Some((ZERO_ASC + u) as char),
10...15 => Some((UPPER_A_ASC + (u-10)) as char),
_ => None

View file

@ -0,0 +1,84 @@
//! formatter for %g %G decimal subs
use super::super::format_field::FormatField;
use super::super::formatter::{InPrefix,FormatPrimitive,Formatter};
use super::float_common::{FloatAnalysis,
fn get_len_fprim(
fprim : &FormatPrimitive
) -> usize {
let mut len = 0;
if let Some(ref s) = fprim.prefix { len += s.len(); }
if let Some(ref s) = fprim.pre_decimal { len += s.len(); }
if let Some(ref s) = fprim.post_decimal { len += s.len(); }
if let Some(ref s) = fprim.suffix { len += s.len(); }
pub struct Decf {
as_num : f64
impl Decf {
pub fn new() -> Decf {
Decf { as_num: 0.0 }
impl Formatter for Decf {
fn get_primitive(
field : &FormatField,
inprefix : &InPrefix,
str_in : &str
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6)+1;
let analysis = FloatAnalysis::analyze(
Some(second_field as usize+1),
let mut f_sci = get_primitive_dec(
second_field as usize,
Some(*field.field_char == 'G'));
//strip trailing zeroes
match f_sci.post_decimal.clone() {
Some(ref post_dec) => {
let mut i = post_dec.len();
let mut it = post_dec.chars();
while let Some(c) = it.next_back() {
if c != '0' { break; }
if i != post_dec.len() {
f_sci.post_decimal =
None => {}
let f_fl = get_primitive_dec(
second_field as usize,
Some(if get_len_fprim(&f_fl) >= get_len_fprim(&f_sci) {
} else { f_fl })
fn primitive_to_str(
prim: &FormatPrimitive,
field: FormatField) -> String {

View file

@ -0,0 +1,331 @@
use super::super::format_field::{FormatField};
use super::super::formatter::{InPrefix,Base,FormatPrimitive,warn_incomplete_conv,get_it_at};
use super::base_conv;
use super::base_conv::{RadixDef};
// if the memory, copy, and comparison cost of chars
// becomes an issue, we can always operate in vec<u8> here
// rather than just at de_hex
pub struct FloatAnalysis {
pub len_important: usize,
//none means no decimal point.
pub decimal_pos: Option<usize>,
pub follow: Option<char>
impl FloatAnalysis {
pub fn analyze(
str_in: &str,
inprefix: &InPrefix,
max_sd_opt: Option<usize>,
max_after_dec_opt: Option<usize>,
) -> FloatAnalysis {
// this fn assumes
// the input string
// has no leading spaces or 0s
let mut str_it = get_it_at(inprefix.offset, str_in);
let mut ret = FloatAnalysis {
len_important: 0,
decimal_pos: None,
follow: None
let mut i=0;
while let Some(c) = { match c{
e @ '0'...'9' | e @ 'A'...'F' | e @ 'a'...'f' => {
match inprefix.radix_in {
Base::Ten => {
match e {
'0'...'9' => {},
_ => {
_ => {}
if let Some(max_sd) = max_sd_opt {
if i == max_sd {
//follow is used in cases of %g
//where the character right after the last
//sd is considered is rounded affecting
//the previous digit in 1/2 of instances
ret.follow = Some(e);
} else if ret.decimal_pos.is_some() && i > max_sd {
if let Some(p) = ret.decimal_pos {
if let Some(max_after_dec) = max_after_dec_opt {
if (i-1) - p == max_after_dec {
'.' => {
if ret.decimal_pos.is_none() {
ret.decimal_pos = Some(i);
} else {
_ => {
}; i+=1; }
ret.len_important = i;
fn de_hex(
src: &str,
before_decimal: bool
) -> String {
let rten = base_conv::RadixTen;
let rhex = base_conv::RadixHex;
if before_decimal {
base_conv::base_conv_str(src, &rhex, &rten)
} else {
let as_arrnum_hex =base_conv::str_to_arrnum(src, &rhex);
let s = format!("{}", base_conv::base_conv_float(
if s.len() > 2 {
} else {
// takes a string in,
// truncates to a position,
// bumps the last digit up one,
// and if the digit was nine
// propagate to the next, etc.
fn _round_str_from(
in_str : &str,
position : usize
) -> (String, bool) {
let mut it=in_str[0..position].chars();
let mut rev = String::new();
let mut i = position;
let mut finished_in_dec=false;
while let Some(c)=it.next_back() {
match c {
'9' => { rev.push('0'); }
e @ _ => {
((e as u8)+1) as char);
finished_in_dec = true;
let mut fwd = String::from(&in_str[0..i]);
for ch in rev.chars().rev() {
(fwd, finished_in_dec)
fn round_terminal_digit(
before_dec: String,
after_dec: String,
position: usize
) -> (String, String) {
if position < after_dec.len() {
let digit_at_pos:char;
match digit_at_pos {
'5'...'9' => {
let (new_after_dec, finished_in_dec) =
_round_str_from(&after_dec, position);
if finished_in_dec {
return (before_dec, new_after_dec)
} else {
let (new_before_dec, _) =
return (new_before_dec, new_after_dec)
_ =>{ }
(before_dec, after_dec)
pub fn get_primitive_dec(
inprefix : &InPrefix,
str_in : &str,
analysis : &FloatAnalysis,
last_dec_place : usize,
sci_mode : Option<bool>
) -> FormatPrimitive {
let mut f : FormatPrimitive = Default::default();
//add negative sign section
if inprefix.sign == -1 {
f.prefix = Some(String::from("-"));
// assign the digits before and after the decimal points
// to separate slices. If no digits after decimal point,
// assign 0
let (mut first_segment_raw, second_segment_raw) =
match analysis.decimal_pos {
Some(pos) => {
(&str_in[..pos], &str_in[pos+1..])
None => { (&str_in[..], "0") }
if first_segment_raw.len() == 0 {
first_segment_raw = "0";
// convert to string, de_hexifying if input is in hex.
let (first_segment, second_segment) =
match inprefix.radix_in {
Base::Hex => {
(de_hex(first_segment_raw, true),
de_hex(second_segment_raw, false))
_ => {
let (pre_dec_unrounded, post_dec_unrounded, mantissa) =
if sci_mode.is_some() {
if first_segment.len() > 1 {
let mut post_dec = String::from(&first_segment[1..]);
first_segment.len() as isize -1)
} else {
match first_segment.chars().next() {
Some('0') => {
let mut it = second_segment.chars().enumerate();
let mut m : isize = 0;
let mut pre = String::from("0");
let mut post = String::from("0");
while let Some((i,c)) = { match c {
'0' => {}
_ => {
m=((i as isize)+1) * -1;
pre = String::from(
post = String::from(
} }
(pre, post, m)
Some(_) => {
(first_segment, second_segment, 0)
None => {
"float_common: no chars in first segment.");
} else {
(first_segment, second_segment, 0)
let (pre_dec_draft, post_dec_draft) =
if let Some(capitalized) = sci_mode {
let si_ind = if capitalized { 'E' } else { 'e' };
f.suffix=Some(if mantissa >=0 {
format!("{}+{:02}", si_ind, mantissa)
} else {
//negative sign is considered in format!s
// leading zeroes
format!("{}{:03}", si_ind, mantissa)
pub fn primitive_to_str_common(
prim: &FormatPrimitive,
field: &FormatField
) -> String {
let mut final_str = String::new();
match prim.prefix {
Some(ref prefix) => {
None => {}
match prim.pre_decimal {
Some(ref pre_decimal) => {
None => {
panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value.");
let decimal_places = field.second_field.unwrap_or(6);
match prim.post_decimal {
Some(ref post_decimal) => {
if post_decimal.len() > 0 && decimal_places > 0 {
let len_avail=post_decimal.len() as u32;
if decimal_places >= len_avail {
//println!("dec {}, len avail {}", decimal_places, len_avail);
if *field.field_char != 'g' &&
*field.field_char != 'G' {
let diff = decimal_places - len_avail;
for _ in 0..diff {
} else {
//println!("printing to only {}", decimal_places);
&post_decimal[0..decimal_places as usize]);
None => {
panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value.");
match prim.suffix {
Some(ref suffix) => {
None => {}

View file

@ -0,0 +1,48 @@
//! formatter for %f %F common-notation floating-point subs
use super::super::format_field::FormatField;
use super::super::formatter::{InPrefix,FormatPrimitive,Formatter};
use super::float_common::{FloatAnalysis,
pub struct Floatf {
as_num : f64
impl Floatf {
pub fn new() -> Floatf {
Floatf { as_num: 0.0 }
impl Formatter for Floatf {
fn get_primitive(
field : &FormatField,
inprefix : &InPrefix,
str_in : &str
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6)+1;
let analysis = FloatAnalysis::analyze(
Some(second_field as usize)
let f = get_primitive_dec(
second_field as usize,
fn primitive_to_str(
prim: &FormatPrimitive,
field: FormatField) -> String {

View file

@ -0,0 +1,297 @@
//! formatter for unsigned and signed int subs
//! unsigned ints: %X %x (hex u64) %o (octal u64) %u (base ten u64)
//! signed ints: %i %d (both base ten i64)
use std::u64;
use std::i64;
use super::super::format_field::FormatField;
use super::super::formatter::{InPrefix,FormatPrimitive,Base,Formatter,warn_incomplete_conv,get_it_at};
pub struct Intf {
a : u32
// see the Intf::analyze() function below
struct IntAnalysis {
check_past_max : bool,
past_max : bool,
is_zero: bool,
len_digits: u8
impl Intf {
pub fn new() -> Intf {
Intf { a:0 }
// take a ref to argument string, and basic information
// about prefix (offset, radix, sign), and analyze string
// to gain the IntAnalysis information above
// check_past_max: true if the number *may* be above max,
// but we don't know either way. One of several reasons
// we may have to parse as int.
// past_max: true if the object is past max, false if not
// in the future we should probably combine these into an
// Option<bool>
// is_zero: true if number is zero, false otherwise
// len_digits: length of digits used to create the int
// important, for example, if we run into a non-valid character
fn analyze(
str_in: &str,
signed_out: bool,
inprefix: &InPrefix
) -> IntAnalysis {
// the maximum number of digits we could conceivably
// have before the decimal point without exceeding the
// max
let mut str_it = get_it_at(inprefix.offset, str_in);
let max_sd_in =
if signed_out {
match inprefix.radix_in {
Base::Ten => 19,
Base::Octal => 21,
Base::Hex => 16
} else {
match inprefix.radix_in {
Base::Ten => 20,
Base::Octal => 22,
Base::Hex => 16
let mut ret = IntAnalysis {
check_past_max: false,
past_max: false,
is_zero: false,
len_digits : 0
// todo turn this to a while let now that we know
// no special behavior on EOI break
loop {
let c_opt =;
if let Some(c) = c_opt { match c {
'0'...'9' | 'a'...'f' | 'A'...'F' => {
if ret.len_digits == 0 && c == '0' {
ret.is_zero = true;
} else if ret.is_zero {
ret.is_zero = false;
ret.len_digits += 1;
if ret.len_digits == max_sd_in {
if let Some(next_ch) = {
match next_ch {
'0'...'9' => {
ret.past_max = true;
_ => {
// force conversion
// to check if its above max.
// todo: spin out convert
// into fn, call it here to try
// read val, on Ok()
// save val for reuse later
// that way on same-base in and out
// we don't needlessly convert int
// to str, we can just copy it over.
ret.check_past_max = true;
if ret.past_max { break; }
} else { ret.check_past_max = true; }
_ => {
} } else {
//breaks on EOL
// get a FormatPrimitive of the maximum value for the field char
// and given sign
fn get_max(
fchar : char,
sign : i8
) -> FormatPrimitive {
let mut fmt_prim : FormatPrimitive = Default::default();
fmt_prim.pre_decimal = Some(String::from(match fchar {
'd' | 'i' => match sign {
1 => "9223372036854775807",
_ => {
fmt_prim.prefix = Some(String::from("-"));
'x' | 'X' => "ffffffffffffffff",
'o' => "1777777777777777777777",
'u' | _ => "18446744073709551615"
// conv_from_segment contract:
// 1. takes
// - a string that begins with a non-zero digit, and proceeds
// with zero or more following digits until the end of the string
// - a radix to interpret those digits as
// - a char that communicates:
// whether to interpret+output the string as an i64 or u64
// what radix to write the parsed number as.
// 2. parses it as a rust integral type
// 3. outputs FormatPrimitive with:
// - if the string falls within bounds:
// number parsed and written in the correct radix
// - if the string falls outside bounds:
// for i64 output, the int minimum or int max (depending on sign)
// for u64 output, the u64 max in the output radix
fn conv_from_segment(
segment : &str,
radix_in : Base,
fchar : char,
sign : i8,
) ->
match fchar {
'i' | 'd' => {
match i64::from_str_radix(segment, radix_in as u32) {
Ok(i) => {
let mut fmt_prim : FormatPrimitive =
if sign == -1 {
fmt_prim.prefix = Some(String::from("-"));
fmt_prim.pre_decimal =
Some(format!("{}", i));
Err(_) => Intf::get_max(fchar, sign)
_ => {
match u64::from_str_radix(segment, radix_in as u32) {
Ok(u) => {
let mut fmt_prim : FormatPrimitive =
let u_f =
if sign == -1 { u64::MAX - (u -1)
} else { u };
fmt_prim.pre_decimal = Some(match fchar {
'X' => format!("{:X}", u_f),
'x' => format!("{:x}", u_f),
'o' => format!("{:o}", u_f),
_ => format!("{}", u_f)
Err(_) => Intf::get_max(fchar, sign)
impl Formatter for Intf {
fn get_primitive(
field : &FormatField,
inprefix : &InPrefix,
str_in : &str
) -> Option<FormatPrimitive> {
let begin = inprefix.offset;
//get information about the string. see Intf::Analyze
// def above.
let convert_hints = Intf::analyze(str_in,
*field.field_char == 'i' || *field.field_char == 'd',
//We always will have a formatprimitive to return
Some(if convert_hints.len_digits == 0 || convert_hints.is_zero {
//if non-digit or end is reached before a non-zero digit
let mut fmt_prim : FormatPrimitive = Default::default();
} else if ! convert_hints.past_max {
//if the number is or may be below the bounds limit
let radix_out = match *field.field_char {
'd' | 'i' | 'u' => Base::Ten,
'x' | 'X' => Base::Hex,
'o' | _ => Base::Octal
let radix_mismatch = ! radix_out.eq(&inprefix.radix_in);
let decr_from_max :bool = inprefix.sign == -1 &&
*field.field_char !='i';
let end = begin + convert_hints.len_digits as usize;
// convert to int if any one of these is true:
// - number of digits in int indicates it may be past max
// - we're subtracting from the max
// - we're converting the base
if convert_hints.check_past_max
|| decr_from_max || radix_mismatch {
//radix of in and out is the same.
let segment = String::from(&str_in[begin..end]);
let m = Intf::conv_from_segment(
} else {
//otherwise just do a straight string copy.
let mut fmt_prim : FormatPrimitive = Default::default();
// this is here and not earlier because
// zero doesn't get a sign, and conv_from_segment
// creates its format primitive separately
if inprefix.sign == -1 && *field.field_char == 'i' {
fmt_prim.prefix = Some(String::from("-"));
fmt_prim.pre_decimal = Some(String::from
} else {
Intf::get_max(*field.field_char, inprefix.sign)
fn primitive_to_str(
prim: &FormatPrimitive,
field: FormatField) -> String {
let mut finalstr : String = String::new();
match prim.prefix {
Some(ref prefix) => {
None => {}
//integral second fields is zero-padded minimum-width
//which gets handled before general minimum-width
match prim.pre_decimal {
Some(ref pre_decimal) => {
match field.second_field {
Some(min) => {
let mut i = min;
let len = pre_decimal.len() as u32;
while i > len {
i -= 1;
None => {}
None => {
panic!("error, format primitives provided to int, will, incidentally under correct behavior, always have a pre_dec value.");

View file

@ -0,0 +1,6 @@
pub mod intf;
pub mod floatf;
pub mod scif;
pub mod decf;
mod float_common;
mod base_conv;

View file

@ -0,0 +1,47 @@
//! formatter for %e %E scientific notation subs
use super::super::format_field::FormatField;
use super::super::formatter::{InPrefix,FormatPrimitive,Formatter};
use super::float_common::{FloatAnalysis,
pub struct Scif {
as_num : f64
impl Scif {
pub fn new() -> Scif {
Scif { as_num: 0.0 }
impl Formatter for Scif {
fn get_primitive(
field : &FormatField,
inprefix : &InPrefix,
str_in : &str
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6)+1;
let analysis = FloatAnalysis::analyze(
Some(second_field as usize+1),
let f = get_primitive_dec(
second_field as usize,
Some(*field.field_char == 'E'));
fn primitive_to_str(
prim: &FormatPrimitive,
field: FormatField) -> String {

View file

@ -0,0 +1,4 @@
pub mod format_field;
mod formatter;
mod formatters;
pub mod num_format;

View file

@ -0,0 +1,269 @@
//! handles creating printed output for numeric substitutions
use std::env;
use std::vec::Vec;
use cli;
use super::format_field::{FormatField, FieldType};
use super::formatter::{Formatter, FormatPrimitive, InPrefix, Base};
use super::formatters::intf::Intf;
use super::formatters::floatf::Floatf;
use super::formatters::scif::Scif;
use super::formatters::decf::Decf;
pub fn warn_expected_numeric(pf_arg: &String) {
//important: keep println here not print
cli::err_msg(&format!("{}: expected a numeric value", pf_arg));
// when character costant arguments have excess characters
// issue a warning when POSIXLY_CORRECT is not set
fn warn_char_constant_ign(remaining_bytes: Vec<u8>) {
match env::var("POSIXLY_CORRECT") {
Ok(_) => {}
Err(e) => {
match e {
env::VarError::NotPresent => {
cli::err_msg(&format!("warning: {:?}: character(s) following character constant have been ignored", &*remaining_bytes));
_ => {}
// this function looks at the first few
// characters of an argument and returns a value if we can learn
// a value from that (e.g. no argument? return 0, char constant? ret value)
fn get_provided(
str_in_opt : Option<&String>
) -> Option<u8> {
const C_S_QUOTE: u8=39;
const C_D_QUOTE: u8=34;
match str_in_opt {
Some(str_in) => {
let mut byte_it = str_in.bytes();
if let Some(qchar) = {
match qchar {
return Some(match {
Some(second_byte) => {
let mut ignored : Vec<u8> = Vec::new();
while let Some(cont) {
if ignored.len() > 0 {
second_byte as u8
//no byte after quote
None => {
let so_far =
(qchar as u8 as char).to_string();
0 as u8
//first byte is not quote
_ => { return None; }
//no first byte
} else { Some(0 as u8) }
None =>{ Some(0) }
// takes a string and returns
// a sign,
// a base,
// and an offset for index after all
// initial spacing, sign, base prefix, and leading zeroes
fn get_inprefix(
str_in : &String,
field_type : &FieldType
) -> InPrefix {
let mut str_it = str_in.chars();
let mut ret = InPrefix { radix_in: Base::Ten, sign: 1, offset: 0 };
let mut topchar =;
//skip spaces and ensure topchar is the first non-space char
// (or None if none exists)
loop {
match topchar
Some(' ')=>{ret.offset+=1;;},
_=>{ break; }
//parse sign
match topchar {
Some('+')=>{ ret.offset+=1;; }
Some('-')=>{ ret.sign = -1; ret.offset+=1;; }
// we want to exit with offset being
// the index of the first non-zero
// digit before the decimal point or
// if there is none, the zero before the
// decimal point, or, if there is none,
// the decimal point.
// while we are determining the offset
// we will ensure as a convention
// the offset is always on the first character
// that we are yet unsure if it is the
// final offset. If the zero could be before
// a decimal point we don't move past the zero.
let mut is_hex = false;
if Some('0') == topchar {
if let Some(base) = {
// lead zeroes can only exist in
// octal and hex base
let mut do_clean_lead_zeroes = false;
match base {
'x' | 'X' => {
is_hex = true;
ret.offset += 2;
ret.radix_in = Base::Hex;
do_clean_lead_zeroes = true;
e @ '0'...'9' => {
match *field_type {
FieldType::Intf => {
ret.radix_in = Base::Octal;
_ => {}
if e == '0' {
do_clean_lead_zeroes = true;
if do_clean_lead_zeroes {
let mut first = true;
while let Some(ch_zero) = {
// see notes on offset above:
// this is why the offset for octals and decimals
// that reach this branch is 1 even though
// they have already eaten the characters '00'
// this is also why when hex encounters its
// first zero it does not move its offset
// forward because it does not know for sure
// that it's current offset (of that zero)
// is not the final offset,
// whereas at that point octal knows its
// current offset is not the final offset.
match ch_zero {
'0' => {
if !(is_hex && first) { ret.offset+=1; }
//if decimal, keep last zero if one exists
//(it's possible for last zero to
// not exist at this branch if we're in hex input)
'.' => {
//other digit, etc.
_ => {
if !(is_hex && first) { ret.offset+=1; }
if first { first = false; }
// this is the function a Sub's print will delegate to
// if it is a numeric field, passing the field details
// and an iterator to the argument
pub fn num_format(
field: &FormatField,
in_str_opt: Option<&String>
) -> Option<String> {
let fchar = field.field_char.clone();
// num format mainly operates by further delegating to one of
// several Formatter structs depending on the field
// see for more details
// to do switch to static dispatch
let fmtr : Box<Formatter> = match *field.field_type {
FieldType::Intf => Box::new(Intf::new()),
FieldType::Floatf => Box::new(Floatf::new()),
FieldType::Scif => Box::new(Scif::new()),
FieldType::Decf => Box::new(Decf::new()),
_ => { panic!("asked to do num format with non-num fieldtype"); }
let prim_opt=
// if we can get an assumed value from looking at the first
// few characters, use that value to create the FormatPrimitive
if let Some(provided_num) = get_provided(in_str_opt) {
let mut tmp : FormatPrimitive = Default::default();
match fchar {
'u' | 'i' | 'd' => {
tmp.pre_decimal = Some(
format!("{}", provided_num));
'x' | 'X' => {
tmp.pre_decimal = Some(
format!("{:x}", provided_num));
'o' => {
tmp.pre_decimal = Some(
format!("{:o}", provided_num));
'e' | 'E' | 'g' | 'G' => {
let as_str = format!("{}", provided_num);
let inprefix = get_inprefix(
tmp=fmtr.get_primitive(field, &inprefix, &as_str)
.expect("err during default provided num");
_ => {
tmp.pre_decimal = Some(
format!("{}", provided_num));
tmp.post_decimal = Some(String::from("0"));
} else {
// otherwise we'll interpret the argument as a number
// using the appropriate Formatter
let in_str = in_str_opt.expect(
"please send the devs this message:
\n get_provided is failing to ret as Some(0) on no str ");
// first get information about the beginning of the
// numeric argument that would be useful for
// any formatter (int or float)
let inprefix = get_inprefix(
// then get the FormatPrimitive from the Formatter
fmtr.get_primitive(field, &inprefix, in_str)
// if we have a formatPrimitive, print its results
// according to the field-char appropriate Formatter
if let Some(prim) = prim_opt {
} else {

src/printf/tokenize/ Normal file
View file

@ -0,0 +1,425 @@
//! Sub is a token that represents a
//! segment of the format string that is a substitution
//! it is created by Sub's implementation of the Tokenizer trait
//! Subs which have numeric field chars make use of the num_format
//! submodule
use std::slice::Iter;
use std::iter::Peekable;
use std::str::Chars;
use std::process::exit;
use cli;
use itertools::PutBackN;
use super::token;
use super::unescaped_text::UnescapedText;
use super::num_format::format_field::{FormatField, FieldType};
use super::num_format::num_format;
//use std::collections::HashSet;
fn err_conv(sofar: &String) {
cli::err_msg(&format!("%{}: invalid conversion specification", sofar));
fn convert_asterisk_arg_int(asterisk_arg : &String) -> isize {
// this is a costly way to parse the
// args used for asterisk values into integers
// from various bases. Actually doing it correctly
// (going through the pipeline to intf, but returning
// the integer instead of writing it to string and then
// back) is on the refactoring TODO
let field_type = FieldType::Intf;
let field_char = 'i';
let field_info = FormatField{
min_width: Some(0),
second_field: Some(0),
orig: asterisk_arg,
field_type: &field_type,
field_char: &field_char
pub enum CanAsterisk<T> {
// Sub is a tokenizer which creates tokens
// for substitution segments of a format string
pub struct Sub {
min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
field_type: FieldType,
orig: String
impl Sub {
pub fn new(min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
orig: String) -> Sub {
// for more dry printing, field characters are grouped
// in initialization of token.
let field_type = match field_char {
's' | 'b' => FieldType::Strf,
'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf,
'f' | 'F' => FieldType::Floatf,
'e' | 'E' => FieldType::Scif,
'g' | 'G' => FieldType::Decf,
'c' => FieldType::Charf,
_ => {
//should be unreachable.
println!("Invalid fieldtype");
Sub {
min_width: min_width,
second_field: second_field,
field_char: field_char,
field_type: field_type,
orig: orig
struct SubParser {
min_width_tmp : Option<String>,
min_width_is_asterisk: bool,
past_decimal : bool,
second_field_tmp : Option<String>,
second_field_is_asterisk : bool,
specifiers_found : bool,
field_char : Option<char>,
text_so_far : String
impl SubParser {
fn new() -> SubParser {
SubParser {
min_width_tmp : None,
min_width_is_asterisk : false,
past_decimal : false,
second_field_tmp : None,
second_field_is_asterisk : false,
specifiers_found : false,
field_char : None,
text_so_far : String::new()
fn from_it(it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>)
-> Option<Box<token::Token>> {
let mut parser = SubParser::new();
if parser.sub_vals_retrieved(it) {
let t: Box<token::Token> = SubParser::build_token(parser);
} else {
fn build_token(parser : SubParser) -> Box<token::Token> {
//not a self method so as to allow move of subparser vals.
//return new Sub struct as token
let t: Box<token::Token> = Box::new(
if parser.min_width_is_asterisk {
} else {
CanAsterisk::Fixed(|x| x.parse::<isize>().unwrap()))
if parser.second_field_is_asterisk {
} else {
CanAsterisk::Fixed(|x| x.parse::<u32>().unwrap()))
fn sub_vals_retrieved(&mut self,
it: &mut PutBackN<Chars>)
-> bool {
if !SubParser::successfully_eat_prefix(it, &mut self.text_so_far) {
return false;
// this fn in particular is much longer than it needs to be
//.could get a lot
// of code savings just by cleaning it up. shouldn't use a regex
// though, as we want to mimic the original behavior of printing
// the field as interpreted up until the error in the field.
let mut legal_fields=vec!['b', 'c', 'd', 'e', 'E',
'f', 'g', 'G', 'i', 'o',
's', 'u', 'x', 'X'];
let mut specifiers=vec!['h', 'j', 'l', 'L', 't', 'z'];
// divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z])
// into min_width, second_field, field_char
while let Some(ch) = {
match ch as char {
'-' | '*' | '0' ... '9' => {
if ! self.past_decimal {
if self.min_width_is_asterisk
|| self.specifiers_found {
if self.min_width_tmp.is_none() {
match self.min_width_tmp.as_mut() {
Some(x) => {
if (ch == '-' || ch == '*') && x.len() > 0 {
if ch == '*' {
self.min_width_is_asterisk = true;
None => { panic!("should be unreachable"); }
} else {
//second field should never have a
// negative value
if self.second_field_is_asterisk
|| ch == '-'
|| self.specifiers_found {
if self.second_field_tmp.is_none() {
match self.second_field_tmp.as_mut() {
Some(x) => {
if ch == '*' && x.len() > 0 {
if ch == '*' {
self.second_field_is_asterisk = true;
None => { panic!("should be unreachable"); }
'.' => {
if ! self.past_decimal {
self.past_decimal = true;
} else {
x if legal_fields.binary_search(&x).is_ok() => {
x if specifiers.binary_search(&x).is_ok() => {
if ! self.past_decimal {
self.past_decimal = true;
if ! self.specifiers_found {
self.specifiers_found = true;
_ => {
if ! self.field_char.is_some()
{ err_conv(&self.text_so_far); }
let field_char_retrieved = self.field_char.unwrap();
if self.past_decimal && self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::from("0"));
// if the dot is provided without a second field
// printf interprets it as 0.
match self.second_field_tmp.as_mut() {
Some(x) => {
if x.len() == 0 {
self.min_width_tmp = Some(String::from("0"));
_ => {}
fn successfully_eat_prefix(it: &mut PutBackN<Chars>,
text_so_far : &mut String ) -> bool {
//get next two chars,
// if they're '%%' we're not tokenizing it
// else put chars back
let preface =;
let n_ch =;
if preface == Some('%') &&
n_ch != Some('%') {
match n_ch {
Some(x) => {
None => {
} else {|x| it.put_back(x));|x| it.put_back(x));
fn validate_field_params(&self, field_char : char) {
//check for illegal combinations here when possible vs
// on each application so we check less per application
// to do: move these checks to Sub::new
if (field_char == 's' &&
self.min_width_tmp == Some(String::from("0"))) ||
(field_char == 'c' &&
(self.min_width_tmp == Some(String::from("0")) || self.past_decimal)) ||
(field_char == 'b' &&
(self.min_width_tmp.is_some() || self.past_decimal ||
self.second_field_tmp.is_some())) {
impl token::Tokenizer for Sub {
fn from_it(it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>)
-> Option<Box<token::Token>> {
SubParser::from_it(it, args)
impl token::Token for Sub {
fn print(&self, pf_args_it: &mut Peekable<Iter<String>>) {
let field = FormatField {
min_width: match self.min_width {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match {
//temporary, use instead
Some(x) => Some(convert_asterisk_arg_int(x)),
None => Some(0)
second_field: match self.second_field {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match {
//temporary, use instead
Some(x) => {
let result = convert_asterisk_arg_int(x);
if result < 0 {
} else {
Some(result as u32)
None => Some(0)
field_char: &self.field_char,
field_type: &self.field_type,
orig: &self.orig,
let pf_arg =;
// minimum width is handled independently of actual
// field char
let pre_min_width_opt : Option<String> = match *field.field_type {
// if %s just return arg
// if %b use UnescapedText module's unescaping-fn
// if %c return first char of arg
FieldType::Strf | FieldType::Charf => {
match pf_arg {
Some(arg_string) => {
match *field.field_char {
's' => {
Some(match field.second_field {
Some(max) =>{
&arg_string[..max as usize])
None => {
'b' => {
let mut a_it=PutBackN::new(
&mut a_it, true);
//for 'c': get iter of string vals,
//get opt<char> of first val
//and map it to opt<String>
'c' | _ => arg_string.chars().next().map(
|x| x.to_string())
None => None
_ => {
// non string/char fields are delegated to num_format
num_format::num_format(&field, pf_arg)
match pre_min_width_opt {
// if have a string, print it, ensuring minimum width is met.
Some(pre_min_width) => {
print!("{}", match field.min_width {
Some(min_width) => {
let diff : isize = min_width.abs() as isize -
pre_min_width.len() as isize;
if diff > 0 {
let mut final_str = String::new();
// definitely more efficient ways
// to do this.
let pad_before = min_width > 0;
if ! pad_before {
for _ in 0..diff {
final_str.push(' ');
if pad_before {
} else {
None => { pre_min_width }
None => {}

View file

@ -0,0 +1,32 @@
//! Traits and enums dealing with Tokenization of printf Format String
use std::iter::Peekable;
use std::str::Chars;
use std::slice::Iter;
use itertools::PutBackN;
// A token object is an object that can print the expected output
// of a contiguous segment of the format string, and
// requires at most 1 argusegment
pub trait Token {
fn print(&self, args: &mut Peekable<Iter<String>>);
// A tokenizer object is an object that takes an iterator
// at a position in a format string, and sees whether
// it can return a token of a type it knows how to produce
// if so, return the token, move the iterator past the
// format string text the token repsresents, and if an
// argument is used move the argument iter forward one
// creating token of a format string segment should also cause
// printing of that token's value. Essentially tokenizing
// a whole format string will print the format string and consume
// a number of arguments equal to the number of argument-using tokens
pub trait Tokenizer {
fn from_it(it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>)
-> Option<Box<Token>>;

View file

@ -0,0 +1,268 @@
//! UnescapedText is a tokenizer impl
//! for tokenizing character literals,
//! and escaped character literals (of allowed escapes),
//! into an unescaped text byte array
use std::iter::Peekable;
use std::slice::Iter;
use std::str::Chars;
use std::char::from_u32;
use std::process::exit;
use cli;
use itertools::PutBackN;
use super::token;
pub struct UnescapedText(Vec<u8>);
impl UnescapedText {
fn new() -> UnescapedText {
//take an iterator to the format string
//consume between min and max chars
//and return it as a base-X number
fn base_to_u32(
min_chars: u8,
max_chars: u8,
base : u32,
it: &mut PutBackN<Chars>
) -> u32 {
let mut retval : u32 = 0;
let mut found=0;
while found < max_chars {
//if end of input break
let nc =;
match nc {
Some(digit) => {
//if end of hexchars break
match digit.to_digit(base) {
Some(d) => {
found += 1;
retval *= base;
retval += d;
None => {
None => {
if found < min_chars {
//only ever expected for hex
println!("missing hexadecimal number in escape"); //todo stderr
// validates against valid
// IEC 10646 vals - these values
// are pinned against the more popular
// printf so as to not disrupt when
// dropped-in as a replacement.
fn validate_iec(val: u32, eight_word: bool) {
let mut preface = 'u';
let mut leading_zeros= 4;
if eight_word {
let err_msg=format!("invalid universal character name {0}{1:02$x}",
if (val < 159 && (val != 36 &&
val != 64 &&
val != 96)) ||
(val > 55296 && val < 57343) {
println!("{}", err_msg);//todo stderr
// pass an iterator that succeeds an '/',
// and process the remaining character
// adding the unescaped bytes
// to the passed byte_vec
// in subs_mode change octal behavior
fn handle_escaped(
byte_vec: &mut Vec<u8>,
it: &mut PutBackN<Chars>,
subs_mode: bool
) {
let ch = match {
Some(x) => x,
None => '\\'
match ch {
'0'...'9' | 'x' => {
let min_len = 1;
let mut max_len = 2;
let mut base = 16;
let ignore = false;
match ch {
'x'=>{ },
e @ '0'...'9' => {
max_len=3; base =8;
// in practice, gnu coreutils printf
// interprets octals without a
// leading zero in %b
// but it only skips leading zeros
// in %b mode.
// if we ever want to match gnu coreutil
// printf's docs instead of its behavior
// we'd set this to true.
//if subs_mode && e != '0'
// { ignore = true; }
if ! subs_mode || e != '0'
{ it.put_back(ch); }
_ =>{}
if ! ignore {
let val = (UnescapedText::base_to_u32(min_len,
it) % 256) as u8;
let bvec = [val];
} else {
byte_vec.push(ch as u8);
e @ _ => {
//only for hex and octal
//is byte encoding specified.
//otherwise, why not leave the door open
//for other encodings unless it turns out
//a bottleneck.
let mut s = String::new();
let ch = match e {
'\\' => '\\',
'"' => '"',
'n' => '\n',
'r' => '\r',
't' => '\t',
'a' => '\x07',
'b' => '\x08',
//vertical tab
'v' => '\x0B',
//form feed
'f' => '\x0C',
//escape character
'e' => '\x1B',
'c' => { exit(cli::EXIT_OK) },
'u' | 'U' => {
let len = match e {
'u' => 4,
'U' | _ => 8
let val = UnescapedText::base_to_u32(len,
UnescapedText::validate_iec(val, false);
if let Some(c) = from_u32(val) {
} else { '-' }
_ => {
// take an iteratator to a string,
// and return a wrapper around a Vec<u8> of unescaped bytes
// break on encounter of sub symbol ('%[^%]') unless called
// through %b subst.
pub fn from_it_core(it: &mut PutBackN<Chars>,
subs_mode: bool) -> Option<Box<token::Token>> {
let mut addchar = false;
let mut new_text = UnescapedText::new();
let mut tmp_str = String::new();
let mut new_vec : &mut Vec<u8> = &mut (new_text.0);
while let Some(ch) = {
if ! addchar { addchar = true; }
match ch as char {
x if x != '\\' && x != '%' => {
// lazy branch eval
// remember this fn could be called
// many times in a single exec through %b
'\\' => {
// the literal may be a literal bytecode
// and not valid utf-8. Str only supports
// valid utf-8.
// if we find the unnecessary drain
// on non hex or octal escapes is costly
// then we can make it faster/more complex
// with as-necessary draining.
if tmp_str.len() > 0 {
tmp_str = String::new();
x if x == '%' && !subs_mode => {
if let Some(follow) = {
if follow == '%' {
} else {
} else {
_ => {
if tmp_str.len() > 0 {
match addchar {
true => Some(Box::new(new_text)),
false => None
impl token::Tokenizer for UnescapedText {
fn from_it(it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>)
-> Option<Box<token::Token>> {
UnescapedText::from_it_core(it, false)
impl token::Token for UnescapedText {
fn print(&self, pf_args_it: &mut Peekable<Iter<String>>) {

tests/ Normal file
View file

@ -0,0 +1,235 @@
mod common;
use common::util::*;
static UTIL_NAME: &'static str = "printf";
fn expect_stdout(input: Vec<&str>, expected: &str) {
let (_, mut ucmd) = testing(UTIL_NAME);
let results = ucmd.args(&input).run();
fn basic_literal() { expect_stdout(
vec!("hello world"), "hello world"); }
fn escaped_tab() { expect_stdout(
vec!("hello\\t world"), "hello\t world"); }
fn escaped_newline() { expect_stdout(
vec!("hello\\n world"), "hello\n world"); }
fn escaped_slash() { expect_stdout(
vec!("hello\\\\ world"), "hello\\ world"); }
fn escaped_hex() { expect_stdout(
vec!("\\x41"), "A"); }
fn escaped_octal() { expect_stdout(
vec!("\\101"), "A"); }
fn escaped_unicode_fourdigit() { expect_stdout(
vec!("\\u0125"), "ĥ"); }
fn escaped_unicode_eightdigit() { expect_stdout(
vec!("\\U00000125"), "ĥ"); }
fn escaped_percent_sign() { expect_stdout(
vec!("hello%% world"), "hello% world"); }
fn escaped_unrecognized() { expect_stdout(
vec!("c\\d"), "c\\d"); }
fn sub_string() { expect_stdout(
vec!("hello %s", "world"), "hello world"); }
fn sub_multifield() { expect_stdout(
vec!("%s %s", "hello", "world"), "hello world"); }
fn sub_repeat_formatstr() { expect_stdout(
vec!("%s.", "hello", "world"), ""); }
fn sub_string_ignore_escapes() { expect_stdout(
vec!("hello %s", "\\tworld"), "hello \\tworld"); }
fn sub_bstring_handle_escapes() { expect_stdout(
vec!("hello %b", "\\tworld"), "hello \tworld"); }
fn sub_bstring_ignore_subs() { expect_stdout(
vec!("hello %b", "world %% %i"), "hello world %% %i"); }
fn sub_char() { expect_stdout(
vec!("the letter %c", "A"), "the letter A"); }
fn sub_num_int() { expect_stdout(
vec!("twenty is %i", "20"), "twenty is 20"); }
fn sub_num_int_minwidth() { expect_stdout(
vec!("twenty is %1i", "20"), "twenty is 20"); }
fn sub_num_int_neg() { expect_stdout(
vec!("neg. twenty is %i", "-20"), "neg. twenty is -20"); }
fn sub_num_int_oct_in() { expect_stdout(
vec!("twenty is %i", "024"), "twenty is 20"); }
fn sub_num_int_oct_in_neg() { expect_stdout(
vec!("neg. twenty is %i", "-024"), "neg. twenty is -20"); }
fn sub_num_int_hex_in() { expect_stdout(
vec!("twenty is %i", "0x14"), "twenty is 20"); }
fn sub_num_int_hex_in_neg() { expect_stdout(
vec!("neg. twenty is %i", "-0x14"), "neg. twenty is -20"); }
fn sub_num_int_charconst_in() { expect_stdout(
vec!("ninetyseven is %i", "'a"), "ninetyseven is 97"); }
fn sub_num_uint() { expect_stdout(
vec!("twenty is %u", "20"), "twenty is 20"); }
fn sub_num_octal() { expect_stdout(
vec!("twenty in octal is %o", "20"), "twenty in octal is 24"); }
fn sub_num_hex_lower() { expect_stdout(
vec!("thirty in hex is %x", "30"), "thirty in hex is 1e"); }
fn sub_num_hex_upper() { expect_stdout(
vec!("thirty in hex is %X", "30"), "thirty in hex is 1E"); }
fn sub_num_float() { expect_stdout(
vec!("twenty is %f", "20"), "twenty is 20.000000"); }
fn sub_num_float_round() { expect_stdout(
vec!("two is %f", "1.9999995"), "two is 2.000000"); }
fn sub_num_sci_lower() { expect_stdout(
vec!("twenty is %e", "20"), "twenty is 2.000000e+01"); }
fn sub_num_sci_upper() { expect_stdout(
vec!("twenty is %E", "20"), "twenty is 2.000000E+01"); }
fn sub_num_sci_trunc() { expect_stdout(
vec!("pi is ~ %e", "3.1415926535"), "pi is ~ 3.141593e+00"); }
fn sub_num_dec_trunc() { expect_stdout(
vec!("pi is ~ %g", "3.1415926535"), "pi is ~ 3.141593"); }
fn sub_minwidth() { expect_stdout(
vec!("hello %7s", "world"), "hello world"); }
fn sub_minwidth_negative() { expect_stdout(
vec!("hello %-7s", "world"), "hello world "); }
fn sub_str_max_chars_input() { expect_stdout(
vec!("hello %7.2s", "world"), "hello wo"); }
fn sub_int_decimal() { expect_stdout(
vec!("%0.i", "11"), "11"); }
fn sub_int_leading_zeroes() { expect_stdout(
vec!("%.4i", "11"), "0011"); }
fn sub_int_leading_zeroes_prio() { expect_stdout(
vec!("%5.4i", "11"), " 0011"); }
fn sub_float_dec_places() { expect_stdout(
vec!("pi is ~ %.11f", "3.1415926535"), "pi is ~ 3.14159265350"); }
fn sub_float_hex_in() { expect_stdout(
vec!("%f", "0xF1.1F"), "241.121094"); }
fn sub_float_no_octal_in() { expect_stdout(
vec!("%f", "077"), "77.000000"); }
fn sub_any_asterisk_firstparam() { expect_stdout(
vec!("%*i", "3", "11", "4", "12"), " 11 12");
fn sub_any_asterisk_second_param() { expect_stdout(
vec!("%.*i", "3", "11", "4", "12"), "0110012");
fn sub_any_asterisk_both_params() { expect_stdout(
vec!("%*.*i", "4", "3", "11", "5", "4", "12"), " 011 0012");
fn sub_any_asterisk_octal_arg() { expect_stdout(
vec!("%.*i", "011", "12345678"), "012345678");
fn sub_any_asterisk_hex_arg() { expect_stdout(
vec!("%.*i", "0xA", "123456789"), "0123456789");
fn sub_any_specifiers_no_params() { expect_stdout(
vec!("%ztlhLji", "3"), "3");
fn sub_any_specifiers_after_first_param() { expect_stdout(
vec!("%0ztlhLji", "3"), "3");
fn sub_any_specifiers_after_period() { expect_stdout(
vec!("%0.ztlhLji", "3"), "3");
fn sub_any_specifiers_after_second_param() { expect_stdout(
vec!("%0.0ztlhLji", "3"), "3");