expr: support arbitrary precision integers (#2271)

* expr: support arbitrary precision integers

Instead of i64s we now use BigInts for integer operations. This means
that no result or input can be out of range.
The representation of integer flags was changed from i64 to u8 to make
their intention clearer.

* expr: allow big numbers as arguments as well

Also adds some tests

* expr: use num-traits to check bigints for 0 and 1

* expr: remove obsolete refs

match ergonomics made these avoidable.

* formatting

Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
This commit is contained in:
Michael Debertol 2021-05-29 23:25:23 +02:00 committed by GitHub
parent 6e1a68ce9a
commit d821719c67
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 128 additions and 84 deletions

13
Cargo.lock generated
View file

@ -859,6 +859,17 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "num-bigint"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e0d047c1062aa51e256408c560894e5251f08925980e53cf1aa5bd00eec6512"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.44"
@ -1812,6 +1823,8 @@ name = "uu_expr"
version = "0.0.6"
dependencies = [
"libc",
"num-bigint",
"num-traits",
"onig",
"uucore",
"uucore_procs",

View file

@ -16,6 +16,8 @@ path = "src/expr.rs"
[dependencies]
libc = "0.2.42"
num-bigint = "0.4.0"
num-traits = "0.2.14"
onig = "~4.3.2"
uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }

View file

@ -12,6 +12,8 @@
// spell-checker:ignore (ToDO) binop binops ints paren prec
use num_bigint::BigInt;
use num_traits::{One, Zero};
use onig::{Regex, RegexOptions, Syntax};
use crate::tokens::Token;
@ -39,20 +41,17 @@ impl AstNode {
for _ in 0..depth {
print!("\t",);
}
match *self {
AstNode::Leaf {
ref token_idx,
ref value,
} => println!(
match self {
AstNode::Leaf { token_idx, value } => println!(
"Leaf( {} ) at #{} ( evaluate -> {:?} )",
value,
token_idx,
self.evaluate()
),
AstNode::Node {
ref token_idx,
ref op_type,
ref operands,
token_idx,
op_type,
operands,
} => {
println!(
"Node( {} ) at #{} (evaluate -> {:?})",
@ -81,36 +80,33 @@ impl AstNode {
})
}
pub fn evaluate(&self) -> Result<String, String> {
match *self {
AstNode::Leaf { ref value, .. } => Ok(value.clone()),
AstNode::Node { ref op_type, .. } => match self.operand_values() {
match self {
AstNode::Leaf { value, .. } => Ok(value.clone()),
AstNode::Node { op_type, .. } => match self.operand_values() {
Err(reason) => Err(reason),
Ok(operand_values) => match op_type.as_ref() {
"+" => infix_operator_two_ints(
|a: i64, b: i64| checked_binop(|| a.checked_add(b), "+"),
&operand_values,
),
"-" => infix_operator_two_ints(
|a: i64, b: i64| checked_binop(|| a.checked_sub(b), "-"),
&operand_values,
),
"*" => infix_operator_two_ints(
|a: i64, b: i64| checked_binop(|| a.checked_mul(b), "*"),
&operand_values,
),
"+" => {
infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a + b), &operand_values)
}
"-" => {
infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a - b), &operand_values)
}
"*" => {
infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a * b), &operand_values)
}
"/" => infix_operator_two_ints(
|a: i64, b: i64| {
if b == 0 {
|a: BigInt, b: BigInt| {
if b.is_zero() {
Err("division by zero".to_owned())
} else {
checked_binop(|| a.checked_div(b), "/")
Ok(a / b)
}
},
&operand_values,
),
"%" => infix_operator_two_ints(
|a: i64, b: i64| {
if b == 0 {
|a: BigInt, b: BigInt| {
if b.is_zero() {
Err("division by zero".to_owned())
} else {
Ok(a % b)
@ -119,32 +115,32 @@ impl AstNode {
&operand_values,
),
"=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a == b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a == b)),
|a: &String, b: &String| Ok(bool_as_string(a == b)),
&operand_values,
),
"!=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a != b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a != b)),
|a: &String, b: &String| Ok(bool_as_string(a != b)),
&operand_values,
),
"<" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a < b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a < b)),
|a: &String, b: &String| Ok(bool_as_string(a < b)),
&operand_values,
),
">" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a > b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a > b)),
|a: &String, b: &String| Ok(bool_as_string(a > b)),
&operand_values,
),
"<=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a <= b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a <= b)),
|a: &String, b: &String| Ok(bool_as_string(a <= b)),
&operand_values,
),
">=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok(bool_as_int(a >= b)),
|a: BigInt, b: BigInt| Ok(bool_as_int(a >= b)),
|a: &String, b: &String| Ok(bool_as_string(a >= b)),
&operand_values,
),
@ -161,7 +157,7 @@ impl AstNode {
}
}
pub fn operand_values(&self) -> Result<Vec<String>, String> {
if let AstNode::Node { ref operands, .. } = *self {
if let AstNode::Node { operands, .. } = self {
let mut out = Vec::with_capacity(operands.len());
for operand in operands {
match operand.evaluate() {
@ -217,9 +213,9 @@ fn maybe_dump_ast(result: &Result<Box<AstNode>, String>) {
if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") {
if debug_var == "1" {
println!("EXPR_DEBUG_AST");
match *result {
Ok(ref ast) => ast.debug_dump(),
Err(ref reason) => println!("\terr: {:?}", reason),
match result {
Ok(ast) => ast.debug_dump(),
Err(reason) => println!("\terr: {:?}", reason),
}
}
}
@ -304,7 +300,7 @@ fn push_token_to_either_stack(
out_stack: &mut TokenStack,
op_stack: &mut TokenStack,
) -> Result<(), String> {
let result = match *token {
let result = match token {
Token::Value { .. } => {
out_stack.push((token_idx, token.clone()));
Ok(())
@ -420,24 +416,14 @@ fn move_till_match_paren(
}
}
fn checked_binop<F: Fn() -> Option<T>, T>(cb: F, op: &str) -> Result<T, String> {
match cb() {
Some(v) => Ok(v),
None => Err(format!("{}: Numerical result out of range", op)),
}
}
fn infix_operator_two_ints<F>(f: F, values: &[String]) -> Result<String, String>
where
F: Fn(i64, i64) -> Result<i64, String>,
F: Fn(BigInt, BigInt) -> Result<BigInt, String>,
{
assert!(values.len() == 2);
if let Ok(left) = values[0].parse::<i64>() {
if let Ok(right) = values[1].parse::<i64>() {
return match f(left, right) {
Ok(result) => Ok(result.to_string()),
Err(reason) => Err(reason),
};
if let Ok(left) = values[0].parse::<BigInt>() {
if let Ok(right) = values[1].parse::<BigInt>() {
return f(left, right).map(|big_int| big_int.to_string());
}
}
Err("Expected an integer operand".to_string())
@ -449,13 +435,14 @@ fn infix_operator_two_ints_or_two_strings<FI, FS>(
values: &[String],
) -> Result<String, String>
where
FI: Fn(i64, i64) -> Result<i64, String>,
FI: Fn(BigInt, BigInt) -> Result<u8, String>,
FS: Fn(&String, &String) -> Result<String, String>,
{
assert!(values.len() == 2);
if let (Some(a_int), Some(b_int)) =
(values[0].parse::<i64>().ok(), values[1].parse::<i64>().ok())
{
if let (Some(a_int), Some(b_int)) = (
values[0].parse::<BigInt>().ok(),
values[1].parse::<BigInt>().ok(),
) {
match fi(a_int, b_int) {
Ok(result) => Ok(result.to_string()),
Err(reason) => Err(reason),
@ -541,7 +528,7 @@ fn prefix_operator_substr(values: &[String]) -> String {
subj.chars().skip(idx).take(len).collect()
}
fn bool_as_int(b: bool) -> i64 {
fn bool_as_int(b: bool) -> u8 {
if b {
1
} else {
@ -559,8 +546,8 @@ fn value_as_bool(s: &str) -> bool {
if s.is_empty() {
return false;
}
match s.parse::<i64>() {
Ok(n) => n != 0,
match s.parse::<BigInt>() {
Ok(n) => n.is_one(),
Err(_) => true,
}
}

View file

@ -18,6 +18,8 @@
// spell-checker:ignore (ToDO) paren
use num_bigint::BigInt;
#[derive(Debug, Clone)]
pub enum Token {
Value {
@ -51,14 +53,14 @@ impl Token {
}
fn is_infix_plus(&self) -> bool {
match *self {
Token::InfixOp { ref value, .. } => value == "+",
match self {
Token::InfixOp { value, .. } => value == "+",
_ => false,
}
}
fn is_a_number(&self) -> bool {
match *self {
Token::Value { ref value, .. } => value.parse::<i64>().is_ok(),
match self {
Token::Value { value, .. } => value.parse::<BigInt>().is_ok(),
_ => false,
}
}
@ -142,7 +144,7 @@ fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, toke
// Smells heuristics... :(
let prev_is_plus = match acc.last() {
None => false,
Some(ref t) => t.1.is_infix_plus(),
Some(t) => t.1.is_infix_plus(),
};
let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 {
let pre_prev = &acc[acc.len() - 2];

View file

@ -2,55 +2,95 @@ use crate::common::util::*;
#[test]
fn test_simple_arithmetic() {
new_ucmd!().args(&["1", "+", "1"]).run().stdout_is("2\n");
new_ucmd!()
.args(&["1", "+", "1"])
.succeeds()
.stdout_only("2\n");
new_ucmd!().args(&["1", "-", "1"]).run().stdout_is("0\n");
new_ucmd!()
.args(&["1", "-", "1"])
.fails()
.status_code(1)
.stdout_only("0\n");
new_ucmd!().args(&["3", "*", "2"]).run().stdout_is("6\n");
new_ucmd!()
.args(&["3", "*", "2"])
.succeeds()
.stdout_only("6\n");
new_ucmd!().args(&["4", "/", "2"]).run().stdout_is("2\n");
new_ucmd!()
.args(&["4", "/", "2"])
.succeeds()
.stdout_only("2\n");
}
#[test]
fn test_complex_arithmetic() {
let run = new_ucmd!()
new_ucmd!()
.args(&["9223372036854775807", "+", "9223372036854775807"])
.run();
run.stdout_is("");
run.stderr_is("expr: +: Numerical result out of range");
.succeeds()
.stdout_only("18446744073709551614\n");
let run = new_ucmd!().args(&["9", "/", "0"]).run();
run.stdout_is("");
run.stderr_is("expr: division by zero");
new_ucmd!()
.args(&[
"92233720368547758076549841651981984981498415651",
"%",
"922337203685",
])
.succeeds()
.stdout_only("533691697086\n");
new_ucmd!()
.args(&[
"92233720368547758076549841651981984981498415651",
"*",
"922337203685",
])
.succeeds()
.stdout_only("85070591730190566808700855121818604965830915152801178873935\n");
new_ucmd!()
.args(&[
"92233720368547758076549841651981984981498415651",
"-",
"922337203685",
])
.succeeds()
.stdout_only("92233720368547758076549841651981984059161211966\n");
new_ucmd!()
.args(&["9", "/", "0"])
.fails()
.stderr_only("expr: division by zero\n");
}
#[test]
fn test_parenthesis() {
new_ucmd!()
.args(&["(", "1", "+", "1", ")", "*", "2"])
.run()
.stdout_is("4\n");
.succeeds()
.stdout_only("4\n");
}
#[test]
fn test_or() {
new_ucmd!()
.args(&["0", "|", "foo"])
.run()
.stdout_is("foo\n");
.succeeds()
.stdout_only("foo\n");
new_ucmd!()
.args(&["foo", "|", "bar"])
.run()
.stdout_is("foo\n");
.succeeds()
.stdout_only("foo\n");
}
#[test]
fn test_and() {
new_ucmd!()
.args(&["foo", "&", "1"])
.run()
.stdout_is("foo\n");
.succeeds()
.stdout_only("foo\n");
new_ucmd!().args(&["", "&", "1"]).run().stdout_is("0\n");
}