expr: support arbitrary precision integers (#2271)

* expr: support arbitrary precision integers Instead of i64s we now use BigInts for integer operations. This means that no result or input can be out of range. The representation of integer flags was changed from i64 to u8 to make their intention clearer. * expr: allow big numbers as arguments as well Also adds some tests * expr: use num-traits to check bigints for 0 and 1 * expr: remove obsolete refs match ergonomics made these avoidable. * formatting Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
2024-07-23 10:54:14 +00:00 · 2021-05-29 23:25:23 +02:00 · 2021-05-29 23:25:23 +02:00 · d821719c67
parent 6e1a68ce9a
commit d821719c67
5 changed files with 128 additions and 84 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -859,6 +859,17 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"

+[[package]]
+name = "num-bigint"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e0d047c1062aa51e256408c560894e5251f08925980e53cf1aa5bd00eec6512"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-integer"
 version = "0.1.44"
@ -1812,6 +1823,8 @@ name = "uu_expr"
 version = "0.0.6"
 dependencies = [
 "libc",
+ "num-bigint",
+ "num-traits",
 "onig",
 "uucore",
 "uucore_procs",
--- a/src/uu/expr/Cargo.toml
+++ b/src/uu/expr/Cargo.toml
@ -16,6 +16,8 @@ path = "src/expr.rs"

 [dependencies]
 libc = "0.2.42"
+num-bigint = "0.4.0"
+num-traits = "0.2.14"
 onig = "~4.3.2"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
--- a/src/uu/expr/src/syntax_tree.rs
+++ b/src/uu/expr/src/syntax_tree.rs
@ -12,6 +12,8 @@

 // spell-checker:ignore (ToDO) binop binops ints paren prec

+use num_bigint::BigInt;
+use num_traits::{One, Zero};
 use onig::{Regex, RegexOptions, Syntax};

 use crate::tokens::Token;
@ -39,20 +41,17 @@ impl AstNode {
        for _ in 0..depth {
            print!("\t",);
        }
-        match *self {
-            AstNode::Leaf {
-                ref token_idx,
-                ref value,
-            } => println!(
+        match self {
+            AstNode::Leaf { token_idx, value } => println!(
                "Leaf( {} ) at #{} ( evaluate -> {:?} )",
                value,
                token_idx,
                self.evaluate()
            ),
            AstNode::Node {
-                ref token_idx,
-                ref op_type,
-                ref operands,
+                token_idx,
+                op_type,
+                operands,
            } => {
                println!(
                    "Node( {} ) at #{} (evaluate -> {:?})",
@ -81,36 +80,33 @@ impl AstNode {
        })
    }
    pub fn evaluate(&self) -> Result<String, String> {
-        match *self {
-            AstNode::Leaf { ref value, .. } => Ok(value.clone()),
-            AstNode::Node { ref op_type, .. } => match self.operand_values() {
+        match self {
+            AstNode::Leaf { value, .. } => Ok(value.clone()),
+            AstNode::Node { op_type, .. } => match self.operand_values() {
                Err(reason) => Err(reason),
                Ok(operand_values) => match op_type.as_ref() {
-                    "+" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_add(b), "+"),
-                        &operand_values,
-                    ),
-                    "-" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_sub(b), "-"),
-                        &operand_values,
-                    ),
-                    "*" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_mul(b), "*"),
-                        &operand_values,
-                    ),
+                    "+" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a + b), &operand_values)
+                    }
+                    "-" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a - b), &operand_values)
+                    }
+                    "*" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a * b), &operand_values)
+                    }
                    "/" => infix_operator_two_ints(
-                        |a: i64, b: i64| {
-                            if b == 0 {
+                        |a: BigInt, b: BigInt| {
+                            if b.is_zero() {
                                Err("division by zero".to_owned())
                            } else {
-                                checked_binop(|| a.checked_div(b), "/")
+                                Ok(a / b)
                            }
                        },
                        &operand_values,
                    ),
                    "%" => infix_operator_two_ints(
-                        |a: i64, b: i64| {
-                            if b == 0 {
+                        |a: BigInt, b: BigInt| {
+                            if b.is_zero() {
                                Err("division by zero".to_owned())
                            } else {
                                Ok(a % b)
@ -119,32 +115,32 @@ impl AstNode {
                        &operand_values,
                    ),
                    "=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a == b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a == b)),
                        |a: &String, b: &String| Ok(bool_as_string(a == b)),
                        &operand_values,
                    ),
                    "!=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a != b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a != b)),
                        |a: &String, b: &String| Ok(bool_as_string(a != b)),
                        &operand_values,
                    ),
                    "<" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a < b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a < b)),
                        |a: &String, b: &String| Ok(bool_as_string(a < b)),
                        &operand_values,
                    ),
                    ">" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a > b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a > b)),
                        |a: &String, b: &String| Ok(bool_as_string(a > b)),
                        &operand_values,
                    ),
                    "<=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a <= b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a <= b)),
                        |a: &String, b: &String| Ok(bool_as_string(a <= b)),
                        &operand_values,
                    ),
                    ">=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a >= b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a >= b)),
                        |a: &String, b: &String| Ok(bool_as_string(a >= b)),
                        &operand_values,
                    ),
@ -161,7 +157,7 @@ impl AstNode {
        }
    }
    pub fn operand_values(&self) -> Result<Vec<String>, String> {
-        if let AstNode::Node { ref operands, .. } = *self {
+        if let AstNode::Node { operands, .. } = self {
            let mut out = Vec::with_capacity(operands.len());
            for operand in operands {
                match operand.evaluate() {
@ -217,9 +213,9 @@ fn maybe_dump_ast(result: &Result<Box<AstNode>, String>) {
    if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") {
        if debug_var == "1" {
            println!("EXPR_DEBUG_AST");
-            match *result {
-                Ok(ref ast) => ast.debug_dump(),
-                Err(ref reason) => println!("\terr: {:?}", reason),
+            match result {
+                Ok(ast) => ast.debug_dump(),
+                Err(reason) => println!("\terr: {:?}", reason),
            }
        }
    }
@ -304,7 +300,7 @@ fn push_token_to_either_stack(
    out_stack: &mut TokenStack,
    op_stack: &mut TokenStack,
 ) -> Result<(), String> {
-    let result = match *token {
+    let result = match token {
        Token::Value { .. } => {
            out_stack.push((token_idx, token.clone()));
            Ok(())
@ -420,24 +416,14 @@ fn move_till_match_paren(
    }
 }

-fn checked_binop<F: Fn() -> Option<T>, T>(cb: F, op: &str) -> Result<T, String> {
-    match cb() {
-        Some(v) => Ok(v),
-        None => Err(format!("{}: Numerical result out of range", op)),
-    }
-}
-
 fn infix_operator_two_ints<F>(f: F, values: &[String]) -> Result<String, String>
 where
-    F: Fn(i64, i64) -> Result<i64, String>,
+    F: Fn(BigInt, BigInt) -> Result<BigInt, String>,
 {
    assert!(values.len() == 2);
-    if let Ok(left) = values[0].parse::<i64>() {
-        if let Ok(right) = values[1].parse::<i64>() {
-            return match f(left, right) {
-                Ok(result) => Ok(result.to_string()),
-                Err(reason) => Err(reason),
-            };
+    if let Ok(left) = values[0].parse::<BigInt>() {
+        if let Ok(right) = values[1].parse::<BigInt>() {
+            return f(left, right).map(|big_int| big_int.to_string());
        }
    }
    Err("Expected an integer operand".to_string())
@ -449,13 +435,14 @@ fn infix_operator_two_ints_or_two_strings<FI, FS>(
    values: &[String],
 ) -> Result<String, String>
 where
-    FI: Fn(i64, i64) -> Result<i64, String>,
+    FI: Fn(BigInt, BigInt) -> Result<u8, String>,
    FS: Fn(&String, &String) -> Result<String, String>,
 {
    assert!(values.len() == 2);
-    if let (Some(a_int), Some(b_int)) =
-        (values[0].parse::<i64>().ok(), values[1].parse::<i64>().ok())
-    {
+    if let (Some(a_int), Some(b_int)) = (
+        values[0].parse::<BigInt>().ok(),
+        values[1].parse::<BigInt>().ok(),
+    ) {
        match fi(a_int, b_int) {
            Ok(result) => Ok(result.to_string()),
            Err(reason) => Err(reason),
@ -541,7 +528,7 @@ fn prefix_operator_substr(values: &[String]) -> String {
    subj.chars().skip(idx).take(len).collect()
 }

-fn bool_as_int(b: bool) -> i64 {
+fn bool_as_int(b: bool) -> u8 {
    if b {
        1
    } else {
@ -559,8 +546,8 @@ fn value_as_bool(s: &str) -> bool {
    if s.is_empty() {
        return false;
    }
-    match s.parse::<i64>() {
-        Ok(n) => n != 0,
+    match s.parse::<BigInt>() {
+        Ok(n) => n.is_one(),
        Err(_) => true,
    }
 }
--- a/src/uu/expr/src/tokens.rs
+++ b/src/uu/expr/src/tokens.rs
@ -18,6 +18,8 @@

 // spell-checker:ignore (ToDO) paren

+use num_bigint::BigInt;
+
 #[derive(Debug, Clone)]
 pub enum Token {
    Value {
@ -51,14 +53,14 @@ impl Token {
    }

    fn is_infix_plus(&self) -> bool {
-        match *self {
-            Token::InfixOp { ref value, .. } => value == "+",
+        match self {
+            Token::InfixOp { value, .. } => value == "+",
            _ => false,
        }
    }
    fn is_a_number(&self) -> bool {
-        match *self {
-            Token::Value { ref value, .. } => value.parse::<i64>().is_ok(),
+        match self {
+            Token::Value { value, .. } => value.parse::<BigInt>().is_ok(),
            _ => false,
        }
    }
@ -142,7 +144,7 @@ fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, toke
    // Smells heuristics... :(
    let prev_is_plus = match acc.last() {
        None => false,
-        Some(ref t) => t.1.is_infix_plus(),
+        Some(t) => t.1.is_infix_plus(),
    };
    let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 {
        let pre_prev = &acc[acc.len() - 2];
--- a/tests/by-util/test_expr.rs
+++ b/tests/by-util/test_expr.rs
@ -2,55 +2,95 @@ use crate::common::util::*;

 #[test]
 fn test_simple_arithmetic() {
-    new_ucmd!().args(&["1", "+", "1"]).run().stdout_is("2\n");
+    new_ucmd!()
+        .args(&["1", "+", "1"])
+        .succeeds()
+        .stdout_only("2\n");

-    new_ucmd!().args(&["1", "-", "1"]).run().stdout_is("0\n");
+    new_ucmd!()
+        .args(&["1", "-", "1"])
+        .fails()
+        .status_code(1)
+        .stdout_only("0\n");

-    new_ucmd!().args(&["3", "*", "2"]).run().stdout_is("6\n");
+    new_ucmd!()
+        .args(&["3", "*", "2"])
+        .succeeds()
+        .stdout_only("6\n");

-    new_ucmd!().args(&["4", "/", "2"]).run().stdout_is("2\n");
+    new_ucmd!()
+        .args(&["4", "/", "2"])
+        .succeeds()
+        .stdout_only("2\n");
 }

 #[test]
 fn test_complex_arithmetic() {
-    let run = new_ucmd!()
+    new_ucmd!()
        .args(&["9223372036854775807", "+", "9223372036854775807"])
-        .run();
-    run.stdout_is("");
-    run.stderr_is("expr: +: Numerical result out of range");
+        .succeeds()
+        .stdout_only("18446744073709551614\n");

-    let run = new_ucmd!().args(&["9", "/", "0"]).run();
-    run.stdout_is("");
-    run.stderr_is("expr: division by zero");
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "%",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("533691697086\n");
+
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "*",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("85070591730190566808700855121818604965830915152801178873935\n");
+
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "-",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("92233720368547758076549841651981984059161211966\n");
+
+    new_ucmd!()
+        .args(&["9", "/", "0"])
+        .fails()
+        .stderr_only("expr: division by zero\n");
 }

 #[test]
 fn test_parenthesis() {
    new_ucmd!()
        .args(&["(", "1", "+", "1", ")", "*", "2"])
-        .run()
-        .stdout_is("4\n");
+        .succeeds()
+        .stdout_only("4\n");
 }

 #[test]
 fn test_or() {
    new_ucmd!()
        .args(&["0", "|", "foo"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");

    new_ucmd!()
        .args(&["foo", "|", "bar"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");
 }

 #[test]
 fn test_and() {
    new_ucmd!()
        .args(&["foo", "&", "1"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");

    new_ucmd!().args(&["", "&", "1"]).run().stdout_is("0\n");
 }