Auto merge of #99322 - GKFX:const-int-parse, r=Mark-Simulacrum

Make {integer}::from_str_radix constant

This commit makes FromStr on integers constant so that `const x: u32 = "23".parse();` works. More practical use-case is with environment variables at build time as discussed in https://github.com/rust-lang/rfcs/issues/1907.

Tracking issue #59133.

ACP: https://github.com/rust-lang/libs-team/issues/74
This commit is contained in:
bors 2024-03-30 19:56:58 +00:00
commit 8df7e723ea
10 changed files with 248 additions and 175 deletions

View file

@ -137,6 +137,7 @@
#![feature(const_heap)]
#![feature(const_hint_assert_unchecked)]
#![feature(const_index_range_slice_index)]
#![feature(const_int_from_str)]
#![feature(const_intrinsic_copy)]
#![feature(const_intrinsic_forget)]
#![feature(const_ipv4)]

View file

@ -113,8 +113,9 @@ pub enum IntErrorKind {
impl ParseIntError {
/// Outputs the detailed cause of parsing an integer failing.
#[must_use]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
#[stable(feature = "int_error_matching", since = "1.55.0")]
pub fn kind(&self) -> &IntErrorKind {
pub const fn kind(&self) -> &IntErrorKind {
&self.kind
}
}

View file

@ -60,32 +60,6 @@ macro_rules! int_impl {
#[stable(feature = "int_bits_const", since = "1.53.0")]
pub const BITS: u32 = <$UnsignedT>::BITS;
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` or `-` sign followed by digits.
/// Leading and trailing whitespace represent an error. Digits are a subset of these characters,
/// depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($SelfT), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
from_str_radix(src, radix)
}
/// Returns the number of ones in the binary representation of `self`.
///
/// # Examples

View file

@ -6,7 +6,6 @@
use crate::hint;
use crate::intrinsics;
use crate::mem;
use crate::ops::{Add, Mul, Sub};
use crate::str::FromStr;
// Used because the `?` operator is not allowed in a const context.
@ -1386,51 +1385,19 @@ pub enum FpCategory {
Normal,
}
#[doc(hidden)]
trait FromStrRadixHelper:
PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
{
const MIN: Self;
fn from_u32(u: u32) -> Self;
fn checked_mul(&self, other: u32) -> Option<Self>;
fn checked_sub(&self, other: u32) -> Option<Self>;
fn checked_add(&self, other: u32) -> Option<Self>;
}
macro_rules! from_str_radix_int_impl {
($($t:ty)*) => {$(
#[stable(feature = "rust1", since = "1.0.0")]
impl FromStr for $t {
type Err = ParseIntError;
fn from_str(src: &str) -> Result<Self, ParseIntError> {
from_str_radix(src, 10)
<$t>::from_str_radix(src, 10)
}
}
)*}
}
from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }
macro_rules! impl_helper_for {
($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
const MIN: Self = Self::MIN;
#[inline]
fn from_u32(u: u32) -> Self { u as Self }
#[inline]
fn checked_mul(&self, other: u32) -> Option<Self> {
Self::checked_mul(*self, other as Self)
}
#[inline]
fn checked_sub(&self, other: u32) -> Option<Self> {
Self::checked_sub(*self, other as Self)
}
#[inline]
fn checked_add(&self, other: u32) -> Option<Self> {
Self::checked_add(*self, other as Self)
}
})*)
}
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
/// Determines if a string of text of that length of that radix could be guaranteed to be
/// stored in the given type T.
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
@ -1438,92 +1405,198 @@ fn checked_add(&self, other: u32) -> Option<Self> {
#[doc(hidden)]
#[inline(always)]
#[unstable(issue = "none", feature = "std_internals")]
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
pub const fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
}
fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
use self::IntErrorKind::*;
use self::ParseIntError as PIE;
assert!(
(2..=36).contains(&radix),
"from_str_radix_int: must lie in the range `[2, 36]` - found {}",
radix
);
if src.is_empty() {
return Err(PIE { kind: Empty });
}
let is_signed_ty = T::from_u32(0) > T::MIN;
// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();
let (is_positive, digits) = match src[0] {
b'+' | b'-' if src[1..].is_empty() => {
return Err(PIE { kind: InvalidDigit });
}
b'+' => (true, &src[1..]),
b'-' if is_signed_ty => (false, &src[1..]),
_ => (true, src),
};
let mut result = T::from_u32(0);
if can_not_overflow::<T>(radix, is_signed_ty, digits) {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
macro_rules! run_unchecked_loop {
($unchecked_additive_op:expr) => {
for &c in digits {
result = result * T::from_u32(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = $unchecked_additive_op(result, T::from_u32(x));
}
};
}
if is_positive {
run_unchecked_loop!(<T as core::ops::Add>::add)
} else {
run_unchecked_loop!(<T as core::ops::Sub>::sub)
};
} else {
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:expr) => {
for &c in digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = mul.ok_or_else($overflow_err)?;
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
}
};
}
if is_positive {
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
} else {
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
};
}
Ok(result)
#[track_caller]
const fn from_str_radix_panic_ct(_radix: u32) -> ! {
panic!("from_str_radix_int: must lie in the range `[2, 36]`");
}
#[track_caller]
fn from_str_radix_panic_rt(radix: u32) -> ! {
panic!("from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix);
}
#[cfg_attr(not(feature = "panic_immediate_abort"), inline(never))]
#[cfg_attr(feature = "panic_immediate_abort", inline)]
#[cold]
#[track_caller]
const fn from_str_radix_assert(radix: u32) {
if 2 > radix || radix > 36 {
// The only difference between these two functions is their panic message.
intrinsics::const_eval_select((radix,), from_str_radix_panic_ct, from_str_radix_panic_rt);
}
}
macro_rules! from_str_radix {
($($int_ty:ty)+) => {$(
impl $int_ty {
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` sign
/// followed by digits.
/// Leading and trailing whitespace represent an error.
/// Digits are a subset of these characters, depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($int_ty), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
pub const fn from_str_radix(src: &str, radix: u32) -> Result<$int_ty, ParseIntError> {
use self::IntErrorKind::*;
use self::ParseIntError as PIE;
from_str_radix_assert(radix);
if src.is_empty() {
return Err(PIE { kind: Empty });
}
#[allow(unused_comparisons)]
let is_signed_ty = 0 > <$int_ty>::MIN;
// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();
let (is_positive, mut digits) = match src {
[b'+' | b'-'] => {
return Err(PIE { kind: InvalidDigit });
}
[b'+', rest @ ..] => (true, rest),
[b'-', rest @ ..] if is_signed_ty => (false, rest),
_ => (true, src),
};
let mut result = 0;
macro_rules! unwrap_or_PIE {
($option:expr, $kind:ident) => {
match $option {
Some(value) => value,
None => return Err(PIE { kind: $kind }),
}
};
}
if can_not_overflow::<$int_ty>(radix, is_signed_ty, digits) {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
macro_rules! run_unchecked_loop {
($unchecked_additive_op:tt) => {{
while let [c, rest @ ..] = digits {
result = result * (radix as $int_ty);
let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit);
result = result $unchecked_additive_op (x as $int_ty);
digits = rest;
}
}};
}
if is_positive {
run_unchecked_loop!(+)
} else {
run_unchecked_loop!(-)
};
} else {
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:ident) => {{
while let [c, rest @ ..] = digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix as $int_ty);
let x = unwrap_or_PIE!((*c as char).to_digit(radix), InvalidDigit) as $int_ty;
result = unwrap_or_PIE!(mul, $overflow_err);
result = unwrap_or_PIE!(<$int_ty>::$checked_additive_op(result, x), $overflow_err);
digits = rest;
}
}};
}
if is_positive {
run_checked_loop!(checked_add, PosOverflow)
} else {
run_checked_loop!(checked_sub, NegOverflow)
};
}
Ok(result)
}
}
)+}
}
from_str_radix! { i8 u8 i16 u16 i32 u32 i64 u64 i128 u128 }
// Re-use the relevant implementation of from_str_radix for isize and usize to avoid outputting two
// identical functions.
macro_rules! from_str_radix_size_impl {
($($t:ident $size:ty),*) => {$(
impl $size {
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` sign
/// followed by digits.
/// Leading and trailing whitespace represent an error.
/// Digits are a subset of these characters, depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($size), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_int_from_str", issue = "59133")]
pub const fn from_str_radix(src: &str, radix: u32) -> Result<$size, ParseIntError> {
match <$t>::from_str_radix(src, radix) {
Ok(x) => Ok(x as $size),
Err(e) => Err(e),
}
}
})*}
}
#[cfg(target_pointer_width = "16")]
from_str_radix_size_impl! { i16 isize, u16 usize }
#[cfg(target_pointer_width = "32")]
from_str_radix_size_impl! { i32 isize, u32 usize }
#[cfg(target_pointer_width = "64")]
from_str_radix_size_impl! { i64 isize, u64 usize }

View file

@ -11,7 +11,6 @@
use crate::str::FromStr;
use crate::ub_checks;
use super::from_str_radix;
use super::{IntErrorKind, ParseIntError};
/// A marker trait for primitive types which can be zero.
@ -804,7 +803,7 @@ pub const fn saturating_pow(self, other: u32) -> Self {
impl FromStr for $Ty {
type Err = ParseIntError;
fn from_str(src: &str) -> Result<Self, Self::Err> {
Self::new(from_str_radix(src, 10)?)
Self::new(<$Int>::from_str_radix(src, 10)?)
.ok_or(ParseIntError {
kind: IntErrorKind::Zero
})

View file

@ -58,33 +58,6 @@ macro_rules! uint_impl {
#[stable(feature = "int_bits_const", since = "1.53.0")]
pub const BITS: u32 = Self::MAX.count_ones();
/// Converts a string slice in a given base to an integer.
///
/// The string is expected to be an optional `+` sign
/// followed by digits.
/// Leading and trailing whitespace represent an error.
/// Digits are a subset of these characters, depending on `radix`:
///
/// * `0-9`
/// * `a-z`
/// * `A-Z`
///
/// # Panics
///
/// This function panics if `radix` is not in the range from 2 to 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
#[doc = concat!("assert_eq!(", stringify!($SelfT), "::from_str_radix(\"A\", 16), Ok(10));")]
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
from_str_radix(src, radix)
}
/// Returns the number of ones in the binary representation of `self`.
///
/// # Examples

View file

@ -16,6 +16,7 @@
#![feature(const_hash)]
#![feature(const_heap)]
#![feature(const_intrinsic_copy)]
#![feature(const_int_from_str)]
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_nonnull_new)]
#![feature(const_pointer_is_aligned)]

View file

@ -214,6 +214,16 @@ fn test_infallible_try_from_int_error() {
assert!(func(0).is_ok());
}
const _TEST_CONST_PARSE: () = {
let Ok(-0x8000) = i16::from_str_radix("-8000", 16) else { panic!() };
let Ok(12345) = u64::from_str_radix("12345", 10) else { panic!() };
if let Err(e) = i8::from_str_radix("+", 10) {
let IntErrorKind::InvalidDigit = e.kind() else { panic!() };
} else {
panic!()
}
};
macro_rules! test_impl_from {
($fn_name:ident, bool, $target: ty) => {
#[test]

View file

@ -0,0 +1,10 @@
#![feature(const_int_from_str)]
const _OK: () = match i32::from_str_radix("-1234", 10) {
Ok(x) => assert!(x == -1234),
Err(_) => panic!(),
};
const _TOO_LOW: () = { u64::from_str_radix("12345ABCD", 1); };
const _TOO_HIGH: () = { u64::from_str_radix("12345ABCD", 37); };
fn main () {}

View file

@ -0,0 +1,31 @@
error[E0080]: evaluation of constant value failed
--> $SRC_DIR/core/src/num/mod.rs:LL:COL
|
= note: the evaluated program panicked at 'from_str_radix_int: must lie in the range `[2, 36]`', $SRC_DIR/core/src/num/mod.rs:LL:COL
|
note: inside `core::num::<impl u64>::from_str_radix`
--> $SRC_DIR/core/src/num/mod.rs:LL:COL
note: inside `_TOO_LOW`
--> $DIR/parse_ints.rs:7:24
|
LL | const _TOO_LOW: () = { u64::from_str_radix("12345ABCD", 1); };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
= note: this error originates in the macro `from_str_radix` (in Nightly builds, run with -Z macro-backtrace for more info)
error[E0080]: evaluation of constant value failed
--> $SRC_DIR/core/src/num/mod.rs:LL:COL
|
= note: the evaluated program panicked at 'from_str_radix_int: must lie in the range `[2, 36]`', $SRC_DIR/core/src/num/mod.rs:LL:COL
|
note: inside `core::num::<impl u64>::from_str_radix`
--> $SRC_DIR/core/src/num/mod.rs:LL:COL
note: inside `_TOO_HIGH`
--> $DIR/parse_ints.rs:8:25
|
LL | const _TOO_HIGH: () = { u64::from_str_radix("12345ABCD", 37); };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
= note: this error originates in the macro `from_str_radix` (in Nightly builds, run with -Z macro-backtrace for more info)
error: aborting due to 2 previous errors
For more information about this error, try `rustc --explain E0080`.