Merge pull request #4356 from howjmay/cksum-a

cksum: implement -a
This commit is contained in:
Terts Diepraam 2023-02-23 12:18:21 +01:00 committed by GitHub
commit 3554565c82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 866 additions and 426 deletions

27
Cargo.lock generated
View file

@ -2101,6 +2101,15 @@ dependencies = [
"autocfg",
]
[[package]]
name = "sm3"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f943a7c5e3089f2bd046221d1e9f4fa59396bf0fe966360983649683086215da"
dependencies = [
"digest",
]
[[package]]
name = "smallvec"
version = "1.10.0"
@ -2439,6 +2448,7 @@ name = "uu_cksum"
version = "0.0.17"
dependencies = [
"clap",
"hex",
"uucore",
]
@ -2643,17 +2653,10 @@ dependencies = [
name = "uu_hashsum"
version = "0.0.17"
dependencies = [
"blake2b_simd",
"blake3",
"clap",
"digest",
"hex",
"md-5",
"memchr",
"regex",
"sha1",
"sha2",
"sha3",
"uucore",
]
@ -3347,17 +3350,27 @@ dependencies = [
name = "uucore"
version = "0.0.17"
dependencies = [
"blake2b_simd",
"blake3",
"clap",
"data-encoding",
"data-encoding-macro",
"digest",
"dns-lookup",
"dunce",
"glob",
"hex",
"itertools",
"libc",
"md-5",
"memchr",
"nix",
"once_cell",
"os_display",
"sha1",
"sha2",
"sha3",
"sm3",
"thiserror",
"time",
"uucore_procs",

View file

@ -332,6 +332,16 @@ windows-sys = { version="0.42.0", default-features=false }
xattr = "0.2.3"
zip = { version = "0.6.3", default_features=false, features=["deflate"] }
hex = "0.4.3"
md-5 = "0.10.5"
sha1 = "0.10.1"
sha2 = "0.10.2"
sha3 = "0.10.6"
blake2b_simd = "1.0.1"
blake3 = "1.3.2"
sm3 = "0.4.1"
digest = "0.10.6"
uucore = { version=">=0.0.17", package="uucore", path="src/uucore" }
uucore_procs = { version=">=0.0.17", package="uucore_procs", path="src/uucore_procs" }
uu_ls = { version=">=0.0.17", path="src/uu/ls" }

View file

@ -16,7 +16,8 @@ path = "src/cksum.rs"
[dependencies]
clap = { workspace=true }
uucore = { workspace=true }
uucore = { workspace=true, features=["sum"] }
hex = { workspace=true }
[[bin]]
name = "cksum"

View file

@ -5,134 +5,244 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) fname
// spell-checker:ignore (ToDO) fname, algo
use clap::{crate_version, Arg, Command};
use hex::encode;
use std::ffi::OsStr;
use std::fs::File;
use std::io::{self, stdin, BufReader, Read};
use std::iter;
use std::path::Path;
use uucore::display::Quotable;
use uucore::error::{FromIo, UResult};
use uucore::{format_usage, show};
// NOTE: CRC_TABLE_LEN *must* be <= 256 as we cast 0..CRC_TABLE_LEN to u8
const CRC_TABLE_LEN: usize = 256;
const CRC_TABLE: [u32; CRC_TABLE_LEN] = generate_crc_table();
use uucore::{
error::{FromIo, UResult},
format_usage,
sum::{
div_ceil, Blake2b, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha512, Sm3,
BSD, CRC, SYSV,
},
};
const USAGE: &str = "{} [OPTIONS] [FILE]...";
const ABOUT: &str = "Print CRC and size for each file";
const fn generate_crc_table() -> [u32; CRC_TABLE_LEN] {
let mut table = [0; CRC_TABLE_LEN];
const ALGORITHM_OPTIONS_SYSV: &str = "sysv";
const ALGORITHM_OPTIONS_BSD: &str = "bsd";
const ALGORITHM_OPTIONS_CRC: &str = "crc";
const ALGORITHM_OPTIONS_MD5: &str = "md5";
const ALGORITHM_OPTIONS_SHA1: &str = "sha1";
const ALGORITHM_OPTIONS_SHA224: &str = "sha224";
const ALGORITHM_OPTIONS_SHA256: &str = "sha256";
const ALGORITHM_OPTIONS_SHA384: &str = "sha384";
const ALGORITHM_OPTIONS_SHA512: &str = "sha512";
const ALGORITHM_OPTIONS_BLAKE2B: &str = "blake2b";
const ALGORITHM_OPTIONS_SM3: &str = "sm3";
let mut i = 0;
while i < CRC_TABLE_LEN {
table[i] = crc_entry(i as u8);
fn detect_algo(program: &str) -> (&'static str, Box<dyn Digest + 'static>, usize) {
match program {
ALGORITHM_OPTIONS_SYSV => (
ALGORITHM_OPTIONS_SYSV,
Box::new(SYSV::new()) as Box<dyn Digest>,
512,
),
ALGORITHM_OPTIONS_BSD => (
ALGORITHM_OPTIONS_BSD,
Box::new(BSD::new()) as Box<dyn Digest>,
1024,
),
ALGORITHM_OPTIONS_CRC => (
ALGORITHM_OPTIONS_CRC,
Box::new(CRC::new()) as Box<dyn Digest>,
256,
),
ALGORITHM_OPTIONS_MD5 => (
ALGORITHM_OPTIONS_MD5,
Box::new(Md5::new()) as Box<dyn Digest>,
128,
),
ALGORITHM_OPTIONS_SHA1 => (
ALGORITHM_OPTIONS_SHA1,
Box::new(Sha1::new()) as Box<dyn Digest>,
160,
),
ALGORITHM_OPTIONS_SHA224 => (
ALGORITHM_OPTIONS_SHA224,
Box::new(Sha224::new()) as Box<dyn Digest>,
224,
),
ALGORITHM_OPTIONS_SHA256 => (
ALGORITHM_OPTIONS_SHA256,
Box::new(Sha256::new()) as Box<dyn Digest>,
256,
),
ALGORITHM_OPTIONS_SHA384 => (
ALGORITHM_OPTIONS_SHA384,
Box::new(Sha384::new()) as Box<dyn Digest>,
384,
),
ALGORITHM_OPTIONS_SHA512 => (
ALGORITHM_OPTIONS_SHA512,
Box::new(Sha512::new()) as Box<dyn Digest>,
512,
),
ALGORITHM_OPTIONS_BLAKE2B => (
ALGORITHM_OPTIONS_BLAKE2B,
Box::new(Blake2b::new()) as Box<dyn Digest>,
512,
),
ALGORITHM_OPTIONS_SM3 => (
ALGORITHM_OPTIONS_SM3,
Box::new(Sm3::new()) as Box<dyn Digest>,
512,
),
_ => unreachable!("unknown algorithm: clap should have prevented this case"),
}
}
i += 1;
struct Options {
algo_name: &'static str,
digest: Box<dyn Digest + 'static>,
output_bits: usize,
}
/// Calculate checksum
///
/// # Arguments
///
/// * `options` - CLI options for the assigning checksum algorithm
/// * `files` - A iterator of OsStr which is a bunch of files that are using for calculating checksum
#[allow(clippy::cognitive_complexity)]
fn cksum<'a, I>(mut options: Options, files: I) -> UResult<()>
where
I: Iterator<Item = &'a OsStr>,
{
for filename in files {
let filename = Path::new(filename);
let stdin_buf;
let file_buf;
let not_file = filename == OsStr::new("-");
let mut file = BufReader::new(if not_file {
stdin_buf = stdin();
Box::new(stdin_buf) as Box<dyn Read>
} else if filename.is_dir() {
Box::new(BufReader::new(io::empty())) as Box<dyn Read>
} else {
file_buf =
File::open(filename).map_err_context(|| filename.to_str().unwrap().to_string())?;
Box::new(file_buf) as Box<dyn Read>
});
let (sum, sz) = digest_read(&mut options.digest, &mut file, options.output_bits)
.map_err_context(|| "failed to read input".to_string())?;
// The BSD checksum output is 5 digit integer
let bsd_width = 5;
match (options.algo_name, not_file) {
(ALGORITHM_OPTIONS_SYSV, true) => println!(
"{} {}",
sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits)
),
(ALGORITHM_OPTIONS_SYSV, false) => println!(
"{} {} {}",
sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits),
filename.display()
),
(ALGORITHM_OPTIONS_BSD, true) => println!(
"{:0bsd_width$} {:bsd_width$}",
sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits)
),
(ALGORITHM_OPTIONS_BSD, false) => println!(
"{:0bsd_width$} {:bsd_width$} {}",
sum.parse::<u16>().unwrap(),
div_ceil(sz, options.output_bits),
filename.display()
),
(_, true) => println!("{sum} {sz}"),
(_, false) => println!("{sum} {sz} {}", filename.display()),
}
}
table
Ok(())
}
const fn crc_entry(input: u8) -> u32 {
let mut crc = (input as u32) << 24;
fn digest_read<T: Read>(
digest: &mut Box<dyn Digest>,
reader: &mut BufReader<T>,
output_bits: usize,
) -> io::Result<(String, usize)> {
digest.reset();
let mut i = 0;
while i < 8 {
let if_condition = crc & 0x8000_0000;
let if_body = (crc << 1) ^ 0x04c1_1db7;
let else_body = crc << 1;
// Read bytes from `reader` and write those bytes to `digest`.
//
// If `binary` is `false` and the operating system is Windows, then
// `DigestWriter` replaces "\r\n" with "\n" before it writes the
// bytes into `digest`. Otherwise, it just inserts the bytes as-is.
//
// In order to support replacing "\r\n", we must call `finalize()`
// in order to support the possibility that the last character read
// from the reader was "\r". (This character gets buffered by
// `DigestWriter` and only written if the following character is
// "\n". But when "\r" is the last character read, we need to force
// it to be written.)
let mut digest_writer = DigestWriter::new(digest, true);
let output_size = std::io::copy(reader, &mut digest_writer)? as usize;
digest_writer.finalize();
// NOTE: i feel like this is easier to understand than emulating an if statement in bitwise
// ops
let condition_table = [else_body, if_body];
crc = condition_table[(if_condition != 0) as usize];
i += 1;
}
crc
}
#[inline]
fn crc_update(crc: u32, input: u8) -> u32 {
(crc << 8) ^ CRC_TABLE[((crc >> 24) as usize ^ input as usize) & 0xFF]
}
#[inline]
fn crc_final(mut crc: u32, mut length: usize) -> u32 {
while length != 0 {
crc = crc_update(crc, length as u8);
length >>= 8;
}
!crc
}
fn init_byte_array() -> Vec<u8> {
vec![0; 1024 * 1024]
}
#[inline]
fn cksum(fname: &str) -> io::Result<(u32, usize)> {
let mut crc = 0u32;
let mut size = 0usize;
let mut rd: Box<dyn Read> = match fname {
"-" => Box::new(stdin()),
_ => {
let p = Path::new(fname);
// Directories should not give an error, but should be interpreted
// as empty files to match GNU semantics.
if p.is_dir() {
Box::new(BufReader::new(io::empty())) as Box<dyn Read>
} else {
Box::new(BufReader::new(File::open(p)?)) as Box<dyn Read>
}
}
};
let mut bytes = init_byte_array();
loop {
let num_bytes = rd.read(&mut bytes)?;
if num_bytes == 0 {
return Ok((crc_final(crc, size), size));
}
for &b in bytes[..num_bytes].iter() {
crc = crc_update(crc, b);
}
size += num_bytes;
if digest.output_bits() > 0 {
Ok((digest.result_str(), output_size))
} else {
// Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016)
let mut bytes = Vec::new();
bytes.resize((output_bits + 7) / 8, 0);
digest.hash_finalize(&mut bytes);
Ok((encode(bytes), output_size))
}
}
mod options {
pub static FILE: &str = "file";
pub static ALGORITHM: &str = "algorithm";
}
const ALGORITHM_HELP_DESC: &str =
"DIGEST determines the digest algorithm and default output format:\n\
\n\
-a=sysv: (equivalent to sum -s)\n\
-a=bsd: (equivalent to sum -r)\n\
-a=crc: (equivalent to cksum)\n\
-a=md5: (equivalent to md5sum)\n\
-a=sha1: (equivalent to sha1sum)\n\
-a=sha224: (equivalent to sha224sum)\n\
-a=sha256: (equivalent to sha256sum)\n\
-a=sha384: (equivalent to sha384sum)\n\
-a=sha512: (equivalent to sha512sum)\n\
-a=blake2b: (equivalent to b2sum)\n\
-a=sm3: (only available through cksum)\n";
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let args = args.collect_ignore();
let matches = uu_app().try_get_matches_from(args)?;
let files: Vec<String> = match matches.get_many::<String>(options::FILE) {
Some(v) => v.clone().map(|v| v.to_owned()).collect(),
None => vec![],
let algo_name: &str = match matches.get_one::<String>(options::ALGORITHM) {
Some(v) => v,
None => ALGORITHM_OPTIONS_CRC,
};
if files.is_empty() {
let (crc, size) = cksum("-")?;
println!("{crc} {size}");
return Ok(());
}
let (name, algo, bits) = detect_algo(algo_name);
let opts = Options {
algo_name: name,
digest: algo,
output_bits: bits,
};
match matches.get_many::<String>(options::FILE) {
Some(files) => cksum(opts, files.map(OsStr::new))?,
None => cksum(opts, iter::once(OsStr::new("-")))?,
};
for fname in &files {
match cksum(fname.as_ref()).map_err_context(|| format!("{}", fname.maybe_quote())) {
Ok((crc, size)) => println!("{crc} {size} {fname}"),
Err(err) => show!(err),
};
}
Ok(())
}
@ -148,4 +258,25 @@ pub fn uu_app() -> Command {
.action(clap::ArgAction::Append)
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::ALGORITHM)
.long(options::ALGORITHM)
.short('a')
.help("select the digest type to use. See DIGEST below")
.value_name("ALGORITHM")
.value_parser([
ALGORITHM_OPTIONS_SYSV,
ALGORITHM_OPTIONS_BSD,
ALGORITHM_OPTIONS_CRC,
ALGORITHM_OPTIONS_MD5,
ALGORITHM_OPTIONS_SHA1,
ALGORITHM_OPTIONS_SHA224,
ALGORITHM_OPTIONS_SHA256,
ALGORITHM_OPTIONS_SHA384,
ALGORITHM_OPTIONS_SHA512,
ALGORITHM_OPTIONS_BLAKE2B,
ALGORITHM_OPTIONS_SM3,
]),
)
.after_help(ALGORITHM_HELP_DESC)
}

View file

@ -15,18 +15,11 @@ edition = "2021"
path = "src/hashsum.rs"
[dependencies]
digest = "0.10.6"
clap = { workspace=true }
hex = "0.4.3"
memchr = { workspace=true }
md-5 = "0.10.5"
regex = { workspace=true }
sha1 = "0.10.1"
sha2 = "0.10.2"
sha3 = "0.10.6"
blake2b_simd = "1.0.1"
blake3 = "1.3.2"
uucore = { workspace=true }
memchr = { workspace=true }
regex = { workspace=true }
hex = { workspace=true }
[[bin]]
name = "hashsum"

View file

@ -1,287 +0,0 @@
// spell-checker:ignore memmem
//! Implementations of digest functions, like md5 and sha1.
//!
//! The [`Digest`] trait represents the interface for providing inputs
//! to these digest functions and accessing the resulting hash. The
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
//! implements the [`Write`] trait, for use in situations where calling
//! [`write`] would be useful.
use std::io::Write;
use hex::encode;
#[cfg(windows)]
use memchr::memmem;
pub trait Digest {
fn new() -> Self
where
Self: Sized;
fn input(&mut self, input: &[u8]);
fn result(&mut self, out: &mut [u8]);
fn reset(&mut self);
fn output_bits(&self) -> usize;
fn output_bytes(&self) -> usize {
(self.output_bits() + 7) / 8
}
fn result_str(&mut self) -> String {
let mut buf: Vec<u8> = vec![0; self.output_bytes()];
self.result(&mut buf);
encode(buf)
}
}
impl Digest for blake2b_simd::State {
fn new() -> Self {
Self::new()
}
fn input(&mut self, input: &[u8]) {
self.update(input);
}
fn result(&mut self, out: &mut [u8]) {
let hash_result = &self.finalize();
out.copy_from_slice(hash_result.as_bytes());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
512
}
}
impl Digest for blake3::Hasher {
fn new() -> Self {
Self::new()
}
fn input(&mut self, input: &[u8]) {
self.update(input);
}
fn result(&mut self, out: &mut [u8]) {
let hash_result = &self.finalize();
out.copy_from_slice(hash_result.as_bytes());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
256
}
}
// Implements the Digest trait for sha2 / sha3 algorithms with fixed output
macro_rules! impl_digest_common {
($type: ty, $size: expr) => {
impl Digest for $type {
fn new() -> Self {
Self::default()
}
fn input(&mut self, input: &[u8]) {
digest::Digest::update(self, input);
}
fn result(&mut self, out: &mut [u8]) {
digest::Digest::finalize_into_reset(self, out.into());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
$size
}
}
};
}
// Implements the Digest trait for sha2 / sha3 algorithms with variable output
macro_rules! impl_digest_shake {
($type: ty) => {
impl Digest for $type {
fn new() -> Self {
Self::default()
}
fn input(&mut self, input: &[u8]) {
digest::Update::update(self, input);
}
fn result(&mut self, out: &mut [u8]) {
digest::ExtendableOutputReset::finalize_xof_reset_into(self, out);
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
0
}
}
};
}
impl_digest_common!(md5::Md5, 128);
impl_digest_common!(sha1::Sha1, 160);
impl_digest_common!(sha2::Sha224, 224);
impl_digest_common!(sha2::Sha256, 256);
impl_digest_common!(sha2::Sha384, 384);
impl_digest_common!(sha2::Sha512, 512);
impl_digest_common!(sha3::Sha3_224, 224);
impl_digest_common!(sha3::Sha3_256, 256);
impl_digest_common!(sha3::Sha3_384, 384);
impl_digest_common!(sha3::Sha3_512, 512);
impl_digest_shake!(sha3::Shake128);
impl_digest_shake!(sha3::Shake256);
/// A struct that writes to a digest.
///
/// This struct wraps a [`Digest`] and provides a [`Write`]
/// implementation that passes input bytes directly to the
/// [`Digest::input`].
///
/// On Windows, if `binary` is `false`, then the [`write`]
/// implementation replaces instances of "\r\n" with "\n" before passing
/// the input bytes to the [`digest`].
pub struct DigestWriter<'a> {
digest: &'a mut Box<dyn Digest>,
/// Whether to write to the digest in binary mode or text mode on Windows.
///
/// If this is `false`, then instances of "\r\n" are replaced with
/// "\n" before passing input bytes to the [`digest`].
#[allow(dead_code)]
binary: bool,
/// Whether the previous
#[allow(dead_code)]
was_last_character_carriage_return: bool,
// TODO These are dead code only on non-Windows operating systems.
// It might be better to use a `#[cfg(windows)]` guard here.
}
impl<'a> DigestWriter<'a> {
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
let was_last_character_carriage_return = false;
DigestWriter {
digest,
binary,
was_last_character_carriage_return,
}
}
pub fn finalize(&mut self) -> bool {
if self.was_last_character_carriage_return {
self.digest.input(&[b'\r']);
true
} else {
false
}
}
}
impl<'a> Write for DigestWriter<'a> {
#[cfg(not(windows))]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.digest.input(buf);
Ok(buf.len())
}
#[cfg(windows)]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.binary {
self.digest.input(buf);
return Ok(buf.len());
}
// The remaining code handles Windows text mode, where we must
// replace each occurrence of "\r\n" with "\n".
//
// First, if the last character written was "\r" and the first
// character in the current buffer to write is not "\n", then we
// need to write the "\r" that we buffered from the previous
// call to `write()`.
let n = buf.len();
if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' {
self.digest.input(&[b'\r']);
}
// Next, find all occurrences of "\r\n", inputting the slice
// just before the "\n" in the previous instance of "\r\n" and
// the beginning of this "\r\n".
let mut i_prev = 0;
for i in memmem::find_iter(buf, b"\r\n") {
self.digest.input(&buf[i_prev..i]);
i_prev = i + 1;
}
// Finally, check whether the last character is "\r". If so,
// buffer it until we know that the next character is not "\n",
// which can only be known on the next call to `write()`.
//
// This all assumes that `write()` will be called on adjacent
// blocks of the input.
if n > 0 && buf[n - 1] == b'\r' {
self.was_last_character_carriage_return = true;
self.digest.input(&buf[i_prev..n - 1]);
} else {
self.was_last_character_carriage_return = false;
self.digest.input(&buf[i_prev..n]);
}
// Even though we dropped a "\r" for each "\r\n" we found, we
// still report the number of bytes written as `n`. This is
// because the meaning of the returned number is supposed to be
// the number of bytes consumed by the writer, so that if the
// calling code were calling `write()` in a loop, it would know
// where the next contiguous slice of the buffer starts.
Ok(n)
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
/// Test for replacing a "\r\n" sequence with "\n" when the "\r" is
/// at the end of one block and the "\n" is at the beginning of the
/// next block, when reading in blocks.
#[cfg(windows)]
#[test]
fn test_crlf_across_blocks() {
use std::io::Write;
use crate::digest::Digest;
use crate::digest::DigestWriter;
// Writing "\r" in one call to `write()`, and then "\n" in another.
let mut digest = Box::new(md5::Md5::new()) as Box<dyn Digest>;
let mut writer_crlf = DigestWriter::new(&mut digest, false);
writer_crlf.write_all(&[b'\r']).unwrap();
writer_crlf.write_all(&[b'\n']).unwrap();
writer_crlf.finalize();
let result_crlf = digest.result_str();
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
let mut digest = Box::new(md5::Md5::new()) as Box<dyn Digest>;
let mut writer_lf = DigestWriter::new(&mut digest, false);
writer_lf.write_all(&[b'\n']).unwrap();
writer_lf.finalize();
let result_lf = digest.result_str();
assert_eq!(result_crlf, result_lf);
}
}

View file

@ -9,21 +9,12 @@
// spell-checker:ignore (ToDO) algo, algoname, regexes, nread, nonames
mod digest;
use self::digest::Digest;
use self::digest::DigestWriter;
use clap::builder::ValueParser;
use clap::crate_version;
use clap::ArgAction;
use clap::{Arg, ArgMatches, Command};
use hex::encode;
use md5::Md5;
use regex::Regex;
use sha1::Sha1;
use sha2::{Sha224, Sha256, Sha384, Sha512};
use sha3::{Sha3_224, Sha3_256, Sha3_384, Sha3_512, Shake128, Shake256};
use std::cmp::Ordering;
use std::error::Error;
use std::ffi::{OsStr, OsString};
@ -32,10 +23,12 @@ use std::io::{self, stdin, BufRead, BufReader, Read};
use std::iter;
use std::num::ParseIntError;
use std::path::Path;
use uucore::crash;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult};
use uucore::show_warning;
use uucore::sum::{
Blake2b, Blake3, Digest, DigestWriter, Md5, Sha1, Sha224, Sha256, Sha384, Sha3_224, Sha3_256,
Sha3_384, Sha3_512, Sha512, Shake128, Shake256,
};
use uucore::{crash, display::Quotable, show_warning};
const NAME: &str = "hashsum";
@ -68,16 +61,8 @@ fn detect_algo(
"sha256sum" => ("SHA256", Box::new(Sha256::new()) as Box<dyn Digest>, 256),
"sha384sum" => ("SHA384", Box::new(Sha384::new()) as Box<dyn Digest>, 384),
"sha512sum" => ("SHA512", Box::new(Sha512::new()) as Box<dyn Digest>, 512),
"b2sum" => (
"BLAKE2",
Box::new(blake2b_simd::State::new()) as Box<dyn Digest>,
512,
),
"b3sum" => (
"BLAKE3",
Box::new(blake3::Hasher::new()) as Box<dyn Digest>,
256,
),
"b2sum" => ("BLAKE2", Box::new(Blake2b::new()) as Box<dyn Digest>, 512),
"b3sum" => ("BLAKE3", Box::new(Blake3::new()) as Box<dyn Digest>, 256),
"sha3sum" => match matches.get_one::<usize>("bits") {
Some(224) => (
"SHA3-224",
@ -170,10 +155,10 @@ fn detect_algo(
set_or_crash("SHA512", Box::new(Sha512::new()), 512);
}
if matches.get_flag("b2sum") {
set_or_crash("BLAKE2", Box::new(blake2b_simd::State::new()), 512);
set_or_crash("BLAKE2", Box::new(Blake2b::new()), 512);
}
if matches.get_flag("b3sum") {
set_or_crash("BLAKE3", Box::new(blake3::Hasher::new()), 256);
set_or_crash("BLAKE3", Box::new(Blake3::new()), 256);
}
if matches.get_flag("sha3") {
match matches.get_one::<usize>("bits") {
@ -680,7 +665,7 @@ fn digest_reader<T: Read>(
// Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016)
let mut bytes = Vec::new();
bytes.resize((output_bits + 7) / 8, 0);
digest.result(&mut bytes);
digest.hash_finalize(&mut bytes);
Ok(encode(bytes))
}
}

View file

@ -36,6 +36,17 @@ libc = { version="0.2.137", optional=true }
once_cell = { workspace=true }
os_display = "0.1.3"
digest = { workspace=true }
hex = { workspace=true }
memchr = { workspace=true }
md-5 = { workspace=true }
sha1 = { workspace=true }
sha2 = { workspace=true }
sha3 = { workspace=true }
blake2b_simd = { workspace=true }
blake3 = { workspace=true }
sm3 = { workspace=true }
[target.'cfg(unix)'.dependencies]
walkdir = { workspace=true, optional=true }
nix = { workspace=true, features = ["fs", "uio", "zerocopy"] }
@ -66,3 +77,4 @@ utf8 = []
utmpx = ["time", "time/macros", "libc", "dns-lookup"]
wide = []
pipes = []
sum = []

View file

@ -12,6 +12,8 @@ pub mod lines;
pub mod memo;
#[cfg(feature = "ringbuffer")]
pub mod ringbuffer;
#[cfg(feature = "sum")]
pub mod sum;
#[cfg(feature = "memo")]
mod tokenize;

View file

@ -0,0 +1,494 @@
// This file is part of the uutils coreutils package.
//
// (c) Yuan YangHao <yuanyanghau@gmail.com>
//
// For the full copyright and license information, please view the LICENSE file
// that was distributed with this source code.
// spell-checker:ignore memmem algo
//! Implementations of digest functions, like md5 and sha1.
//!
//! The [`Digest`] trait represents the interface for providing inputs
//! to these digest functions and accessing the resulting hash. The
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
//! implements the [`Write`] trait, for use in situations where calling
//! [`write`] would be useful.
use std::io::Write;
use hex::encode;
#[cfg(windows)]
use memchr::memmem;
pub trait Digest {
fn new() -> Self
where
Self: Sized;
fn hash_update(&mut self, input: &[u8]);
fn hash_finalize(&mut self, out: &mut [u8]);
fn reset(&mut self);
fn output_bits(&self) -> usize;
fn output_bytes(&self) -> usize {
(self.output_bits() + 7) / 8
}
fn result_str(&mut self) -> String {
let mut buf: Vec<u8> = vec![0; self.output_bytes()];
self.hash_finalize(&mut buf);
encode(buf)
}
}
pub struct Blake2b(blake2b_simd::State);
impl Digest for Blake2b {
fn new() -> Self {
Self(blake2b_simd::State::new())
}
fn hash_update(&mut self, input: &[u8]) {
self.0.update(input);
}
fn hash_finalize(&mut self, out: &mut [u8]) {
let hash_result = &self.0.finalize();
out.copy_from_slice(hash_result.as_bytes());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
512
}
}
pub struct Blake3(blake3::Hasher);
impl Digest for Blake3 {
fn new() -> Self {
Self(blake3::Hasher::new())
}
fn hash_update(&mut self, input: &[u8]) {
self.0.update(input);
}
fn hash_finalize(&mut self, out: &mut [u8]) {
let hash_result = &self.0.finalize();
out.copy_from_slice(hash_result.as_bytes());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
256
}
}
pub struct Sm3(sm3::Sm3);
impl Digest for Sm3 {
fn new() -> Self {
Self(<sm3::Sm3 as sm3::Digest>::new())
}
fn hash_update(&mut self, input: &[u8]) {
<sm3::Sm3 as sm3::Digest>::update(&mut self.0, input);
}
fn hash_finalize(&mut self, out: &mut [u8]) {
out.copy_from_slice(&<sm3::Sm3 as sm3::Digest>::finalize(self.0.clone()));
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
256
}
}
// NOTE: CRC_TABLE_LEN *must* be <= 256 as we cast 0..CRC_TABLE_LEN to u8
const CRC_TABLE_LEN: usize = 256;
pub struct CRC {
state: u32,
size: usize,
crc_table: [u32; CRC_TABLE_LEN],
}
impl CRC {
fn generate_crc_table() -> [u32; CRC_TABLE_LEN] {
let mut table = [0; CRC_TABLE_LEN];
for (i, elt) in table.iter_mut().enumerate().take(CRC_TABLE_LEN) {
*elt = Self::crc_entry(i as u8);
}
table
}
fn crc_entry(input: u8) -> u32 {
let mut crc = (input as u32) << 24;
let mut i = 0;
while i < 8 {
let if_condition = crc & 0x8000_0000;
let if_body = (crc << 1) ^ 0x04c1_1db7;
let else_body = crc << 1;
// NOTE: i feel like this is easier to understand than emulating an if statement in bitwise
// ops
let condition_table = [else_body, if_body];
crc = condition_table[(if_condition != 0) as usize];
i += 1;
}
crc
}
fn update(&mut self, input: u8) {
self.state = (self.state << 8)
^ self.crc_table[((self.state >> 24) as usize ^ input as usize) & 0xFF];
}
}
impl Digest for CRC {
fn new() -> Self {
Self {
state: 0,
size: 0,
crc_table: Self::generate_crc_table(),
}
}
fn hash_update(&mut self, input: &[u8]) {
for &elt in input.iter() {
self.update(elt);
}
self.size += input.len();
}
fn hash_finalize(&mut self, out: &mut [u8]) {
let mut sz = self.size;
while sz != 0 {
self.update(sz as u8);
sz >>= 8;
}
self.state = !self.state;
out.copy_from_slice(&self.state.to_ne_bytes());
}
fn result_str(&mut self) -> String {
let mut _out: Vec<u8> = vec![0; 4];
self.hash_finalize(&mut _out);
format!("{}", self.state)
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
256
}
}
// This can be replaced with usize::div_ceil once it is stabilized.
// This implementation approach is optimized for when `b` is a constant,
// particularly a power of two.
pub fn div_ceil(a: usize, b: usize) -> usize {
(a + b - 1) / b
}
pub struct BSD {
state: u16,
}
impl Digest for BSD {
fn new() -> Self {
Self { state: 0 }
}
fn hash_update(&mut self, input: &[u8]) {
for &byte in input.iter() {
self.state = (self.state >> 1) + ((self.state & 1) << 15);
self.state = self.state.wrapping_add(u16::from(byte));
}
}
fn hash_finalize(&mut self, out: &mut [u8]) {
out.copy_from_slice(&self.state.to_ne_bytes());
}
fn result_str(&mut self) -> String {
let mut _out: Vec<u8> = vec![0; 2];
self.hash_finalize(&mut _out);
format!("{}", self.state)
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
128
}
}
pub struct SYSV {
state: u32,
}
impl Digest for SYSV {
fn new() -> Self {
Self { state: 0 }
}
fn hash_update(&mut self, input: &[u8]) {
for &byte in input.iter() {
self.state = self.state.wrapping_add(u32::from(byte));
}
}
fn hash_finalize(&mut self, out: &mut [u8]) {
self.state = (self.state & 0xffff) + (self.state >> 16);
self.state = (self.state & 0xffff) + (self.state >> 16);
out.copy_from_slice(&(self.state as u16).to_ne_bytes());
}
fn result_str(&mut self) -> String {
let mut _out: Vec<u8> = vec![0; 2];
self.hash_finalize(&mut _out);
format!("{}", self.state)
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
512
}
}
// Implements the Digest trait for sha2 / sha3 algorithms with fixed output
macro_rules! impl_digest_common {
($algo_type: ty, $size: expr) => {
impl Digest for $algo_type {
fn new() -> Self {
Self(Default::default())
}
fn hash_update(&mut self, input: &[u8]) {
digest::Digest::update(&mut self.0, input);
}
fn hash_finalize(&mut self, out: &mut [u8]) {
digest::Digest::finalize_into_reset(&mut self.0, out.into());
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
$size
}
}
};
}
// Implements the Digest trait for sha2 / sha3 algorithms with variable output
macro_rules! impl_digest_shake {
($algo_type: ty) => {
impl Digest for $algo_type {
fn new() -> Self {
Self(Default::default())
}
fn hash_update(&mut self, input: &[u8]) {
digest::Update::update(&mut self.0, input);
}
fn hash_finalize(&mut self, out: &mut [u8]) {
digest::ExtendableOutputReset::finalize_xof_reset_into(&mut self.0, out);
}
fn reset(&mut self) {
*self = Self::new();
}
fn output_bits(&self) -> usize {
0
}
}
};
}
pub struct Md5(md5::Md5);
pub struct Sha1(sha1::Sha1);
pub struct Sha224(sha2::Sha224);
pub struct Sha256(sha2::Sha256);
pub struct Sha384(sha2::Sha384);
pub struct Sha512(sha2::Sha512);
impl_digest_common!(Md5, 128);
impl_digest_common!(Sha1, 160);
impl_digest_common!(Sha224, 224);
impl_digest_common!(Sha256, 256);
impl_digest_common!(Sha384, 384);
impl_digest_common!(Sha512, 512);
pub struct Sha3_224(sha3::Sha3_224);
pub struct Sha3_256(sha3::Sha3_256);
pub struct Sha3_384(sha3::Sha3_384);
pub struct Sha3_512(sha3::Sha3_512);
impl_digest_common!(Sha3_224, 224);
impl_digest_common!(Sha3_256, 256);
impl_digest_common!(Sha3_384, 384);
impl_digest_common!(Sha3_512, 512);
pub struct Shake128(sha3::Shake128);
pub struct Shake256(sha3::Shake256);
impl_digest_shake!(Shake128);
impl_digest_shake!(Shake256);
/// A struct that writes to a digest.
///
/// This struct wraps a [`Digest`] and provides a [`Write`]
/// implementation that passes input bytes directly to the
/// [`Digest::hash_update`].
///
/// On Windows, if `binary` is `false`, then the [`write`]
/// implementation replaces instances of "\r\n" with "\n" before passing
/// the input bytes to the [`digest`].
pub struct DigestWriter<'a> {
digest: &'a mut Box<dyn Digest>,
/// Whether to write to the digest in binary mode or text mode on Windows.
///
/// If this is `false`, then instances of "\r\n" are replaced with
/// "\n" before passing input bytes to the [`digest`].
#[allow(dead_code)]
binary: bool,
/// Whether the previous
#[allow(dead_code)]
was_last_character_carriage_return: bool,
// TODO These are dead code only on non-Windows operating systems.
// It might be better to use a `#[cfg(windows)]` guard here.
}
impl<'a> DigestWriter<'a> {
pub fn new(digest: &'a mut Box<dyn Digest>, binary: bool) -> DigestWriter {
let was_last_character_carriage_return = false;
DigestWriter {
digest,
binary,
was_last_character_carriage_return,
}
}
pub fn finalize(&mut self) -> bool {
if self.was_last_character_carriage_return {
self.digest.hash_update(&[b'\r']);
true
} else {
false
}
}
}
impl<'a> Write for DigestWriter<'a> {
#[cfg(not(windows))]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.digest.hash_update(buf);
Ok(buf.len())
}
#[cfg(windows)]
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.binary {
self.digest.hash_update(buf);
return Ok(buf.len());
}
// The remaining code handles Windows text mode, where we must
// replace each occurrence of "\r\n" with "\n".
//
// First, if the last character written was "\r" and the first
// character in the current buffer to write is not "\n", then we
// need to write the "\r" that we buffered from the previous
// call to `write()`.
let n = buf.len();
if self.was_last_character_carriage_return && n > 0 && buf[0] != b'\n' {
self.digest.hash_update(&[b'\r']);
}
// Next, find all occurrences of "\r\n", inputting the slice
// just before the "\n" in the previous instance of "\r\n" and
// the beginning of this "\r\n".
let mut i_prev = 0;
for i in memmem::find_iter(buf, b"\r\n") {
self.digest.hash_update(&buf[i_prev..i]);
i_prev = i + 1;
}
// Finally, check whether the last character is "\r". If so,
// buffer it until we know that the next character is not "\n",
// which can only be known on the next call to `write()`.
//
// This all assumes that `write()` will be called on adjacent
// blocks of the input.
if n > 0 && buf[n - 1] == b'\r' {
self.was_last_character_carriage_return = true;
self.digest.hash_update(&buf[i_prev..n - 1]);
} else {
self.was_last_character_carriage_return = false;
self.digest.hash_update(&buf[i_prev..n]);
}
// Even though we dropped a "\r" for each "\r\n" we found, we
// still report the number of bytes written as `n`. This is
// because the meaning of the returned number is supposed to be
// the number of bytes consumed by the writer, so that if the
// calling code were calling `write()` in a loop, it would know
// where the next contiguous slice of the buffer starts.
Ok(n)
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[cfg(test)]
mod tests {
/// Test for replacing a "\r\n" sequence with "\n" when the "\r" is
/// at the end of one block and the "\n" is at the beginning of the
/// next block, when reading in blocks.
#[cfg(windows)]
#[test]
fn test_crlf_across_blocks() {
use std::io::Write;
use crate::digest::Digest;
use crate::digest::DigestWriter;
// Writing "\r" in one call to `write()`, and then "\n" in another.
let mut digest = Box::new(md5::Md5::new()) as Box<dyn Digest>;
let mut writer_crlf = DigestWriter::new(&mut digest, false);
writer_crlf.write_all(&[b'\r']).unwrap();
writer_crlf.write_all(&[b'\n']).unwrap();
writer_crlf.hash_finalize();
let result_crlf = digest.result_str();
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
let mut digest = Box::new(md5::Md5::new()) as Box<dyn Digest>;
let mut writer_lf = DigestWriter::new(&mut digest, false);
writer_lf.write_all(&[b'\n']).unwrap();
writer_lf.hash_finalize();
let result_lf = digest.result_str();
assert_eq!(result_crlf, result_lf);
}
}

View file

@ -46,6 +46,8 @@ pub use crate::features::lines;
pub use crate::features::memo;
#[cfg(feature = "ringbuffer")]
pub use crate::features::ringbuffer;
#[cfg(feature = "sum")]
pub use crate::features::sum;
// * (platform-specific) feature-gated modules
// ** non-windows (i.e. Unix + Fuchsia)

View file

@ -114,3 +114,79 @@ fn test_stdin_larger_than_128_bytes() {
assert_eq!(cksum, 945_881_979);
assert_eq!(bytes_cnt, 2058);
}
#[test]
fn test_sha1_single_file() {
new_ucmd!()
.arg("-a=sha1")
.arg("lorem_ipsum.txt")
.succeeds()
.stdout_is("ab1dd0bae1d8883a3d18a66de6afbd28252cfbef 772 lorem_ipsum.txt\n");
}
#[test]
fn test_sm3_single_file() {
new_ucmd!()
.arg("-a=sm3")
.arg("lorem_ipsum.txt")
.succeeds()
.stdout_is(
"6d296b805d060bfed22808df308dbb9b4317794dd4ed6740a10770a782699bc2 772 lorem_ipsum.txt\n",
);
}
#[test]
fn test_bsd_single_file() {
new_ucmd!()
.arg("-a=bsd")
.arg("lorem_ipsum.txt")
.succeeds()
.stdout_only_fixture("bsd_single_file.expected");
}
#[test]
fn test_bsd_multiple_files() {
new_ucmd!()
.arg("-a=bsd")
.arg("lorem_ipsum.txt")
.arg("alice_in_wonderland.txt")
.succeeds()
.stdout_only_fixture("bsd_multiple_files.expected");
}
#[test]
fn test_bsd_stdin() {
new_ucmd!()
.arg("-a=bsd")
.pipe_in_fixture("lorem_ipsum.txt")
.succeeds()
.stdout_only_fixture("bsd_stdin.expected");
}
#[test]
fn test_sysv_single_file() {
new_ucmd!()
.arg("-a=sysv")
.arg("lorem_ipsum.txt")
.succeeds()
.stdout_only_fixture("sysv_single_file.expected");
}
#[test]
fn test_sysv_multiple_files() {
new_ucmd!()
.arg("-a=sysv")
.arg("lorem_ipsum.txt")
.arg("alice_in_wonderland.txt")
.succeeds()
.stdout_only_fixture("sysv_multiple_files.expected");
}
#[test]
fn test_sysv_stdin() {
new_ucmd!()
.arg("-a=sysv")
.pipe_in_fixture("lorem_ipsum.txt")
.succeeds()
.stdout_only_fixture("sysv_stdin.expected");
}

View file

@ -0,0 +1,2 @@
08109 1 lorem_ipsum.txt
01814 1 alice_in_wonderland.txt

View file

@ -0,0 +1 @@
08109 1 lorem_ipsum.txt

View file

@ -0,0 +1 @@
08109 1

View file

@ -0,0 +1,2 @@
6985 2 lorem_ipsum.txt
27441 1 alice_in_wonderland.txt

View file

@ -0,0 +1 @@
6985 2 lorem_ipsum.txt

View file

@ -0,0 +1 @@
6985 2