Move display::Quotable into its own crate

The standalone version has a number of bugfixes compared to the old
This commit is contained in:
Jan Verbeek 2021-10-13 17:29:03 +02:00
parent 2e12316ae1
commit 77e1570ea0
3 changed files with 15 additions and 492 deletions

Cargo.lock generated
View file

@ -1291,6 +1291,15 @@ dependencies = [
name = "os_display"
version = "0.1.2"
source = "registry+"
checksum = "748cc1d0dc55247316a5bedd8dc8c5478c8a0c2e2001176b38ce7c0ed732c7a5"
dependencies = [
name = "ouroboros"
version = "0.10.1"
@ -2102,9 +2111,9 @@ checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
name = "unicode-width"
version = "0.1.8"
version = "0.1.9"
source = "registry+"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
name = "unicode-xid"
@ -3276,6 +3285,7 @@ dependencies = [
"nix 0.20.0",

View file

@ -30,6 +30,7 @@ data-encoding-macro = { version="0.1.12", optional=true }
z85 = { version="3.0.3", optional=true }
libc = { version="0.2.15", optional=true }
once_cell = "1.8.0"
os_display = "0.1.0"
walkdir = { version="2.3.2", optional=true }

View file

@ -19,378 +19,16 @@
/// println_verbatim(path)?; // Prints "foo/bar.baz"
/// # Ok::<(), std::io::Error>(())
/// ```
// spell-checker:ignore Fbar
use std::borrow::Cow;
use std::ffi::OsStr;
#[cfg(any(unix, target_os = "wasi", windows))]
use std::fmt::Write as FmtWrite;
use std::fmt::{self, Display, Formatter};
use std::io::{self, Write as IoWrite};
use std::os::unix::ffi::OsStrExt;
#[cfg(target_os = "wasi")]
use std::os::wasi::ffi::OsStrExt;
#[cfg(any(unix, target_os = "wasi"))]
use std::str::from_utf8;
/// An extension trait for displaying filenames to users.
pub trait Quotable {
/// Returns an object that implements [`Display`] for printing filenames with
/// proper quoting and escaping for the platform.
/// On Unix this corresponds to sh/bash syntax, on Windows Powershell syntax
/// is used.
/// # Examples
/// ```
/// use std::path::Path;
/// use uucore::display::Quotable;
/// let path = Path::new("foo/bar.baz");
/// println!("Found file {}", path.quote()); // Prints "Found file 'foo/bar.baz'"
/// ```
fn quote(&self) -> Quoted<'_>;
/// Like `quote()`, but don't actually add quotes unless necessary because of
/// whitespace or special characters.
/// # Examples
/// ```
/// use std::path::Path;
/// use uucore::display::Quotable;
/// use uucore::show_error;
/// let foo = Path::new("foo/bar.baz");
/// let bar = Path::new("foo bar");
/// show_error!("{}: Not found", foo.maybe_quote()); // Prints "util: foo/bar.baz: Not found"
/// show_error!("{}: Not found", bar.maybe_quote()); // Prints "util: 'foo bar': Not found"
/// ```
fn maybe_quote(&self) -> Quoted<'_> {
let mut quoted = self.quote();
quoted.force_quote = false;
macro_rules! impl_as_ref {
($type: ty) => {
impl Quotable for $type {
fn quote(&self) -> Quoted<'_> {
impl_as_ref!(&'_ str);
impl_as_ref!(&'_ std::path::Path);
impl_as_ref!(&'_ std::ffi::OsStr);
// Cow<'_, str> does not implement AsRef<OsStr> and this is unlikely to be fixed
// for backward compatibility reasons. Otherwise we'd use a blanket impl.
impl Quotable for Cow<'_, str> {
fn quote(&self) -> Quoted<'_> {
let text: &str = self.as_ref();
impl Quotable for Cow<'_, std::path::Path> {
fn quote(&self) -> Quoted<'_> {
let text: &std::path::Path = self.as_ref();
/// A wrapper around [`OsStr`] for printing paths with quoting and escaping applied.
#[derive(Debug, Copy, Clone)]
pub struct Quoted<'a> {
text: &'a OsStr,
force_quote: bool,
impl<'a> Quoted<'a> {
fn new(text: &'a OsStr) -> Self {
Quoted {
force_quote: true,
impl Display for Quoted<'_> {
#[cfg(any(windows, unix, target_os = "wasi"))]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
// On Unix we emulate sh syntax. On Windows Powershell.
// They're just similar enough to share some code.
/// Characters with special meaning outside quotes.
// I don't know why % is in there, and GNU doesn't quote it either.
// {} were used in a version elsewhere but seem unnecessary, GNU doesn't
// quote them. They're used in function definitions but not in a way we
// have to worry about.
#[cfg(any(unix, target_os = "wasi"))]
const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\\\"'*?[]=";
// FIXME: I'm not a PowerShell wizard and don't know if this is correct.
// I just copied the Unix version, removed \, and added ,{} based on
// experimentation.
// I have noticed that ~?*[] only get expanded in some contexts, so watch
// out for that if doing your own tests.
// Get-ChildItem seems unwilling to quote anything so it doesn't help.
// There's the additional wrinkle that Windows has stricter requirements
// for filenames: I've been testing using a Linux build of PowerShell, but
// this code doesn't even compile on Linux.
const SPECIAL_SHELL_CHARS: &[u8] = b"|&;<>()$`\"'*?[]=,{}";
/// Characters with a special meaning at the beginning of a name.
// ~ expands a home directory.
// # starts a comment.
// ! is a common extension for expanding the shell history.
#[cfg(any(unix, target_os = "wasi"))]
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '!'];
// Same deal as before, this is possibly incomplete.
// A single stand-alone exclamation mark seems to have some special meaning.
const SPECIAL_SHELL_CHARS_START: &[char] = &['~', '#', '@', '!'];
/// Characters that are interpreted specially in a double-quoted string.
#[cfg(any(unix, target_os = "wasi"))]
const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$', b'\\'];
const DOUBLE_UNSAFE: &[u8] = &[b'"', b'`', b'$'];
let text = match self.text.to_str() {
None => return write_escaped(f, self.text),
Some(text) => text,
let mut is_single_safe = true;
let mut is_double_safe = true;
let mut requires_quote = self.force_quote;
if let Some(first) = text.chars().next() {
if SPECIAL_SHELL_CHARS_START.contains(&first) {
requires_quote = true;
// Unlike in Unix, quoting an argument may stop it
// from being recognized as an option. I like that very much.
// But we don't want to quote "-" because that's a common
// special argument and PowerShell doesn't mind it.
if first == '-' && text.len() > 1 {
requires_quote = true;
} else {
// Empty string
requires_quote = true;
for ch in text.chars() {
if ch.is_ascii() {
let ch = ch as u8;
if ch == b'\'' {
is_single_safe = false;
if DOUBLE_UNSAFE.contains(&ch) {
is_double_safe = false;
if !requires_quote && SPECIAL_SHELL_CHARS.contains(&ch) {
requires_quote = true;
if ch.is_ascii_control() {
return write_escaped(f, self.text);
if !requires_quote && ch.is_whitespace() {
// This includes unicode whitespace.
// We maybe don't have to escape it, we don't escape other lookalike
// characters either, but it's confusing if it goes unquoted.
requires_quote = true;
if !requires_quote {
return f.write_str(text);
} else if is_single_safe {
return write_simple(f, text, '\'');
} else if is_double_safe {
return write_simple(f, text, '\"');
} else {
return write_single_escaped(f, text);
fn write_simple(f: &mut Formatter<'_>, text: &str, quote: char) -> fmt::Result {
#[cfg(any(unix, target_os = "wasi"))]
fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result {
let mut iter = text.split('\'');
if let Some(chunk) = {
if !chunk.is_empty() {
write_simple(f, chunk, '\'')?;
for chunk in iter {
if !chunk.is_empty() {
write_simple(f, chunk, '\'')?;
/// Write using the syntax described here:
/// Supported by these shells:
/// - bash
/// - zsh
/// - busybox sh
/// - mksh
/// Not supported by these:
/// - fish
/// - dash
/// - tcsh
#[cfg(any(unix, target_os = "wasi"))]
fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result {
for chunk in from_utf8_iter(text.as_bytes()) {
match chunk {
Ok(chunk) => {
for ch in chunk.chars() {
match ch {
'\n' => f.write_str("\\n")?,
'\t' => f.write_str("\\t")?,
'\r' => f.write_str("\\r")?,
// We could do \b, \f, \v, etc., but those are
// rare enough to be confusing.
// \0 doesn't work consistently because of the
// octal \nnn syntax, and null bytes can't appear
// in filenames anyway.
ch if ch.is_ascii_control() => write!(f, "\\x{:02X}", ch as u8)?,
'\\' | '\'' => {
// '?' and '"' can also be escaped this way
// but AFAICT there's no reason to do so
ch => {
Err(unit) => write!(f, "\\x{:02X}", unit)?,
fn write_single_escaped(f: &mut Formatter<'_>, text: &str) -> fmt::Result {
// Quotes in Powershell can be escaped by doubling them
let mut iter = text.split('\'');
if let Some(chunk) = {
for chunk in iter {
fn write_escaped(f: &mut Formatter<'_>, text: &OsStr) -> fmt::Result {
// ` takes the role of \ since \ is already used as the path separator.
// Things are UTF-16-oriented, so we escape code units as "`u{1234}".
use std::char::decode_utf16;
use std::os::windows::ffi::OsStrExt;
for ch in decode_utf16(text.encode_wide()) {
match ch {
Ok(ch) => match ch {
'\0' => f.write_str("`0")?,
'\r' => f.write_str("`r")?,
'\n' => f.write_str("`n")?,
'\t' => f.write_str("`t")?,
ch if ch.is_ascii_control() => write!(f, "`u{{{:04X}}}", ch as u8)?,
'`' => f.write_str("``")?,
'$' => f.write_str("`$")?,
'"' => f.write_str("\"\"")?,
ch => f.write_char(ch)?,
Err(err) => write!(f, "`u{{{:04X}}}", err.unpaired_surrogate())?,
#[cfg(not(any(unix, target_os = "wasi", windows)))]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
// As a fallback, we use Rust's own escaping rules.
// This is reasonably sane and very easy to implement.
// We use single quotes because that's hardcoded in a lot of tests.
let text = self.text.to_string_lossy();
if self.force_quote || !text.chars().all(|ch| ch.is_alphanumeric() || ch == '.') {
write!(f, "'{}'", text.escape_debug())
} else {
#[cfg(any(unix, target_os = "wasi"))]
fn from_utf8_iter(mut bytes: &[u8]) -> impl Iterator<Item = Result<&str, u8>> {
std::iter::from_fn(move || {
if bytes.is_empty() {
return None;
match from_utf8(bytes) {
Ok(text) => {
bytes = &[];
Err(err) if err.valid_up_to() == 0 => {
let res = bytes[0];
bytes = &bytes[1..];
Err(err) => {
let (valid, rest) = bytes.split_at(err.valid_up_to());
bytes = rest;
// These used to be defined here, but they live in their own crate now.
pub use os_display::{Quotable, Quoted};
/// Print a path (or `OsStr`-like object) directly to stdout, with a trailing newline,
/// without losing any information if its encoding is invalid.
@ -429,129 +67,3 @@ pub fn print_verbatim<S: AsRef<OsStr>>(text: S) -> io::Result<()> {
write!(stdout, "{}", std::path::Path::new(text.as_ref()).display())
mod tests {
use super::*;
fn verify_quote(cases: &[(impl Quotable, &str)]) {
for (case, expected) in cases {
assert_eq!(case.quote().to_string(), *expected);
fn verify_maybe(cases: &[(impl Quotable, &str)]) {
for (case, expected) in cases {
assert_eq!(case.maybe_quote().to_string(), *expected);
/// This should hold on any platform, or else other tests fail.
fn test_basic() {
("foo", "'foo'"),
("", "''"),
("foo/bar.baz", "'foo/bar.baz'"),
("foo", "foo"),
("", "''"),
("foo bar", "'foo bar'"),
("$foo", "'$foo'"),
("-", "-"),
#[cfg(any(unix, target_os = "wasi", windows))]
fn test_common() {
("a#b", "a#b"),
("#ab", "'#ab'"),
("a~b", "a~b"),
("!", "'!'"),
#[cfg(any(unix, target_os = "wasi"))]
fn test_unix() {
("can't", r#""can't""#),
(r#"can'"t"#, r#"'can'\''"t'"#),
(r#"can'$t"#, r#"'can'\''$t'"#),
("foo\nb\ta\r\\\0`r", r#"$'foo\nb\ta\r\\\x00`r'"#),
("foo\x02", r#"$'foo\x02'"#),
(r#"'$''"#, r#"\''$'\'\'"#),
verify_quote(&[(OsStr::from_bytes(b"foo\xFF"), r#"$'foo\xFF'"#)]);
("-x", "-x"),
("a,b", "a,b"),
("a\\b", "'a\\b'"),
("}", ("}")),
fn test_windows() {
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
(r#"foo\bar"#, r#"'foo\bar'"#),
("can't", r#""can't""#),
(r#"can'"t"#, r#"'can''"t'"#),
(r#"can'$t"#, r#"'can''$t'"#),
("foo\nb\ta\r\\\0`r", r#""foo`nb`ta`r\`0``r""#),
("foo\x02", r#""foo`u{0002}""#),
(r#"'$''"#, r#"'''$'''''"#),
OsString::from_wide(&[b'x' as u16, 0xD800]),
("-x", "'-x'"),
("a,b", "'a,b'"),
("a\\b", "a\\b"),
("}", "'}'"),
#[cfg(any(unix, target_os = "wasi"))]
fn test_utf8_iter() {
type ByteStr = &'static [u8];
type Chunk = Result<&'static str, u8>;
const CASES: &[(ByteStr, &[Chunk])] = &[
(b"", &[]),
(b"hello", &[Ok("hello")]),
// Immediately invalid
(b"\xFF", &[Err(b'\xFF')]),
// Incomplete UTF-8
(b"\xC2", &[Err(b'\xC2')]),
(b"\xF4\x8F", &[Err(b'\xF4'), Err(b'\x8F')]),
(b"\xFF\xFF", &[Err(b'\xFF'), Err(b'\xFF')]),
(b"hello\xC2", &[Ok("hello"), Err(b'\xC2')]),
(b"\xFFhello", &[Err(b'\xFF'), Ok("hello")]),
(b"\xFF\xC2hello", &[Err(b'\xFF'), Err(b'\xC2'), Ok("hello")]),
(b"foo\xFFbar", &[Ok("foo"), Err(b'\xFF'), Ok("bar")]),
&[Ok("foo"), Err(b'\xF4'), Err(b'\x8F'), Ok("bar")],
&[Ok("foo"), Err(b'\xFF'), Err(b'\xC2'), Ok("bar")],
for &(case, expected) in CASES {