Auto merge of #25416 - kballard:ffi-cstr-to-str-convenience, r=alexcrichton

This was motivated by http://www.evanmiller.org/a-taste-of-rust.html.

A common problem when working with FFI right now is converting from raw
C strings into `&str` or `String`. Right now you're required to say
something like

    let cstr = unsafe { CStr::from_ptr(ptr) };
    let result = str::from_utf8(cstr.to_bytes());

This is slightly awkward, and is not particularly intuitive for people
who haven't used the ffi module before. We can do a bit better by
providing some convenience methods on CStr:

    fn to_str(&self) -> Result<&str, str::Utf8Error>
    fn to_string_lossy(&self) -> Cow<str>

This will make it immediately apparent to new users of CStr how to get a
string from a raw C string, so they can say:

    let s = unsafe { CStr::from_ptr(ptr).to_string_lossy() };
This commit is contained in:
bors 2015-05-23 11:12:02 +00:00
commit 4c2ebc3947

View file

@ -10,6 +10,7 @@
#![unstable(feature = "std_misc")]
use borrow::Cow;
use convert::{Into, From};
use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering};
use error::Error;
@ -22,6 +23,7 @@
use option::Option::{self, Some, None};
use result::Result::{self, Ok, Err};
use slice;
use str;
use string::String;
use vec::Vec;
@ -113,6 +115,26 @@ pub struct CString {
/// work(&s);
/// }
/// ```
///
/// Converting a foreign C string into a Rust `String`
///
/// ```no_run
/// # #![feature(libc,cstr_to_str)]
/// extern crate libc;
/// use std::ffi::CStr;
///
/// extern { fn my_string() -> *const libc::c_char; }
///
/// fn my_string_safe() -> String {
/// unsafe {
/// CStr::from_ptr(my_string()).to_string_lossy().into_owned()
/// }
/// }
///
/// fn main() {
/// println!("string: {}", my_string_safe());
/// }
/// ```
#[derive(Hash)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct CStr {
@ -327,6 +349,39 @@ pub fn to_bytes(&self) -> &[u8] {
pub fn to_bytes_with_nul(&self) -> &[u8] {
unsafe { mem::transmute::<&[libc::c_char], &[u8]>(&self.inner) }
}
/// Yields a `&str` slice if the `CStr` contains valid UTF-8.
///
/// This function will calculate the length of this string and check for
/// UTF-8 validity, and then return the `&str` if it's valid.
///
/// > **Note**: This method is currently implemented to check for validity
/// > after a 0-cost cast, but it is planned to alter its definition in the
/// > future to perform the length calculation in addition to the UTF-8
/// > check whenever this method is called.
#[unstable(feature = "cstr_to_str", reason = "recently added")]
pub fn to_str(&self) -> Result<&str, str::Utf8Error> {
// NB: When CStr is changed to perform the length check in .to_bytes() instead of in
// from_ptr(), it may be worth considering if this should be rewritten to do the UTF-8
// check inline with the length calculation instead of doing it afterwards.
str::from_utf8(self.to_bytes())
}
/// Converts a `CStr` into a `Cow<str>`.
///
/// This function will calculate the length of this string (which normally
/// requires a linear amount of work to be done) and then return the
/// resulting slice as a `Cow<str>`, replacing any invalid UTF-8 sequences
/// with `U+FFFD REPLACEMENT CHARACTER`.
///
/// > **Note**: This method is currently implemented to check for validity
/// > after a 0-cost cast, but it is planned to alter its definition in the
/// > future to perform the length calculation in addition to the UTF-8
/// > check whenever this method is called.
#[unstable(feature = "cstr_to_str", reason = "recently added")]
pub fn to_string_lossy(&self) -> Cow<str> {
String::from_utf8_lossy(self.to_bytes())
}
}
#[stable(feature = "rust1", since = "1.0.0")]
@ -355,6 +410,7 @@ mod tests {
use prelude::v1::*;
use super::*;
use libc;
use borrow::Cow::{Borrowed, Owned};
#[test]
fn c_to_rust() {
@ -404,4 +460,20 @@ fn borrowed() {
assert_eq!(s.to_bytes_with_nul(), b"12\0");
}
}
#[test]
fn to_str() {
let data = b"123\xE2\x80\xA6\0";
let ptr = data.as_ptr() as *const libc::c_char;
unsafe {
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
}
let data = b"123\xE2\0";
let ptr = data.as_ptr() as *const libc::c_char;
unsafe {
assert!(CStr::from_ptr(ptr).to_str().is_err());
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
}
}
}