Auto merge of #72813 - RalfJung:rollup-4ko6q8j, r=RalfJung

Rollup of 5 pull requests Successful merges: - #72683 (from_u32_unchecked: check validity, and fix UB in Wtf8) - #72715 (Account for trailing comma when suggesting `where` clauses) - #72745 (generalize Borrow<[T]> for Interned<'tcx, List<T>>) - #72749 (Update stdarch submodule to latest head) - #72781 (Use `LocalDefId` instead of `NodeId` in `resolve_str_path_error`) Failed merges: r? @ghost
2024-07-08 19:47:03 +00:00 · 2020-05-31 13:39:05 +00:00 · 2020-05-31 13:39:05 +00:00 · f6072cab13
commit f6072cab13
parent 4b1f86adbe cbc73dc263
14 changed files with 137 additions and 145 deletions
--- a/src/libcore/char/convert.rs
+++ b/src/libcore/char/convert.rs
@ -99,7 +99,7 @@ pub fn from_u32(i: u32) -> Option<char> {
 #[inline]
 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
 pub unsafe fn from_u32_unchecked(i: u32) -> char {
-    transmute(i)
+    if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { transmute(i) }
 }

 #[stable(feature = "char_convert", since = "1.13.0")]
@ -218,7 +218,7 @@ fn try_from(i: u32) -> Result<Self, Self::Error> {
            Err(CharTryFromError(()))
        } else {
            // SAFETY: checked that it's a legal unicode value
-            Ok(unsafe { from_u32_unchecked(i) })
+            Ok(unsafe { transmute(i) })
        }
    }
 }
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@ -593,16 +593,7 @@ pub fn escape_default(self) -> EscapeDefault {
    #[stable(feature = "rust1", since = "1.0.0")]
    #[inline]
    pub fn len_utf8(self) -> usize {
-        let code = self as u32;
-        if code < MAX_ONE_B {
-            1
-        } else if code < MAX_TWO_B {
-            2
-        } else if code < MAX_THREE_B {
-            3
-        } else {
-            4
-        }
+        len_utf8(self as u32)
    }

    /// Returns the number of 16-bit code units this `char` would need if
@ -670,36 +661,8 @@ pub fn len_utf16(self) -> usize {
    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
    #[inline]
    pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
-        let code = self as u32;
-        let len = self.len_utf8();
-        match (len, &mut dst[..]) {
-            (1, [a, ..]) => {
-                *a = code as u8;
-            }
-            (2, [a, b, ..]) => {
-                *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-                *b = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            (3, [a, b, c, ..]) => {
-                *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-                *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *c = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            (4, [a, b, c, d, ..]) => {
-                *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-                *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-                *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-                *d = (code & 0x3F) as u8 | TAG_CONT;
-            }
-            _ => panic!(
-                "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
-                len,
-                code,
-                dst.len(),
-            ),
-        };
-        // SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
-        unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
+        // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
+        unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
    }

    /// Encodes this character as UTF-16 into the provided `u16` buffer,
@ -739,28 +702,7 @@ pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
    #[stable(feature = "unicode_encode_char", since = "1.15.0")]
    #[inline]
    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
-        let mut code = self as u32;
-        // SAFETY: each arm checks whether there are enough bits to write into
-        unsafe {
-            if (code & 0xFFFF) == code && !dst.is_empty() {
-                // The BMP falls through (assuming non-surrogate, as it should)
-                *dst.get_unchecked_mut(0) = code as u16;
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
-            } else if dst.len() >= 2 {
-                // Supplementary planes break into surrogates.
-                code -= 0x1_0000;
-                *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
-                *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
-                slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
-            } else {
-                panic!(
-                    "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
-                    from_u32_unchecked(code).len_utf16(),
-                    code,
-                    dst.len(),
-                )
-            }
-        }
+        encode_utf16_raw(self as u32, dst)
    }

    /// Returns `true` if this `char` has the `Alphabetic` property.
@ -1673,3 +1615,100 @@ pub const fn is_ascii_control(&self) -> bool {
        }
    }
 }
+
+#[inline]
+fn len_utf8(code: u32) -> usize {
+    if code < MAX_ONE_B {
+        1
+    } else if code < MAX_TWO_B {
+        2
+    } else if code < MAX_THREE_B {
+        3
+    } else {
+        4
+    }
+}
+
+/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
+/// and then returns the subslice of the buffer that contains the encoded character.
+///
+/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
+/// (Creating a `char` in the surrogate range is UB.)
+/// The result is valid [generalized UTF-8] but not valid UTF-8.
+///
+/// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
+///
+/// # Panics
+///
+/// Panics if the buffer is not large enough.
+/// A buffer of length four is large enough to encode any `char`.
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+#[doc(hidden)]
+#[inline]
+pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
+    let len = len_utf8(code);
+    match (len, &mut dst[..]) {
+        (1, [a, ..]) => {
+            *a = code as u8;
+        }
+        (2, [a, b, ..]) => {
+            *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+            *b = (code & 0x3F) as u8 | TAG_CONT;
+        }
+        (3, [a, b, c, ..]) => {
+            *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+            *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+            *c = (code & 0x3F) as u8 | TAG_CONT;
+        }
+        (4, [a, b, c, d, ..]) => {
+            *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+            *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+            *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+            *d = (code & 0x3F) as u8 | TAG_CONT;
+        }
+        _ => panic!(
+            "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
+            len,
+            code,
+            dst.len(),
+        ),
+    };
+    &mut dst[..len]
+}
+
+/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
+/// and then returns the subslice of the buffer that contains the encoded character.
+///
+/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
+/// (Creating a `char` in the surrogate range is UB.)
+///
+/// # Panics
+///
+/// Panics if the buffer is not large enough.
+/// A buffer of length 2 is large enough to encode any `char`.
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+#[doc(hidden)]
+#[inline]
+pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
+    // SAFETY: each arm checks whether there are enough bits to write into
+    unsafe {
+        if (code & 0xFFFF) == code && !dst.is_empty() {
+            // The BMP falls through
+            *dst.get_unchecked_mut(0) = code as u16;
+            slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
+        } else if dst.len() >= 2 {
+            // Supplementary planes break into surrogates.
+            code -= 0x1_0000;
+            *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
+            *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
+            slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
+        } else {
+            panic!(
+                "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
+                from_u32_unchecked(code).len_utf16(),
+                code,
+                dst.len(),
+            )
+        }
+    }
+}
--- a/src/libcore/char/mod.rs
+++ b/src/libcore/char/mod.rs
@ -37,6 +37,12 @@
 #[stable(feature = "unicode_version", since = "1.45.0")]
 pub use crate::unicode::UNICODE_VERSION;

+// perma-unstable re-exports
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+pub use self::methods::encode_utf16_raw;
+#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
+pub use self::methods::encode_utf8_raw;
+
 use crate::fmt::{self, Write};
 use crate::iter::FusedIterator;

--- a/src/librustc_hir/hir.rs
+++ b/src/librustc_hir/hir.rs
@ -524,6 +524,13 @@ pub fn span(&self) -> Option<Span> {
    pub fn span_for_predicates_or_empty_place(&self) -> Span {
        self.span
    }
+
+    /// `Span` where further predicates would be suggested, accounting for trailing commas, like
+    ///  in `fn foo<T>(t: T) where T: Foo,` so we don't suggest two trailing commas.
+    pub fn tail_span_for_suggestion(&self) -> Span {
+        let end = self.span_for_predicates_or_empty_place().shrink_to_hi();
+        self.predicates.last().map(|p| p.span()).unwrap_or(end).shrink_to_hi().to(end)
+    }
 }

 /// A single predicate in a where-clause.
--- a/src/librustc_middle/ty/context.rs
+++ b/src/librustc_middle/ty/context.rs
@ -1971,32 +1971,8 @@ fn hash<H: Hasher>(&self, s: &mut H) {
    }
 }

-impl<'tcx> Borrow<[Ty<'tcx>]> for Interned<'tcx, List<Ty<'tcx>>> {
-    fn borrow<'a>(&'a self) -> &'a [Ty<'tcx>] {
-        &self.0[..]
-    }
-}
-
-impl<'tcx> Borrow<[CanonicalVarInfo]> for Interned<'tcx, List<CanonicalVarInfo>> {
-    fn borrow(&self) -> &[CanonicalVarInfo] {
-        &self.0[..]
-    }
-}
-
-impl<'tcx> Borrow<[GenericArg<'tcx>]> for Interned<'tcx, InternalSubsts<'tcx>> {
-    fn borrow<'a>(&'a self) -> &'a [GenericArg<'tcx>] {
-        &self.0[..]
-    }
-}
-
-impl<'tcx> Borrow<[ProjectionKind]> for Interned<'tcx, List<ProjectionKind>> {
-    fn borrow(&self) -> &[ProjectionKind] {
-        &self.0[..]
-    }
-}
-
-impl<'tcx> Borrow<[PlaceElem<'tcx>]> for Interned<'tcx, List<PlaceElem<'tcx>>> {
-    fn borrow(&self) -> &[PlaceElem<'tcx>] {
+impl<'tcx, T> Borrow<[T]> for Interned<'tcx, List<T>> {
+    fn borrow<'a>(&'a self) -> &'a [T] {
        &self.0[..]
    }
 }
@ -2007,34 +1983,12 @@ fn borrow(&self) -> &RegionKind {
    }
 }

-impl<'tcx> Borrow<[ExistentialPredicate<'tcx>]>
-    for Interned<'tcx, List<ExistentialPredicate<'tcx>>>
-{
-    fn borrow<'a>(&'a self) -> &'a [ExistentialPredicate<'tcx>] {
-        &self.0[..]
-    }
-}
-
-impl<'tcx> Borrow<[Predicate<'tcx>]> for Interned<'tcx, List<Predicate<'tcx>>> {
-    fn borrow<'a>(&'a self) -> &'a [Predicate<'tcx>] {
-        &self.0[..]
-    }
-}
-
 impl<'tcx> Borrow<Const<'tcx>> for Interned<'tcx, Const<'tcx>> {
    fn borrow<'a>(&'a self) -> &'a Const<'tcx> {
        &self.0
    }
 }

-impl<'tcx> Borrow<[traits::ChalkEnvironmentClause<'tcx>]>
-    for Interned<'tcx, List<traits::ChalkEnvironmentClause<'tcx>>>
-{
-    fn borrow<'a>(&'a self) -> &'a [traits::ChalkEnvironmentClause<'tcx>] {
-        &self.0[..]
-    }
-}
-
 impl<'tcx> Borrow<PredicateKind<'tcx>> for Interned<'tcx, PredicateKind<'tcx>> {
    fn borrow<'a>(&'a self) -> &'a PredicateKind<'tcx> {
        &self.0
--- a/src/librustc_middle/ty/diagnostics.rs
+++ b/src/librustc_middle/ty/diagnostics.rs
@ -7,7 +7,6 @@
 use rustc_hir as hir;
 use rustc_hir::def_id::DefId;
 use rustc_hir::{QPath, TyKind, WhereBoundPredicate, WherePredicate};
-use rustc_span::{BytePos, Span};

 impl<'tcx> TyS<'tcx> {
    /// Similar to `TyS::is_primitive`, but also considers inferred numeric values to be primitive.
@ -221,24 +220,11 @@ pub fn suggest_constraining_type_param(
            }
        }

-        let where_clause_span = generics.where_clause.span_for_predicates_or_empty_place();
-        // Account for `fn foo<T>(t: T) where T: Foo,` so we don't suggest two trailing commas.
-        let mut trailing_comma = false;
-        if let Ok(snippet) = tcx.sess.source_map().span_to_snippet(where_clause_span) {
-            trailing_comma = snippet.ends_with(',');
-        }
-        let where_clause_span = if trailing_comma {
-            let hi = where_clause_span.hi();
-            Span::new(hi - BytePos(1), hi, where_clause_span.ctxt())
-        } else {
-            where_clause_span.shrink_to_hi()
-        };
-
        match &param_spans[..] {
            &[&param_span] => suggest_restrict(param_span.shrink_to_hi()),
            _ => {
                err.span_suggestion_verbose(
-                    where_clause_span,
+                    generics.where_clause.tail_span_for_suggestion(),
                    &msg_restrict_type_further,
                    format!(", {}: {}", param_name, constraint),
                    Applicability::MachineApplicable,
--- a/src/librustc_resolve/lib.rs
+++ b/src/librustc_resolve/lib.rs
@ -2902,7 +2902,7 @@ pub fn resolve_str_path_error(
        span: Span,
        path_str: &str,
        ns: Namespace,
-        module_id: NodeId,
+        module_id: LocalDefId,
    ) -> Result<(ast::Path, Res), ()> {
        let path = if path_str.starts_with("::") {
            ast::Path {
@ -2922,10 +2922,7 @@ pub fn resolve_str_path_error(
                    .collect(),
            }
        };
-        let module = self.block_map.get(&module_id).copied().unwrap_or_else(|| {
-            let def_id = self.definitions.local_def_id(module_id);
-            self.module_map.get(&def_id).copied().unwrap_or(self.graph_root)
-        });
+        let module = self.module_map.get(&module_id).copied().unwrap_or(self.graph_root);
        let parent_scope = &ParentScope::module(module);
        let res = self.resolve_ast_path(&path, ns, parent_scope).map_err(|_| ())?;
        Ok((path, res))
--- a/src/librustc_trait_selection/traits/error_reporting/suggestions.rs
+++ b/src/librustc_trait_selection/traits/error_reporting/suggestions.rs
@ -170,7 +170,7 @@ fn suggest_await_before_try(

 fn predicate_constraint(generics: &hir::Generics<'_>, pred: String) -> (Span, String) {
    (
-        generics.where_clause.span_for_predicates_or_empty_place().shrink_to_hi(),
+        generics.where_clause.tail_span_for_suggestion(),
        format!(
            "{} {}",
            if !generics.where_clause.predicates.is_empty() { "," } else { " where" },
--- a/src/librustdoc/core.rs
+++ b/src/librustdoc/core.rs
@ -1,4 +1,3 @@
-use rustc_ast::ast::CRATE_NODE_ID;
 use rustc_attr as attr;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::sync::{self, Lrc};
@ -7,7 +6,7 @@
 use rustc_errors::json::JsonEmitter;
 use rustc_feature::UnstableFeatures;
 use rustc_hir::def::Namespace::TypeNS;
-use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
+use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
 use rustc_hir::HirId;
 use rustc_interface::interface;
 use rustc_middle::middle::cstore::CrateStore;
@ -390,7 +389,12 @@ pub fn run_core(options: RustdocOptions) -> (clean::Crate, RenderInfo, RenderOpt
                resolver.borrow_mut().access(|resolver| {
                    for extern_name in &extern_names {
                        resolver
-                            .resolve_str_path_error(DUMMY_SP, extern_name, TypeNS, CRATE_NODE_ID)
+                            .resolve_str_path_error(
+                                DUMMY_SP,
+                                extern_name,
+                                TypeNS,
+                                LocalDefId { local_def_index: CRATE_DEF_INDEX },
+                            )
                            .unwrap_or_else(|()| {
                                panic!("Unable to resolve external crate {}", extern_name)
                            });
--- a/src/librustdoc/passes/collect_intra_doc_links.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links.rs
@ -8,7 +8,7 @@
    Namespace::{self, *},
    PerNS, Res,
 };
-use rustc_hir::def_id::DefId;
+use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_middle::ty;
 use rustc_resolve::ParentScope;
 use rustc_session::lint;
@ -61,7 +61,7 @@ fn variant_field(
        &self,
        path_str: &str,
        current_item: &Option<String>,
-        module_id: rustc_ast::ast::NodeId,
+        module_id: LocalDefId,
    ) -> Result<(Res, Option<String>), ErrorKind> {
        let cx = self.cx;

@ -137,7 +137,7 @@ fn resolve(

        // In case we're in a module, try to resolve the relative path.
        if let Some(module_id) = parent_id.or(self.mod_ids.last().cloned()) {
-            let module_id = cx.tcx.hir().hir_id_to_node_id(module_id);
+            let module_id = cx.tcx.hir().local_def_id(module_id);
            let result = cx.enter_resolver(|resolver| {
                resolver.resolve_str_path_error(DUMMY_SP, &path_str, ns, module_id)
            });
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@ -247,6 +247,7 @@
 #![feature(cfg_target_has_atomic)]
 #![feature(cfg_target_thread_local)]
 #![feature(char_error_internals)]
+#![feature(char_internals)]
 #![feature(clamp)]
 #![feature(concat_idents)]
 #![feature(const_cstr_unchecked)]
--- a/src/libstd/sys_common/wtf8.rs
+++ b/src/libstd/sys_common/wtf8.rs
@ -201,9 +201,8 @@ pub fn from_wide(v: &[u16]) -> Wtf8Buf {
    /// Copied from String::push
    /// This does **not** include the WTF-8 concatenation check.
    fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
-        let c = unsafe { char::from_u32_unchecked(code_point.value) };
        let mut bytes = [0; 4];
-        let bytes = c.encode_utf8(&mut bytes).as_bytes();
+        let bytes = char::encode_utf8_raw(code_point.value, &mut bytes);
        self.bytes.extend_from_slice(bytes)
    }

@ -840,8 +839,7 @@ fn next(&mut self) -> Option<u16> {

        let mut buf = [0; 2];
        self.code_points.next().map(|code_point| {
-            let c = unsafe { char::from_u32_unchecked(code_point.value) };
-            let n = c.encode_utf16(&mut buf).len();
+            let n = char::encode_utf16_raw(code_point.value, &mut buf).len();
            if n == 2 {
                self.extra = buf[1];
            }
--- a/src/stdarch
+++ b/src/stdarch
@ -1 +1 @@
-Subproject commit ec6fccd34c30003a7ebf4e7a9dfe4e31f5b76e1b
+Subproject commit 45340c0e2fdadf2f131ef43cb683b5cafab0ff15
--- a/src/test/ui/bound-suggestions.rs
+++ b/src/test/ui/bound-suggestions.rs
@ -19,7 +19,7 @@ fn test_one_bound<T: Sized>(t: T) {
 }

 #[allow(dead_code)]
-fn test_no_bounds_where<X, Y>(x: X, y: Y) where X: std::fmt::Debug {
+fn test_no_bounds_where<X, Y>(x: X, y: Y) where X: std::fmt::Debug, {
    println!("{:?} {:?}", x, y);
    //~^ ERROR doesn't implement
 }