From 646e8e729168d95dbb65ee2d27cd00d2f0ece846 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 22 Feb 2024 08:12:09 -0800 Subject: [PATCH] rustc: Fix wasm64 metadata object files It looks like LLD will detect object files being either 32 or 64-bit depending on any memory present. LLD will additionally reject 32-bit objects during a 64-bit link. Previously metadata objects did not have any memories in them which led LLD to conclude they were 32-bit objects which broke 64-bit targets for wasm. This commit fixes this by ensuring that for 64-bit targets there's a memory object present to get LLD to detect it's a 64-bit target. Additionally this commit moves away from a hand-crafted wasm encoder to the `wasm-encoder` crate on crates.io as the complexity grows for the generated object file. Closes #121460 --- Cargo.lock | 10 +++ compiler/rustc_codegen_ssa/Cargo.toml | 1 + compiler/rustc_codegen_ssa/src/back/link.rs | 9 +- .../rustc_codegen_ssa/src/back/metadata.rs | 85 +++++++++---------- src/tools/tidy/src/deps.rs | 3 + 5 files changed, 59 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b19c4a721e..5bd56d74cb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3649,6 +3649,7 @@ dependencies = [ "thin-vec", "thorin-dwp", "tracing", + "wasm-encoder", "windows", ] @@ -6104,6 +6105,15 @@ version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +[[package]] +name = "wasm-encoder" +version = "0.200.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e3fb0c8fbddd78aa6095b850dfeedbc7506cf5f81e633f69cf8f2333ab84b9" +dependencies = [ + "leb128", +] + [[package]] name = "wasmparser" version = "0.118.2" diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index 781c54bdef8..9359df5de6a 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -38,6 +38,7 @@ tempfile = "3.2" thin-vec = "0.2.12" thorin-dwp = "0.7" tracing = "0.1" +wasm-encoder = "0.200.0" # tidy-alphabetical-end [target.'cfg(unix)'.dependencies] diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 1ad0dec0640..8c90f2d91f0 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -315,8 +315,11 @@ fn link_rlib<'a>( let trailing_metadata = match flavor { RlibFlavor::Normal => { - let (metadata, metadata_position) = - create_wrapper_file(sess, b".rmeta".to_vec(), codegen_results.metadata.raw_data()); + let (metadata, metadata_position) = create_wrapper_file( + sess, + ".rmeta".to_string(), + codegen_results.metadata.raw_data(), + ); let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME); match metadata_position { MetadataPosition::First => { @@ -384,7 +387,7 @@ fn link_rlib<'a>( let path = find_native_static_library(filename.as_str(), true, &lib_search_paths, sess); let src = read(path) .map_err(|e| sess.dcx().emit_fatal(errors::ReadFileError { message: e }))?; - let (data, _) = create_wrapper_file(sess, b".bundled_lib".to_vec(), &src); + let (data, _) = create_wrapper_file(sess, ".bundled_lib".to_string(), &src); let wrapper_file = emit_wrapper_file(sess, &data, tmpdir, filename.as_str()); packed_bundled_libs.push(wrapper_file); } else { diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index c6b04431fab..158b8fb8727 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -1,5 +1,6 @@ //! Reading of the rustc metadata for rlibs and dylibs +use std::borrow::Cow; use std::fs::File; use std::io::Write; use std::path::Path; @@ -15,7 +16,6 @@ use rustc_metadata::creader::MetadataLoader; use rustc_metadata::fs::METADATA_FILENAME; use rustc_metadata::EncodedMetadata; -use rustc_serialize::leb128; use rustc_session::Session; use rustc_span::sym; use rustc_target::abi::Endian; @@ -434,12 +434,15 @@ pub enum MetadataPosition { /// automatically removed from the final output. pub fn create_wrapper_file( sess: &Session, - section_name: Vec, + section_name: String, data: &[u8], ) -> (Vec, MetadataPosition) { let Some(mut file) = create_object_file(sess) else { if sess.target.is_like_wasm { - return (create_metadata_file_for_wasm(data, §ion_name), MetadataPosition::First); + return ( + create_metadata_file_for_wasm(sess, data, §ion_name), + MetadataPosition::First, + ); } // Targets using this branch don't have support implemented here yet or @@ -452,7 +455,7 @@ pub fn create_wrapper_file( } else { file.add_section( file.segment_name(StandardSegment::Debug).to_vec(), - section_name, + section_name.into_bytes(), SectionKind::Debug, ) }; @@ -524,7 +527,7 @@ pub fn create_compressed_metadata_file( let Some(mut file) = create_object_file(sess) else { if sess.target.is_like_wasm { - return create_metadata_file_for_wasm(&packed_metadata, b".rustc"); + return create_metadata_file_for_wasm(sess, &packed_metadata, ".rustc"); } return packed_metadata.to_vec(); }; @@ -624,51 +627,41 @@ pub fn create_compressed_metadata_file_for_xcoff( /// `data`. /// /// NB: the `object` crate does not yet have support for writing the wasm -/// object file format. The format is simple enough that for now an extra crate -/// from crates.io (such as `wasm-encoder`). The file format is: +/// object file format. In lieu of that the `wasm-encoder` crate is used to +/// build a wasm file by hand. /// -/// * 4-byte header "\0asm" -/// * 4-byte version number - 1u32 in little-endian format -/// * concatenated sections, which for this object is always "custom sections" -/// -/// Custom sections are then defined by: -/// * 1-byte section identifier - 0 for a custom section -/// * leb-encoded section length (size of the contents beneath this bullet) -/// * leb-encoded custom section name length -/// * custom section name -/// * section contents -/// -/// One custom section, `linking`, is added here in accordance with +/// The wasm object file format is defined at /// -/// which is required to inform LLD that this is an object file but it should -/// otherwise basically ignore it if it otherwise looks at it. The linking -/// section currently is defined by a single version byte (2) and then further -/// sections, but we have no more sections, so it's just the byte "2". +/// and mainly consists of a `linking` custom section. In this case the custom +/// section there is empty except for a version marker indicating what format +/// it's in. /// -/// The next custom section is the one we're interested in. -pub fn create_metadata_file_for_wasm(data: &[u8], section_name: &[u8]) -> Vec { - let mut bytes = b"\0asm\x01\0\0\0".to_vec(); +/// The main purpose of this is to contain a custom section with `section_name`, +/// which is then appended after `linking`. +/// +/// As a further detail the object needs to have a 64-bit memory if `wasm64` is +/// the target or otherwise it's interpreted as a 32-bit object which is +/// incompatible with 64-bit ones. +pub fn create_metadata_file_for_wasm(sess: &Session, data: &[u8], section_name: &str) -> Vec { + assert!(sess.target.is_like_wasm); + let mut module = wasm_encoder::Module::new(); + let mut imports = wasm_encoder::ImportSection::new(); - let mut append_custom_section = |section_name: &[u8], data: &[u8]| { - let mut section_name_len = [0; leb128::max_leb128_len::()]; - let off = leb128::write_usize_leb128(&mut section_name_len, section_name.len()); - let section_name_len = §ion_name_len[..off]; - - let mut section_len = [0; leb128::max_leb128_len::()]; - let off = leb128::write_usize_leb128( - &mut section_len, - data.len() + section_name_len.len() + section_name.len(), + if sess.target.pointer_width == 64 { + imports.import( + "env", + "__linear_memory", + wasm_encoder::MemoryType { minimum: 0, maximum: None, memory64: true, shared: false }, ); - let section_len = §ion_len[..off]; + } - bytes.push(0u8); - bytes.extend_from_slice(section_len); - bytes.extend_from_slice(section_name_len); - bytes.extend_from_slice(section_name); - bytes.extend_from_slice(data); - }; - - append_custom_section(b"linking", &[2]); - append_custom_section(section_name, data); - bytes + if imports.len() > 0 { + module.section(&imports); + } + module.section(&wasm_encoder::CustomSection { + name: "linking".into(), + data: Cow::Borrowed(&[2]), + }); + module.section(&wasm_encoder::CustomSection { name: section_name.into(), data: data.into() }); + module.finish() } diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index cff219285dc..79bc380c1e9 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -92,6 +92,7 @@ ("ryu", "Apache-2.0 OR BSL-1.0"), // BSL is not acceptble, but we use it under Apache-2.0 // cargo/... (because of serde) ("self_cell", "Apache-2.0"), // rustc (fluent translations) ("snap", "BSD-3-Clause"), // rustc + ("wasm-encoder", "Apache-2.0 WITH LLVM-exception"), // rustc ("wasmparser", "Apache-2.0 WITH LLVM-exception"), // rustc // tidy-alphabetical-end ]; @@ -267,6 +268,7 @@ "jemalloc-sys", "jobserver", "lazy_static", + "leb128", "libc", "libloading", "linux-raw-sys", @@ -380,6 +382,7 @@ "valuable", "version_check", "wasi", + "wasm-encoder", "wasmparser", "winapi", "winapi-i686-pc-windows-gnu",