Add a footer in FileEncoder and check for it in MemDecoder

This commit is contained in:
Ben Kimock 2024-05-03 21:17:57 -04:00
parent 39e02f1bd1
commit 95150d7246
16 changed files with 115 additions and 52 deletions

View file

@ -195,6 +195,7 @@ pub enum CodegenErrors {
EmptyVersionNumber,
EncodingVersionMismatch { version_array: String, rlink_version: u32 },
RustcVersionMismatch { rustc_version: String },
CorruptFile,
}
pub fn provide(providers: &mut Providers) {
@ -265,7 +266,9 @@ pub fn deserialize_rlink(
});
}
let mut decoder = MemDecoder::new(&data[4..], 0);
let Some(mut decoder) = MemDecoder::new(&data[4..], 0) else {
return Err(CodegenErrors::CorruptFile);
};
let rustc_version = decoder.read_str();
if rustc_version != sess.cfg_version {
return Err(CodegenErrors::RustcVersionMismatch {

View file

@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error}
driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}`
driver_impl_ice_version = rustc {$version} running on {$triple}
driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}`
driver_impl_rlink_empty_version_number = The input does not contain version number
driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}`

View file

@ -96,7 +96,7 @@ pub(super) fn install() {}
use crate::session_diagnostics::{
RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch,
RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead,
RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead,
};
rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
@ -645,8 +645,7 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) {
match err {
CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType),
CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber),
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess
.dcx()
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx
.emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }),
CodegenErrors::RustcVersionMismatch { rustc_version } => {
dcx.emit_fatal(RLinkRustcVersionMismatch {
@ -654,6 +653,9 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) {
current_version: sess.cfg_version,
})
}
CodegenErrors::CorruptFile => {
dcx.emit_fatal(RlinkCorruptFile { file: file.display().to_string() });
}
};
}
};

View file

@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> {
#[diag(driver_impl_rlink_no_a_file)]
pub(crate) struct RlinkNotAFile;
#[derive(Diagnostic)]
#[diag(driver_impl_rlink_corrupt_file)]
pub(crate) struct RlinkCorruptFile {
pub file: String,
}
#[derive(Diagnostic)]
#[diag(driver_impl_ice)]
pub(crate) struct Ice;

View file

@ -21,6 +21,8 @@ incremental_cargo_help_2 =
incremental_copy_workproduct_to_cache =
error copying object file `{$from}` to incremental directory as `{$to}`: {$err}
incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant.
incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err}
incremental_create_incr_comp_dir =

View file

@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> {
pub path: &'a Path,
pub err: std::io::Error,
}
#[derive(Diagnostic)]
#[diag(incremental_corrupt_file)]
pub struct CorruptFile<'a> {
pub path: &'a Path,
}

View file

@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result {
// Decode the list of work_products
let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos);
let Some(mut work_product_decoder) =
MemDecoder::new(&work_products_data[..], start_pos)
else {
sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path });
return LoadResult::DataOutOfDate;
};
let work_products: Vec<SerializedWorkProduct> =
Decodable::decode(&mut work_product_decoder);
@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err),
LoadResult::Ok { data: (bytes, start_pos) } => {
let mut decoder = MemDecoder::new(&bytes, start_pos);
let Some(mut decoder) = MemDecoder::new(&bytes, start_pos) else {
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
return LoadResult::DataOutOfDate;
};
let prev_commandline_args_hash = u64::decode(&mut decoder);
if prev_commandline_args_hash != expected_hash {
@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option<OnDiskCache<'_>> {
let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache");
match load_data(&query_cache_path(sess), sess) {
let path = query_cache_path(sess);
match load_data(&path, sess) {
LoadResult::Ok { data: (bytes, start_pos) } => {
Some(OnDiskCache::new(sess, bytes, start_pos))
let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|| {
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
OnDiskCache::new_empty(sess.source_map())
});
Some(cache)
}
_ => Some(OnDiskCache::new_empty(sess.source_map())),
}

View file

@ -853,7 +853,12 @@ fn get_metadata_section<'p>(
slice_owned(mmap, Deref::deref)
}
};
let blob = MetadataBlob(raw_bytes);
let Some(blob) = MetadataBlob::new(raw_bytes) else {
return Err(MetadataError::LoadFailure(format!(
"corrupt metadata encountered in {}",
filename.display()
)));
};
match blob.check_compatibility(cfg_version) {
Ok(()) => Ok(blob),
Err(None) => Err(MetadataError::LoadFailure(format!(

View file

@ -40,10 +40,9 @@
mod cstore_impl;
/// A reference to the raw binary version of crate metadata.
/// A `MetadataBlob` internally is just a reference counted pointer to
/// the actual data, so cloning it is cheap.
#[derive(Clone)]
pub(crate) struct MetadataBlob(pub(crate) OwnedSlice);
/// This struct applies [`MemDecoder`]'s validation when constructed
/// so that later constructions are guaranteed to succeed.
pub(crate) struct MetadataBlob(OwnedSlice);
impl std::ops::Deref for MetadataBlob {
type Target = [u8];
@ -54,6 +53,16 @@ fn deref(&self) -> &[u8] {
}
}
impl MetadataBlob {
pub fn new(slice: OwnedSlice) -> Option<Self> {
if MemDecoder::new(&*slice, 0).is_some() { Some(Self(slice)) } else { None }
}
pub fn bytes(&self) -> &OwnedSlice {
&self.0
}
}
/// A map from external crate numbers (as decoded from some crate file) to
/// local crate numbers (as generated during this session). Each external
/// crate may refer to types in other external crates, and each has their
@ -165,7 +174,14 @@ fn tcx(self) -> Option<TyCtxt<'tcx>> {
fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> {
let tcx = self.tcx();
DecodeContext {
opaque: MemDecoder::new(self.blob(), pos),
// FIXME: This unwrap should never panic because we check that it won't when creating
// `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of
// it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own
// the data for the decoder so holding onto the `MemDecoder` too would make us a
// self-referential struct which is downright goofy because `MetadataBlob` is already
// self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that
// demands a significant refactoring due to our crate graph.
opaque: MemDecoder::new(self.blob(), pos).unwrap(),
cdata: self.cdata(),
blob: self.blob(),
sess: self.sess().or(tcx.map(|tcx| tcx.sess)),
@ -393,7 +409,7 @@ fn with_position<F, R>(&mut self, pos: usize, f: F) -> R
where
F: FnOnce(&mut Self) -> R,
{
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let new_opaque = self.opaque.split_at(pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode);
let r = f(self);

View file

@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMapRef<'static>
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> {
let len = d.read_usize();
let pos = d.position();
let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]);
let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]);
// Although we already have the data we need via the `OwnedSlice`, we still need
// to advance the `DecodeContext`'s position so it's in a valid state after

View file

@ -154,24 +154,22 @@ fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId {
impl<'sess> OnDiskCache<'sess> {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
debug_assert!(sess.opts.incremental.is_some());
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Option<Self> {
assert!(sess.opts.incremental.is_some());
// Wrap in a scope so we can borrow `data`.
let footer: Footer = {
let mut decoder = MemDecoder::new(&data, start_pos);
let mut decoder = MemDecoder::new(&data, start_pos)?;
// Decode the *position* of the footer, which can be found in the
// last 8 bytes of the file.
let footer_pos = decoder
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
IntEncodedWithFixedSize::decode(decoder).0 as usize
});
// Decode the file footer, which contains all the lookup tables, etc.
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER))
};
// Decode the *position* of the footer, which can be found in the
// last 8 bytes of the file.
let footer_pos = decoder
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
IntEncodedWithFixedSize::decode(decoder).0 as usize
});
// Decode the file footer, which contains all the lookup tables, etc.
let footer: Footer =
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER));
Self {
Some(Self {
serialized_data: RwLock::new(Some(data)),
file_index_to_stable_id: footer.file_index_to_stable_id,
file_index_to_file: Default::default(),
@ -184,7 +182,7 @@ pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
expn_data: footer.expn_data,
foreign_expn_data: footer.foreign_expn_data,
hygiene_context: Default::default(),
}
})
}
pub fn new_empty(source_map: &'sess SourceMap) -> Self {
@ -437,7 +435,8 @@ fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T
let serialized_data = self.serialized_data.read();
let mut decoder = CacheDecoder {
tcx,
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize())
.unwrap(),
source_map: self.source_map,
file_index_to_file: &self.file_index_to_file,
file_index_to_stable_id: &self.file_index_to_stable_id,
@ -558,7 +557,7 @@ fn with_position<F, R>(&mut self, pos: usize, f: F) -> R
{
debug_assert!(pos < self.opaque.len());
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let new_opaque = self.opaque.split_at(pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let r = f(self);
self.opaque = old_opaque;

View file

@ -182,15 +182,13 @@ impl SerializedDepGraph {
pub fn decode<D: Deps>(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
// The last 16 bytes are the node count and edge count.
debug!("position: {:?}", d.position());
let (node_count, edge_count, graph_size) =
d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
let (node_count, edge_count) =
d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
debug!("position: {:?}", d.position());
let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize;
(node_count, edge_count, graph_size)
(node_count, edge_count)
});
assert_eq!(d.len(), graph_size);
debug!("position: {:?}", d.position());
debug!(?node_count, ?edge_count);
@ -606,8 +604,6 @@ fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
debug!("position: {:?}", encoder.position());
IntEncodedWithFixedSize(node_count).encode(&mut encoder);
IntEncodedWithFixedSize(edge_count).encode(&mut encoder);
let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE;
IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder);
debug!("position: {:?}", encoder.position());
// Drop the encoder so that nothing is written after the counts.
let result = encoder.finish();

View file

@ -17,6 +17,8 @@
pub type FileEncodeResult = Result<usize, (PathBuf, io::Error)>;
const FOOTER: &[u8] = b"rust-end-file";
/// The size of the buffer in `FileEncoder`.
const BUF_SIZE: usize = 8192;
@ -181,6 +183,7 @@ fn panic_invalid_write<const N: usize>(written: usize) {
}
pub fn finish(&mut self) -> FileEncodeResult {
self.write_all(FOOTER);
self.flush();
#[cfg(debug_assertions)]
{
@ -261,15 +264,18 @@ pub struct MemDecoder<'a> {
impl<'a> MemDecoder<'a> {
#[inline]
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
pub fn new(data: &'a [u8], position: usize) -> Option<MemDecoder<'a>> {
let data = data.strip_suffix(FOOTER)?;
let Range { start, end } = data.as_ptr_range();
MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }
Some(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData })
}
#[inline]
pub fn data(&self) -> &'a [u8] {
// SAFETY: This recovers the original slice, only using members we never modify.
unsafe { std::slice::from_raw_parts(self.start, self.len()) }
pub fn split_at(&self, position: usize) -> MemDecoder<'a> {
assert!(position <= self.len());
// SAFETY: We checked above that this offset is within the original slice
let current = unsafe { self.start.add(position) };
MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData }
}
#[inline]

View file

@ -1,4 +1,5 @@
use rustc_serialize::leb128::*;
use rustc_serialize::opaque::MemDecoder;
use rustc_serialize::Decoder;
macro_rules! impl_test_unsigned_leb128 {
@ -25,13 +26,15 @@ fn $test_name() {
let n = $write_fn_name(&mut buf, x);
stream.extend(&buf[..n]);
}
let stream_end = stream.len();
stream.extend(b"rust-end-file");
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
for &expected in &values {
let actual = $read_fn_name(&mut decoder);
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), decoder.position());
assert_eq!(stream_end, decoder.position());
}
};
}
@ -72,13 +75,15 @@ fn $test_name() {
let n = $write_fn_name(&mut buf, x);
stream.extend(&buf[..n]);
}
let stream_end = stream.len();
stream.extend(b"rust-end-file");
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
for &expected in &values {
let actual = $read_fn_name(&mut decoder);
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), decoder.position());
assert_eq!(stream_end, decoder.position());
}
};
}

View file

@ -42,7 +42,7 @@ fn check_round_trip<
encoder.finish().unwrap();
let data = fs::read(&tmpfile).unwrap();
let mut decoder = MemDecoder::new(&data[..], 0);
let mut decoder = MemDecoder::new(&data[..], 0).unwrap();
for value in values {
let decoded = Decodable::decode(&mut decoder);
assert_eq!(value, decoded);

View file

@ -344,7 +344,9 @@ pub(crate) fn load_call_locations(
Ok(bytes) => bytes,
Err(e) => dcx.fatal(format!("failed to load examples: {e}")),
};
let mut decoder = MemDecoder::new(&bytes, 0);
let Some(mut decoder) = MemDecoder::new(&bytes, 0) else {
dcx.fatal(format!("Corrupt metadata encountered in {path}"))
};
let calls = AllCallLocations::decode(&mut decoder);
for (function, fn_calls) in calls.into_iter() {