Auto merge of #98851 - klensy:encode_symbols, r=cjgillot

rustc_metadata: dedupe strings to prevent multiple copies in rmeta/query cache blow file size

r? `@cjgillot`

Encodes strings in rmeta/query cache so duplicated ones will be encoded as offsets to first strings, reducing file size.
This commit is contained in:
bors 2022-08-18 23:53:22 +00:00
commit 71ecf5d359
5 changed files with 113 additions and 3 deletions

View file

@ -637,6 +637,35 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
}
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
let tag = d.read_u8();
match tag {
SYMBOL_STR => {
let s = d.read_str();
Symbol::intern(s)
}
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();
// move to str ofset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);
// restore position
d.opaque.set_position(old_pos);
sym
}
_ => unreachable!(),
}
}
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for &'tcx [ty::abstract_const::Node<'tcx>] {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
ty::codec::RefDecodable::decode(d)

View file

@ -39,6 +39,7 @@
};
use rustc_target::abi::VariantIdx;
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::hash::Hash;
use std::io::{Read, Seek, Write};
use std::iter;
@ -75,6 +76,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
required_source_files: Option<GrowableBitSet<usize>>,
is_proc_macro: bool,
hygiene_ctxt: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>,
}
/// If the current crate is a proc-macro, returns early with `Lazy:empty()`.
@ -307,6 +309,24 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
}
}
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.opaque.emit_u8(SYMBOL_STR);
let pos = s.opaque.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}
impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> {
const CLEAR_CROSS_CRATE: bool = true;
@ -2259,6 +2279,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
required_source_files,
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
hygiene_ctxt: &hygiene_ctxt,
symbol_table: Default::default(),
};
// Encode the rustc version string in a predictable location.

View file

@ -445,6 +445,10 @@ struct GeneratorData<'tcx> {
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
const TAG_PARTIAL_SPAN: u8 = 2;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
pub fn provide(providers: &mut Providers) {
encoder::provide(providers);
decoder::provide(providers);

View file

@ -22,8 +22,9 @@
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
};
use rustc_span::source_map::{SourceMap, StableSourceFileId};
use rustc_span::CachingSourceMapView;
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
use rustc_span::{CachingSourceMapView, Symbol};
use std::collections::hash_map::Entry;
use std::io;
use std::mem;
@ -38,6 +39,10 @@
const TAG_SYNTAX_CONTEXT: u8 = 0;
const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
/// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results
/// of a few selected queries (like `typeck` and `mir_optimized`) and
@ -254,6 +259,7 @@ fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: FileEncoder) -> FileEncode
source_map: CachingSourceMapView::new(tcx.sess.source_map()),
file_to_file_index,
hygiene_context: &hygiene_encode_context,
symbol_table: Default::default(),
};
// Encode query results.
@ -714,6 +720,36 @@ fn decode(decoder: &mut CacheDecoder<'a, 'tcx>) -> Self {
}
}
// copy&paste impl from rustc_metadata
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
let tag = d.read_u8();
match tag {
SYMBOL_STR => {
let s = d.read_str();
Symbol::intern(s)
}
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();
// move to str ofset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);
// restore position
d.opaque.set_position(old_pos);
sym
}
_ => unreachable!(),
}
}
}
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for CrateNum {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
let stable_id = StableCrateId::decode(d);
@ -815,6 +851,7 @@ pub struct CacheEncoder<'a, 'tcx> {
source_map: CachingSourceMapView<'tcx>,
file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
hygiene_context: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>,
}
impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
@ -899,6 +936,25 @@ fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
}
}
// copy&paste impl from rustc_metadata
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.encoder.emit_u8(SYMBOL_STR);
let pos = s.encoder.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}
impl<'a, 'tcx> TyEncoder for CacheEncoder<'a, 'tcx> {
type I = TyCtxt<'tcx>;
const CLEAR_CROSS_CRATE: bool = false;

View file

@ -1852,14 +1852,14 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
}
impl<S: Encoder> Encodable<S> for Symbol {
fn encode(&self, s: &mut S) {
default fn encode(&self, s: &mut S) {
s.emit_str(self.as_str());
}
}
impl<D: Decoder> Decodable<D> for Symbol {
#[inline]
fn decode(d: &mut D) -> Symbol {
default fn decode(d: &mut D) -> Symbol {
Symbol::intern(&d.read_str())
}
}