Initialize LLVM time trace profiler on each code generation thread

In https://reviews.llvm.org/D71059 LLVM 11, the time trace profiler was
extended to support multiple threads.

`timeTraceProfilerInitialize` creates a thread local profiler instance.
When a thread finishes `timeTraceProfilerFinishThread` moves a thread
local instance into a global collection of instances. Finally when all
codegen work is complete `timeTraceProfilerWrite` writes data from the
current thread local instance and the instances in global collection
of instances.

Previously, the profiler was intialized on a single thread only. Since
this thread performs no code generation on its own, the resulting
profile was empty.

Update LLVM codegen to initialize & finish time trace profiler on each
code generation thread.
This commit is contained in:
Tomasz Miąsko 2021-11-05 00:00:00 +00:00
parent d22dd65835
commit 5a09e12135
6 changed files with 128 additions and 56 deletions

View file

@ -76,6 +76,27 @@ pub mod llvm {
#[derive(Clone)]
pub struct LlvmCodegenBackend(());
struct TimeTraceProfiler {
enabled: bool,
}
impl TimeTraceProfiler {
fn new(enabled: bool) -> Self {
if enabled {
unsafe { llvm::LLVMTimeTraceProfilerInitialize() }
}
TimeTraceProfiler { enabled }
}
}
impl Drop for TimeTraceProfiler {
fn drop(&mut self) {
if self.enabled {
unsafe { llvm::LLVMTimeTraceProfilerFinishThread() }
}
}
}
impl ExtraBackendMethods for LlvmCodegenBackend {
fn new_metadata(&self, tcx: TyCtxt<'_>, mod_name: &str) -> ModuleLlvm {
ModuleLlvm::new_metadata(tcx, mod_name)
@ -119,6 +140,34 @@ fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
llvm_util::tune_cpu(sess)
}
fn spawn_thread<F, T>(time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(move || {
let _profiler = TimeTraceProfiler::new(time_trace);
f()
})
}
fn spawn_named_thread<F, T>(
time_trace: bool,
name: String,
f: F,
) -> std::io::Result<std::thread::JoinHandle<T>>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::Builder::new().name(name).spawn(move || {
let _profiler = TimeTraceProfiler::new(time_trace);
f()
})
}
}
impl WriteBackendMethods for LlvmCodegenBackend {

View file

@ -1737,6 +1737,8 @@ pub fn LLVMRustBuildAtomicFence(
pub fn LLVMTimeTraceProfilerInitialize();
pub fn LLVMTimeTraceProfilerFinishThread();
pub fn LLVMTimeTraceProfilerFinish(FileName: *const c_char);
pub fn LLVMAddAnalysisPasses(T: &'a TargetMachine, PM: &PassManager<'a>);

View file

@ -113,11 +113,6 @@ fn llvm_arg_to_arg_name(full_arg: &str) -> &str {
}
if sess.opts.debugging_opts.llvm_time_trace {
// time-trace is not thread safe and running it in parallel will cause seg faults.
if !sess.opts.debugging_opts.no_parallel_llvm {
bug!("`-Z llvm-time-trace` requires `-Z no-parallel-llvm")
}
llvm::LLVMTimeTraceProfilerInitialize();
}

View file

@ -310,6 +310,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
pub no_landing_pads: bool,
pub save_temps: bool,
pub fewer_names: bool,
pub time_trace: bool,
pub exported_symbols: Option<Arc<ExportedSymbols>>,
pub opts: Arc<config::Options>,
pub crate_types: Vec<CrateType>,
@ -1039,6 +1040,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
no_landing_pads: sess.panic_strategy() == PanicStrategy::Abort,
fewer_names: sess.fewer_names(),
save_temps: sess.opts.cg.save_temps,
time_trace: sess.opts.debugging_opts.llvm_time_trace,
opts: Arc::new(sess.opts.clone()),
prof: sess.prof.clone(),
exported_symbols,
@ -1198,7 +1200,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
// Each LLVM module is automatically sent back to the coordinator for LTO if
// necessary. There's already optimizations in place to avoid sending work
// back to the coordinator if LTO isn't requested.
return thread::spawn(move || {
return B::spawn_thread(cgcx.time_trace, move || {
let mut worker_id_counter = 0;
let mut free_worker_ids = Vec::new();
let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
@ -1615,9 +1617,7 @@ fn maybe_start_llvm_timer<'a>(
pub struct WorkerFatalError;
fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) {
let builder = thread::Builder::new().name(work.short_description());
builder
.spawn(move || {
B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
// Set up a destructor which will fire off a message that we're done as
// we exit.
struct Bomb<B: ExtraBackendMethods> {

View file

@ -142,4 +142,26 @@ fn target_machine_factory(
) -> TargetMachineFactoryFn<Self>;
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
fn spawn_thread<F, T>(_time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(f)
}
fn spawn_named_thread<F, T>(
_time_trace: bool,
name: String,
f: F,
) -> std::io::Result<std::thread::JoinHandle<T>>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::Builder::new().name(name).spawn(f)
}
}

View file

@ -75,6 +75,10 @@ extern "C" void LLVMTimeTraceProfilerInitialize() {
/* ProcName */ "rustc");
}
extern "C" void LLVMTimeTraceProfilerFinishThread() {
timeTraceProfilerFinishThread();
}
extern "C" void LLVMTimeTraceProfilerFinish(const char* FileName) {
StringRef FN(FileName);
std::error_code EC;