std: refactor the TLS implementation

As discovered by Mara in #110897, our TLS implementation is a total mess. In the past months, I have simplified the actual macros and their expansions, but the majority of the complexity comes from the platform-specific support code needed to create keys and register destructors. In keeping with #117276, I have therefore moved all of the `thread_local_key`/`thread_local_dtor` modules to the `thread_local` module in `sys` and merged them into a new structure, so that future porters of `std` can simply mix-and-match the existing code instead of having to copy the same (bad) implementation everywhere. The new structure should become obvious when looking at `sys/thread_local/mod.rs`.

Unfortunately, the documentation changes associated with the refactoring have made this PR rather large. That said, this contains no functional changes except for two small ones:
* the key-based destructor fallback now, by virtue of sharing the implementation used by macOS and others, stores its list in a `#[thread_local]` static instead of in the key, eliminating one indirection layer and drastically simplifying its code.
* I've switched over ZKVM (tier 3) to use the same implementation as WebAssembly, as the implementation was just a way worse version of that

Please let me know if I can make this easier to review! I know these large PRs aren't optimal, but I couldn't think of any good intermediate steps.

@rustbot label +A-thread-locals
This commit is contained in:
joboet 2024-06-15 17:47:35 +02:00
parent d2ad293851
commit f3facf1175
No known key found for this signature in database
GPG key ID: 704E0149B0194B3C
50 changed files with 720 additions and 930 deletions

View file

@ -33,9 +33,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
pub mod time;
use crate::io::ErrorKind;
@ -98,7 +95,6 @@ pub unsafe fn cleanup() {}
argv: *const *const c_char,
env: *const *const c_char,
) -> ! {
use thread_local_dtor::run_dtors;
extern "C" {
fn main(argc: isize, argv: *const *const c_char) -> i32;
}
@ -108,7 +104,7 @@ pub unsafe fn cleanup() {}
let result = main(argc as isize, argv);
run_dtors();
crate::sys::thread_local::destructors::run();
hermit_abi::exit(result);
}

View file

@ -1,7 +1,6 @@
#![allow(dead_code)]
use super::hermit_abi;
use super::thread_local_dtor::run_dtors;
use crate::ffi::CStr;
use crate::io;
use crate::mem;
@ -50,7 +49,7 @@ extern "C" fn thread_start(main: usize) {
Box::from_raw(ptr::with_exposed_provenance::<Box<dyn FnOnce()>>(main).cast_mut())();
// run all destructors
run_dtors();
crate::sys::thread_local::destructors::run();
}
}
}

View file

@ -1,29 +0,0 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]
// Simplify dtor registration by using a list of destructors.
// The this solution works like the implementation of macOS and
// doesn't additional OS support
use crate::cell::RefCell;
#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
match DTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
}
// every thread call this function to run through all possible destructors
pub unsafe fn run_dtors() {
let mut list = DTORS.take();
while !list.is_empty() {
for (ptr, dtor) in list {
dtor(ptr);
}
list = DTORS.take();
}
}

View file

@ -14,7 +14,6 @@
num::NonZero,
ptr::NonNull,
sync::atomic::{AtomicUsize, Ordering},
sys::thread_local_dtor::run_dtors,
time::Duration,
};
@ -116,7 +115,7 @@ pub unsafe fn new(stack: usize, p: Box<dyn FnOnce()>) -> io::Result<Thread> {
// Run TLS destructors now because they are not
// called automatically for terminated tasks.
unsafe { run_dtors() };
unsafe { crate::sys::thread_local::destructors::run() };
let old_lifecycle = inner
.lifecycle

View file

@ -26,7 +26,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
pub mod thread_local_key;
pub mod thread_parking;
pub mod time;
pub mod waitqueue;

View file

@ -33,8 +33,6 @@ pub mod itron {
pub mod process;
pub mod stdio;
pub use self::itron::thread;
pub mod thread_local_dtor;
pub mod thread_local_key;
pub use self::itron::thread_parking;
pub mod time;

View file

@ -1,43 +0,0 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]
// Simplify dtor registration by using a list of destructors.
use super::{abi, itron::task};
use crate::cell::{Cell, RefCell};
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
if !REGISTERED.get() {
let tid = task::current_task_id_aborting();
// Register `tls_dtor` to make sure the TLS destructors are called
// for tasks created by other means than `std::thread`
unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, tls_dtor) };
REGISTERED.set(true);
}
match DTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
}
pub unsafe fn run_dtors() {
let mut list = DTORS.take();
while !list.is_empty() {
for (ptr, dtor) in list {
unsafe { dtor(ptr) };
}
list = DTORS.take();
}
}
unsafe extern "C" fn tls_dtor(_unused: *mut u8) {
unsafe { run_dtors() };
}

View file

@ -1,21 +0,0 @@
pub type Key = usize;
#[inline]
pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
panic!("should not be used on the solid target");
}
#[inline]
pub unsafe fn set(_key: Key, _value: *mut u8) {
panic!("should not be used on the solid target");
}
#[inline]
pub unsafe fn get(_key: Key) -> *mut u8 {
panic!("should not be used on the solid target");
}
#[inline]
pub unsafe fn destroy(_key: Key) {
panic!("should not be used on the solid target");
}

View file

@ -27,9 +27,6 @@
mod rand;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
#[path = "../unix/thread_local_key.rs"]
pub mod thread_local_key;
#[allow(non_upper_case_globals)]
#[path = "../unix/time.rs"]
pub mod time;

View file

@ -1,4 +0,0 @@
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
register_dtor_fallback(t, dtor);
}

View file

@ -28,8 +28,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
pub mod time;
mod helpers;

View file

@ -31,8 +31,6 @@
pub mod stack_overflow;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
pub mod thread_local_key;
pub mod thread_parking;
pub mod time;

View file

@ -1,126 +0,0 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]
//! Provides thread-local destructors without an associated "key", which
//! can be more efficient.
// Since what appears to be glibc 2.18 this symbol has been shipped which
// GCC and clang both use to invoke destructors in thread_local globals, so
// let's do the same!
//
// Note, however, that we run on lots older linuxes, as well as cross
// compiling from a newer linux to an older linux, so we also have a
// fallback implementation to use as well.
#[cfg(any(
target_os = "linux",
target_os = "android",
target_os = "fuchsia",
target_os = "redox",
target_os = "hurd",
target_os = "netbsd",
target_os = "dragonfly"
))]
// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
#[no_sanitize(cfi, kcfi)]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::mem;
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
/// This is necessary because the __cxa_thread_atexit_impl implementation
/// std links to by default may be a C or C++ implementation that was not
/// compiled using the Clang integer normalization option.
#[cfg(sanitizer_cfi_normalize_integers)]
use core::ffi::c_int;
#[cfg(not(sanitizer_cfi_normalize_integers))]
#[cfi_encoding = "i"]
#[repr(transparent)]
pub struct c_int(#[allow(dead_code)] pub libc::c_int);
extern "C" {
#[linkage = "extern_weak"]
static __dso_handle: *mut u8;
#[linkage = "extern_weak"]
static __cxa_thread_atexit_impl: Option<
extern "C" fn(
unsafe extern "C" fn(*mut libc::c_void),
*mut libc::c_void,
*mut libc::c_void,
) -> c_int,
>;
}
if let Some(f) = __cxa_thread_atexit_impl {
unsafe {
f(
mem::transmute::<
unsafe extern "C" fn(*mut u8),
unsafe extern "C" fn(*mut libc::c_void),
>(dtor),
t.cast(),
core::ptr::addr_of!(__dso_handle) as *mut _,
);
}
return;
}
register_dtor_fallback(t, dtor);
}
// This implementation is very similar to register_dtor_fallback in
// sys_common/thread_local.rs. The main difference is that we want to hook into
// macOS's analog of the above linux function, _tlv_atexit. OSX will run the
// registered dtors before any TLS slots get freed, and when the main thread
// exits.
//
// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The
// workaround below is to register, via _tlv_atexit, a custom DTOR list once per
// thread. thread_local dtors are pushed to the DTOR list without calling
// _tlv_atexit.
#[cfg(target_vendor = "apple")]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::cell::{Cell, RefCell};
use crate::ptr;
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
if !REGISTERED.get() {
_tlv_atexit(run_dtors, ptr::null_mut());
REGISTERED.set(true);
}
extern "C" {
fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
}
match DTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
unsafe extern "C" fn run_dtors(_: *mut u8) {
let mut list = DTORS.take();
while !list.is_empty() {
for (ptr, dtor) in list {
dtor(ptr);
}
list = DTORS.take();
}
}
}
#[cfg(any(
target_os = "vxworks",
target_os = "horizon",
target_os = "emscripten",
target_os = "aix",
target_os = "freebsd",
))]
#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten)
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
register_dtor_fallback(t, dtor);
}

View file

@ -1,29 +0,0 @@
#![allow(dead_code)] // not used on all platforms
use crate::mem;
pub type Key = libc::pthread_key_t;
#[inline]
pub unsafe fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
let mut key = 0;
assert_eq!(libc::pthread_key_create(&mut key, mem::transmute(dtor)), 0);
key
}
#[inline]
pub unsafe fn set(key: Key, value: *mut u8) {
let r = libc::pthread_setspecific(key, value as *mut _);
debug_assert_eq!(r, 0);
}
#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
libc::pthread_getspecific(key) as *mut u8
}
#[inline]
pub unsafe fn destroy(key: Key) {
let r = libc::pthread_key_delete(key);
debug_assert_eq!(r, 0);
}

View file

@ -11,9 +11,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
#[cfg(target_thread_local)]
pub mod thread_local_dtor;
pub mod thread_local_key;
pub mod time;
mod common;

View file

@ -1,10 +0,0 @@
#![unstable(feature = "thread_local_internals", issue = "none")]
#[cfg_attr(target_family = "wasm", allow(unused))] // unused on wasm32-unknown-unknown
pub unsafe fn register_dtor(_t: *mut u8, _dtor: unsafe extern "C" fn(*mut u8)) {
// FIXME: right now there is no concept of "thread exit", but this is likely
// going to show up at some point in the form of an exported symbol that the
// wasm runtime is going to be expected to call. For now we basically just
// ignore the arguments, but if such a function starts to exist it will
// likely look like the OSX implementation in `unix/fast_thread_local.rs`
}

View file

@ -1,21 +0,0 @@
pub type Key = usize;
#[inline]
pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
panic!("should not be used on this target");
}
#[inline]
pub unsafe fn set(_key: Key, _value: *mut u8) {
panic!("should not be used on this target");
}
#[inline]
pub unsafe fn get(_key: Key) -> *mut u8 {
panic!("should not be used on this target");
}
#[inline]
pub unsafe fn destroy(_key: Key) {
panic!("should not be used on this target");
}

View file

@ -33,10 +33,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
#[path = "../unsupported/thread_local_dtor.rs"]
pub mod thread_local_dtor;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
pub mod time;
#[path = "../unsupported/common.rs"]

View file

@ -34,10 +34,6 @@
pub mod stdio;
#[path = "../wasi/thread.rs"]
pub mod thread;
#[path = "../unsupported/thread_local_dtor.rs"]
pub mod thread_local_dtor;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
#[path = "../wasi/time.rs"]
pub mod time;

View file

@ -34,10 +34,6 @@
pub mod process;
#[path = "../unsupported/stdio.rs"]
pub mod stdio;
#[path = "../unsupported/thread_local_dtor.rs"]
pub mod thread_local_dtor;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
#[path = "../unsupported/time.rs"]
pub mod time;

View file

@ -54,6 +54,7 @@
pub const CONDITION_VARIABLE_INIT: CONDITION_VARIABLE = CONDITION_VARIABLE { Ptr: ptr::null_mut() };
#[cfg(target_vendor = "win7")]
pub const SRWLOCK_INIT: SRWLOCK = SRWLOCK { Ptr: ptr::null_mut() };
#[cfg(not(target_thread_local))]
pub const INIT_ONCE_STATIC_INIT: INIT_ONCE = INIT_ONCE { Ptr: ptr::null_mut() };
// Some windows_sys types have different signs than the types we use.

View file

@ -31,8 +31,6 @@
pub mod rand;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
pub mod thread_local_key;
pub mod time;
cfg_if::cfg_if! {
if #[cfg(not(target_vendor = "uwp"))] {

View file

@ -1,7 +0,0 @@
//! Implements thread-local destructors that are not associated with any
//! particular data.
#![unstable(feature = "thread_local_internals", issue = "none")]
#![cfg(target_thread_local)]
pub use super::thread_local_key::register_keyless_dtor as register_dtor;

View file

@ -1,351 +0,0 @@
use crate::cell::UnsafeCell;
use crate::ptr;
use crate::sync::atomic::{
AtomicPtr, AtomicU32,
Ordering::{AcqRel, Acquire, Relaxed, Release},
};
use crate::sys::c;
#[cfg(test)]
mod tests;
// Using a per-thread list avoids the problems in synchronizing global state.
#[thread_local]
#[cfg(target_thread_local)]
static DESTRUCTORS: crate::cell::RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> =
crate::cell::RefCell::new(Vec::new());
// Ensure this can never be inlined because otherwise this may break in dylibs.
// See #44391.
#[inline(never)]
#[cfg(target_thread_local)]
pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
dtors_used();
match DESTRUCTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
}
#[inline(never)] // See comment above
#[cfg(target_thread_local)]
/// Runs destructors. This should not be called until thread exit.
unsafe fn run_keyless_dtors() {
// Drop all the destructors.
//
// Note: While this is potentially an infinite loop, it *should* be
// the case that this loop always terminates because we provide the
// guarantee that a TLS key cannot be set after it is flagged for
// destruction.
loop {
// Use a let-else binding to ensure the `RefCell` guard is dropped
// immediately. Otherwise, a panic would occur if a TLS destructor
// tries to access the list.
let Some((ptr, dtor)) = DESTRUCTORS.borrow_mut().pop() else {
break;
};
(dtor)(ptr);
}
// We're done so free the memory.
DESTRUCTORS.replace(Vec::new());
}
type Key = c::DWORD;
type Dtor = unsafe extern "C" fn(*mut u8);
// Turns out, like pretty much everything, Windows is pretty close the
// functionality that Unix provides, but slightly different! In the case of
// TLS, Windows does not provide an API to provide a destructor for a TLS
// variable. This ends up being pretty crucial to this implementation, so we
// need a way around this.
//
// The solution here ended up being a little obscure, but fear not, the
// internet has informed me [1][2] that this solution is not unique (no way
// I could have thought of it as well!). The key idea is to insert some hook
// somewhere to run arbitrary code on thread termination. With this in place
// we'll be able to run anything we like, including all TLS destructors!
//
// To accomplish this feat, we perform a number of threads, all contained
// within this module:
//
// * All TLS destructors are tracked by *us*, not the Windows runtime. This
// means that we have a global list of destructors for each TLS key that
// we know about.
// * When a thread exits, we run over the entire list and run dtors for all
// non-null keys. This attempts to match Unix semantics in this regard.
//
// For more details and nitty-gritty, see the code sections below!
//
// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
pub struct StaticKey {
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
/// is not a valid key value, this allows us to use zero as sentinel value
/// without risking overflow.
key: AtomicU32,
dtor: Option<Dtor>,
next: AtomicPtr<StaticKey>,
/// Currently, destructors cannot be unregistered, so we cannot use racy
/// initialization for keys. Instead, we need synchronize initialization.
/// Use the Windows-provided `Once` since it does not require TLS.
once: UnsafeCell<c::INIT_ONCE>,
}
impl StaticKey {
#[inline]
pub const fn new(dtor: Option<Dtor>) -> StaticKey {
StaticKey {
key: AtomicU32::new(0),
dtor,
next: AtomicPtr::new(ptr::null_mut()),
once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
}
}
#[inline]
pub unsafe fn set(&'static self, val: *mut u8) {
let r = c::TlsSetValue(self.key(), val.cast());
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(&'static self) -> *mut u8 {
c::TlsGetValue(self.key()).cast()
}
#[inline]
unsafe fn key(&'static self) -> Key {
match self.key.load(Acquire) {
0 => self.init(),
key => key - 1,
}
}
#[cold]
unsafe fn init(&'static self) -> Key {
if self.dtor.is_some() {
dtors_used();
let mut pending = c::FALSE;
let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
assert_eq!(r, c::TRUE);
if pending == c::FALSE {
// Some other thread initialized the key, load it.
self.key.load(Relaxed) - 1
} else {
let key = c::TlsAlloc();
if key == c::TLS_OUT_OF_INDEXES {
// Wakeup the waiting threads before panicking to avoid deadlock.
c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
panic!("out of TLS indexes");
}
register_dtor(self);
// Release-storing the key needs to be the last thing we do.
// This is because in `fn key()`, other threads will do an acquire load of the key,
// and if that sees this write then it will entirely bypass the `InitOnce`. We thus
// need to establish synchronization through `key`. In particular that acquire load
// must happen-after the register_dtor above, to ensure the dtor actually runs!
self.key.store(key + 1, Release);
let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
debug_assert_eq!(r, c::TRUE);
key
}
} else {
// If there is no destructor to clean up, we can use racy initialization.
let key = c::TlsAlloc();
assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
Ok(_) => key,
Err(new) => {
// Some other thread completed initialization first, so destroy
// our key and use theirs.
let r = c::TlsFree(key);
debug_assert_eq!(r, c::TRUE);
new - 1
}
}
}
}
}
unsafe impl Send for StaticKey {}
unsafe impl Sync for StaticKey {}
// -------------------------------------------------------------------------
// Dtor registration
//
// Windows has no native support for running destructors so we manage our own
// list of destructors to keep track of how to destroy keys. We then install a
// callback later to get invoked whenever a thread exits, running all
// appropriate destructors.
//
// Currently unregistration from this list is not supported. A destructor can be
// registered but cannot be unregistered. There's various simplifying reasons
// for doing this, the big ones being:
//
// 1. Currently we don't even support deallocating TLS keys, so normal operation
// doesn't need to deallocate a destructor.
// 2. There is no point in time where we know we can unregister a destructor
// because it could always be getting run by some remote thread.
//
// Typically processes have a statically known set of TLS keys which is pretty
// small, and we'd want to keep this memory alive for the whole process anyway
// really.
static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());
/// Should only be called once per key, otherwise loops or breaks may occur in
/// the linked list.
unsafe fn register_dtor(key: &'static StaticKey) {
// Ensure this is never run when native thread locals are available.
assert_eq!(false, cfg!(target_thread_local));
let this = <*const StaticKey>::cast_mut(key);
// Use acquire ordering to pass along the changes done by the previously
// registered keys when we store the new head with release ordering.
let mut head = DTORS.load(Acquire);
loop {
key.next.store(head, Relaxed);
match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
Ok(_) => break,
Err(new) => head = new,
}
}
}
// -------------------------------------------------------------------------
// Where the Magic (TM) Happens
//
// If you're looking at this code, and wondering "what is this doing?",
// you're not alone! I'll try to break this down step by step:
//
// # What's up with CRT$XLB?
//
// For anything about TLS destructors to work on Windows, we have to be able
// to run *something* when a thread exits. To do so, we place a very special
// static in a very special location. If this is encoded in just the right
// way, the kernel's loader is apparently nice enough to run some function
// of ours whenever a thread exits! How nice of the kernel!
//
// Lots of detailed information can be found in source [1] above, but the
// gist of it is that this is leveraging a feature of Microsoft's PE format
// (executable format) which is not actually used by any compilers today.
// This apparently translates to any callbacks in the ".CRT$XLB" section
// being run on certain events.
//
// So after all that, we use the compiler's #[link_section] feature to place
// a callback pointer into the magic section so it ends up being called.
//
// # What's up with this callback?
//
// The callback specified receives a number of parameters from... someone!
// (the kernel? the runtime? I'm not quite sure!) There are a few events that
// this gets invoked for, but we're currently only interested on when a
// thread or a process "detaches" (exits). The process part happens for the
// last thread and the thread part happens for any normal thread.
//
// # Ok, what's up with running all these destructors?
//
// This will likely need to be improved over time, but this function
// attempts a "poor man's" destructor callback system. Once we've got a list
// of what to run, we iterate over all keys, check their values, and then run
// destructors if the values turn out to be non null (setting them to null just
// beforehand). We do this a few times in a loop to basically match Unix
// semantics. If we don't reach a fixed point after a short while then we just
// inevitably leak something most likely.
//
// # The article mentions weird stuff about "/INCLUDE"?
//
// It sure does! Specifically we're talking about this quote:
//
// The Microsoft run-time library facilitates this process by defining a
// memory image of the TLS Directory and giving it the special name
// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
// linker looks for this memory image and uses the data there to create the
// TLS Directory. Other compilers that support TLS and work with the
// Microsoft linker must use this same technique.
//
// Basically what this means is that if we want support for our TLS
// destructors/our hook being called then we need to make sure the linker does
// not omit this symbol. Otherwise it will omit it and our callback won't be
// wired up.
//
// We don't actually use the `/INCLUDE` linker flag here like the article
// mentions because the Rust compiler doesn't propagate linker flags, but
// instead we use a shim function which performs a volatile 1-byte load from
// the address of the symbol to ensure it sticks around.
#[link_section = ".CRT$XLB"]
#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section`
pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
on_tls_callback;
fn dtors_used() {
// we don't want LLVM eliminating p_thread_callback when destructors are used.
// when the symbol makes it to the linker the linker will take over
unsafe { crate::intrinsics::volatile_load(&p_thread_callback) };
}
unsafe extern "system" fn on_tls_callback(_h: c::LPVOID, dwReason: c::DWORD, _pv: c::LPVOID) {
if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
#[cfg(not(target_thread_local))]
run_dtors();
#[cfg(target_thread_local)]
run_keyless_dtors();
}
// See comments above for what this is doing. Note that we don't need this
// trickery on GNU windows, just on MSVC.
#[cfg(all(target_env = "msvc", not(target_thread_local)))]
{
extern "C" {
static _tls_used: u8;
}
crate::intrinsics::volatile_load(&_tls_used);
}
}
#[cfg(not(target_thread_local))]
unsafe fn run_dtors() {
for _ in 0..5 {
let mut any_run = false;
// Use acquire ordering to observe key initialization.
let mut cur = DTORS.load(Acquire);
while !cur.is_null() {
let pre_key = (*cur).key.load(Acquire);
let dtor = (*cur).dtor.unwrap();
cur = (*cur).next.load(Relaxed);
// In StaticKey::init, we register the dtor before setting `key`.
// So if one thread's `run_dtors` races with another thread executing `init` on the same
// `StaticKey`, we can encounter a key of 0 here. That means this key was never
// initialized in this thread so we can safely skip it.
if pre_key == 0 {
continue;
}
// If this is non-zero, then via the `Acquire` load above we synchronized with
// everything relevant for this key. (It's not clear that this is needed, since the
// release-acquire pair on DTORS also establishes synchronization, but better safe than
// sorry.)
let key = pre_key - 1;
let ptr = c::TlsGetValue(key);
if !ptr.is_null() {
c::TlsSetValue(key, ptr::null_mut());
dtor(ptr as *mut _);
any_run = true;
}
}
if !any_run {
break;
}
}
}

View file

@ -17,7 +17,6 @@
pub mod process;
pub mod stdio;
pub mod thread;
pub mod thread_local_key;
pub mod time;
#[path = "../unsupported/common.rs"]

View file

@ -81,7 +81,7 @@ extern "C" fn thread_start(
// Destroy TLS, which will free the TLS page and call the destructor for
// any thread local storage (if any).
unsafe {
crate::sys::thread_local_key::destroy_tls();
crate::sys::thread_local::key::destroy_tls();
}
// Deallocate the stack memory, along with the guard pages. Afterwards,

View file

@ -25,7 +25,6 @@
#[path = "../unsupported/process.rs"]
pub mod process;
pub mod stdio;
pub mod thread_local_key;
#[path = "../unsupported/time.rs"]
pub mod time;

View file

@ -1,23 +0,0 @@
use crate::alloc::{alloc, Layout};
pub type Key = usize;
#[inline]
pub unsafe fn create(_dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
alloc(Layout::new::<*mut u8>()) as _
}
#[inline]
pub unsafe fn set(key: Key, value: *mut u8) {
let key: *mut *mut u8 = core::ptr::with_exposed_provenance_mut(key);
*key = value;
}
#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
let key: *mut *mut u8 = core::ptr::with_exposed_provenance_mut(key);
*key
}
#[inline]
pub unsafe fn destroy(_key: Key) {}

View file

@ -0,0 +1,58 @@
//! Destructor registration for Linux-like systems.
//!
//! Since what appears to be version 2.18, glibc has shipped the
//! `__cxa_thread_atexit_impl` symbol which GCC and clang both use to invoke
//! destructors in C++ thread_local globals. This function does exactly what
//! we want: it schedules a callback which will be run at thread exit with the
//! provided argument.
//!
//! Unfortunately, our minimum supported glibc version (at the time of writing)
//! is 2.17, so we can only link this symbol weakly and need to use the
//! [`list`](super::list) destructor implementation as fallback.
use crate::mem::transmute;
// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
#[no_sanitize(cfi, kcfi)]
pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
/// This is necessary because the __cxa_thread_atexit_impl implementation
/// std links to by default may be a C or C++ implementation that was not
/// compiled using the Clang integer normalization option.
#[cfg(sanitizer_cfi_normalize_integers)]
use core::ffi::c_int;
#[cfg(not(sanitizer_cfi_normalize_integers))]
#[cfi_encoding = "i"]
#[repr(transparent)]
#[allow(non_camel_case_types)]
pub struct c_int(#[allow(dead_code)] pub libc::c_int);
extern "C" {
#[linkage = "extern_weak"]
static __dso_handle: *mut u8;
#[linkage = "extern_weak"]
static __cxa_thread_atexit_impl: Option<
extern "C" fn(
unsafe extern "C" fn(*mut libc::c_void),
*mut libc::c_void,
*mut libc::c_void,
) -> c_int,
>;
}
if let Some(f) = unsafe { __cxa_thread_atexit_impl } {
unsafe {
f(
transmute::<unsafe extern "C" fn(*mut u8), unsafe extern "C" fn(*mut libc::c_void)>(
dtor,
),
t.cast(),
core::ptr::addr_of!(__dso_handle) as *mut _,
);
}
} else {
unsafe {
super::list::register(t, dtor);
}
}
}

View file

@ -0,0 +1,44 @@
use crate::cell::RefCell;
use crate::sys::thread_local::guard;
#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
let Ok(mut dtors) = DTORS.try_borrow_mut() else {
// This point can only be reached if the global allocator calls this
// function again.
// FIXME: maybe use the system allocator instead?
rtabort!("the global allocator may not use TLS with destructors");
};
guard::enable();
dtors.push((t, dtor));
}
/// The [`guard`] module contains platform-specific functions which will run this
/// function on thread exit if [`guard::enable`] has been called.
///
/// # Safety
///
/// May only be run on thread exit to guarantee that there are no live references
/// to TLS variables while they are destroyed.
pub unsafe fn run() {
loop {
let mut dtors = DTORS.borrow_mut();
match dtors.pop() {
Some((t, dtor)) => {
drop(dtors);
unsafe {
dtor(t);
}
}
None => {
// Free the list memory.
*dtors = Vec::new();
break;
}
}
}
}

View file

@ -0,0 +1,31 @@
//! macOS allows registering destructors through _tlv_atexit. But since calling
//! it while TLS destructors are running is UB, we still need to keep our own
//! list of destructors.
use crate::cell::Cell;
use crate::ptr;
use crate::sys::thread_local::destructors;
pub fn enable() {
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
extern "C" {
fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
}
if !REGISTERED.replace(true) {
// SAFETY: Calling _tlv_atexit while TLS destructors are running is UB.
// But as run_dtors is only called after being registered, this point
// cannot be reached from it.
unsafe {
_tlv_atexit(run_dtors, ptr::null_mut());
}
}
unsafe extern "C" fn run_dtors(_: *mut u8) {
unsafe {
destructors::run();
}
}
}

View file

@ -0,0 +1,23 @@
//! A lot of UNIX platforms don't have a way to register TLS destructors.
//! Instead, we use one TLS key to register a callback which will run
//! iterate through the destructor list.
use crate::ptr;
use crate::sys::thread_local::destructors;
use crate::sys::thread_local::key::StaticKey;
pub fn enable() {
static DTORS: StaticKey = StaticKey::new(Some(run));
// Setting the key value to something other than NULL will result in the
// destructor being run at thread exit.
unsafe {
DTORS.set(ptr::without_provenance_mut(1));
}
unsafe extern "C" fn run(_: *mut u8) {
unsafe {
destructors::run();
}
}
}

View file

@ -0,0 +1,23 @@
//! SOLID, just like macOS, has an API to register TLS destructors. But since
//! it does not allow specifying an argument to that function, and will not run
//! destructors for terminated tasks, we still keep our own list.
use crate::cell::Cell;
use crate::sys::pal::{abi, itron::task};
use crate::sys::thread_local::destructors;
pub fn enable() {
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
if !REGISTERED.replace(true) {
let tid = task::current_task_id_aborting();
// Register `tls_dtor` to make sure the TLS destructors are called
// for tasks created by other means than `std::thread`
unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, tls_dtor) };
}
unsafe extern "C" fn tls_dtor(_unused: *mut u8) {
unsafe { destructors::run() };
}
}

View file

@ -0,0 +1,103 @@
//! Support for Windows TLS destructors.
//!
//! Unfortunately, Windows does not provide a nice API to provide a destructor
//! for a TLS variable. Thus, the solution here ended up being a little more
//! obscure, but fear not, the internet has informed me [1][2] that this solution
//! is not unique (no way I could have thought of it as well!). The key idea is
//! to insert some hook somewhere to run arbitrary code on thread termination.
//! With this in place we'll be able to run anything we like, including all
//! TLS destructors!
//!
//! In order to realize this, all TLS destructors are tracked by *us*, not the
//! Windows runtime. This means that we have a global list of destructors for
//! each TLS key or variable that we know about.
//!
//! # What's up with CRT$XLB?
//!
//! For anything about TLS destructors to work on Windows, we have to be able
//! to run *something* when a thread exits. To do so, we place a very special
//! static in a very special location. If this is encoded in just the right
//! way, the kernel's loader is apparently nice enough to run some function
//! of ours whenever a thread exits! How nice of the kernel!
//!
//! Lots of detailed information can be found in source [1] above, but the
//! gist of it is that this is leveraging a feature of Microsoft's PE format
//! (executable format) which is not actually used by any compilers today.
//! This apparently translates to any callbacks in the ".CRT$XLB" section
//! being run on certain events.
//!
//! So after all that, we use the compiler's #[link_section] feature to place
//! a callback pointer into the magic section so it ends up being called.
//!
//! # What's up with this callback?
//!
//! The callback specified receives a number of parameters from... someone!
//! (the kernel? the runtime? I'm not quite sure!) There are a few events that
//! this gets invoked for, but we're currently only interested on when a
//! thread or a process "detaches" (exits). The process part happens for the
//! last thread and the thread part happens for any normal thread.
//!
//! # The article mentions weird stuff about "/INCLUDE"?
//!
//! It sure does! Specifically we're talking about this quote:
//!
//! ```quote
//! The Microsoft run-time library facilitates this process by defining a
//! memory image of the TLS Directory and giving it the special name
//! “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
//! linker looks for this memory image and uses the data there to create the
//! TLS Directory. Other compilers that support TLS and work with the
//! Microsoft linker must use this same technique.
//! ```
//!
//! Basically what this means is that if we want support for our TLS
//! destructors/our hook being called then we need to make sure the linker does
//! not omit this symbol. Otherwise it will omit it and our callback won't be
//! wired up.
//!
//! We don't actually use the `/INCLUDE` linker flag here like the article
//! mentions because the Rust compiler doesn't propagate linker flags, but
//! instead we use a shim function which performs a volatile 1-byte load from
//! the address of the symbol to ensure it sticks around.
//!
//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
use crate::ptr;
use crate::sys::c;
pub fn enable() {
// When destructors are used, we don't want LLVM eliminating CALLBACK for any
// reason. Once the symbol makes it to the linker, it will do the rest.
unsafe { ptr::from_ref(&CALLBACK).read_volatile() };
}
#[link_section = ".CRT$XLB"]
#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section`
pub static CALLBACK: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) = tls_callback;
unsafe extern "system" fn tls_callback(_h: c::LPVOID, dw_reason: c::DWORD, _pv: c::LPVOID) {
// See comments above for what this is doing. Note that we don't need this
// trickery on GNU windows, just on MSVC.
#[cfg(all(target_env = "msvc", not(target_thread_local)))]
{
extern "C" {
static _tls_used: u8;
}
unsafe {
ptr::from_ref(&_tls_used).read_volatile();
}
}
if dw_reason == c::DLL_THREAD_DETACH || dw_reason == c::DLL_PROCESS_DETACH {
#[cfg(target_thread_local)]
unsafe {
super::super::destructors::run();
}
#[cfg(not(target_thread_local))]
unsafe {
super::super::key::run_dtors();
}
}
}

View file

@ -1,61 +1,16 @@
//! OS-based thread local storage for non-Windows systems
//! An implementation of `const`-creatable TLS keys for non-Windows platforms.
//!
//! This module provides an implementation of OS-based thread local storage,
//! using the native OS-provided facilities (think `TlsAlloc` or
//! `pthread_setspecific`). The interface of this differs from the other types
//! of thread-local-storage provided in this crate in that OS-based TLS can only
//! get/set pointer-sized data, possibly with an associated destructor.
//! Most OSs without native TLS will provide a library-based way to create TLS
//! storage. For each TLS variable, we create a key, which can then be used to
//! reference an entry in a thread-local table. This then associates each key
//! with a pointer which we can get and set to store our data.
//!
//! This module also provides two flavors of TLS. One is intended for static
//! initialization, and does not contain a `Drop` implementation to deallocate
//! the OS-TLS key. The other is a type which does implement `Drop` and hence
//! has a safe interface.
//!
//! Windows doesn't use this module at all; `sys::pal::windows::thread_local_key`
//! gets imported in its stead.
//!
//! # Usage
//!
//! This module should likely not be used directly unless other primitives are
//! being built on. Types such as `thread_local::spawn::Key` are likely much
//! more useful in practice than this OS-based version which likely requires
//! unsafe code to interoperate with.
//!
//! # Examples
//!
//! Using a dynamically allocated TLS key. Note that this key can be shared
//! among many threads via an `Arc`.
//!
//! ```ignore (cannot-doctest-private-modules)
//! let key = Key::new(None);
//! assert!(key.get().is_null());
//! key.set(1 as *mut u8);
//! assert!(!key.get().is_null());
//!
//! drop(key); // deallocate this TLS slot.
//! ```
//!
//! Sometimes a statically allocated key is either required or easier to work
//! with, however.
//!
//! ```ignore (cannot-doctest-private-modules)
//! static KEY: StaticKey = INIT;
//!
//! unsafe {
//! assert!(KEY.get().is_null());
//! KEY.set(1 as *mut u8);
//! }
//! ```
#![allow(non_camel_case_types)]
#![unstable(feature = "thread_local_internals", issue = "none")]
#![allow(dead_code)]
#[cfg(test)]
mod tests;
//! Unfortunately, none of these platforms allows creating the key at compile-time,
//! which means we need a way to lazily create keys (`StaticKey`). Instead of
//! blocking API like `OnceLock`, we use racy initialization, which should be
//! more lightweight and avoids circular dependencies with the rest of `std`.
use crate::sync::atomic::{self, AtomicUsize, Ordering};
use crate::sys::thread_local_key as imp;
/// A type for TLS keys that are statically allocated.
///
@ -90,11 +45,6 @@ pub struct StaticKey {
dtor: Option<unsafe extern "C" fn(*mut u8)>,
}
/// Constant initialization value for static TLS keys.
///
/// This value specifies no destructor by default.
pub const INIT: StaticKey = StaticKey::new(None);
// Define a sentinel value that is likely not to be returned
// as a TLS key.
#[cfg(not(target_os = "nto"))]
@ -117,7 +67,7 @@ pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> StaticKey {
/// been allocated.
#[inline]
pub unsafe fn get(&self) -> *mut u8 {
imp::get(self.key())
unsafe { super::get(self.key()) }
}
/// Sets this TLS key to a new value.
@ -126,18 +76,18 @@ pub unsafe fn get(&self) -> *mut u8 {
/// been allocated.
#[inline]
pub unsafe fn set(&self, val: *mut u8) {
imp::set(self.key(), val)
unsafe { super::set(self.key(), val) }
}
#[inline]
unsafe fn key(&self) -> imp::Key {
fn key(&self) -> super::Key {
match self.key.load(Ordering::Acquire) {
KEY_SENTVAL => self.lazy_init() as imp::Key,
n => n as imp::Key,
KEY_SENTVAL => self.lazy_init() as super::Key,
n => n as super::Key,
}
}
unsafe fn lazy_init(&self) -> usize {
fn lazy_init(&self) -> usize {
// POSIX allows the key created here to be KEY_SENTVAL, but the compare_exchange
// below relies on using KEY_SENTVAL as a sentinel value to check who won the
// race to set the shared TLS key. As far as I know, there is no
@ -147,12 +97,14 @@ unsafe fn lazy_init(&self) -> usize {
// value of KEY_SENTVAL, but with some gyrations to make sure we have a non-KEY_SENTVAL
// value returned from the creation routine.
// FIXME: this is clearly a hack, and should be cleaned up.
let key1 = imp::create(self.dtor);
let key1 = super::create(self.dtor);
let key = if key1 as usize != KEY_SENTVAL {
key1
} else {
let key2 = imp::create(self.dtor);
imp::destroy(key1);
let key2 = super::create(self.dtor);
unsafe {
super::destroy(key1);
}
key2
};
rtassert!(key as usize != KEY_SENTVAL);
@ -165,10 +117,10 @@ unsafe fn lazy_init(&self) -> usize {
// The CAS succeeded, so we've created the actual key
Ok(_) => key as usize,
// If someone beat us to the punch, use their key instead
Err(n) => {
imp::destroy(key);
Err(n) => unsafe {
super::destroy(key);
n
}
},
}
}
}

View file

@ -1,9 +1,9 @@
use super::abi::tls::{Key as AbiKey, Tls};
use crate::sys::pal::abi::tls::{Key as AbiKey, Tls};
pub type Key = usize;
#[inline]
pub unsafe fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
pub fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
Tls::create(dtor).as_usize()
}

View file

@ -1,7 +1,3 @@
// This file only tests the thread local key fallback.
// Windows targets with native thread local support do not use this.
#![cfg(not(target_thread_local))]
use super::StaticKey;
use crate::ptr;
@ -27,7 +23,7 @@ fn destructors() {
use crate::thread;
unsafe extern "C" fn destruct(ptr: *mut u8) {
drop(Arc::from_raw(ptr as *const ()));
drop(unsafe { Arc::from_raw(ptr as *const ()) });
}
static KEY: StaticKey = StaticKey::new(Some(destruct));

View file

@ -0,0 +1,27 @@
use crate::mem;
pub type Key = libc::pthread_key_t;
#[inline]
pub fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
let mut key = 0;
assert_eq!(unsafe { libc::pthread_key_create(&mut key, mem::transmute(dtor)) }, 0);
key
}
#[inline]
pub unsafe fn set(key: Key, value: *mut u8) {
let r = unsafe { libc::pthread_setspecific(key, value as *mut _) };
debug_assert_eq!(r, 0);
}
#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
unsafe { libc::pthread_getspecific(key) as *mut u8 }
}
#[inline]
pub unsafe fn destroy(key: Key) {
let r = unsafe { libc::pthread_key_delete(key) };
debug_assert_eq!(r, 0);
}

View file

@ -0,0 +1,206 @@
//! Implementation of `StaticKey` for Windows.
//!
//! Windows has no native support for running destructors so we manage our own
//! list of destructors to keep track of how to destroy keys. We then install a
//! callback later to get invoked whenever a thread exits, running all
//! appropriate destructors (see the [`guard`](guard) module documentation).
//!
//! This will likely need to be improved over time, but this module attempts a
//! "poor man's" destructor callback system. Once we've got a list of what to
//! run, we iterate over all keys, check their values, and then run destructors
//! if the values turn out to be non null (setting them to null just beforehand).
//! We do this a few times in a loop to basically match Unix semantics. If we
//! don't reach a fixed point after a short while then we just inevitably leak
//! something.
//!
//! The list is implemented as an atomic single-linked list of `StaticKey`s and
//! does not support unregistration. Unfortunately, this means that we cannot
//! use racy initialization for creating the keys in `StaticKey`, as that could
//! result in destructors being missed. Hence, we synchronize the creation of
//! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s
//! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself).
//! For keys without destructors, racy initialization suffices.
// FIXME: investigate using a fixed-size array instead, as the maximum number
// of keys is [limited to 1088](https://learn.microsoft.com/en-us/windows/win32/ProcThread/thread-local-storage).
use crate::cell::UnsafeCell;
use crate::ptr;
use crate::sync::atomic::{
AtomicPtr, AtomicU32,
Ordering::{AcqRel, Acquire, Relaxed, Release},
};
use crate::sys::c;
use crate::sys::thread_local::guard;
type Key = c::DWORD;
type Dtor = unsafe extern "C" fn(*mut u8);
pub struct StaticKey {
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
/// is not a valid key value, this allows us to use zero as sentinel value
/// without risking overflow.
key: AtomicU32,
dtor: Option<Dtor>,
next: AtomicPtr<StaticKey>,
/// Currently, destructors cannot be unregistered, so we cannot use racy
/// initialization for keys. Instead, we need synchronize initialization.
/// Use the Windows-provided `Once` since it does not require TLS.
once: UnsafeCell<c::INIT_ONCE>,
}
impl StaticKey {
#[inline]
pub const fn new(dtor: Option<Dtor>) -> StaticKey {
StaticKey {
key: AtomicU32::new(0),
dtor,
next: AtomicPtr::new(ptr::null_mut()),
once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
}
}
#[inline]
pub unsafe fn set(&'static self, val: *mut u8) {
let r = unsafe { c::TlsSetValue(self.key(), val.cast()) };
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(&'static self) -> *mut u8 {
unsafe { c::TlsGetValue(self.key()).cast() }
}
#[inline]
fn key(&'static self) -> Key {
match self.key.load(Acquire) {
0 => unsafe { self.init() },
key => key - 1,
}
}
#[cold]
unsafe fn init(&'static self) -> Key {
if self.dtor.is_some() {
let mut pending = c::FALSE;
let r = unsafe {
c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut())
};
assert_eq!(r, c::TRUE);
if pending == c::FALSE {
// Some other thread initialized the key, load it.
self.key.load(Relaxed) - 1
} else {
let key = unsafe { c::TlsAlloc() };
if key == c::TLS_OUT_OF_INDEXES {
// Wakeup the waiting threads before panicking to avoid deadlock.
unsafe {
c::InitOnceComplete(
self.once.get(),
c::INIT_ONCE_INIT_FAILED,
ptr::null_mut(),
);
}
panic!("out of TLS indexes");
}
unsafe {
register_dtor(self);
}
// Release-storing the key needs to be the last thing we do.
// This is because in `fn key()`, other threads will do an acquire load of the key,
// and if that sees this write then it will entirely bypass the `InitOnce`. We thus
// need to establish synchronization through `key`. In particular that acquire load
// must happen-after the register_dtor above, to ensure the dtor actually runs!
self.key.store(key + 1, Release);
let r = unsafe { c::InitOnceComplete(self.once.get(), 0, ptr::null_mut()) };
debug_assert_eq!(r, c::TRUE);
key
}
} else {
// If there is no destructor to clean up, we can use racy initialization.
let key = unsafe { c::TlsAlloc() };
assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
Ok(_) => key,
Err(new) => unsafe {
// Some other thread completed initialization first, so destroy
// our key and use theirs.
let r = c::TlsFree(key);
debug_assert_eq!(r, c::TRUE);
new - 1
},
}
}
}
}
unsafe impl Send for StaticKey {}
unsafe impl Sync for StaticKey {}
static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());
/// Should only be called once per key, otherwise loops or breaks may occur in
/// the linked list.
unsafe fn register_dtor(key: &'static StaticKey) {
guard::enable();
let this = <*const StaticKey>::cast_mut(key);
// Use acquire ordering to pass along the changes done by the previously
// registered keys when we store the new head with release ordering.
let mut head = DTORS.load(Acquire);
loop {
key.next.store(head, Relaxed);
match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
Ok(_) => break,
Err(new) => head = new,
}
}
}
/// This will and must only be run by the destructor callback in [`guard`].
pub unsafe fn run_dtors() {
for _ in 0..5 {
let mut any_run = false;
// Use acquire ordering to observe key initialization.
let mut cur = DTORS.load(Acquire);
while !cur.is_null() {
let pre_key = unsafe { (*cur).key.load(Acquire) };
let dtor = unsafe { (*cur).dtor.unwrap() };
cur = unsafe { (*cur).next.load(Relaxed) };
// In StaticKey::init, we register the dtor before setting `key`.
// So if one thread's `run_dtors` races with another thread executing `init` on the same
// `StaticKey`, we can encounter a key of 0 here. That means this key was never
// initialized in this thread so we can safely skip it.
if pre_key == 0 {
continue;
}
// If this is non-zero, then via the `Acquire` load above we synchronized with
// everything relevant for this key. (It's not clear that this is needed, since the
// release-acquire pair on DTORS also establishes synchronization, but better safe than
// sorry.)
let key = pre_key - 1;
let ptr = unsafe { c::TlsGetValue(key) };
if !ptr.is_null() {
unsafe {
c::TlsSetValue(key, ptr::null_mut());
dtor(ptr as *mut _);
any_run = true;
}
}
}
if !any_run {
break;
}
}
}

View file

@ -1,3 +1,41 @@
//! Thread Local Storage
//!
//! Currently, we are limited to 1023 TLS entries. The entries
//! live in a page of memory that's unique per-process, and is
//! stored in the `$tp` register. If this register is 0, then
//! TLS has not been initialized and thread cleanup can be skipped.
//!
//! The index into this register is the `key`. This key is identical
//! between all threads, but indexes a different offset within this
//! pointer.
//!
//! # Dtor registration (stolen from Windows)
//!
//! Xous has no native support for running destructors so we manage our own
//! list of destructors to keep track of how to destroy keys. When a thread
//! or the process exits, `run_dtors` is called, which will iterate through
//! the list and run the destructors.
//!
//! Currently unregistration from this list is not supported. A destructor can be
//! registered but cannot be unregistered. There's various simplifying reasons
//! for doing this, the big ones being:
//!
//! 1. Currently we don't even support deallocating TLS keys, so normal operation
//! doesn't need to deallocate a destructor.
//! 2. There is no point in time where we know we can unregister a destructor
//! because it could always be getting run by some remote thread.
//!
//! Typically processes have a statically known set of TLS keys which is pretty
//! small, and we'd want to keep this memory alive for the whole process anyway
//! really.
//!
//! Perhaps one day we can fold the `Box` here into a static allocation,
//! expanding the `StaticKey` structure to contain not only a slot for the TLS
//! key but also a slot for the destructor queue on windows. An optimization for
//! another day!
// FIXME(joboet): implement support for native TLS instead.
use crate::mem::ManuallyDrop;
use crate::ptr;
use crate::sync::atomic::AtomicPtr;
@ -7,18 +45,7 @@
use crate::os::xous::ffi::{map_memory, unmap_memory, MemoryFlags};
/// Thread Local Storage
///
/// Currently, we are limited to 1023 TLS entries. The entries
/// live in a page of memory that's unique per-process, and is
/// stored in the `$tp` register. If this register is 0, then
/// TLS has not been initialized and thread cleanup can be skipped.
///
/// The index into this register is the `key`. This key is identical
/// between all threads, but indexes a different offset within this
/// pointer.
pub type Key = usize;
pub type Dtor = unsafe extern "C" fn(*mut u8);
const TLS_MEMORY_SIZE: usize = 4096;
@ -89,7 +116,7 @@ fn tls_table() -> &'static mut [*mut u8] {
}
#[inline]
pub unsafe fn create(dtor: Option<Dtor>) -> Key {
pub fn create(dtor: Option<Dtor>) -> Key {
// Allocate a new TLS key. These keys are shared among all threads.
#[allow(unused_unsafe)]
let key = unsafe { TLS_KEY_INDEX.fetch_add(1, Relaxed) };
@ -118,32 +145,6 @@ pub unsafe fn destroy(_key: Key) {
// lots of TLS variables, but in practice that's not an issue.
}
// -------------------------------------------------------------------------
// Dtor registration (stolen from Windows)
//
// Xous has no native support for running destructors so we manage our own
// list of destructors to keep track of how to destroy keys. We then install a
// callback later to get invoked whenever a thread exits, running all
// appropriate destructors.
//
// Currently unregistration from this list is not supported. A destructor can be
// registered but cannot be unregistered. There's various simplifying reasons
// for doing this, the big ones being:
//
// 1. Currently we don't even support deallocating TLS keys, so normal operation
// doesn't need to deallocate a destructor.
// 2. There is no point in time where we know we can unregister a destructor
// because it could always be getting run by some remote thread.
//
// Typically processes have a statically known set of TLS keys which is pretty
// small, and we'd want to keep this memory alive for the whole process anyway
// really.
//
// Perhaps one day we can fold the `Box` here into a static allocation,
// expanding the `StaticKey` structure to contain not only a slot for the TLS
// key but also a slot for the destructor queue on windows. An optimization for
// another day!
struct Node {
dtor: Dtor,
key: Key,

View file

@ -1,27 +1,135 @@
#![unstable(feature = "thread_local_internals", reason = "should not be necessary", issue = "none")]
#![cfg_attr(test, allow(unused))]
//! Implementation of the `thread_local` macro.
//!
//! There are three different thread-local implementations:
//! * Some targets lack threading support, and hence have only one thread, so
//! the TLS data is stored in a normal `static`.
//! * Some targets support TLS natively via the dynamic linker and C runtime.
//! * On some targets, the OS provides a library-based TLS implementation. The
//! TLS data is heap-allocated and referenced using a TLS key.
//!
//! Each implementation provides a macro which generates the `LocalKey` `const`
//! used to reference the TLS variable, along with the necessary helper structs
//! to track the initialization/destruction state of the variable.
//!
//! Additionally, this module contains abstractions for the OS interfaces used
//! for these implementations.
// There are three thread-local implementations: "static", "fast", "OS".
// The "OS" thread local key type is accessed via platform-specific API calls and is slow, while the
// "fast" key type is accessed via code generated via LLVM, where TLS keys are set up by the linker.
// "static" is for single-threaded platforms where a global static is sufficient.
#![cfg_attr(test, allow(unused))]
#![doc(hidden)]
#![forbid(unsafe_op_in_unsafe_fn)]
#![unstable(
feature = "thread_local_internals",
reason = "internal details of the thread_local macro",
issue = "none"
)]
cfg_if::cfg_if! {
if #[cfg(any(all(target_family = "wasm", not(target_feature = "atomics")), target_os = "uefi"))] {
#[doc(hidden)]
mod static_local;
#[doc(hidden)]
pub use static_local::{EagerStorage, LazyStorage, thread_local_inner};
if #[cfg(any(
all(target_family = "wasm", not(target_feature = "atomics")),
target_os = "uefi",
target_os = "zkvm",
))] {
mod statik;
pub use statik::{EagerStorage, LazyStorage, thread_local_inner};
} else if #[cfg(target_thread_local)] {
#[doc(hidden)]
mod fast_local;
#[doc(hidden)]
pub use fast_local::{EagerStorage, LazyStorage, thread_local_inner};
mod native;
pub use native::{EagerStorage, LazyStorage, thread_local_inner};
} else {
#[doc(hidden)]
mod os_local;
#[doc(hidden)]
pub use os_local::{Key, thread_local_inner};
mod os;
pub use os::{Key, thread_local_inner};
}
}
/// This module maintains a list of TLS destructors for the current thread,
/// all of which will be run on thread exit.
pub(crate) mod destructors {
cfg_if::cfg_if! {
if #[cfg(all(
target_thread_local,
any(
target_os = "linux",
target_os = "android",
target_os = "fuchsia",
target_os = "redox",
target_os = "hurd",
target_os = "netbsd",
target_os = "dragonfly"
)
))] {
mod linux;
mod list;
pub(super) use linux::register;
pub(super) use list::run;
} else if #[cfg(all(
target_thread_local,
not(all(target_family = "wasm", not(target_feature = "atomics")))
))] {
mod list;
pub(super) use list::register;
pub(crate) use list::run;
}
}
}
/// This module provides a way to schedule the execution of the destructor list
/// on systems without a per-variable destructor system.
mod guard {
cfg_if::cfg_if! {
if #[cfg(all(target_thread_local, target_vendor = "apple"))] {
mod apple;
pub(super) use apple::enable;
} else if #[cfg(target_os = "windows")] {
mod windows;
pub(super) use windows::enable;
} else if #[cfg(any(
all(target_family = "wasm", target_feature = "atomics"),
target_os = "hermit",
))] {
pub(super) fn enable() {}
} else if #[cfg(target_os = "solid_asp3")] {
mod solid;
pub(super) use solid::enable;
} else if #[cfg(all(target_thread_local, not(target_family = "wasm")))] {
mod key;
pub(super) use key::enable;
}
}
}
/// This module provides the `StaticKey` abstraction over OS TLS keys.
pub(crate) mod key {
cfg_if::cfg_if! {
if #[cfg(any(
all(not(target_vendor = "apple"), target_family = "unix"),
target_os = "teeos",
))] {
mod racy;
mod unix;
#[cfg(test)]
mod tests;
pub(super) use racy::StaticKey;
use unix::{Key, create, destroy, get, set};
} else if #[cfg(all(not(target_thread_local), target_os = "windows"))] {
#[cfg(test)]
mod tests;
mod windows;
pub(super) use windows::{StaticKey, run_dtors};
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
mod racy;
mod sgx;
#[cfg(test)]
mod tests;
pub(super) use racy::StaticKey;
use sgx::{Key, create, destroy, get, set};
} else if #[cfg(target_os = "xous")] {
mod racy;
#[cfg(test)]
mod tests;
mod xous;
pub(super) use racy::StaticKey;
pub(crate) use xous::destroy_tls;
use xous::{Key, create, destroy, get, set};
}
}
}

View file

@ -1,7 +1,7 @@
use crate::cell::{Cell, UnsafeCell};
use crate::ptr::{self, drop_in_place};
use crate::sys::thread_local::abort_on_dtor_unwind;
use crate::sys::thread_local_dtor::register_dtor;
use crate::sys::thread_local::destructors;
#[derive(Clone, Copy)]
enum State {
@ -45,7 +45,7 @@ unsafe fn initialize(&self) -> *const T {
// SAFETY:
// The caller guarantees that `self` will be valid until thread destruction.
unsafe {
register_dtor(ptr::from_ref(self).cast_mut().cast(), destroy::<T>);
destructors::register(ptr::from_ref(self).cast_mut().cast(), destroy::<T>);
}
self.state.set(State::Alive);

View file

@ -2,7 +2,7 @@
use crate::hint::unreachable_unchecked;
use crate::ptr;
use crate::sys::thread_local::abort_on_dtor_unwind;
use crate::sys::thread_local_dtor::register_dtor;
use crate::sys::thread_local::destructors;
pub unsafe trait DestroyedState: Sized {
fn register_dtor<T>(s: &Storage<T, Self>);
@ -15,7 +15,7 @@ fn register_dtor<T>(_: &Storage<T, !>) {}
unsafe impl DestroyedState for () {
fn register_dtor<T>(s: &Storage<T, ()>) {
unsafe {
register_dtor(ptr::from_ref(s).cast_mut().cast(), destroy::<T>);
destructors::register(ptr::from_ref(s).cast_mut().cast(), destroy::<T>);
}
}
}

View file

@ -29,8 +29,6 @@
//! eliminates the `Destroyed` state for these values, which can allow more niche
//! optimizations to occur for the `State` enum. For `Drop` types, `()` is used.
#![deny(unsafe_op_in_unsafe_fn)]
mod eager;
mod lazy;

View file

@ -2,7 +2,7 @@
use crate::cell::Cell;
use crate::marker::PhantomData;
use crate::ptr;
use crate::sys_common::thread_local_key::StaticKey as OsKey;
use crate::sys::thread_local::key::StaticKey as OsKey;
#[doc(hidden)]
#[allow_internal_unstable(thread_local_internals)]

View file

@ -25,18 +25,9 @@
pub mod io;
pub mod lazy_box;
pub mod process;
pub mod thread_local_dtor;
pub mod wstr;
pub mod wtf8;
cfg_if::cfg_if! {
if #[cfg(target_os = "windows")] {
pub use crate::sys::thread_local_key;
} else {
pub mod thread_local_key;
}
}
cfg_if::cfg_if! {
if #[cfg(any(
all(unix, not(target_os = "l4re")),

View file

@ -1,56 +0,0 @@
//! Thread-local destructor
//!
//! Besides thread-local "keys" (pointer-sized non-addressable thread-local store
//! with an associated destructor), many platforms also provide thread-local
//! destructors that are not associated with any particular data. These are
//! often more efficient.
//!
//! This module provides a fallback implementation for that interface, based
//! on the less efficient thread-local "keys". Each platform provides
//! a `thread_local_dtor` module which will either re-export the fallback,
//! or implement something more efficient.
#![unstable(feature = "thread_local_internals", issue = "none")]
#![allow(dead_code)]
use crate::cell::RefCell;
use crate::ptr;
use crate::sys_common::thread_local_key::StaticKey;
pub unsafe fn register_dtor_fallback(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
// The fallback implementation uses a vanilla OS-based TLS key to track
// the list of destructors that need to be run for this thread. The key
// then has its own destructor which runs all the other destructors.
//
// The destructor for DTORS is a little special in that it has a `while`
// loop to continuously drain the list of registered destructors. It
// *should* be the case that this loop always terminates because we
// provide the guarantee that a TLS key cannot be set after it is
// flagged for destruction.
static DTORS: StaticKey = StaticKey::new(Some(run_dtors));
// FIXME(joboet): integrate RefCell into pointer to avoid infinite recursion
// when the global allocator tries to register a destructor and just panic
// instead.
type List = RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>>;
if DTORS.get().is_null() {
let v: Box<List> = Box::new(RefCell::new(Vec::new()));
DTORS.set(Box::into_raw(v) as *mut u8);
}
let list = &*(DTORS.get() as *const List);
match list.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
unsafe extern "C" fn run_dtors(mut ptr: *mut u8) {
while !ptr.is_null() {
let list = Box::from_raw(ptr as *mut List).into_inner();
for (ptr, dtor) in list.into_iter() {
dtor(ptr);
}
ptr = DTORS.get();
DTORS.set(ptr::null_mut());
}
}
}

View file

@ -1,17 +0,0 @@
use super::StaticKey;
use core::ptr;
#[test]
fn statik() {
static K1: StaticKey = StaticKey::new(None);
static K2: StaticKey = StaticKey::new(None);
unsafe {
assert!(K1.get().is_null());
assert!(K2.get().is_null());
K1.set(ptr::without_provenance_mut(1));
K2.set(ptr::without_provenance_mut(2));
assert_eq!(K1.get() as usize, 1);
assert_eq!(K2.get() as usize, 2);
}
}

View file

@ -1,9 +1,9 @@
//@ignore-target-windows: No pthreads on Windows
//! Test that pthread_key destructors are run in the right order.
//! Note that these are *not* used by actual `thread_local!` on Linux! Those use
//! `thread_local_dtor::register_dtor` from the stdlib instead. In Miri this hits the fallback path
//! in `register_dtor_fallback`, which uses a *single* pthread_key to manage a thread-local list of
//! dtors to call.
//! `destructors::register` from the stdlib instead. In Miri this ends up hitting
//! the fallback path in `guard::key::enable`, which uses a *single* pthread_key
//! to manage a thread-local list of dtors to call.
use std::mem;
use std::ptr;