std: separate TLS key creation from TLS access

Currently, `std` performs an atomic load to get the OS key on every access to `StaticKey` even when the key is already known. This PR thus replaces `StaticKey` with the platform-specific `get` and `set` function and a new `LazyKey` type that acts as a `LazyLock<Key>`, allowing the reuse of the retreived key for multiple accesses.
This commit is contained in:
joboet 2024-06-25 18:30:49 +02:00
parent 164e1297e1
commit e8516f8b52
No known key found for this signature in database
GPG key ID: 704E0149B0194B3C
8 changed files with 100 additions and 125 deletions

View file

@ -4,15 +4,15 @@
use crate::ptr;
use crate::sys::thread_local::destructors;
use crate::sys::thread_local::key::StaticKey;
use crate::sys::thread_local::key::{set, LazyKey};
pub fn enable() {
static DTORS: StaticKey = StaticKey::new(Some(run));
static DTORS: LazyKey = LazyKey::new(Some(run));
// Setting the key value to something other than NULL will result in the
// destructor being run at thread exit.
unsafe {
DTORS.set(ptr::without_provenance_mut(1));
set(DTORS.force(), ptr::without_provenance_mut(1));
}
unsafe extern "C" fn run(_: *mut u8) {

View file

@ -1,4 +1,4 @@
//! A `StaticKey` implementation using racy initialization.
//! A `LazyKey` implementation using racy initialization.
//!
//! Unfortunately, none of the platforms currently supported by `std` allows
//! creating TLS keys at compile-time. Thus we need a way to lazily create keys.
@ -10,34 +10,12 @@
/// A type for TLS keys that are statically allocated.
///
/// This type is entirely `unsafe` to use as it does not protect against
/// use-after-deallocation or use-during-deallocation.
///
/// The actual OS-TLS key is lazily allocated when this is used for the first
/// time. The key is also deallocated when the Rust runtime exits or `destroy`
/// is called, whichever comes first.
///
/// # Examples
///
/// ```ignore (cannot-doctest-private-modules)
/// use tls::os::{StaticKey, INIT};
///
/// // Use a regular global static to store the key.
/// static KEY: StaticKey = INIT;
///
/// // The state provided via `get` and `set` is thread-local.
/// unsafe {
/// assert!(KEY.get().is_null());
/// KEY.set(1 as *mut u8);
/// }
/// ```
pub struct StaticKey {
/// This is basically a `LazyLock<Key>`, but avoids blocking and circular
/// dependencies with the rest of `std`.
pub struct LazyKey {
/// Inner static TLS key (internals).
key: AtomicUsize,
/// Destructor for the TLS value.
///
/// See `Key::new` for information about when the destructor runs and how
/// it runs.
dtor: Option<unsafe extern "C" fn(*mut u8)>,
}
@ -51,32 +29,14 @@ pub struct StaticKey {
#[cfg(target_os = "nto")]
const KEY_SENTVAL: usize = libc::PTHREAD_KEYS_MAX + 1;
impl StaticKey {
impl LazyKey {
#[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> StaticKey {
StaticKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor }
}
/// Gets the value associated with this TLS key
///
/// This will lazily allocate a TLS key from the OS if one has not already
/// been allocated.
#[inline]
pub unsafe fn get(&self) -> *mut u8 {
unsafe { super::get(self.key()) }
}
/// Sets this TLS key to a new value.
///
/// This will lazily allocate a TLS key from the OS if one has not already
/// been allocated.
#[inline]
pub unsafe fn set(&self, val: *mut u8) {
unsafe { super::set(self.key(), val) }
pub const fn new(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> LazyKey {
LazyKey { key: atomic::AtomicUsize::new(KEY_SENTVAL), dtor }
}
#[inline]
fn key(&self) -> super::Key {
pub fn force(&self) -> super::Key {
match self.key.load(Ordering::Acquire) {
KEY_SENTVAL => self.lazy_init() as super::Key,
n => n as super::Key,

View file

@ -1,18 +1,25 @@
use super::StaticKey;
use super::{get, set, LazyKey};
use crate::ptr;
#[test]
fn smoke() {
static K1: StaticKey = StaticKey::new(None);
static K2: StaticKey = StaticKey::new(None);
static K1: LazyKey = LazyKey::new(None);
static K2: LazyKey = LazyKey::new(None);
let k1 = K1.force();
let k2 = K2.force();
assert_ne!(k1, k2);
assert_eq!(K1.force(), k1);
assert_eq!(K2.force(), k2);
unsafe {
assert!(K1.get().is_null());
assert!(K2.get().is_null());
K1.set(ptr::without_provenance_mut(1));
K2.set(ptr::without_provenance_mut(2));
assert_eq!(K1.get() as usize, 1);
assert_eq!(K2.get() as usize, 2);
assert!(get(k1).is_null());
assert!(get(k2).is_null());
set(k1, ptr::without_provenance_mut(1));
set(k2, ptr::without_provenance_mut(2));
assert_eq!(get(k1) as usize, 1);
assert_eq!(get(k2) as usize, 2);
}
}
@ -26,25 +33,27 @@ fn destructors() {
drop(unsafe { Arc::from_raw(ptr as *const ()) });
}
static KEY: StaticKey = StaticKey::new(Some(destruct));
static KEY: LazyKey = LazyKey::new(Some(destruct));
let shared1 = Arc::new(());
let shared2 = Arc::clone(&shared1);
let key = KEY.force();
unsafe {
assert!(KEY.get().is_null());
KEY.set(Arc::into_raw(shared1) as *mut u8);
assert!(get(key).is_null());
set(key, Arc::into_raw(shared1) as *mut u8);
}
thread::spawn(move || unsafe {
assert!(KEY.get().is_null());
KEY.set(Arc::into_raw(shared2) as *mut u8);
let key = KEY.force();
assert!(get(key).is_null());
set(key, Arc::into_raw(shared2) as *mut u8);
})
.join()
.unwrap();
// Leak the Arc, let the TLS destructor clean it up.
let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(KEY.get() as *const ())) };
let shared1 = unsafe { ManuallyDrop::new(Arc::from_raw(get(key) as *const ())) };
assert_eq!(
Arc::strong_count(&shared1),
1,

View file

@ -16,6 +16,7 @@ pub unsafe fn set(key: Key, value: *mut u8) {
}
#[inline]
#[cfg(any(not(target_thread_local), test))]
pub unsafe fn get(key: Key) -> *mut u8 {
unsafe { libc::pthread_getspecific(key) as *mut u8 }
}

View file

@ -1,4 +1,4 @@
//! Implementation of `StaticKey` for Windows.
//! Implementation of `LazyKey` for Windows.
//!
//! Windows has no native support for running destructors so we manage our own
//! list of destructors to keep track of how to destroy keys. We then install a
@ -13,9 +13,9 @@
//! don't reach a fixed point after a short while then we just inevitably leak
//! something.
//!
//! The list is implemented as an atomic single-linked list of `StaticKey`s and
//! The list is implemented as an atomic single-linked list of `LazyKey`s and
//! does not support unregistration. Unfortunately, this means that we cannot
//! use racy initialization for creating the keys in `StaticKey`, as that could
//! use racy initialization for creating the keys in `LazyKey`, as that could
//! result in destructors being missed. Hence, we synchronize the creation of
//! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s
//! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself).
@ -33,26 +33,26 @@
use crate::sys::c;
use crate::sys::thread_local::guard;
type Key = c::DWORD;
pub type Key = c::DWORD;
type Dtor = unsafe extern "C" fn(*mut u8);
pub struct StaticKey {
pub struct LazyKey {
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
/// is not a valid key value, this allows us to use zero as sentinel value
/// without risking overflow.
key: AtomicU32,
dtor: Option<Dtor>,
next: AtomicPtr<StaticKey>,
next: AtomicPtr<LazyKey>,
/// Currently, destructors cannot be unregistered, so we cannot use racy
/// initialization for keys. Instead, we need synchronize initialization.
/// Use the Windows-provided `Once` since it does not require TLS.
once: UnsafeCell<c::INIT_ONCE>,
}
impl StaticKey {
impl LazyKey {
#[inline]
pub const fn new(dtor: Option<Dtor>) -> StaticKey {
StaticKey {
pub const fn new(dtor: Option<Dtor>) -> LazyKey {
LazyKey {
key: AtomicU32::new(0),
dtor,
next: AtomicPtr::new(ptr::null_mut()),
@ -61,18 +61,7 @@ pub const fn new(dtor: Option<Dtor>) -> StaticKey {
}
#[inline]
pub unsafe fn set(&'static self, val: *mut u8) {
let r = unsafe { c::TlsSetValue(self.key(), val.cast()) };
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(&'static self) -> *mut u8 {
unsafe { c::TlsGetValue(self.key()).cast() }
}
#[inline]
fn key(&'static self) -> Key {
pub fn force(&'static self) -> Key {
match self.key.load(Acquire) {
0 => unsafe { self.init() },
key => key - 1,
@ -141,17 +130,28 @@ unsafe fn init(&'static self) -> Key {
}
}
unsafe impl Send for StaticKey {}
unsafe impl Sync for StaticKey {}
unsafe impl Send for LazyKey {}
unsafe impl Sync for LazyKey {}
static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());
#[inline]
pub unsafe fn set(key: Key, val: *mut u8) {
let r = unsafe { c::TlsSetValue(key, val.cast()) };
debug_assert_eq!(r, c::TRUE);
}
#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
unsafe { c::TlsGetValue(key).cast() }
}
static DTORS: AtomicPtr<LazyKey> = AtomicPtr::new(ptr::null_mut());
/// Should only be called once per key, otherwise loops or breaks may occur in
/// the linked list.
unsafe fn register_dtor(key: &'static StaticKey) {
unsafe fn register_dtor(key: &'static LazyKey) {
guard::enable();
let this = <*const StaticKey>::cast_mut(key);
let this = <*const LazyKey>::cast_mut(key);
// Use acquire ordering to pass along the changes done by the previously
// registered keys when we store the new head with release ordering.
let mut head = DTORS.load(Acquire);
@ -176,9 +176,9 @@ pub unsafe fn run_dtors() {
let dtor = unsafe { (*cur).dtor.unwrap() };
cur = unsafe { (*cur).next.load(Relaxed) };
// In StaticKey::init, we register the dtor before setting `key`.
// In LazyKey::init, we register the dtor before setting `key`.
// So if one thread's `run_dtors` races with another thread executing `init` on the same
// `StaticKey`, we can encounter a key of 0 here. That means this key was never
// `LazyKey`, we can encounter a key of 0 here. That means this key was never
// initialized in this thread so we can safely skip it.
if pre_key == 0 {
continue;

View file

@ -30,7 +30,7 @@
//! really.
//!
//! Perhaps one day we can fold the `Box` here into a static allocation,
//! expanding the `StaticKey` structure to contain not only a slot for the TLS
//! expanding the `LazyKey` structure to contain not only a slot for the TLS
//! key but also a slot for the destructor queue on windows. An optimization for
//! another day!

View file

@ -36,7 +36,7 @@
pub use native::{EagerStorage, LazyStorage, thread_local_inner};
} else {
mod os;
pub use os::{Key, thread_local_inner};
pub use os::{Storage, thread_local_inner};
}
}
@ -126,28 +126,33 @@ pub(crate) mod key {
mod unix;
#[cfg(test)]
mod tests;
pub(super) use racy::StaticKey;
use unix::{Key, create, destroy, get, set};
pub(super) use racy::LazyKey;
pub(super) use unix::{Key, set};
#[cfg(any(not(target_thread_local), test))]
pub(super) use unix::get;
use unix::{create, destroy};
} else if #[cfg(all(not(target_thread_local), target_os = "windows"))] {
#[cfg(test)]
mod tests;
mod windows;
pub(super) use windows::{StaticKey, run_dtors};
pub(super) use windows::{Key, LazyKey, get, run_dtors, set};
} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
mod racy;
mod sgx;
#[cfg(test)]
mod tests;
pub(super) use racy::StaticKey;
use sgx::{Key, create, destroy, get, set};
pub(super) use racy::LazyKey;
pub(super) use sgx::{Key, get, set};
use sgx::{create, destroy};
} else if #[cfg(target_os = "xous")] {
mod racy;
#[cfg(test)]
mod tests;
mod xous;
pub(super) use racy::StaticKey;
pub(super) use racy::LazyKey;
pub(crate) use xous::destroy_tls;
use xous::{Key, create, destroy, get, set};
pub(super) use xous::{Key, get, set};
use xous::{create, destroy};
}
}
}

View file

@ -2,7 +2,7 @@
use crate::cell::Cell;
use crate::marker::PhantomData;
use crate::ptr;
use crate::sys::thread_local::key::StaticKey as OsKey;
use crate::sys::thread_local::key::{get, set, Key, LazyKey};
#[doc(hidden)]
#[allow_internal_unstable(thread_local_internals)]
@ -22,12 +22,12 @@ fn __init() -> $t { $init }
unsafe {
use $crate::thread::LocalKey;
use $crate::thread::local_impl::Key;
use $crate::thread::local_impl::Storage;
// Inlining does not work on windows-gnu due to linking errors around
// dllimports. See https://github.com/rust-lang/rust/issues/109797.
LocalKey::new(#[cfg_attr(windows, inline(never))] |init| {
static VAL: Key<$t> = Key::new();
static VAL: Storage<$t> = Storage::new();
VAL.get(init, __init)
})
}
@ -41,22 +41,22 @@ fn __init() -> $t { $init }
/// Use a regular global static to store this key; the state provided will then be
/// thread-local.
#[allow(missing_debug_implementations)]
pub struct Key<T> {
os: OsKey,
pub struct Storage<T> {
key: LazyKey,
marker: PhantomData<Cell<T>>,
}
unsafe impl<T> Sync for Key<T> {}
unsafe impl<T> Sync for Storage<T> {}
struct Value<T: 'static> {
value: T,
key: &'static Key<T>,
key: Key,
}
impl<T: 'static> Key<T> {
impl<T: 'static> Storage<T> {
#[rustc_const_unstable(feature = "thread_local_internals", issue = "none")]
pub const fn new() -> Key<T> {
Key { os: OsKey::new(Some(destroy_value::<T>)), marker: PhantomData }
pub const fn new() -> Storage<T> {
Storage { key: LazyKey::new(Some(destroy_value::<T>)), marker: PhantomData }
}
/// Get a pointer to the TLS value, potentially initializing it with the
@ -66,19 +66,19 @@ pub const fn new() -> Key<T> {
/// The resulting pointer may not be used after reentrant inialialization
/// or thread destruction has occurred.
pub fn get(&'static self, i: Option<&mut Option<T>>, f: impl FnOnce() -> T) -> *const T {
// SAFETY: (FIXME: get should actually be safe)
let ptr = unsafe { self.os.get() as *mut Value<T> };
let key = self.key.force();
let ptr = unsafe { get(key) as *mut Value<T> };
if ptr.addr() > 1 {
// SAFETY: the check ensured the pointer is safe (its destructor
// is not running) + it is coming from a trusted source (self).
unsafe { &(*ptr).value }
} else {
self.try_initialize(ptr, i, f)
unsafe { Self::try_initialize(key, ptr, i, f) }
}
}
fn try_initialize(
&'static self,
unsafe fn try_initialize(
key: Key,
ptr: *mut Value<T>,
i: Option<&mut Option<T>>,
f: impl FnOnce() -> T,
@ -88,13 +88,13 @@ fn try_initialize(
return ptr::null();
}
let value = i.and_then(Option::take).unwrap_or_else(f);
let ptr = Box::into_raw(Box::new(Value { value, key: self }));
// SAFETY: (FIXME: get should actually be safe)
let old = unsafe { self.os.get() as *mut Value<T> };
let value = Box::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key });
let ptr = Box::into_raw(value);
let old = unsafe { get(key) as *mut Value<T> };
// SAFETY: `ptr` is a correct pointer that can be destroyed by the key destructor.
unsafe {
self.os.set(ptr as *mut u8);
set(key, ptr as *mut u8);
}
if !old.is_null() {
// If the variable was recursively initialized, drop the old value.
@ -123,8 +123,8 @@ fn try_initialize(
abort_on_dtor_unwind(|| {
let ptr = unsafe { Box::from_raw(ptr as *mut Value<T>) };
let key = ptr.key;
unsafe { key.os.set(ptr::without_provenance_mut(1)) };
unsafe { set(key, ptr::without_provenance_mut(1)) };
drop(ptr);
unsafe { key.os.set(ptr::null_mut()) };
unsafe { set(key, ptr::null_mut()) };
});
}