LibC+LibELF: Support loading shared libraries compiled with dynamic TLS

This is a prerequisite for upstreaming our LLVM patches, as our current
hack forcing `-ftls-model=initial-exec` in the Clang driver is not
acceptable upstream.

Currently, our kernel-managed TLS implementation limits us to only
having a single block of storage for all thread-local variables that's
initialized at load time. This PR merely implements the dynamic TLS
interface (`__tls_get_addr` and TLSDESC) on top of our static TLS
infrastructure. The current model's limitations still stand:
- a single static TLS block is reserved at load time, `dlopen()`-ing
  shared libraries that define thread-local variables might cause us to
  run out of space.
- the initial TLS image is not changeable post-load, so `dlopen()`-ing
  libraries with non-zero-initialized TLS variables is not supported.

The way we repurpose `ti_module` to mean "offset within static TLS
block" instead of "module index" is not ABI-compliant.
This commit is contained in:
Daniel Bertalan 2023-07-05 23:58:24 +02:00 committed by Jelle Raaijmakers
parent 192ee4594c
commit ad9e674fa0
6 changed files with 166 additions and 21 deletions

View file

@ -13,6 +13,8 @@ file(GLOB LIBC_SOURCES3 "../Libraries/LibC/arch/${ARCH_FOLDER}/*.S")
set(ELF_SOURCES ${ELF_SOURCES} "../Libraries/LibELF/Arch/${ARCH_FOLDER}/entry.S" "../Libraries/LibELF/Arch/${ARCH_FOLDER}/plt_trampoline.S")
if ("${SERENITY_ARCH}" STREQUAL "x86_64")
set(LIBC_SOURCES3 ${LIBC_SOURCES3} "../Libraries/LibC/arch/x86_64/memset.cpp")
elseif ("${SERENITY_ARCH}" STREQUAL "aarch64")
set(ELF_SOURCES ${ELF_SOURCES} "../Libraries/LibELF/Arch/aarch64/tls.S")
endif()
file(GLOB LIBSYSTEM_SOURCES "../Libraries/LibSystem/*.cpp")

View file

@ -67,6 +67,7 @@ set(LIBC_SOURCES
termios.cpp
time.cpp
times.cpp
tls.cpp
ulimit.cpp
unistd.cpp
utime.cpp
@ -99,7 +100,7 @@ file(GLOB ELF_SOURCES CONFIGURE_DEPENDS "../LibELF/*.cpp")
if ("${SERENITY_ARCH}" STREQUAL "aarch64")
set(ASM_SOURCES "arch/aarch64/setjmp.S")
set(ELF_SOURCES ${ELF_SOURCES} ../LibELF/Arch/aarch64/entry.S ../LibELF/Arch/aarch64/plt_trampoline.S)
set(ELF_SOURCES ${ELF_SOURCES} ../LibELF/Arch/aarch64/entry.S ../LibELF/Arch/aarch64/plt_trampoline.S ../LibELF/Arch/aarch64/tls.S)
set(CRTI_SOURCE "arch/aarch64/crti.S")
set(CRTN_SOURCE "arch/aarch64/crtn.S")
elseif ("${SERENITY_ARCH}" STREQUAL "x86_64")

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, the SerenityOS developers.
* Copyright (c) 2020-2023, the SerenityOS developers.
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -7,6 +7,7 @@
#pragma once
#include <sys/cdefs.h>
#include <sys/types.h>
__BEGIN_DECLS
@ -28,4 +29,11 @@ __attribute__((noreturn)) void __cxa_pure_virtual(void) __attribute__((weak));
__attribute__((noreturn)) void __stack_chk_fail(void);
__attribute__((noreturn)) void __stack_chk_fail_local(void);
struct __tls_index {
size_t ti_module;
size_t ti_offset;
};
void* __tls_get_addr(__tls_index*);
__END_DECLS

View file

@ -0,0 +1,26 @@
/*
* Copyright (c) 2023, Daniel Bertalan <dani@danielbertalan.dev>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Types.h>
#include <sys/internals.h>
extern "C" {
// This function is called to compute the address of a thread-local variable
// which might not be stored in the static TLS block (local-dynamic and
// global-dynamic models). Compilers default to this when creating shared
// libraries, as they may be loaded after program startup by `dlopen()`.
//
// We currently only support a static TLS block, so we take a shortcut in the
// implementation of this interface: instead of storing the module ID in
// ti_module, we store the module's TLS block offset. This avoids the need to
// have a per-thread module ID -> TLS block address. This will have to be
// changed if we support dynamically allocated TLS blocks.
void* __tls_get_addr(__tls_index* index)
{
return reinterpret_cast<void*>(reinterpret_cast<FlatPtr>(__builtin_thread_pointer()) + index->ti_module + index->ti_offset);
}
}

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2023, Daniel Bertalan <dani@danielbertalan.dev>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
// This file implements the runtime components of the AArch64 TLSDESC ABI,
// which is used when accessing thread-local variables which might not be
// stored in the static TLS block (global-dynamic and local-dynamic access
// models). Compilers default to this when creating shared libraries, as they
// may be loaded after program startup by `dlopen()`.
//
// Each referenced thread-local symbol is associated with a descriptor:
//
// struct TlsDescriptor {
// size_t (*resolver)(TlsDescriptor*);
// union {
// size_t tpoff; // for static TLS
// struct {
// size_t module_id;
// size_t module_offset;
// } *dynamic; // for dynamic TLS, not yet implemented
// };
// };
//
// The resolver takes a pointer to the descriptor as an argument and returns
// the symbol's offset to the thread pointer (tpidr_el1). The second field of
// the descriptor is an implementation-defined value which the resolver uses to
// identify the symbol.
//
// Thus, the address of a thread-local variable is retrieved as follows:
//
// &var = thread_pointer + descriptor.resolver(&descriptor);
//
// The two essential types of resolver functions are:
//
// - `__tlsdesc_static`: If the variable is located in the static TLS block,
// its thread pointer offset is a load-time constant, which can be stored in
// the descriptor. This function simply returns that.
//
// - `tlsdesc_dynamic`: Looks up a variable by its module ID and module offset.
// This is used if the TLS block is allocated separately, so might have a
// different thread pointer offset for each thread. This works similarly to
// the traditional TLS ABI's __tls_get_addr function. Not yet implemented in
// SerenityOS.
//
// The TLSDESC format strives to make the code sequence for thread-local
// variable access as short as possible, hence the resolver functions follow a
// special calling convention: they must not clobber any registers. To ensure
// that even the usually volatile registers are saved off, we need to implement
// the resolvers in assembly.
// size_t __tlsdesc_static(TlsDescriptor* desc)
// {
// return desc->tpoff;
// }
.p2align 4
.globl __tlsdesc_static
.hidden __tlsdesc_static
.type __tlsdesc_static,@function
__tlsdesc_static:
ldr x0, [x0, #8]
ret

View file

@ -2,7 +2,7 @@
* Copyright (c) 2019-2020, Andrew Kaster <akaster@serenityos.org>
* Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2022, Daniel Bertalan <dani@danielbertalan.dev>
* Copyright (c) 2022-2023, Daniel Bertalan <dani@danielbertalan.dev>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -35,6 +35,13 @@ static void* mmap_with_name(void* addr, size_t length, int prot, int flags, int
# define MAP_RANDOMIZED 0
#endif
#if ARCH(AARCH64)
# define HAS_TLSDESC_SUPPORT
extern "C" {
void* __tlsdesc_static(void*);
}
#endif
namespace ELF {
Result<NonnullRefPtr<DynamicLoader>, DlErrorMessage> DynamicLoader::try_create(int fd, DeprecatedString filepath)
@ -527,6 +534,23 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec
return VirtualAddress { reinterpret_cast<DynamicObject::IfuncResolver>(address.get())() };
};
struct ResolvedTLSSymbol {
DynamicObject const& dynamic_object;
FlatPtr value;
};
auto resolve_tls_symbol = [](DynamicObject::Relocation const& relocation) -> Optional<ResolvedTLSSymbol> {
if (relocation.symbol_index() == 0)
return ResolvedTLSSymbol { relocation.dynamic_object(), 0 };
auto res = lookup_symbol(relocation.symbol());
if (!res.has_value())
return {};
VERIFY(relocation.symbol().type() != STT_GNU_IFUNC);
VERIFY(res.value().dynamic_object != nullptr);
return ResolvedTLSSymbol { *res.value().dynamic_object, res.value().value };
};
switch (relocation.type()) {
case R_X86_64_NONE:
@ -601,30 +625,51 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec
}
case R_AARCH64_TLS_TPREL:
case R_X86_64_TPOFF64: {
auto symbol = relocation.symbol();
FlatPtr symbol_value;
DynamicObject const* dynamic_object_of_symbol;
if (relocation.symbol_index() != 0) {
auto res = lookup_symbol(symbol);
if (!res.has_value())
break;
VERIFY(symbol.type() != STT_GNU_IFUNC);
symbol_value = res.value().value;
dynamic_object_of_symbol = res.value().dynamic_object;
} else {
symbol_value = 0;
dynamic_object_of_symbol = &relocation.dynamic_object();
}
VERIFY(dynamic_object_of_symbol);
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
auto maybe_resolution = resolve_tls_symbol(relocation);
if (!maybe_resolution.has_value())
break;
auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value();
*patch_ptr = addend + dynamic_object_of_symbol->tls_offset().value() + symbol_value;
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
*patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value;
// At offset 0 there's the thread's ThreadSpecificData structure, we don't want to collide with it.
VERIFY(static_cast<ssize_t>(*patch_ptr) < 0);
break;
}
case R_X86_64_DTPMOD64: {
auto maybe_resolution = resolve_tls_symbol(relocation);
if (!maybe_resolution.has_value())
break;
// We repurpose the module index to store the TLS block's TP offset. This is fine
// because we currently only support a single static TLS block.
*patch_ptr = maybe_resolution->dynamic_object.tls_offset().value();
break;
}
case R_X86_64_DTPOFF64: {
auto maybe_resolution = resolve_tls_symbol(relocation);
if (!maybe_resolution.has_value())
break;
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
*patch_ptr = addend + maybe_resolution->value;
break;
}
#ifdef HAS_TLSDESC_SUPPORT
case R_AARCH64_TLSDESC: {
auto maybe_resolution = resolve_tls_symbol(relocation);
if (!maybe_resolution.has_value())
break;
auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value();
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
patch_ptr[0] = (FlatPtr)__tlsdesc_static;
patch_ptr[1] = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value;
break;
}
#endif
case R_AARCH64_IRELATIVE:
case R_X86_64_IRELATIVE: {
if (should_call_ifunc_resolver == ShouldCallIfuncResolver::No)