From 6c8ae362bb3b93a260d38fb9829183c025dc4948 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Sat, 25 Jun 2022 04:59:52 +0100 Subject: [PATCH] CI: Never unload llvmpipe DSO whilst testing This commit is truly horrible. We want to run ASan with leak checking enabled in CI so we can catch memory leaks before they're introduced. This works well with Pixman, and with NIR-only drivers like iris or Panfrost. But when we run under llvmpipe - which we do under CI - we start failing because: - Mesa pulls in llvmpipe via dlopen - llvmpipe pulls in LLVM itself via DT_NEEDED - initialising LLVM's global type/etc systems performs thread-local allocations - llvmpipe can't free those allocations since the application might also be using LLVM - Weston stops using GL and destroys all GL objects, leading to Mesa unloading llvmpipe like it should - with everything disappearing from the process's vmap, ASan can no longer keep track of still-reachable pointers - tests fail because LLVM is 'leaking' Usually, an alternative is to LD_PRELOAD a shim which overrides dlclose() to be a no-op. This is not usable here, because when $LD_PRELOAD is not empty and ASan is not first in it, ASan immediately errors out. Prepending ASan doesn't work, because we run our tests through Meson (which also invokes Ninja), leading to LSan exploding over CPython and Ninja, which is not what we're interested in. It would be possible to inject _both_ ASan and a dlclose-does-nothing shim DSO into the LD_PRELOAD environment for every test, but that seems even worse, especially as Meson strongly discourages globbing for random files in the root. So, here we are, doing what we can: finding where swrast_dri.so (aka llvmpipe) lives, stashing that in an environment variable, and deliberately leaking a dlopen handle which we never close to ensure that neither llvmpipe nor LLVM leave our process's address space before we exit. Signed-off-by: Daniel Stone --- .gitlab-ci/virtme-scripts/run-weston-tests.sh | 4 ++++ tests/meson.build | 1 + tests/weston-test-runner.c | 20 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/.gitlab-ci/virtme-scripts/run-weston-tests.sh b/.gitlab-ci/virtme-scripts/run-weston-tests.sh index 2b156333..c61bd615 100755 --- a/.gitlab-ci/virtme-scripts/run-weston-tests.sh +++ b/.gitlab-ci/virtme-scripts/run-weston-tests.sh @@ -28,6 +28,10 @@ export PATH=/usr/local/bin:$PATH export SEATD_LOGLEVEL=debug +# Terrible hack, per comment in weston-test-runner.c's main(): find Mesa's +# llvmpipe driver module location +export WESTON_CI_LEAK_DL_HANDLE=$(find /usr/local -name swrast_dri.so -print 2>/dev/null || true) + # run the tests and save the exit status # we give ourselves a very generous timeout multiplier due to ASan overhead echo 0x1f > /sys/module/drm/parameters/debug diff --git a/tests/meson.build b/tests/meson.build index 6a5dea41..06479710 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -14,6 +14,7 @@ lib_test_runner = static_library( dependencies: [ dep_libweston_private_h_deps, dep_wayland_client, + dep_libdl, ], include_directories: common_inc, install: false, diff --git a/tests/weston-test-runner.c b/tests/weston-test-runner.c index 0ab7bc1b..6bea6705 100644 --- a/tests/weston-test-runner.c +++ b/tests/weston-test-runner.c @@ -34,12 +34,18 @@ #include #include #include +#include #include "test-config.h" #include "weston-test-runner.h" #include "weston-testsuite-data.h" #include "shared/string-helpers.h" +/* This is a glibc extension; if we can't use it then make it harmless. */ +#ifndef RTLD_NODELETE +#define RTLD_NODELETE 0 +#endif + /** * \defgroup testharness Test harness * \defgroup testharness_private Test harness private @@ -627,9 +633,23 @@ main(int argc, char *argv[]) enum test_result_code ret; enum test_result_code result = RESULT_OK; const struct fixture_setup_array *fsa; + const char *leak_dl_handle; int fi; int fi_end; + /* This is horrific, but it gives us working leak checking. If we + * actually unload llvmpipe, then we also unload LLVM, and some global + * setup it's done - which llvmpipe can't tear down because the actual + * client might be using LLVM instead. + * + * Turns out if llvmpipe is always live, then the pointers are always + * reachable, so LeakSanitizer just tells us about our own code rather + * than LLVM's, so ... + */ + leak_dl_handle = getenv("WESTON_CI_LEAK_DL_HANDLE"); + if (leak_dl_handle) + (void) dlopen(leak_dl_handle, RTLD_LAZY | RTLD_GLOBAL | RTLD_NODELETE); + harness = weston_test_harness_create(argc, argv); fsa = fixture_setup_array_get_();