2021-03-05 09:36:04 +00:00
|
|
|
#!/usr/bin/env bash
|
2021-10-17 16:13:06 +00:00
|
|
|
# SPDX-License-Identifier: LGPL-2.1-or-later
|
2021-04-09 17:39:41 +00:00
|
|
|
set -eux
|
2018-12-13 16:22:01 +00:00
|
|
|
set -o pipefail
|
|
|
|
|
2023-10-19 10:28:37 +00:00
|
|
|
# shellcheck source=test/units/util.sh
|
|
|
|
. "$(dirname "$0")"/util.sh
|
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
systemd-analyze log-level debug
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2023-01-20 23:00:38 +00:00
|
|
|
# Ensure that the init.scope.d drop-in is applied on boot
|
|
|
|
test "$(cat /sys/fs/cgroup/init.scope/memory.high)" != "max"
|
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
# Loose checks to ensure the environment has the necessary features for systemd-oomd
|
2021-04-07 22:09:55 +00:00
|
|
|
[[ -e /proc/pressure ]] || echo "no PSI" >>/skipped
|
2021-04-09 17:49:32 +00:00
|
|
|
cgroup_type="$(stat -fc %T /sys/fs/cgroup/)"
|
2021-03-05 09:36:04 +00:00
|
|
|
if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]]; then
|
2021-04-07 22:09:55 +00:00
|
|
|
echo "no cgroup2" >>/skipped
|
2021-03-05 09:36:04 +00:00
|
|
|
fi
|
|
|
|
if [ ! -f /usr/lib/systemd/systemd-oomd ] && [ ! -f /lib/systemd/systemd-oomd ]; then
|
2021-04-07 22:09:55 +00:00
|
|
|
echo "no oomd" >>/skipped
|
2021-03-05 09:36:04 +00:00
|
|
|
fi
|
2021-04-09 17:49:32 +00:00
|
|
|
|
|
|
|
if [[ -e /skipped ]]; then
|
|
|
|
exit 0
|
|
|
|
fi
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2023-06-16 23:01:24 +00:00
|
|
|
rm -rf /run/systemd/system/testsuite-55-testbloat.service.d
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2023-06-23 22:19:31 +00:00
|
|
|
# Activate swap file if we are in a VM
|
|
|
|
if systemd-detect-virt --vm --quiet; then
|
|
|
|
mkswap /swapfile
|
|
|
|
swapon /swapfile
|
|
|
|
swapon --show
|
|
|
|
fi
|
|
|
|
|
2022-01-06 20:37:21 +00:00
|
|
|
# Configure oomd explicitly to avoid conflicts with distro dropins
|
2023-06-16 23:01:24 +00:00
|
|
|
mkdir -p /run/systemd/oomd.conf.d/
|
2023-06-16 23:06:38 +00:00
|
|
|
cat >/run/systemd/oomd.conf.d/99-oomd-test.conf <<EOF
|
|
|
|
[OOM]
|
|
|
|
DefaultMemoryPressureDurationSec=2s
|
|
|
|
EOF
|
|
|
|
|
2023-06-16 23:01:24 +00:00
|
|
|
mkdir -p /run/systemd/system/-.slice.d/
|
2023-06-16 23:06:38 +00:00
|
|
|
cat >/run/systemd/system/-.slice.d/99-oomd-test.conf <<EOF
|
|
|
|
[Slice]
|
|
|
|
ManagedOOMSwap=auto
|
|
|
|
EOF
|
|
|
|
|
2023-06-16 23:01:24 +00:00
|
|
|
mkdir -p /run/systemd/system/user@.service.d/
|
2023-06-16 23:06:38 +00:00
|
|
|
cat >/run/systemd/system/user@.service.d/99-oomd-test.conf <<EOF
|
|
|
|
[Service]
|
|
|
|
ManagedOOMMemoryPressure=auto
|
|
|
|
ManagedOOMMemoryPressureLimit=0%
|
|
|
|
EOF
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2023-06-16 23:01:24 +00:00
|
|
|
mkdir -p /run/systemd/system/systemd-oomd.service.d/
|
2023-06-16 23:06:38 +00:00
|
|
|
cat >/run/systemd/system/systemd-oomd.service.d/debug.conf <<EOF
|
|
|
|
[Service]
|
|
|
|
Environment=SYSTEMD_LOG_LEVEL=debug
|
|
|
|
EOF
|
2021-07-02 17:04:31 +00:00
|
|
|
|
|
|
|
systemctl daemon-reload
|
|
|
|
|
2022-01-12 11:29:34 +00:00
|
|
|
# enable the service to ensure dbus-org.freedesktop.oom1.service exists
|
|
|
|
# and D-Bus activation works
|
|
|
|
systemctl enable systemd-oomd.service
|
|
|
|
|
2021-07-02 17:04:31 +00:00
|
|
|
# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied
|
|
|
|
if systemctl is-active systemd-oomd.service; then
|
|
|
|
systemctl restart systemd-oomd.service
|
|
|
|
fi
|
|
|
|
|
core: add systemd-executor binary
Currently we spawn services by forking a child process, doing a bunch
of work, and then exec'ing the service executable.
There are some advantages to this approach:
- quick: we immediately have access to all the enourmous amount of
state simply by virtue of sharing the memory with the parent
- easy to refactor and add features
- part of the same binary, will never be out of sync
There are however significant drawbacks:
- doing work after fork and before exec is against glibc's supported
case for several APIs we call
- copy-on-write trap: anytime any memory is touched in either parent
or child, a copy of that page will be triggered
- memory footprint of the child process will be memory footprint of
PID1, but using the cgroup memory limits of the unit
The last issue is especially problematic on resource constrained
systems where hard memory caps are enforced and swap is not allowed.
As soon as PID1 is under load, with no page out due to no swap, and a
service with a low MemoryMax= tries to start, hilarity ensues.
Add a new systemd-executor binary, that is able to receive all the
required state via memfd, deserialize it, prepare the appropriate
data structures and call exec_child.
Use posix_spawn which uses CLONE_VM + CLONE_VFORK, to ensure there is
no copy-on-write (same address space will be used, and parent process
will be frozen, until exec).
The sd-executor binary is pinned by FD on startup, so that we can
guarantee there will be no incompatibilities during upgrades.
2023-06-01 18:51:42 +00:00
|
|
|
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip under
|
|
|
|
# sanitizers as they balloon memory usage.
|
|
|
|
if ! [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
|
|
|
|
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 /bin/true
|
|
|
|
fi
|
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
systemctl start testsuite-55-testchill.service
|
|
|
|
systemctl start testsuite-55-testbloat.service
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
# Verify systemd-oomd is monitoring the expected units
|
2021-11-24 09:02:22 +00:00
|
|
|
# Try to avoid racing the oomctl output check by checking in a loop with a timeout
|
|
|
|
oomctl_output=$(oomctl)
|
|
|
|
timeout="$(date -ud "1 minutes" +%s)"
|
|
|
|
while [[ $(date -u +%s) -le $timeout ]]; do
|
|
|
|
if grep "/testsuite-55-workload.slice" <<< "$oomctl_output"; then
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
oomctl_output=$(oomctl)
|
|
|
|
sleep 1
|
|
|
|
done
|
|
|
|
|
|
|
|
grep "/testsuite-55-workload.slice" <<< "$oomctl_output"
|
|
|
|
grep "20.00%" <<< "$oomctl_output"
|
|
|
|
grep "Default Memory Pressure Duration: 2s" <<< "$oomctl_output"
|
2018-12-13 16:22:01 +00:00
|
|
|
|
2021-07-02 17:24:30 +00:00
|
|
|
systemctl status testsuite-55-testchill.service
|
|
|
|
|
2021-07-02 17:23:11 +00:00
|
|
|
# systemd-oomd watches for elevated pressure for 2 seconds before acting.
|
2021-03-05 09:36:04 +00:00
|
|
|
# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
|
2021-04-09 17:49:32 +00:00
|
|
|
timeout="$(date -ud "2 minutes" +%s)"
|
2021-03-05 09:36:04 +00:00
|
|
|
while [[ $(date -u +%s) -le $timeout ]]; do
|
|
|
|
if ! systemctl status testsuite-55-testbloat.service; then
|
2018-12-13 16:22:01 +00:00
|
|
|
break
|
2021-03-05 09:36:04 +00:00
|
|
|
fi
|
2023-06-16 23:07:32 +00:00
|
|
|
oomctl
|
2021-07-02 17:23:11 +00:00
|
|
|
sleep 2
|
2018-12-13 16:22:01 +00:00
|
|
|
done
|
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
# testbloat should be killed and testchill should be fine
|
|
|
|
if systemctl status testsuite-55-testbloat.service; then exit 42; fi
|
|
|
|
if ! systemctl status testsuite-55-testchill.service; then exit 24; fi
|
|
|
|
|
2021-09-09 15:12:55 +00:00
|
|
|
# Make sure we also work correctly on user units.
|
|
|
|
|
2021-11-22 21:12:09 +00:00
|
|
|
systemctl start --machine "testuser@.host" --user testsuite-55-testchill.service
|
|
|
|
systemctl start --machine "testuser@.host" --user testsuite-55-testbloat.service
|
2021-09-09 15:12:55 +00:00
|
|
|
|
|
|
|
# Verify systemd-oomd is monitoring the expected units
|
2021-11-24 09:02:22 +00:00
|
|
|
# Try to avoid racing the oomctl output check by checking in a loop with a timeout
|
|
|
|
oomctl_output=$(oomctl)
|
|
|
|
timeout="$(date -ud "1 minutes" +%s)"
|
|
|
|
while [[ $(date -u +%s) -le $timeout ]]; do
|
|
|
|
if grep -E "/user.slice.*/testsuite-55-workload.slice" <<< "$oomctl_output"; then
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
oomctl_output=$(oomctl)
|
|
|
|
sleep 1
|
|
|
|
done
|
|
|
|
|
|
|
|
grep -E "/user.slice.*/testsuite-55-workload.slice" <<< "$oomctl_output"
|
|
|
|
grep "20.00%" <<< "$oomctl_output"
|
|
|
|
grep "Default Memory Pressure Duration: 2s" <<< "$oomctl_output"
|
2021-09-09 15:12:55 +00:00
|
|
|
|
2021-11-22 21:12:09 +00:00
|
|
|
systemctl --machine "testuser@.host" --user status testsuite-55-testchill.service
|
2021-09-09 15:12:55 +00:00
|
|
|
|
|
|
|
# systemd-oomd watches for elevated pressure for 2 seconds before acting.
|
|
|
|
# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
|
|
|
|
timeout="$(date -ud "2 minutes" +%s)"
|
|
|
|
while [[ $(date -u +%s) -le $timeout ]]; do
|
2021-11-22 21:12:09 +00:00
|
|
|
if ! systemctl --machine "testuser@.host" --user status testsuite-55-testbloat.service; then
|
2021-09-09 15:12:55 +00:00
|
|
|
break
|
|
|
|
fi
|
2023-06-16 23:07:32 +00:00
|
|
|
oomctl
|
2021-09-09 15:12:55 +00:00
|
|
|
sleep 2
|
|
|
|
done
|
|
|
|
|
|
|
|
# testbloat should be killed and testchill should be fine
|
2021-11-22 21:12:09 +00:00
|
|
|
if systemctl --machine "testuser@.host" --user status testsuite-55-testbloat.service; then exit 42; fi
|
|
|
|
if ! systemctl --machine "testuser@.host" --user status testsuite-55-testchill.service; then exit 24; fi
|
2021-09-09 15:12:55 +00:00
|
|
|
|
2021-03-05 09:36:04 +00:00
|
|
|
# only run this portion of the test if we can set xattrs
|
2023-10-19 10:28:37 +00:00
|
|
|
if cgroupfs_supports_user_xattrs; then
|
2021-03-05 09:36:04 +00:00
|
|
|
sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down
|
|
|
|
|
2023-06-16 23:01:24 +00:00
|
|
|
mkdir -p /run/systemd/system/testsuite-55-testbloat.service.d/
|
2023-06-16 23:06:38 +00:00
|
|
|
cat >/run/systemd/system/testsuite-55-testbloat.service.d/override.conf <<EOF
|
|
|
|
[Service]
|
|
|
|
ManagedOOMPreference=avoid
|
|
|
|
EOF
|
2021-03-05 09:36:04 +00:00
|
|
|
|
|
|
|
systemctl daemon-reload
|
|
|
|
systemctl start testsuite-55-testchill.service
|
|
|
|
systemctl start testsuite-55-testmunch.service
|
|
|
|
systemctl start testsuite-55-testbloat.service
|
|
|
|
|
2021-04-09 17:49:32 +00:00
|
|
|
timeout="$(date -ud "2 minutes" +%s)"
|
|
|
|
while [[ "$(date -u +%s)" -le "$timeout" ]]; do
|
2021-03-05 09:36:04 +00:00
|
|
|
if ! systemctl status testsuite-55-testmunch.service; then
|
|
|
|
break
|
|
|
|
fi
|
2023-06-16 23:07:32 +00:00
|
|
|
oomctl
|
2021-09-12 07:02:31 +00:00
|
|
|
sleep 2
|
2021-03-05 09:36:04 +00:00
|
|
|
done
|
|
|
|
|
|
|
|
# testmunch should be killed since testbloat had the avoid xattr on it
|
|
|
|
if ! systemctl status testsuite-55-testbloat.service; then exit 25; fi
|
|
|
|
if systemctl status testsuite-55-testmunch.service; then exit 43; fi
|
|
|
|
if ! systemctl status testsuite-55-testchill.service; then exit 24; fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
systemd-analyze log-level info
|
|
|
|
|
2023-07-12 13:49:55 +00:00
|
|
|
touch /testok
|