systemd/test/units/testsuite-36.sh
Frantisek Sumsal f53d56f1a3 test: check the numa-test.service status directly
In a couple of recent CI runs I noticed TEST-36 failing due to a missed
service exit notification and a subsequent fail of the `grep` command:

```
[  257.112153] H systemd[1]: Started numa-test.service.
[  257.114343] H systemd[899]: numa-test.service: Failed to set NUMA memory policy: Invalid argument
[  257.118270] H systemd[899]: numa-test.service: Failed at step NUMA_POLICY spawning /bin/sleep: Invalid argument
[  257.126170] H systemd[1]: Bus private-bus-connection: changing state RUNNING → CLOSING
[  257.130290] H systemd[1]: numa-test.service: Failed to send unit change signal for numa-test.service: Connection reset by peer
[  257.131567] H systemd[1]: Received SIGCHLD from PID 899 ((sleep)).
[  257.132870] H systemd[1]: Got disconnect on private connection.
[  257.134299] H systemd[1]: systemd-journald.service: Got notification message from PID 498 (FDSTORE=1)
[  257.135611] H systemd[1]: systemd-journald.service: Added fd 38 (n/a) to fd store.
[  257.136999] H systemd[1]: systemd-journald.service: Received EPOLLHUP on stored fd 38 (stored), closing.
[  257.366996] H testsuite-36.sh[536]: + stopJournalctl
[  257.366996] H testsuite-36.sh[536]: + local unit=init.scope
[  257.366996] H testsuite-36.sh[536]: + echo 'Force journald to write all queued messages'
[  257.366996] H testsuite-36.sh[536]: Force journald to write all queued messages
[  257.366996] H testsuite-36.sh[536]: + journalctl --sync
[  257.488642] H systemd-journald[498]: Received client request to rotate journal.
[  257.520821] H testsuite-36.sh[536]: + journalctl -u init.scope --cursor-file=jounalCursorFile
[  257.981399] H testsuite-36.sh[536]: + pid1StopUnit numa-test.service
[  257.984533] H testsuite-36.sh[536]: + systemctl stop numa-test.service
[  258.173656] H systemd[1]: Bus private-bus-connection: changing state AUTHENTICATING → RUNNING
[  258.180710] H systemd[1]: numa-test.service: Trying to enqueue job numa-test.service/stop/replace
[  258.182424] H systemd[1]: Added job numa-test.service/stop to transaction.
[  258.185234] H systemd[1]: numa-test.service: Installed new job numa-test.service/stop as 738
[  258.187017] H systemd[1]: numa-test.service: Enqueued job numa-test.service/stop as 738
[  258.239930] H testsuite-36.sh[536]: + grep 'numa-test.service: Main process exited, code=exited, status=242/NUMA' journal.log
```

Let's mitigate this by checking the test service exit status directly
instead of relying on the notification.
2022-09-08 15:00:26 +00:00

354 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
at_exit() {
# shellcheck disable=SC2181
if [[ $? -ne 0 ]]; then
# We're exiting with a non-zero EC, let's dump test artifacts
# for easier debugging
[[ -v straceLog && -f "$straceLog" ]] && cat "$straceLog"
[[ -v journalLog && -f "$journalLog" ]] && cat "$journalLog"
fi
}
trap at_exit EXIT
systemd-analyze log-level debug
systemd-analyze log-target journal
# Log files
straceLog='strace.log'
journalLog='journal.log'
# Systemd config files
testUnit='numa-test.service'
testUnitFile="/run/systemd/system/$testUnit"
testUnitNUMAConf="$testUnitFile.d/numa.conf"
# Sleep constants (we should probably figure out something better but nothing comes to mind)
sleepAfterStart=1
# Journal cursor for easier navigation
journalCursorFile="jounalCursorFile"
startStrace() {
coproc strace -qq -p 1 -o "$straceLog" -e set_mempolicy -s 1024 ${1:+"$1"}
# Wait for strace to properly "initialize", i.e. until PID 1 has the TracerPid
# field set to the current strace's PID
while ! awk -v spid="$COPROC_PID" '/^TracerPid:/ {exit !($2 == spid);}' /proc/1/status; do sleep 0.1; done
}
stopStrace() {
[[ -v COPROC_PID ]] || return
local PID=$COPROC_PID
kill -s TERM "$PID"
# Make sure the strace process is indeed dead
while kill -0 "$PID" 2>/dev/null; do sleep 0.1; done
}
startJournalctl() {
: >"$journalCursorFile"
# Save journal's cursor for later navigation
journalctl --no-pager --cursor-file="$journalCursorFile" -n0 -ocat
}
stopJournalctl() {
local unit="${1:-init.scope}"
# Using journalctl --sync should be better than using SIGRTMIN+1, as
# the --sync wait until the synchronization is complete
echo "Force journald to write all queued messages"
journalctl --sync
journalctl -u "$unit" --cursor-file="$journalCursorFile" >"$journalLog"
}
checkNUMA() {
# NUMA enabled system should have at least NUMA node0
test -e /sys/devices/system/node/node0
}
writePID1NUMAPolicy() {
cat >"$confDir/numa.conf" <<EOF
[Manager]
NUMAPolicy=${1:?}
NUMAMask=${2:-""}
EOF
}
writeTestUnit() {
mkdir -p "$testUnitFile.d/"
printf "[Service]\nExecStart=/bin/sleep 3600\n" >"$testUnitFile"
}
writeTestUnitNUMAPolicy() {
cat >"$testUnitNUMAConf" <<EOF
[Service]
NUMAPolicy=${1:?}
NUMAMask=${2:-""}
EOF
systemctl daemon-reload
}
pid1ReloadWithStrace() {
startStrace
systemctl daemon-reload
sleep $sleepAfterStart
stopStrace
}
pid1ReloadWithJournal() {
startJournalctl
systemctl daemon-reload
stopJournalctl
}
pid1StartUnitWithStrace() {
startStrace '-f'
systemctl start "${1:?}"
sleep $sleepAfterStart
stopStrace
}
pid1StartUnitWithJournal() {
startJournalctl
systemctl start "${1:?}"
sleep $sleepAfterStart
stopJournalctl
}
pid1StopUnit() {
systemctl stop "${1:?}"
}
systemctlCheckNUMAProperties() {
local UNIT_NAME="${1:?}"
local NUMA_POLICY="${2:?}"
local NUMA_MASK="${3:-""}"
local LOGFILE
LOGFILE="$(mktemp)"
systemctl show -p NUMAPolicy "$UNIT_NAME" >"$LOGFILE"
grep "NUMAPolicy=$NUMA_POLICY" "$LOGFILE"
: >"$LOGFILE"
if [ -n "$NUMA_MASK" ]; then
systemctl show -p NUMAMask "$UNIT_NAME" >"$LOGFILE"
grep "NUMAMask=$NUMA_MASK" "$LOGFILE"
fi
}
writeTestUnit
# Create systemd config drop-in directory
confDir="/run/systemd/system.conf.d/"
mkdir -p "$confDir"
if ! checkNUMA; then
echo >&2 "NUMA is not supported on this machine, switching to a simple sanity check"
echo "PID1 NUMAPolicy=default && NUMAMask=0 check without NUMA support"
writePID1NUMAPolicy "default" "0"
startJournalctl
systemctl daemon-reload
stopJournalctl
grep "NUMA support not available, ignoring" "$journalLog"
echo "systemd-run NUMAPolicy=default && NUMAMask=0 check without NUMA support"
runUnit='numa-systemd-run-test.service'
startJournalctl
systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
sleep $sleepAfterStart
pid1StopUnit "$runUnit"
stopJournalctl "$runUnit"
grep "NUMA support not available, ignoring" "$journalLog"
else
echo "PID1 NUMAPolicy support - Default policy w/o mask"
writePID1NUMAPolicy "default"
pid1ReloadWithStrace
# Kernel requires that nodemask argument is set to NULL when setting default policy
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
echo "PID1 NUMAPolicy support - Default policy w/ mask"
writePID1NUMAPolicy "default" "0"
pid1ReloadWithStrace
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
echo "PID1 NUMAPolicy support - Bind policy w/o mask"
writePID1NUMAPolicy "bind"
pid1ReloadWithJournal
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
echo "PID1 NUMAPolicy support - Bind policy w/ mask"
writePID1NUMAPolicy "bind" "0"
pid1ReloadWithStrace
grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
echo "PID1 NUMAPolicy support - Interleave policy w/o mask"
writePID1NUMAPolicy "interleave"
pid1ReloadWithJournal
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog"
echo "PID1 NUMAPolicy support - Interleave policy w/ mask"
writePID1NUMAPolicy "interleave" "0"
pid1ReloadWithStrace
grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
echo "PID1 NUMAPolicy support - Preferred policy w/o mask"
writePID1NUMAPolicy "preferred"
pid1ReloadWithJournal
# Preferred policy with empty node mask is actually allowed and should reset allocation policy to default
grep "Failed to set NUMA memory policy, ignoring: Invalid argument" "$journalLog" && { echo >&2 "unexpected pass"; exit 1; }
echo "PID1 NUMAPolicy support - Preferred policy w/ mask"
writePID1NUMAPolicy "preferred" "0"
pid1ReloadWithStrace
grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
echo "PID1 NUMAPolicy support - Local policy w/o mask"
writePID1NUMAPolicy "local"
pid1ReloadWithStrace
# Kernel requires that nodemask argument is set to NULL when setting default policy
# The unpatched versions of strace don't recognize the MPOL_LOCAL constant and
# return a numerical constant instead (with a comment):
# set_mempolicy(0x4 /* MPOL_??? */, NULL, 0) = 0
# Let's cover this scenario as well
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
echo "PID1 NUMAPolicy support - Local policy w/ mask"
writePID1NUMAPolicy "local" "0"
pid1ReloadWithStrace
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
echo "Unit file NUMAPolicy support - Default policy w/o mask"
writeTestUnitNUMAPolicy "default"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "default"
pid1StopUnit "$testUnit"
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
echo "Unit file NUMAPolicy support - Default policy w/ mask"
writeTestUnitNUMAPolicy "default" "0"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "default" "0"
pid1StopUnit $testUnit
# Mask must be ignored
grep "set_mempolicy(MPOL_DEFAULT, NULL" "$straceLog"
echo "Unit file NUMAPolicy support - Bind policy w/o mask"
writeTestUnitNUMAPolicy "bind"
pid1StartUnitWithJournal "$testUnit"
pid1StopUnit "$testUnit"
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
echo "Unit file NUMAPolicy support - Bind policy w/ mask"
writeTestUnitNUMAPolicy "bind" "0"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "bind" "0"
pid1StopUnit "$testUnit"
grep -P "set_mempolicy\(MPOL_BIND, \[0x0*1\]" "$straceLog"
echo "Unit file NUMAPolicy support - Interleave policy w/o mask"
writeTestUnitNUMAPolicy "interleave"
pid1StartUnitWithStrace "$testUnit"
pid1StopUnit "$testUnit"
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]]
echo "Unit file NUMAPolicy support - Interleave policy w/ mask"
writeTestUnitNUMAPolicy "interleave" "0"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "interleave" "0"
pid1StopUnit "$testUnit"
grep -P "set_mempolicy\(MPOL_INTERLEAVE, \[0x0*1\]" "$straceLog"
echo "Unit file NUMAPolicy support - Preferred policy w/o mask"
writeTestUnitNUMAPolicy "preferred"
pid1StartUnitWithJournal "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "preferred"
pid1StopUnit "$testUnit"
[[ $(systemctl show "$testUnit" -P ExecMainStatus) == "242" ]] && { echo >&2 "unexpected pass"; exit 1; }
echo "Unit file NUMAPolicy support - Preferred policy w/ mask"
writeTestUnitNUMAPolicy "preferred" "0"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "preferred" "0"
pid1StopUnit "$testUnit"
grep -P "set_mempolicy\(MPOL_PREFERRED, \[0x0*1\]" "$straceLog"
echo "Unit file NUMAPolicy support - Local policy w/o mask"
writeTestUnitNUMAPolicy "local"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "local"
pid1StopUnit "$testUnit"
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
echo "Unit file NUMAPolicy support - Local policy w/ mask"
writeTestUnitNUMAPolicy "local" "0"
pid1StartUnitWithStrace "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "local" "0"
pid1StopUnit "$testUnit"
# Mask must be ignored
grep -E "set_mempolicy\((MPOL_LOCAL|0x4 [^,]*), NULL" "$straceLog"
echo "Unit file CPUAffinity=NUMA support"
writeTestUnitNUMAPolicy "bind" "0"
echo "CPUAffinity=numa" >>"$testUnitNUMAConf"
systemctl daemon-reload
systemctl start "$testUnit"
systemctlCheckNUMAProperties "$testUnit" "bind" "0"
cpulist="$(cat /sys/devices/system/node/node0/cpulist)"
affinity_systemd="$(systemctl show --value -p CPUAffinity "$testUnit")"
[ "$cpulist" = "$affinity_systemd" ]
pid1StopUnit "$testUnit"
echo "systemd-run NUMAPolicy support"
runUnit='numa-systemd-run-test.service'
systemd-run -p NUMAPolicy=default --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "default"
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=default -p NUMAMask=0 --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "default" ""
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=bind -p NUMAMask=0 --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "bind" "0"
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=interleave -p NUMAMask=0 --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "interleave" "0"
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=preferred -p NUMAMask=0 --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "preferred" "0"
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=local --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "local"
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=local -p NUMAMask=0 --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "local" ""
pid1StopUnit "$runUnit"
systemd-run -p NUMAPolicy=local -p NUMAMask=0 -p CPUAffinity=numa --unit "$runUnit" sleep 1000
systemctlCheckNUMAProperties "$runUnit" "local" ""
systemctl cat "$runUnit" | grep -q 'CPUAffinity=numa'
pid1StopUnit "$runUnit"
fi
# Cleanup
rm -rf "$confDir"
systemctl daemon-reload
systemd-analyze log-level info
echo OK >/testok
exit 0