Merge branch 'selftests-net-page_poll-allocation-error-injection'

Jakub Kicinski says:

====================
selftests: net: page_poll allocation error injection

Add a test for exercising driver memory allocation failure paths.
page pool is a bit tricky to inject errors into at the page allocator
level because of the bulk alloc and recycling, so add explicit error
injection support "in front" of the caches.

Add a test to exercise that using only the standard APIs.
This is the first useful test for the new tests with an endpoint.
There's no point testing netdevsim here, so this is also the first
HW-only test in Python.

I'm not super happy with the traffic generation using iperf3,
my initial approach was to use mausezahn. But it turned out to be
5x slower in terms of PPS. Hopefully this is good enough for now.

v1: https://lore.kernel.org/all/20240426232400.624864-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20240429144426.743476-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-04-30 08:15:33 -07:00
commit b451767036
10 changed files with 214 additions and 7 deletions

View file

@ -5,6 +5,7 @@
* Copyright (C) 2016 Red Hat, Inc.
*/
#include <linux/error-injection.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@ -550,6 +551,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
return page;
}
EXPORT_SYMBOL(page_pool_alloc_pages);
ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);
/* Calculate distance between two u32 values, valid if distance is below 2^(31)
* https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution

View file

@ -119,7 +119,7 @@ TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
# Networking tests want the net/lib target, include it automatically
ifneq ($(filter net drivers/net,$(TARGETS)),)
ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),)
ifeq ($(filter net/lib,$(TARGETS)),)
INSTALL_DEP_TARGETS := net/lib
endif

View file

@ -9,6 +9,7 @@ TEST_PROGS = \
hw_stats_l3.sh \
hw_stats_l3_gre.sh \
loopback.sh \
pp_alloc_fail.py \
#
TEST_FILES := \
@ -16,6 +17,7 @@ TEST_FILES := \
#
TEST_INCLUDES := \
$(wildcard lib/py/*.py ../lib/py/*.py) \
../../../net/lib.sh \
../../../net/forwarding/lib.sh \
../../../net/forwarding/ipip_lib.sh \

View file

@ -0,0 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
import sys
from pathlib import Path
KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
from net.lib.py import *
from drivers.net.lib.py import *
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
ktap_result(True, comment="SKIP")
sys.exit(4)

View file

@ -0,0 +1,129 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
import time
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import KsftSkipEx, KsftFailEx
from lib.py import NetdevFamily, NlError
from lib.py import NetDrvEpEnv
from lib.py import cmd, tool, GenerateTraffic
def _write_fail_config(config):
for key, value in config.items():
with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
fp.write(str(value) + "\n")
def _enable_pp_allocation_fail():
if not os.path.exists("/sys/kernel/debug/fail_function"):
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
fp.write("page_pool_alloc_pages\n")
_write_fail_config({
"verbose": 0,
"interval": 511,
"probability": 100,
"times": -1,
})
def _disable_pp_allocation_fail():
if not os.path.exists("/sys/kernel/debug/fail_function"):
return
if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
fp.write("\n")
_write_fail_config({
"probability": 0,
"times": 0,
})
def test_pp_alloc(cfg, netdevnl):
def get_stats():
return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
def check_traffic_flowing():
stat1 = get_stats()
time.sleep(1)
stat2 = get_stats()
if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
try:
stats = get_stats()
except NlError as e:
if e.nl_msg.error == -95:
stats = {}
else:
raise
if 'rx-alloc-fail' not in stats:
raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
set_g = False
traffic = None
try:
traffic = GenerateTraffic(cfg)
check_traffic_flowing()
_enable_pp_allocation_fail()
s1 = get_stats()
time.sleep(3)
s2 = get_stats()
if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
raise KsftSkipEx("Allocation failures not increasing")
if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
"packets:", s2['rx-packets'] - s1['rx-packets'])
# Basic failures are fine, try to wobble some settings to catch extra failures
check_traffic_flowing()
g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
new_g = g['rx'] * 2
elif 'rx' in g:
new_g = g['rx'] // 2
else:
new_g = None
if new_g:
set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
if set_g:
ksft_pr("ethtool -G change retval: success")
else:
ksft_pr("ethtool -G change retval: did not succeed", new_g)
else:
ksft_pr("ethtool -G change retval: did not try")
time.sleep(0.1)
check_traffic_flowing()
finally:
_disable_pp_allocation_fail()
if traffic:
traffic.stop()
time.sleep(0.1)
if set_g:
cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
def main() -> None:
netdevnl = NetdevFamily()
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
ksft_exit()
if __name__ == "__main__":
main()

View file

@ -15,4 +15,5 @@ except ModuleNotFoundError as e:
sys.exit(4)
from .env import *
from .load import *
from .remote import Remote

View file

@ -2,7 +2,7 @@
import os
from pathlib import Path
from lib.py import KsftSkipEx
from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import cmd, ip
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
@ -76,7 +76,7 @@ class NetDrvEpEnv:
nsim_v4_pfx = "192.0.2."
nsim_v6_pfx = "2001:db8::"
def __init__(self, src_path):
def __init__(self, src_path, nsim_test=None):
self.env = _load_env_file(src_path)
@ -88,7 +88,10 @@ class NetDrvEpEnv:
self._ns_peer = None
if "NETIF" in self.env:
if nsim_test is True:
raise KsftXfailEx("Test only works on netdevsim")
self._check_env()
self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
self.v4 = self.env.get("LOCAL_V4")
@ -98,6 +101,9 @@ class NetDrvEpEnv:
kind = self.env["REMOTE_TYPE"]
args = self.env["REMOTE_ARGS"]
else:
if nsim_test is False:
raise KsftXfailEx("Test does not work on netdevsim")
self.create_local()
self.dev = self._ns.nsims[0].dev

View file

@ -0,0 +1,41 @@
# SPDX-License-Identifier: GPL-2.0
import time
from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env):
env.require_cmd("iperf3", remote=True)
self.env = env
port = rand_port()
self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True)
wait_port_listen(port)
time.sleep(0.1)
self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
background=True, host=env.remote)
# Wait for traffic to ramp up
pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
for _ in range(50):
time.sleep(0.1)
now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
if now - pkt > 1000:
return
pkt = now
self.stop(verbose=True)
raise Exception("iperf3 traffic did not ramp up")
def stop(self, verbose=None):
self._iperf_client.process(terminate=True)
if verbose:
ksft_pr(">> Client:")
ksft_pr(self._iperf_client.stdout)
ksft_pr(self._iperf_client.stderr)
self._iperf_server.process(terminate=True)
if verbose:
ksft_pr(">> Server:")
ksft_pr(self._iperf_server.stdout)
ksft_pr(self._iperf_server.stderr)

View file

@ -11,6 +11,10 @@ KSFT_RESULT = None
KSFT_RESULT_ALL = True
class KsftFailEx(Exception):
pass
class KsftSkipEx(Exception):
pass

View file

@ -56,10 +56,10 @@ class bkg(cmd):
return self.process(terminate=self.terminate)
def ip(args, json=None, ns=None, host=None):
cmd_str = "ip "
def tool(name, args, json=None, ns=None, host=None):
cmd_str = name + ' '
if json:
cmd_str += '-j '
cmd_str += '--json '
cmd_str += args
cmd_obj = cmd(cmd_str, ns=ns, host=host)
if json:
@ -67,11 +67,17 @@ def ip(args, json=None, ns=None, host=None):
return cmd_obj
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
return tool('ip', args, json=json, host=host)
def rand_port():
"""
Get unprivileged port, for now just random, one day we may decide to check if used.
"""
return random.randint(1024, 65535)
return random.randint(10000, 65535)
def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):