From 0f30bf5886f839984df545df74e9658daf3533d2 Mon Sep 17 00:00:00 2001 From: Renjaya Raga Zenta Date: Tue, 13 Feb 2024 21:15:24 +0700 Subject: [PATCH] udevd: Add ReceivePacketSteeringCPUMask for systemd.link Takes a list of CPU indices or ranges separated by either whitespace or commas. Alternatively, takes the special value "all" in which will include all available CPUs in the mask. CPU ranges are specified by the lower and upper CPU indices separated by a dash (e.g. "2-6"). This option may be specified more than once, in which case the specified CPU affinity masks are merged. If an empty string is assigned, the mask is reset, all assignments prior to this will have no effect. Defaults to unset and RPS CPU list is unchanged. To disable RPS when it was previously enabled, use the special value "disable". Currently, this will set CPU mask to all `rx` queue of matched device (if it has multiple queues). The `/sys/class/net//queues/rx-/rps_cpus` only accept cpu bitmap mask in hexadecimal. Fix: #30323 --- man/systemd.link.xml | 15 ++++ src/shared/cpu-set-util.c | 77 +++++++++++++++++++ src/shared/cpu-set-util.h | 5 ++ src/test/test-cpu-set-util.c | 79 ++++++++++++++++++++ src/udev/net/link-config-gperf.gperf | 1 + src/udev/net/link-config.c | 107 +++++++++++++++++++++++++++ src/udev/net/link-config.h | 3 + 7 files changed, 287 insertions(+) diff --git a/man/systemd.link.xml b/man/systemd.link.xml index 3e98e4ddba0..7c0a84dff87 100644 --- a/man/systemd.link.xml +++ b/man/systemd.link.xml @@ -967,6 +967,21 @@ + + ReceivePacketSteeringCPUMask= + + Configures Receive Packet Steering (RPS) list of CPUs to which RPS may forward traffic. + Takes a list of CPU indices or ranges separated by either whitespace or commas. Alternatively, + takes the special value all in which will include all available CPUs in the mask. + CPU ranges are specified by the lower and upper CPU indices separated by a dash (e.g. 2-6). + This option may be specified more than once, in which case the specified CPU affinity masks are merged. + If an empty string is assigned, the mask is reset, all assignments prior to this will have no effect. + Defaults to unset and RPS CPU list is unchanged. To disable RPS when it was previously enabled, use the + special value disable. + + + + ReceiveVLANCTAGHardwareAcceleration= diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c index d096576cd6c..1112de1333d 100644 --- a/src/shared/cpu-set-util.c +++ b/src/shared/cpu-set-util.c @@ -11,6 +11,7 @@ #include "errno-util.h" #include "extract-word.h" #include "fd-util.h" +#include "hexdecoct.h" #include "log.h" #include "macro.h" #include "memory-util.h" @@ -82,6 +83,63 @@ char *cpu_set_to_range_string(const CPUSet *set) { return TAKE_PTR(str) ?: strdup(""); } +char* cpu_set_to_mask_string(const CPUSet *a) { + _cleanup_free_ char *str = NULL; + size_t len = 0; + bool found_nonzero = false; + + assert(a); + + /* Return CPU set in hexadecimal bitmap mask, e.g. + * CPU 0 -> "1" + * CPU 1 -> "2" + * CPU 0,1 -> "3" + * CPU 0-3 -> "f" + * CPU 0-7 -> "ff" + * CPU 4-7 -> "f0" + * CPU 7 -> "80" + * None -> "0" + * + * When there are more than 32 CPUs, separate every 32 CPUs by comma, e.g. + * CPU 0-47 -> "ffff,ffffffff" + * CPU 0-63 -> "ffffffff,ffffffff" + * CPU 0-71 -> "ff,ffffffff,ffffffff" */ + + for (ssize_t i = a->allocated * 8; i >= 0; i -= 4) { + uint8_t m = 0; + + for (size_t j = 0; j < 4; j++) + if (CPU_ISSET_S(i + j, a->allocated, a->set)) + m |= 1U << j; + + if (!found_nonzero) + found_nonzero = m > 0; + + if (!found_nonzero && m == 0) + /* Skip leading zeros */ + continue; + + if (!GREEDY_REALLOC(str, len + 3)) + return NULL; + + str[len++] = hexchar(m); + if (i >= 4 && i % 32 == 0) + /* Separate by comma for each 32 CPUs. */ + str[len++] = ','; + str[len] = 0; + } + + return TAKE_PTR(str) ?: strdup("0"); +} + +CPUSet* cpu_set_free(CPUSet *c) { + if (!c) + return c; + + cpu_set_reset(c); + return mfree(c); +} + int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) { size_t need; @@ -290,3 +348,22 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) { *set = TAKE_STRUCT(s); return 0; } + +int cpu_mask_add_all(CPUSet *mask) { + long m; + int r; + + assert(mask); + + m = sysconf(_SC_NPROCESSORS_ONLN); + if (m < 0) + return -errno; + + for (unsigned i = 0; i < (unsigned) m; i++) { + r = cpu_set_add(mask, i); + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h index 3c63a588261..618fe1b0a3b 100644 --- a/src/shared/cpu-set-util.h +++ b/src/shared/cpu-set-util.h @@ -19,11 +19,15 @@ static inline void cpu_set_reset(CPUSet *a) { *a = (CPUSet) {}; } +CPUSet* cpu_set_free(CPUSet *c); +DEFINE_TRIVIAL_CLEANUP_FUNC(CPUSet*, cpu_set_free); + int cpu_set_add_all(CPUSet *a, const CPUSet *b); int cpu_set_add(CPUSet *a, unsigned cpu); char* cpu_set_to_string(const CPUSet *a); char *cpu_set_to_range_string(const CPUSet *a); +char* cpu_set_to_mask_string(const CPUSet *a); int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus); int parse_cpu_set_full( @@ -50,3 +54,4 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated); int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set); int cpus_in_affinity_mask(void); +int cpu_mask_add_all(CPUSet *mask); diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c index a0660f579ed..0c2304e2c09 100644 --- a/src/test/test-cpu-set-util.c +++ b/src/test/test-cpu-set-util.c @@ -25,6 +25,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "1")); + str = mfree(str); cpu_set_reset(&c); /* Simple range (from CPUAffinity example) */ @@ -43,6 +47,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "1-2 4")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "16")); + str = mfree(str); cpu_set_reset(&c); /* A more interesting range */ @@ -61,6 +69,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-3 8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f")); + str = mfree(str); cpu_set_reset(&c); /* Quoted strings */ @@ -76,6 +88,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f00")); + str = mfree(str); cpu_set_reset(&c); /* Use commas as separators */ @@ -106,6 +122,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-7 63")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "80000000,000000ff")); + str = mfree(str); cpu_set_reset(&c); /* Ranges */ @@ -120,6 +140,28 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_string: %s", str); str = mfree(str); cpu_set_reset(&c); + assert_se(parse_cpu_set_full("36-39,44-47", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); + assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); + assert_se(CPU_COUNT_S(c.allocated, c.set) == 8); + for (cpu = 36; cpu < 40; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + for (cpu = 44; cpu < 48; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f0,00000000")); + str = mfree(str); + cpu_set_reset(&c); + assert_se(parse_cpu_set_full("64-71", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); + assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); + assert_se(CPU_COUNT_S(c.allocated, c.set) == 8); + for (cpu = 64; cpu < 72; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "ff,00000000,00000000")); + str = mfree(str); + cpu_set_reset(&c); /* Ranges with trailing comma, space */ assert_se(parse_cpu_set_full("0-3 8-11, ", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); @@ -136,12 +178,20 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-3 8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f")); + str = mfree(str); cpu_set_reset(&c); /* Negative range (returns empty cpu_set) */ assert_se(parse_cpu_set_full("3-0", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); assert_se(CPU_COUNT_S(c.allocated, c.set) == 0); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "0")); + str = mfree(str); cpu_set_reset(&c); /* Overlapping ranges */ @@ -157,6 +207,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "fff")); + str = mfree(str); cpu_set_reset(&c); /* Mix ranges and individual CPUs */ @@ -174,6 +228,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0 2 4-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "ff5")); + str = mfree(str); cpu_set_reset(&c); /* Garbage */ @@ -190,6 +248,10 @@ TEST(parse_cpu_set) { assert_se(parse_cpu_set_full("", &c, true, NULL, "fake", 1, "CPUAffinity") == 0); assert_se(!c.set); /* empty string returns NULL */ assert_se(c.allocated == 0); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "0")); + str = mfree(str); /* Runaway quoted string */ assert_se(parse_cpu_set_full("0 1 2 3 \"4 5 6 7 ", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL); @@ -206,6 +268,23 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "8000-8191")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + for (size_t i = 0; i < strlen(str); i++) { + if (i < 54) { + if (i >= 8 && (i + 1) % 9 == 0) + assert_se(str[i] == ','); + else + assert_se(str[i] == 'f'); + } + else { + if (i >= 8 && (i + 1) % 9 == 0) + assert_se(str[i] == ','); + else + assert_se(str[i] == '0'); + } + } + str = mfree(str); cpu_set_reset(&c); } diff --git a/src/udev/net/link-config-gperf.gperf b/src/udev/net/link-config-gperf.gperf index 42d7cc7ee21..b77759d0944 100644 --- a/src/udev/net/link-config-gperf.gperf +++ b/src/udev/net/link-config-gperf.gperf @@ -108,6 +108,7 @@ Link.RxMaxCoalescedHighFrames, config_parse_coalesce_u32, Link.TxCoalesceHighSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.tx_coalesce_usecs_high) Link.TxMaxCoalescedHighFrames, config_parse_coalesce_u32, 0, offsetof(LinkConfig, coalesce.tx_max_coalesced_frames_high) Link.CoalescePacketRateSampleIntervalSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.rate_sample_interval) +Link.ReceivePacketSteeringCPUMask, config_parse_rps_cpu_mask, 0, offsetof(LinkConfig, rps_cpu_mask) Link.MDI, config_parse_mdi, 0, offsetof(LinkConfig, mdi) Link.SR-IOVVirtualFunctions, config_parse_sr_iov_num_vfs, 0, offsetof(LinkConfig, sr_iov_num_vfs) SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, offsetof(LinkConfig, sr_iov_by_section) diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c index a8b2cc23a2c..8eee527bbff 100644 --- a/src/udev/net/link-config.c +++ b/src/udev/net/link-config.c @@ -73,6 +73,7 @@ static LinkConfig* link_config_free(LinkConfig *config) { free(config->alias); free(config->wol_password_file); erase_and_free(config->wol_password); + cpu_set_free(config->rps_cpu_mask); ordered_hashmap_free_with_destructor(config->sr_iov_by_section, sr_iov_free); @@ -937,6 +938,49 @@ static int link_apply_sr_iov_config(Link *link, sd_netlink **rtnl) { return 0; } +static int link_apply_rps_cpu_mask(Link *link) { + _cleanup_free_ char *mask_str = NULL; + LinkConfig *config; + int r; + + assert(link); + config = ASSERT_PTR(link->config); + + /* Skip if the config is not specified. */ + if (!config->rps_cpu_mask) + return 0; + + mask_str = cpu_set_to_mask_string(config->rps_cpu_mask); + if (!mask_str) + return log_oom(); + + log_link_debug(link, "Applying RPS CPU mask: %s", mask_str); + + /* Currently, this will set CPU mask to all rx queue of matched device. */ + FOREACH_DEVICE_SYSATTR(link->device, attr) { + const char *c; + + c = path_startswith(attr, "queues/"); + if (!c) + continue; + + c = startswith(c, "rx-"); + if (!c) + continue; + + c += strcspn(c, "/"); + + if (!path_equal(c, "/rps_cpus")) + continue; + + r = sd_device_set_sysattr_value(link->device, attr, mask_str); + if (r < 0) + log_link_warning_errno(link, r, "Failed to write %s sysfs attribute, ignoring: %m", attr); + } + + return 0; +} + static int link_apply_udev_properties(Link *link, bool test) { LinkConfig *config; sd_device *device; @@ -1024,6 +1068,10 @@ int link_apply_config(LinkConfigContext *ctx, sd_netlink **rtnl, Link *link, boo if (r < 0) return r; + r = link_apply_rps_cpu_mask(link); + if (r < 0) + return r; + return 0; } @@ -1314,6 +1362,65 @@ int config_parse_wol_password( return 0; } +int config_parse_rps_cpu_mask( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(cpu_set_freep) CPUSet *allocated = NULL; + CPUSet *mask, **rps_cpu_mask = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + *rps_cpu_mask = cpu_set_free(*rps_cpu_mask); + return 0; + } + + if (*rps_cpu_mask) + mask = *rps_cpu_mask; + else { + allocated = new0(CPUSet, 1); + if (!allocated) + return log_oom(); + + mask = allocated; + } + + if (streq(rvalue, "disable")) { + cpu_set_reset(mask); + return 0; + } + + if (streq(rvalue, "all")) { + r = cpu_mask_add_all(mask); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create CPU affinity mask representing \"all\" cpus, ignoring: %m"); + return 0; + } + } else { + r = parse_cpu_set_extend(rvalue, mask, /* warn= */ true, unit, filename, line, lvalue); + if (r < 0) + return 0; + } + + if (allocated) + *rps_cpu_mask = TAKE_PTR(allocated); + + return 0; +} + static const char* const mac_address_policy_table[_MAC_ADDRESS_POLICY_MAX] = { [MAC_ADDRESS_POLICY_PERSISTENT] = "persistent", [MAC_ADDRESS_POLICY_RANDOM] = "random", diff --git a/src/udev/net/link-config.h b/src/udev/net/link-config.h index 98cadc212e1..f6abff89e8b 100644 --- a/src/udev/net/link-config.h +++ b/src/udev/net/link-config.h @@ -6,6 +6,7 @@ #include "condition.h" #include "conf-parser.h" +#include "cpu-set-util.h" #include "ethtool-util.h" #include "hashmap.h" #include "list.h" @@ -84,6 +85,7 @@ struct LinkConfig { int autoneg_flow_control; netdev_coalesce_param coalesce; uint8_t mdi; + CPUSet *rps_cpu_mask; uint32_t sr_iov_num_vfs; OrderedHashmap *sr_iov_by_section; @@ -121,3 +123,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_wol_password); CONFIG_PARSER_PROTOTYPE(config_parse_mac_address_policy); CONFIG_PARSER_PROTOTYPE(config_parse_name_policy); CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy); +CONFIG_PARSER_PROTOTYPE(config_parse_rps_cpu_mask);