add support for KSM

This adds support for KSM (kernel samepage merging). It adds a new
boolean parameter called MemoryKSM to enable the feature. The feature
can only be enabled with newer kernels.
This commit is contained in:
Stefan Roesch 2023-02-28 12:39:35 -08:00 committed by Lennart Poettering
parent 308b189511
commit 85614c6e2f
11 changed files with 65 additions and 0 deletions

View file

@ -3174,6 +3174,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
@ -3739,6 +3741,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<!--property ProtectHostname is not documented!-->
<!--property MemoryKSM is not documented!-->
<!--property NetworkNamespacePath is not documented!-->
<!--property IPCNamespacePath is not documented!-->
@ -4405,6 +4409,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>
<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@ -5184,6 +5190,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
@ -5761,6 +5769,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--property ProtectHostname is not documented!-->
<!--property MemoryKSM is not documented!-->
<!--property NetworkNamespacePath is not documented!-->
<!--property IPCNamespacePath is not documented!-->
@ -6407,6 +6417,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>
<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@ -7061,6 +7073,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
@ -7566,6 +7580,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<!--property ProtectHostname is not documented!-->
<!--property MemoryKSM is not documented!-->
<!--property NetworkNamespacePath is not documented!-->
<!--property IPCNamespacePath is not documented!-->
@ -8130,6 +8146,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>
<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@ -8911,6 +8929,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b ProtectHostname = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly b MemoryKSM = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s NetworkNamespacePath = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s IPCNamespacePath = '...';
@ -9402,6 +9422,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<!--property ProtectHostname is not documented!-->
<!--property MemoryKSM is not documented!-->
<!--property NetworkNamespacePath is not documented!-->
<!--property IPCNamespacePath is not documented!-->
@ -9952,6 +9974,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<variablelist class="dbus-property" generated="True" extra-ref="ProtectHostname"/>
<variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
<variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
<variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>

View file

@ -1773,6 +1773,22 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<xi:include href="system-or-user-ns.xml" xpointer="singular"/></listitem>
</varlistentry>
<varlistentry>
<term><varname>MemoryKSM=</varname></term>
<listitem><para>Takes a boolean argument. When set, it enables KSM (kernel samepage merging) for
the processes. KSM is a memory-saving de-duplication feature. Anonymous memory pages with identical
content can be replaced by a single write-protected page. This feature should only be enabled for
jobs that share the same security domain. For details, see
<ulink url="https://docs.kernel.org/admin-guide/mm/ksm.html">Kernel Samepage Merging</ulink> in the
kernel documentation.</para>
<para>Note that this functionality might not be available, for example if KSM is disabled in the
kernel, or the kernel doesn't support controlling KSM at the process level through
<function>prctl()</function>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>PrivateUsers=</varname></term>

View file

@ -202,6 +202,7 @@ CONFIG_X86_MSR=y
CONFIG_XFRM_USER=y
CONFIG_XFS_FS=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_KSM=y
# CONFIG_WIRELESS is not set
# CONFIG_WLAN is not set

View file

@ -20,3 +20,7 @@
#ifndef PR_MDWE_REFUSE_EXEC_GAIN
#define PR_MDWE_REFUSE_EXEC_GAIN 1
#endif
#ifndef PR_SET_MEMORY_MERGE
#define PR_SET_MEMORY_MERGE 67
#endif

View file

@ -1347,6 +1347,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RootImagePolicy", "s", property_get_image_policy, offsetof(ExecContext, root_image_policy), SD_BUS_VTABLE_PROPERTY_CONST),
@ -2024,6 +2025,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "ProtectHostname"))
return bus_set_transient_bool(u, name, &c->protect_hostname, message, flags, error);
if (streq(name, "MemoryKSM"))
return bus_set_transient_tristate(u, name, &c->memory_ksm, message, flags, error);
if (streq(name, "UtmpIdentifier"))
return bus_set_transient_string(u, name, &c->utmp_id, message, flags, error);

View file

@ -5193,6 +5193,16 @@ static int exec_child(
return r;
}
if (context->memory_ksm >= 0)
if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
log_unit_debug_errno(unit, errno, "KSM support not available, ignoring.");
else {
*exit_status = EXIT_KSM;
return log_unit_error_errno(unit, errno, "Failed to set KSM: %m");
}
}
/* Drop groups as early as possible.
* This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
* For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
@ -5773,6 +5783,7 @@ void exec_context_init(ExecContext *c) {
c->tty_cols = UINT_MAX;
numa_policy_reset(&c->numa_policy);
c->private_mounts = -1;
c->memory_ksm = -1;
}
void exec_context_done(ExecContext *c) {

View file

@ -310,6 +310,7 @@ struct ExecContext {
ProcSubset proc_subset; /* subset= */
int private_mounts;
int memory_ksm;
bool private_tmp;
bool private_network;
bool private_devices;

View file

@ -176,6 +176,7 @@
{{type}}.SmackProcessLabel, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
{% endif %}
{{type}}.ProtectHostname, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_hostname)
{{type}}.MemoryKSM, config_parse_tristate, 0, offsetof({{type}}, exec_context.memory_ksm)
{%- endmacro -%}
{%- macro KILL_CONTEXT_CONFIG_ITEMS(type) -%}

View file

@ -991,6 +991,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"CPUSchedulingResetOnFork",
"LockPersonality",
"ProtectHostname",
"MemoryKSM",
"RestrictSUIDSGID"))
return bus_append_parse_boolean(m, field, eq);

View file

@ -72,6 +72,7 @@ const ExitStatusMapping exit_status_mappings[256] = {
[EXIT_NUMA_POLICY] = { "NUMA_POLICY", EXIT_STATUS_SYSTEMD },
[EXIT_CREDENTIALS] = { "CREDENTIALS", EXIT_STATUS_SYSTEMD },
[EXIT_BPF] = { "BPF", EXIT_STATUS_SYSTEMD },
[EXIT_KSM] = { "KSM", EXIT_STATUS_SYSTEMD },
[EXIT_EXCEPTION] = { "EXCEPTION", EXIT_STATUS_SYSTEMD },

View file

@ -72,6 +72,7 @@ enum {
EXIT_NUMA_POLICY,
EXIT_CREDENTIALS,
EXIT_BPF,
EXIT_KSM,
EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
};