core: firewall integration with ControlGroupNFTSet=

New directive `ControlGroupNFTSet=` provides a method for integrating services
into firewall rules with NFT sets.

Example:

```
table inet filter {
...
        set timesyncd {
                type cgroupsv2
        }

        chain ntp_output {
                socket cgroupv2 != @timesyncd counter drop
                accept
        }
...
}
```

/etc/systemd/system/systemd-timesyncd.service.d/override.conf
```
[Service]
ControlGroupNFTSet=inet:filter:timesyncd
```

```
$ sudo nft list set inet filter timesyncd
table inet filter {
        set timesyncd {
                type cgroupsv2
                elements = { "system.slice/systemd-timesyncd.service" }
        }
}
```
This commit is contained in:
Topi Miettinen 2022-05-22 14:21:02 +03:00 committed by Topi Miettinen
parent ab51fd9dbd
commit c0548df0a2
15 changed files with 304 additions and 0 deletions

View file

@ -2599,6 +2599,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sb) EnvironmentFiles = [...];
@ -3170,6 +3172,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -3750,6 +3754,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -4487,6 +4493,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sb) EnvironmentFiles = [...];
@ -5082,6 +5090,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -5656,6 +5666,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -6282,6 +6294,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sb) EnvironmentFiles = [...];
@ -6805,6 +6819,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -7297,6 +7313,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -8050,6 +8068,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sb) EnvironmentFiles = [...];
@ -8559,6 +8579,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -9037,6 +9059,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -9648,6 +9672,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
readonly a(iiqq) SocketBindDeny = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
};
interface org.freedesktop.DBus.Peer { ... };
interface org.freedesktop.DBus.Introspectable { ... };
@ -9800,6 +9826,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--Autogenerated cross-references for systemd.directives, do not edit-->
<variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
@ -9958,6 +9986,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<!--End of Autogenerated section-->
<refsect2>
@ -10138,6 +10168,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly (bas) RestrictNetworkInterfaces = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(iss) ControlGroupNFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KillMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly i KillSignal = ...;
@ -10307,6 +10339,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<!--property RestrictNetworkInterfaces is not documented!-->
<!--property ControlGroupNFTSet is not documented!-->
<!--property KillMode is not documented!-->
<!--property KillSignal is not documented!-->
@ -10493,6 +10527,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNetworkInterfaces"/>
<variablelist class="dbus-property" generated="True" extra-ref="ControlGroupNFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>

View file

@ -1173,6 +1173,35 @@ DeviceAllow=/dev/loop-control
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>ControlGroupNFTSet=</varname><replaceable>family</replaceable>:<replaceable>table</replaceable>:<replaceable>set</replaceable></term>
<listitem>
<para>This setting provides a method for integrating dynamic cgroup IDs into firewall rules with
NFT sets. This option expects a whitespace separated list of NFT set definitions. Each definition
consists of a colon-separated tuple of NFT address family (one of <literal>arp</literal>,
<literal>bridge</literal>, <literal>inet</literal>, <literal>ip</literal>, <literal>ip6</literal>,
or <literal>netdev</literal>), table name and set name. The names of tables and sets must conform
to lexical restrictions of NFT table names. When a control group for a unit is realized, the cgroup
ID will be appended to the NFT sets and it will be be removed when the control group is
removed. Failures to manage the sets will be ignored.</para>
<para>Example:
<programlisting>[Unit]
ControlGroupNFTSet=inet:filter:my_service
</programlisting>
Corresponding NFT rules:
<programlisting>table inet filter {
set my_service {
type cgroupsv2
}
chain x {
socket cgroupv2 level 2 @my_service accept
drop
}
}</programlisting>
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>

View file

@ -19,6 +19,7 @@
#include "devnum-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "in-addr-prefix-util.h"
#include "inotify-util.h"
#include "io-util.h"
@ -279,6 +280,8 @@ void cgroup_context_done(CGroupContext *c) {
cpu_set_reset(&c->startup_cpuset_cpus);
cpu_set_reset(&c->cpuset_mems);
cpu_set_reset(&c->startup_cpuset_mems);
c->nft_set_context = nft_set_context_free_many(c->nft_set_context, &c->n_nft_set_contexts);
}
static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
@ -617,6 +620,11 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
SET_FOREACH(iface, c->restrict_network_interfaces)
fprintf(f, "%sRestrictNetworkInterfaces: %s\n", prefix, iface);
}
for (size_t i = 0; i < c->n_nft_set_contexts; i++)
fprintf(f, "%sControlGroupNFTSet: %s:%s:%s\n", prefix,
nfproto_to_string(c->nft_set_context[i].nfproto),
c->nft_set_context[i].table, c->nft_set_context[i].set);
}
void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f) {
@ -1226,6 +1234,46 @@ static void cgroup_apply_firewall(Unit *u) {
(void) bpf_firewall_install(u);
}
static void cgroup_apply_nft_set(Unit *u) {
int r;
CGroupContext *c;
assert(u);
assert_se(c = unit_get_cgroup_context(u));
for (size_t i = 0; i < c->n_nft_set_contexts; i++) {
NFTSetContext *s = &c->nft_set_context[i];
r = nft_set_element_add_uint64(s, u->cgroup_id);
if (r < 0)
log_warning_errno(r, "Adding NFT family %s table %s set %s cgroup %" PRIu64 " failed, ignoring: %m",
nfproto_to_string(s->nfproto),
s->table,
s->set,
u->cgroup_id);
}
}
static void cgroup_delete_nft_set(Unit *u) {
int r;
CGroupContext *c;
assert(u);
assert_se(c = unit_get_cgroup_context(u));
for (size_t i = 0; i < c->n_nft_set_contexts; i++) {
NFTSetContext *s = &c->nft_set_context[i];
r = nft_set_element_del_uint64(s, u->cgroup_id);
if (r < 0)
log_warning_errno(r, "Deleting NFT family %s table %s set %s cgroup %" PRIu64 " failed, ignoring: %m",
nfproto_to_string(s->nfproto),
s->table,
s->set,
u->cgroup_id);
}
}
static void cgroup_apply_socket_bind(Unit *u) {
assert(u);
@ -1658,6 +1706,8 @@ static void cgroup_context_apply(
if (apply_mask & CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES)
cgroup_apply_restrict_network_interfaces(u);
cgroup_apply_nft_set(u);
}
static bool unit_get_needs_bpf_firewall(Unit *u) {
@ -2807,6 +2857,8 @@ void unit_prune_cgroup(Unit *u) {
(void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
#endif
cgroup_delete_nft_set(u);
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);

View file

@ -6,6 +6,7 @@
#include "bpf-lsm.h"
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "firewall-util.h"
#include "list.h"
#include "time-util.h"
@ -194,6 +195,9 @@ struct CGroupContext {
ManagedOOMMode moom_mem_pressure;
uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */
ManagedOOMPreference moom_preference;
NFTSetContext *nft_set_context;
size_t n_nft_set_contexts;
};
/* Used when querying IP accounting data */

View file

@ -15,6 +15,7 @@
#include "errno-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "in-addr-prefix-util.h"
#include "ip-protocol-list.h"
#include "limits-util.h"
@ -443,6 +444,36 @@ static int property_get_restrict_network_interfaces(
return sd_bus_message_close_container(reply);
}
static int property_get_cgroup_nft_set(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
int r;
CGroupContext *c = userdata;
assert(bus);
assert(reply);
assert(c);
r = sd_bus_message_open_container(reply, 'a', "(iss)");
if (r < 0)
return r;
for (size_t i = 0; i < c->n_nft_set_contexts; i++) {
NFTSetContext *s = &c->nft_set_context[i];
r = sd_bus_message_append(reply, "(iss)", s->nfproto, s->table, s->set);
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@ -500,6 +531,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("SocketBindAllow", "a(iiqq)", property_get_socket_bind, offsetof(CGroupContext, socket_bind_allow), 0),
SD_BUS_PROPERTY("SocketBindDeny", "a(iiqq)", property_get_socket_bind, offsetof(CGroupContext, socket_bind_deny), 0),
SD_BUS_PROPERTY("RestrictNetworkInterfaces", "(bas)", property_get_restrict_network_interfaces, 0, 0),
SD_BUS_PROPERTY("ControlGroupNFTSet", "a(iss)", property_get_cgroup_nft_set, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@ -2062,5 +2094,58 @@ int bus_cgroup_set_property(
if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB))
return bus_cgroup_set_transient_property(u, c, name, message, flags, error);
if (streq(name, "ControlGroupNFTSet")) {
int nfproto;
const char *table, *set;
bool empty = true;
r = sd_bus_message_enter_container(message, 'a', "(iss)");
if (r < 0)
return r;
while ((r = sd_bus_message_read(message, "(iss)", &nfproto, &table, &set)) > 0) {
const char *nfproto_name;
nfproto_name = nfproto_to_string(nfproto);
if (!nfproto_name)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid protocol %d.", nfproto);
if (nft_identifier_bad(table))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT table name %s.", table);
if (nft_identifier_bad(set))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT set name %s.", set);
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
r = nft_set_context_add(&c->nft_set_context, &c->n_nft_set_contexts, nfproto, table, set);
if (r < 0)
return r;
unit_write_settingf(
u, flags|UNIT_ESCAPE_SPECIFIERS, name,
"%s=%s:%s:%s",
name,
nfproto_name,
table,
set);
}
empty = false;
}
if (r < 0)
return r;
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
if (empty) {
c->nft_set_context = nft_set_context_free_many(c->nft_set_context, &c->n_nft_set_contexts);
unit_write_settingf(u, flags, name, "%s=", name);
}
return 1;
}
return 0;
}

View file

@ -241,6 +241,7 @@
{{type}}.SocketBindAllow, config_parse_cgroup_socket_bind, 0, offsetof({{type}}, cgroup_context.socket_bind_allow)
{{type}}.SocketBindDeny, config_parse_cgroup_socket_bind, 0, offsetof({{type}}, cgroup_context.socket_bind_deny)
{{type}}.RestrictNetworkInterfaces, config_parse_restrict_network_interfaces, 0, offsetof({{type}}, cgroup_context)
{{type}}.ControlGroupNFTSet, config_parse_cgroup_nft_set, 0, offsetof({{type}}, cgroup_context)
{%- endmacro -%}
%{

View file

@ -35,8 +35,10 @@
#include "env-util.h"
#include "errno-list.h"
#include "escape.h"
#include "execute.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "fs-util.h"
#include "hexdecoct.h"
#include "io-util.h"
@ -6520,3 +6522,88 @@ int config_parse_tty_size(
return config_parse_unsigned(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
}
static int config_parse_nft_set(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
NFTSetContext **c,
size_t *n,
Unit *u) {
_cleanup_free_ char *family_str = NULL, *table = NULL, *set = NULL, *table_resolved = NULL, *set_resolved = NULL;
int nfproto, r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(u);
if (isempty(rvalue)) {
/* Empty assignment resets the list */
*c = nft_set_context_free_many(*c, n);
return 0;
}
for (const char *p = rvalue;;) {
r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE, &family_str, &table, &set, NULL);
if (r == -ENOMEM)
return log_oom();
if (r == 0)
break;
if (r != 3) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse NFT set, ignoring: %s", p);
return 0;
}
nfproto = nfproto_from_string(family_str);
if (nfproto < 0) {
log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown NFT protocol family, ignoring: %s", family_str);
return 0;
}
r = unit_path_printf(u, table, &table_resolved);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", table);
return 0;
}
if (nft_identifier_bad(table_resolved))
return log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid table name %s, ignoring", table);
r = unit_path_printf(u, set, &set_resolved);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", set);
return 0;
}
if (nft_identifier_bad(set_resolved))
return log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid set name %s, ignoring", set);
r = nft_set_context_add(c, n, nfproto, table_resolved, set_resolved);
if (r < 0)
return log_oom();
}
return 0;
}
int config_parse_cgroup_nft_set(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
CGroupContext *c = data;
Unit *u = userdata;
return config_parse_nft_set(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &c->nft_set_context, &c->n_nft_set_contexts, u);
}

View file

@ -150,6 +150,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_socket_bind);
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_network_interfaces);
CONFIG_PARSER_PROTOTYPE(config_parse_watchdog_sec);
CONFIG_PARSER_PROTOTYPE(config_parse_tty_size);
CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_nft_set);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);

View file

@ -891,6 +891,9 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
if (streq(field, "ControlGroupNFTSet"))
return bus_append_nft_set(m, field, eq);
return 0;
}

View file

@ -28,6 +28,7 @@ Capabilities=
CapabilityBoundingSet=
ConfigurationDirectory=
ConfigurationDirectoryMode=
ControlGroupNFTSet=
CoredumpFilter=
DefaultMemoryLow=
DefaultMemoryMin=

View file

@ -8,6 +8,7 @@ BlockIODeviceWeight=
BlockIOReadBandwidth=
BlockIOWeight=
BlockIOWriteBandwidth=
ControlGroupNFTSet=
CPUAccounting=
CPUQuota=
CPUQuotaPeriodSec=

View file

@ -72,6 +72,7 @@ ConditionSecurity=
ConditionUser=
ConditionVirtualization=
Conflicts=
ControlGroupNFTSet=
DefaultDependencies=
Description=
Documentation=

View file

@ -8,6 +8,7 @@ BlockIODeviceWeight=
BlockIOReadBandwidth=
BlockIOWeight=
BlockIOWriteBandwidth=
ControlGroupNFTSet=
CPUAccounting=
CPUQuota=
CPUQuotaPeriodSec=

View file

@ -33,6 +33,7 @@ Capabilities=
CapabilityBoundingSet=
ConfigurationDirectory=
ConfigurationDirectoryMode=
ControlGroupNFTSet=
CoredumpFilter=
DefaultMemoryLow=
DefaultMemoryMin=

View file

@ -28,6 +28,7 @@ Capabilities=
CapabilityBoundingSet=
ConfigurationDirectory=
ConfigurationDirectoryMode=
ControlGroupNFTSet=
CoredumpFilter=
DefaultMemoryLow=
DefaultMemoryMin=