core: firewall integration of cgroups with NFTSet=

New directive `NFTSet=` provides a method for integrating dynamic cgroup IDs
into firewall rules with NFT sets. The benefit of using this setting is to be
able to use control group as a selector in firewall rules easily and this in
turn allows more fine grained filtering. Also, NFT rules for cgroup matching
use numeric cgroup IDs, which change every time a service is restarted, making
them hard to use in systemd environment.

This option expects a whitespace separated list of NFT set definitions. Each
definition consists of a colon-separated tuple of source type (only "cgroup"),
NFT address family (one of "arp", "bridge", "inet", "ip", "ip6", or "netdev"),
table name and set name. The names of tables and sets must conform to lexical
restrictions of NFT table names. The type of the element used in the NFT filter
must be "cgroupsv2". When a control group for a unit is realized, the cgroup ID
will be appended to the NFT sets and it will be be removed when the control
group is removed.  systemd only inserts elements to (or removes from) the sets,
so the related NFT rules, tables and sets must be prepared elsewhere in
advance.  Failures to manage the sets will be ignored.

If the firewall rules are reinstalled so that the contents of NFT sets are
destroyed, command systemctl daemon-reload can be used to refill the sets.

Example:

```
table inet filter {
...
        set timesyncd {
                type cgroupsv2
        }

        chain ntp_output {
                socket cgroupv2 != @timesyncd counter drop
                accept
        }
...
}
```

/etc/systemd/system/systemd-timesyncd.service.d/override.conf
```
[Service]
NFTSet=cgroup:inet:filter:timesyncd
```

```
$ sudo nft list set inet filter timesyncd
table inet filter {
        set timesyncd {
                type cgroupsv2
                elements = { "system.slice/systemd-timesyncd.service" }
        }
}
```
This commit is contained in:
Topi Miettinen 2023-09-02 21:55:36 +03:00 committed by Topi Miettinen
parent b28bd48238
commit dc7d69b3c1
17 changed files with 416 additions and 12 deletions

View file

@ -2920,6 +2920,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -3538,6 +3540,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -4168,6 +4172,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -4957,6 +4963,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -5585,6 +5593,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -6197,6 +6207,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -6860,6 +6872,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -7416,6 +7430,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -7942,6 +7958,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -8728,6 +8746,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as Environment = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -9270,6 +9290,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--property EnvironmentFiles is not documented!-->
<!--property PassEnvironment is not documented!-->
@ -9782,6 +9804,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
<variablelist class="dbus-property" generated="True" extra-ref="EnvironmentFiles"/>
@ -10427,6 +10451,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
};
interface org.freedesktop.DBus.Peer { ... };
interface org.freedesktop.DBus.Introspectable { ... };
@ -10597,6 +10623,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--Autogenerated cross-references for systemd.directives, do not edit-->
<variablelist class="dbus-interface" generated="True" extra-ref="org.freedesktop.systemd1.Unit"/>
@ -10775,6 +10803,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<!--End of Autogenerated section-->
<refsect2>
@ -10976,6 +11006,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
readonly s MemoryPressureWatch = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly t MemoryPressureThresholdUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(iiss) NFTSet = [...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s KillMode = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
@ -11166,6 +11198,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<!--property MemoryPressureThresholdUSec is not documented!-->
<!--property NFTSet is not documented!-->
<!--property KillMode is not documented!-->
<!--property KillSignal is not documented!-->
@ -11374,6 +11408,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
<variablelist class="dbus-property" generated="True" extra-ref="MemoryPressureThresholdUSec"/>
<variablelist class="dbus-property" generated="True" extra-ref="NFTSet"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillMode"/>
<variablelist class="dbus-property" generated="True" extra-ref="KillSignal"/>

View file

@ -1500,6 +1500,73 @@ DeviceAllow=/dev/loop-control
<xi:include href="version-info.xml" xpointer="v254"/></listitem>
</varlistentry>
<varlistentry>
<term><varname>NFTSet=</varname><replaceable>family</replaceable>:<replaceable>table</replaceable>:<replaceable>set</replaceable></term>
<listitem>
<para>This setting provides a method for integrating dynamic cgroup IDs into firewall rules with
<ulink url="https://netfilter.org/projects/nftables/index.html">NFT</ulink> sets. The benefit of
using this setting is to be able to use the IDs as selectors in firewall rules easily and this in
turn allows more fine grained filtering. NFT rules for cgroup matching use numeric cgroup IDs,
which change every time a service is restarted, making them hard to use in systemd environment
otherwise.</para>
<para>This option expects a whitespace separated list of NFT set definitions. Each definition
consists of a colon-separated tuple of source type (only <literal>cgroup</literal>), NFT address
family (one of <literal>arp</literal>, <literal>bridge</literal>, <literal>inet</literal>,
<literal>ip</literal>, <literal>ip6</literal>, or <literal>netdev</literal>), table name and set
name. The names of tables and sets must conform to lexical restrictions of NFT table names. The
type of the element used in the NFT filter must match the type implied by the directive
(<literal>cgroup</literal>) as shown in the table below. When a control group is realized, the
corresponding ID will be appended to the NFT sets and it will be be removed when the control group
is removed. <command>systemd</command> only inserts elements to (or removes from) the sets, so the
related NFT rules, tables and sets must be prepared elsewhere in advance. Failures to manage the
sets will be ignored.</para>
<table>
<title>Defined <varname>source type</varname> values</title>
<tgroup cols='3'>
<colspec colname='source type'/>
<colspec colname='description'/>
<colspec colname='NFT type name'/>
<thead>
<row>
<entry>Source type</entry>
<entry>Description</entry>
<entry>Corresponding NFT type name</entry>
</row>
</thead>
<tbody>
<row>
<entry><literal>cgroup</literal></entry>
<entry>control group ID</entry>
<entry><literal>cgroupsv2</literal></entry>
</row>
</tbody>
</tgroup>
</table>
<para>If the firewall rules are reinstalled so that the contents of NFT sets are destroyed, command
<command>systemctl daemon-reload</command> can be used to refill the sets.</para>
<para>Example:
<programlisting>[Unit]
NFTSet=cgroup:inet:filter:my_service
</programlisting>
Corresponding NFT rules:
<programlisting>table inet filter {
set my_service {
type cgroupsv2
}
chain x {
socket cgroupv2 level 2 @my_service accept
drop
}
}</programlisting>
</para>
<xi:include href="version-info.xml" xpointer="v255"/></listitem>
</varlistentry>
</variablelist>
</refsect2>
</refsect1>

View file

@ -20,6 +20,7 @@
#include "devnum-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "in-addr-prefix-util.h"
#include "inotify-util.h"
#include "io-util.h"
@ -291,6 +292,8 @@ void cgroup_context_done(CGroupContext *c) {
cpu_set_reset(&c->startup_cpuset_mems);
c->delegate_subgroup = mfree(c->delegate_subgroup);
nft_set_context_clear(&c->nft_set_context);
}
static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
@ -664,6 +667,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
SET_FOREACH(iface, c->restrict_network_interfaces)
fprintf(f, "%sRestrictNetworkInterfaces: %s\n", prefix, iface);
}
FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets)
fprintf(f, "%sNFTSet: %s:%s:%s:%s\n", prefix, nft_set_source_to_string(nft_set->source),
nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set);
}
void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f) {
@ -1342,6 +1349,43 @@ static void cgroup_apply_firewall(Unit *u) {
(void) bpf_firewall_install(u);
}
void cgroup_modify_nft_set(Unit *u, bool add) {
int r;
CGroupContext *c;
assert(u);
if (!MANAGER_IS_SYSTEM(u->manager))
return;
if (cg_all_unified() <= 0)
return;
assert_se(c = unit_get_cgroup_context(u));
if (u->cgroup_id == 0)
return;
if (!u->manager->fw_ctx) {
r = fw_ctx_new_full(&u->manager->fw_ctx, /* init_tables= */ false);
if (r < 0)
return;
assert(u->manager->fw_ctx);
}
FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) {
uint64_t element = u->cgroup_id;
r = nft_set_element_modify_any(u->manager->fw_ctx, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element));
if (r < 0)
log_warning_errno(r, "Failed to %s NFT set: family %s, table %s, set %s, cgroup %" PRIu64 ", ignoring: %m",
add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
else
log_debug("%s NFT set: family %s, table %s, set %s, cgroup %" PRIu64,
add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
}
}
static void cgroup_apply_socket_bind(Unit *u) {
assert(u);
@ -1781,6 +1825,8 @@ static void cgroup_context_apply(
if (apply_mask & CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES)
cgroup_apply_restrict_network_interfaces(u);
cgroup_modify_nft_set(u, /* add = */ true);
}
static bool unit_get_needs_bpf_firewall(Unit *u) {
@ -2950,6 +2996,8 @@ void unit_prune_cgroup(Unit *u) {
(void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
#endif
cgroup_modify_nft_set(u, /* add = */ false);
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);

View file

@ -6,6 +6,7 @@
#include "bpf-lsm.h"
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "firewall-util.h"
#include "list.h"
#include "time-util.h"
@ -223,6 +224,8 @@ struct CGroupContext {
usec_t memory_pressure_threshold_usec;
/* NB: For now we don't make the period configurable, not the type, nor do we allow multiple
* triggers, nor triggers for non-memory pressure. We might add that later. */
NFTSetContext nft_set_context;
};
/* Used when querying IP accounting data */
@ -277,6 +280,8 @@ int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const
void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
int cgroup_log_xattr_apply(Unit *u, const char *cgroup_path);
void cgroup_modify_nft_set(Unit *u, bool add);
CGroupMask unit_get_own_mask(Unit *u);
CGroupMask unit_get_delegate_mask(Unit *u);
CGroupMask unit_get_members_mask(Unit *u);

View file

@ -14,8 +14,10 @@
#include "dbus-cgroup.h"
#include "dbus-util.h"
#include "errno-util.h"
#include "escape.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "in-addr-prefix-util.h"
#include "ip-protocol-list.h"
#include "limits-util.h"
@ -423,6 +425,34 @@ static int property_get_restrict_network_interfaces(
return sd_bus_message_close_container(reply);
}
static int property_get_cgroup_nft_set(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
int r;
CGroupContext *c = userdata;
assert(bus);
assert(reply);
assert(c);
r = sd_bus_message_open_container(reply, 'a', "(iiss)");
if (r < 0)
return r;
FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets) {
r = sd_bus_message_append(reply, "(iiss)", nft_set->source, nft_set->nfproto, nft_set->table, nft_set->set);
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@ -490,6 +520,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("RestrictNetworkInterfaces", "(bas)", property_get_restrict_network_interfaces, 0, 0),
SD_BUS_PROPERTY("MemoryPressureWatch", "s", bus_property_get_cgroup_pressure_watch, offsetof(CGroupContext, memory_pressure_watch), 0),
SD_BUS_PROPERTY("MemoryPressureThresholdUSec", "t", bus_property_get_usec, offsetof(CGroupContext, memory_pressure_threshold_usec), 0),
SD_BUS_PROPERTY("NFTSet", "a(iiss)", property_get_cgroup_nft_set, 0, 0),
SD_BUS_VTABLE_END
};
@ -2192,6 +2223,75 @@ int bus_cgroup_set_property(
return 1;
}
if (streq(name, "NFTSet")) {
int source, nfproto;
const char *table, *set;
bool empty = true;
r = sd_bus_message_enter_container(message, 'a', "(iiss)");
if (r < 0)
return r;
while ((r = sd_bus_message_read(message, "(iiss)", &source, &nfproto, &table, &set)) > 0) {
const char *source_name, *nfproto_name;
if (source != NFT_SET_SOURCE_CGROUP)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid source %d.", source);
source_name = nft_set_source_to_string(source);
assert(source_name);
nfproto_name = nfproto_to_string(nfproto);
if (!nfproto_name)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid protocol %d.", nfproto);
if (!nft_identifier_valid(table)) {
_cleanup_free_ char *esc = NULL;
esc = cescape(table);
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT table name %s.", strna(esc));
}
if (!nft_identifier_valid(set)) {
_cleanup_free_ char *esc = NULL;
esc = cescape(set);
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid NFT set name %s.", strna(esc));
}
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
r = nft_set_add(&c->nft_set_context, source, nfproto, table, set);
if (r < 0)
return r;
unit_write_settingf(
u, flags|UNIT_ESCAPE_SPECIFIERS, name,
"%s=%s:%s:%s:%s",
name,
source_name,
nfproto_name,
table,
set);
}
empty = false;
}
if (r < 0)
return r;
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
if (empty && !UNIT_WRITE_FLAGS_NOOP(flags)) {
nft_set_context_clear(&c->nft_set_context);
unit_write_settingf(u, flags, name, "%s=", name);
}
return 1;
}
/* must be last */
if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB))
return bus_cgroup_set_transient_property(u, c, name, message, flags, error);

View file

@ -258,6 +258,7 @@
{{type}}.RestrictNetworkInterfaces, config_parse_restrict_network_interfaces, 0, offsetof({{type}}, cgroup_context)
{{type}}.MemoryPressureThresholdSec, config_parse_sec, 0, offsetof({{type}}, cgroup_context.memory_pressure_threshold_usec)
{{type}}.MemoryPressureWatch, config_parse_memory_pressure_watch, 0, offsetof({{type}}, cgroup_context.memory_pressure_watch)
{{type}}.NFTSet, config_parse_cgroup_nft_set, NFT_SET_PARSE_CGROUP, offsetof({{type}}, cgroup_context)
{%- endmacro -%}
%{

View file

@ -33,8 +33,10 @@
#include "errno-list.h"
#include "escape.h"
#include "exec-credential.h"
#include "execute.h"
#include "fd-util.h"
#include "fileio.h"
#include "firewall-util.h"
#include "fs-util.h"
#include "hexdecoct.h"
#include "io-util.h"
@ -6696,3 +6698,21 @@ int config_parse_open_file(
return 0;
}
int config_parse_cgroup_nft_set(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
CGroupContext *c = ASSERT_PTR(data);
Unit *u = ASSERT_PTR(userdata);
return config_parse_nft_set(unit, filename, line, section, section_line, lvalue, ltype, rvalue, &c->nft_set_context, u);
}

View file

@ -158,6 +158,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tty_size);
CONFIG_PARSER_PROTOTYPE(config_parse_log_filter_patterns);
CONFIG_PARSER_PROTOTYPE(config_parse_open_file);
CONFIG_PARSER_PROTOTYPE(config_parse_memory_pressure_watch);
CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_nft_set);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);

View file

@ -1679,6 +1679,8 @@ Manager* manager_free(Manager *m) {
free(m->watchdog_pretimeout_governor);
free(m->watchdog_pretimeout_governor_overridden);
m->fw_ctx = fw_ctx_free(m->fw_ctx);
#if BPF_FRAMEWORK
lsm_bpf_destroy(m->restrict_fs);
#endif

View file

@ -486,6 +486,9 @@ struct Manager {
RateLimit dump_ratelimit;
sd_event_source *memory_pressure_event_source;
/* For NFTSet= */
FirewallContext *fw_ctx;
};
static inline usec_t manager_default_timeout_abort_usec(Manager *m) {

View file

@ -1489,14 +1489,22 @@ static int socket_address_listen_do(
log_unit_error_errno(u, error, fmt, strna(_t)); \
})
static int fork_needed(const SocketAddress *address, const ExecContext *context) {
static int fork_needed(const SocketAddress *address, Socket *s) {
int r;
assert(address);
assert(context);
assert(s);
/* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
/* If there are any NFTSet= directives with cgroup source, we need the cgroup */
Unit *u = UNIT(s);
CGroupContext *c = unit_get_cgroup_context(u);
if (c)
FOREACH_ARRAY(nft_set, c->nft_set_context.sets, c->nft_set_context.n_sets)
if (nft_set->source == NFT_SET_SOURCE_CGROUP)
return true;
if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
r = bpf_firewall_supported();
if (r < 0)
@ -1505,7 +1513,7 @@ static int fork_needed(const SocketAddress *address, const ExecContext *context)
return true;
}
return exec_needs_network_namespace(context);
return exec_needs_network_namespace(&s->exec_context);
}
static int socket_address_listen_in_cgroup(
@ -1525,7 +1533,7 @@ static int socket_address_listen_in_cgroup(
* the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
* such. */
r = fork_needed(address, &s->exec_context);
r = fork_needed(address, s);
if (r < 0)
return r;
if (r == 0) {

View file

@ -3820,6 +3820,10 @@ int unit_coldplug(Unit *u) {
r = q;
}
CGroupContext *c = unit_get_cgroup_context(u);
if (c)
cgroup_modify_nft_set(u, /* add = */ true);
return r;
}

View file

@ -161,7 +161,7 @@ Address.DuplicateAddressDetection, config_parse_duplicate_address_dete
Address.Scope, config_parse_address_scope, 0, 0
Address.RouteMetric, config_parse_address_route_metric, 0, 0
Address.NetLabel, config_parse_address_netlabel, 0, 0
Address.NFTSet, config_parse_address_ip_nft_set, 0, 0
Address.NFTSet, config_parse_address_ip_nft_set, NFT_SET_PARSE_NETWORK, 0
IPv6AddressLabel.Prefix, config_parse_address_label_prefix, 0, 0
IPv6AddressLabel.Label, config_parse_address_label, 0, 0
Neighbor.Address, config_parse_neighbor_address, 0, 0
@ -258,7 +258,7 @@ DHCPv4.InitialAdvertisedReceiveWindow, config_parse_tcp_window,
DHCPv4.FallbackLeaseLifetimeSec, config_parse_dhcp_fallback_lease_lifetime, 0, 0
DHCPv4.Use6RD, config_parse_bool, 0, offsetof(Network, dhcp_use_6rd)
DHCPv4.NetLabel, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Network, dhcp_netlabel)
DHCPv4.NFTSet, config_parse_nft_set, 0, offsetof(Network, dhcp_nft_set_context)
DHCPv4.NFTSet, config_parse_nft_set, NFT_SET_PARSE_NETWORK, offsetof(Network, dhcp_nft_set_context)
DHCPv6.UseAddress, config_parse_bool, 0, offsetof(Network, dhcp6_use_address)
DHCPv6.UseDelegatedPrefix, config_parse_bool, 0, offsetof(Network, dhcp6_use_pd_prefix)
DHCPv6.UseDNS, config_parse_dhcp_use_dns, AF_INET6, 0
@ -280,7 +280,7 @@ DHCPv6.DUIDRawData, config_parse_duid_rawdata,
DHCPv6.RapidCommit, config_parse_bool, 0, offsetof(Network, dhcp6_use_rapid_commit)
DHCPv6.NetLabel, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Network, dhcp6_netlabel)
DHCPv6.SendRelease, config_parse_bool, 0, offsetof(Network, dhcp6_send_release)
DHCPv6.NFTSet, config_parse_nft_set, 0, offsetof(Network, dhcp6_nft_set_context)
DHCPv6.NFTSet, config_parse_nft_set, NFT_SET_PARSE_NETWORK, offsetof(Network, dhcp6_nft_set_context)
IPv6AcceptRA.UseGateway, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_gateway)
IPv6AcceptRA.UseRoutePrefix, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_route_prefix)
IPv6AcceptRA.UseAutonomousPrefix, config_parse_bool, 0, offsetof(Network, ipv6_accept_ra_use_autonomous_prefix)
@ -304,7 +304,7 @@ IPv6AcceptRA.RouteAllowList, config_parse_in_addr_prefixes,
IPv6AcceptRA.RouteDenyList, config_parse_in_addr_prefixes, AF_INET6, offsetof(Network, ndisc_deny_listed_route_prefix)
IPv6AcceptRA.Token, config_parse_address_generation_type, 0, offsetof(Network, ndisc_tokens)
IPv6AcceptRA.NetLabel, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Network, ndisc_netlabel)
IPv6AcceptRA.NFTSet, config_parse_nft_set, 0, offsetof(Network, ndisc_nft_set_context)
IPv6AcceptRA.NFTSet, config_parse_nft_set, NFT_SET_PARSE_NETWORK, offsetof(Network, ndisc_nft_set_context)
DHCPServer.ServerAddress, config_parse_dhcp_server_address, 0, 0
DHCPServer.UplinkInterface, config_parse_uplink, 0, 0
DHCPServer.RelayTarget, config_parse_in_addr_non_null, AF_INET, offsetof(Network, dhcp_server_relay_target)
@ -372,7 +372,7 @@ DHCPPrefixDelegation.ManageTemporaryAddress, config_parse_bool,
DHCPPrefixDelegation.Token, config_parse_address_generation_type, 0, offsetof(Network, dhcp_pd_tokens)
DHCPPrefixDelegation.RouteMetric, config_parse_uint32, 0, offsetof(Network, dhcp_pd_route_metric)
DHCPPrefixDelegation.NetLabel, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Network, dhcp_pd_netlabel)
DHCPPrefixDelegation.NFTSet, config_parse_nft_set, 0, offsetof(Network, dhcp_pd_nft_set_context)
DHCPPrefixDelegation.NFTSet, config_parse_nft_set, NFT_SET_PARSE_NETWORK, offsetof(Network, dhcp_pd_nft_set_context)
IPv6SendRA.RouterLifetimeSec, config_parse_router_lifetime, 0, offsetof(Network, router_lifetime_usec)
IPv6SendRA.RetransmitSec, config_parse_router_retransmit, 0, offsetof(Network, router_retransmit_usec)
IPv6SendRA.Managed, config_parse_bool, 0, offsetof(Network, router_managed)

View file

@ -17,6 +17,7 @@
#include "exec-util.h"
#include "exit-status.h"
#include "fileio.h"
#include "firewall-util.h"
#include "hexdecoct.h"
#include "hostname-util.h"
#include "in-addr-util.h"
@ -449,6 +450,91 @@ static int bus_append_ip_address_access(sd_bus_message *m, int family, const uni
return sd_bus_message_close_container(m);
}
static int bus_append_nft_set(sd_bus_message *m, const char *field, const char *eq) {
int r;
assert(m);
assert(field);
assert(eq);
if (isempty(eq)) {
r = sd_bus_message_append(m, "(sv)", field, "a(iiss)", 0);
if (r < 0)
return bus_log_create_error(r);
return 1;
}
r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_open_container(m, 'v', "a(iiss)");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_open_container(m, 'a', "(iiss)");
if (r < 0)
return bus_log_create_error(r);
for (const char *p = eq;;) {
_cleanup_free_ char *tuple = NULL, *source_str = NULL, *nfproto_str = NULL, *table = NULL, *set = NULL;
const char *q = NULL;
int source, nfproto;
r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
return log_error_errno(r, "Failed to parse %s: %m", field);
if (r == 0)
break;
if (isempty(tuple))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
q = tuple;
r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE, &source_str, &nfproto_str, &table, &set, NULL);
if (r == -ENOMEM)
return log_oom();
if (r != 4 || !isempty(q))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
assert(source_str);
assert(nfproto_str);
assert(table);
assert(set);
source = nft_set_source_from_string(source_str);
if (source != NFT_SET_SOURCE_CGROUP)
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
nfproto = nfproto_from_string(nfproto_str);
if (nfproto < 0 || !nft_identifier_valid(table) || !nft_identifier_valid(set))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s", field);
r = sd_bus_message_append(m, "(iiss)", source, nfproto, table, set);
if (r < 0)
return bus_log_create_error(r);
}
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
return 1;
}
static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
int r;
@ -914,6 +1000,9 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
if (streq(field, "MemoryPressureThresholdSec"))
return bus_append_parse_sec_rename(m, field, eq);
if (streq(field, "NFTSet"))
return bus_append_nft_set(m, field, eq);
return 0;
}

View file

@ -1202,6 +1202,7 @@ static const char *const nft_set_source_table[] = {
[NFT_SET_SOURCE_ADDRESS] = "address",
[NFT_SET_SOURCE_PREFIX] = "prefix",
[NFT_SET_SOURCE_IFINDEX] = "ifindex",
[NFT_SET_SOURCE_CGROUP] = "cgroup",
};
DEFINE_STRING_TABLE_LOOKUP(nft_set_source, int);
@ -1218,11 +1219,11 @@ void nft_set_context_clear(NFTSetContext *s) {
s->sets = mfree(s->sets);
}
static int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set) {
int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set) {
_cleanup_free_ char *table_dup = NULL, *set_dup = NULL;
assert(s);
assert(IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX));
assert(IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX, NFT_SET_SOURCE_CGROUP));
assert(nfproto_is_valid(nfproto));
assert(table);
assert(set);
@ -1285,6 +1286,7 @@ int config_parse_nft_set(
assert(lvalue);
assert(rvalue);
assert(nft_set_context);
assert(IN_SET(ltype, NFT_SET_PARSE_NETWORK, NFT_SET_PARSE_CGROUP));
if (isempty(rvalue)) {
nft_set_context_clear(nft_set_context);
@ -1328,7 +1330,9 @@ int config_parse_nft_set(
assert(set);
source = nft_set_source_from_string(source_str);
if (source < 0) {
if (source < 0 ||
(ltype == NFT_SET_PARSE_NETWORK && !IN_SET(source, NFT_SET_SOURCE_ADDRESS, NFT_SET_SOURCE_PREFIX, NFT_SET_SOURCE_IFINDEX)) ||
(ltype == NFT_SET_PARSE_CGROUP && source != NFT_SET_SOURCE_CGROUP)) {
_cleanup_free_ char *esc = NULL;
esc = cescape(source_str);

View file

@ -36,6 +36,7 @@ typedef enum NFTSetSource {
NFT_SET_SOURCE_ADDRESS,
NFT_SET_SOURCE_PREFIX,
NFT_SET_SOURCE_IFINDEX,
NFT_SET_SOURCE_CGROUP,
_NFT_SET_SOURCE_MAX,
_NFT_SET_SOURCE_INVALID = -EINVAL,
} NFTSetSource;
@ -89,4 +90,11 @@ int nft_set_element_modify_any(
const void *element,
size_t element_size);
int nft_set_add(NFTSetContext *s, NFTSetSource source, int nfproto, const char *table, const char *set);
typedef enum NFTSetParseFlags {
NFT_SET_PARSE_NETWORK,
NFT_SET_PARSE_CGROUP,
} NFTSetParseFlags;
CONFIG_PARSER_PROTOTYPE(config_parse_nft_set);

View file

@ -43,6 +43,14 @@ int main(int argc, char **argv) {
r = safe_atou32(argv[6], &element);
assert_se(r == 0);
r = nft_set_element_modify_any(ctx, add, nfproto, table, set, &element, sizeof(element));
assert_se(r == 0);
} else if (streq(argv[5], "uint64")) {
uint64_t element;
r = safe_atou64(argv[6], &element);
assert_se(r == 0);
r = nft_set_element_modify_any(ctx, add, nfproto, table, set, &element, sizeof(element));
assert_se(r == 0);
} else if (streq(argv[5], "in_addr")) {