nspawn: introduce an option for specifying network namespace path

Add a new option `--network-namespace-path` to systemd-nspawn to allow
users to specify an arbitrary network namespace, e.g. `/run/netns/foo`.
Then systemd-nspawn will open the netns file, pass the fd to
outer_child, and enter the namespace represented by the fd before
running inner_child.

```
$ sudo ip netns add foo
$ mount | grep /run/netns/foo
nsfs on /run/netns/foo type nsfs (rw)
...
$ sudo systemd-nspawn -D /srv/fc27 --network-namespace-path=/run/netns/foo \
  /bin/readlink -f /proc/self/ns/net
/proc/1/ns/net:[4026532009]
```

Note that the option `--network-namespace-path=` cannot be used together
with other network-related options such as `--private-network` so that
the options do not conflict with each other.

Fixes https://github.com/systemd/systemd/issues/7361
This commit is contained in:
Dongsu Park 2017-11-24 18:22:17 +01:00 committed by Iago López Galeiras
parent cb9eeb062c
commit d7bea6b629
5 changed files with 141 additions and 52 deletions

View file

@ -522,6 +522,23 @@
<option>--drop-capability=</option>.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--network-namespace-path=</option></term>
<listitem><para>Takes the path to a file representing a kernel
network namespace that the container shall run in. The specified path
should refer to a (possibly bind-mounted) network namespace file, as
exposed by the kernel below <filename>/proc/$PID/ns/net</filename>.
This makes the container enter the given network namespace. One of the
typical use cases is to give a network namespace under
<filename>/run/netns</filename> created by <citerefentry
project='man-pages'><refentrytitle>ip-netns</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
for example, <option>--network-namespace-path=/run/netns/foo</option>.
Note that this option cannot be used together with other
network-related options, such as <option>--private-network</option>
or <option>--network-interface=</option>.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--network-interface=</option></term>

View file

@ -1271,4 +1271,12 @@ struct fib_rule_uid_range {
# define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#endif
#ifndef NSFS_MAGIC
#define NSFS_MAGIC 0x6e736673
#endif
#ifndef NS_GET_NSTYPE
#define NS_GET_NSTYPE _IO(0xb7, 0x3)
#endif
#include "missing_syscall.h"

View file

@ -226,6 +226,18 @@ int fd_is_temporary_fs(int fd) {
return is_temporary_fs(&s);
}
int fd_is_network_ns(int fd) {
int r;
r = fd_is_fs_type(fd, NSFS_MAGIC);
if (r <= 0)
return r;
r = ioctl(fd, NS_GET_NSTYPE);
if (r < 0)
return -errno;
return r == CLONE_NEWNET;
}
int path_is_temporary_fs(const char *path) {
_cleanup_close_ int fd = -1;

View file

@ -62,6 +62,7 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value);
bool is_temporary_fs(const struct statfs *s) _pure_;
int fd_is_temporary_fs(int fd);
int fd_is_network_ns(int fd);
int path_is_temporary_fs(const char *path);
/* Because statfs.t_type can be int on some architectures, we have to cast

View file

@ -190,6 +190,7 @@ static bool arg_network_veth = false;
static char **arg_network_veth_extra = NULL;
static char *arg_network_bridge = NULL;
static char *arg_network_zone = NULL;
static char *arg_network_namespace_path = NULL;
static unsigned long arg_personality = PERSONALITY_INVALID;
static char *arg_image = NULL;
static VolatileMode arg_volatile_mode = VOLATILE_NO;
@ -260,6 +261,9 @@ static void help(void) {
" and attach it to an existing bridge on the host\n"
" --network-zone=NAME Similar, but attach the new interface to an\n"
" an automatically managed bridge interface\n"
" --network-namespace-path=PATH\n"
" Set network namespace to the one represented by\n"
" the specified kernel namespace file node\n"
" -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
" Expose a container IP port on the host\n"
" -Z --selinux-context=SECLABEL\n"
@ -434,6 +438,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_NETWORK_BRIDGE,
ARG_NETWORK_ZONE,
ARG_NETWORK_VETH_EXTRA,
ARG_NETWORK_NAMESPACE_PATH,
ARG_PERSONALITY,
ARG_VOLATILE,
ARG_TEMPLATE,
@ -450,55 +455,56 @@ static int parse_argv(int argc, char *argv[]) {
};
static const struct option options[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "directory", required_argument, NULL, 'D' },
{ "template", required_argument, NULL, ARG_TEMPLATE },
{ "ephemeral", no_argument, NULL, 'x' },
{ "user", required_argument, NULL, 'u' },
{ "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
{ "as-pid2", no_argument, NULL, 'a' },
{ "boot", no_argument, NULL, 'b' },
{ "uuid", required_argument, NULL, ARG_UUID },
{ "read-only", no_argument, NULL, ARG_READ_ONLY },
{ "capability", required_argument, NULL, ARG_CAPABILITY },
{ "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
{ "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
{ "bind", required_argument, NULL, ARG_BIND },
{ "bind-ro", required_argument, NULL, ARG_BIND_RO },
{ "tmpfs", required_argument, NULL, ARG_TMPFS },
{ "overlay", required_argument, NULL, ARG_OVERLAY },
{ "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
{ "machine", required_argument, NULL, 'M' },
{ "slice", required_argument, NULL, 'S' },
{ "setenv", required_argument, NULL, 'E' },
{ "selinux-context", required_argument, NULL, 'Z' },
{ "selinux-apifs-context", required_argument, NULL, 'L' },
{ "quiet", no_argument, NULL, 'q' },
{ "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
{ "register", required_argument, NULL, ARG_REGISTER },
{ "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
{ "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
{ "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
{ "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
{ "network-veth", no_argument, NULL, 'n' },
{ "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
{ "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
{ "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
{ "personality", required_argument, NULL, ARG_PERSONALITY },
{ "image", required_argument, NULL, 'i' },
{ "volatile", optional_argument, NULL, ARG_VOLATILE },
{ "port", required_argument, NULL, 'p' },
{ "property", required_argument, NULL, ARG_PROPERTY },
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
{ "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
{ "settings", required_argument, NULL, ARG_SETTINGS },
{ "chdir", required_argument, NULL, ARG_CHDIR },
{ "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
{ "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
{ "root-hash", required_argument, NULL, ARG_ROOT_HASH },
{ "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, ARG_VERSION },
{ "directory", required_argument, NULL, 'D' },
{ "template", required_argument, NULL, ARG_TEMPLATE },
{ "ephemeral", no_argument, NULL, 'x' },
{ "user", required_argument, NULL, 'u' },
{ "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
{ "as-pid2", no_argument, NULL, 'a' },
{ "boot", no_argument, NULL, 'b' },
{ "uuid", required_argument, NULL, ARG_UUID },
{ "read-only", no_argument, NULL, ARG_READ_ONLY },
{ "capability", required_argument, NULL, ARG_CAPABILITY },
{ "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
{ "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
{ "bind", required_argument, NULL, ARG_BIND },
{ "bind-ro", required_argument, NULL, ARG_BIND_RO },
{ "tmpfs", required_argument, NULL, ARG_TMPFS },
{ "overlay", required_argument, NULL, ARG_OVERLAY },
{ "overlay-ro", required_argument, NULL, ARG_OVERLAY_RO },
{ "machine", required_argument, NULL, 'M' },
{ "slice", required_argument, NULL, 'S' },
{ "setenv", required_argument, NULL, 'E' },
{ "selinux-context", required_argument, NULL, 'Z' },
{ "selinux-apifs-context", required_argument, NULL, 'L' },
{ "quiet", no_argument, NULL, 'q' },
{ "share-system", no_argument, NULL, ARG_SHARE_SYSTEM }, /* not documented */
{ "register", required_argument, NULL, ARG_REGISTER },
{ "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
{ "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
{ "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
{ "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
{ "network-veth", no_argument, NULL, 'n' },
{ "network-veth-extra", required_argument, NULL, ARG_NETWORK_VETH_EXTRA },
{ "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
{ "network-zone", required_argument, NULL, ARG_NETWORK_ZONE },
{ "network-namespace-path", required_argument, NULL, ARG_NETWORK_NAMESPACE_PATH },
{ "personality", required_argument, NULL, ARG_PERSONALITY },
{ "image", required_argument, NULL, 'i' },
{ "volatile", optional_argument, NULL, ARG_VOLATILE },
{ "port", required_argument, NULL, 'p' },
{ "property", required_argument, NULL, ARG_PROPERTY },
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
{ "private-users-chown", optional_argument, NULL, ARG_PRIVATE_USERS_CHOWN },
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
{ "settings", required_argument, NULL, ARG_SETTINGS },
{ "chdir", required_argument, NULL, ARG_CHDIR },
{ "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
{ "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
{ "root-hash", required_argument, NULL, ARG_ROOT_HASH },
{ "system-call-filter", required_argument, NULL, ARG_SYSTEM_CALL_FILTER },
{}
};
@ -644,6 +650,13 @@ static int parse_argv(int argc, char *argv[]) {
arg_settings_mask |= SETTING_NETWORK;
break;
case ARG_NETWORK_NAMESPACE_PATH:
r = parse_path_argument_and_warn(optarg, false, &arg_network_namespace_path);
if (r < 0)
return r;
break;
case 'b':
if (arg_start_mode == START_PID2) {
log_error("--boot and --as-pid2 may not be combined.");
@ -1103,6 +1116,17 @@ static int parse_argv(int argc, char *argv[]) {
assert_not_reached("Unhandled option");
}
/* If --network-namespace-path is given with any other network-related option,
* we need to error out, to avoid conflicts between different network options. */
if (arg_network_namespace_path &&
(arg_network_interfaces || arg_network_macvlan ||
arg_network_ipvlan || arg_network_veth_extra ||
arg_network_bridge || arg_network_zone ||
arg_network_veth || arg_private_network)) {
log_error("--network-namespace-path cannot be combined with other network options.");
return -EINVAL;
}
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_IPC", CLONE_NEWIPC);
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_PID", CLONE_NEWPID);
parse_share_ns_env("SYSTEMD_NSPAWN_SHARE_NS_UTS", CLONE_NEWUTS);
@ -2532,12 +2556,14 @@ static int outer_child(
int rtnl_socket,
int uid_shift_socket,
int unified_cgroup_hierarchy_socket,
FDSet *fds) {
FDSet *fds,
int netns_fd) {
pid_t pid;
ssize_t l;
int r;
_cleanup_close_ int fd = -1;
bool create_netns;
assert(barrier);
assert(directory);
@ -2788,9 +2814,11 @@ static int outer_child(
if (fd < 0)
return fd;
create_netns = !arg_network_namespace_path && arg_private_network;
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
arg_clone_ns_flags |
(arg_private_network ? CLONE_NEWNET : 0) |
(create_netns ? CLONE_NEWNET : 0) |
(arg_userns_mode != USER_NAMESPACE_NO ? CLONE_NEWUSER : 0));
if (pid < 0)
return log_error_errno(errno, "Failed to fork inner child: %m");
@ -2804,6 +2832,12 @@ static int outer_child(
* requested, so that we all are owned by the user if
* user namespaces are turned on. */
if (arg_network_namespace_path) {
r = namespace_enter(-1, -1, netns_fd, -1, -1);
if (r < 0)
return r;
}
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
if (r < 0)
_exit(EXIT_FAILURE);
@ -2836,6 +2870,7 @@ static int outer_child(
notify_socket = safe_close(notify_socket);
kmsg_socket = safe_close(kmsg_socket);
rtnl_socket = safe_close(rtnl_socket);
netns_fd = safe_close(netns_fd);
return 0;
}
@ -3311,6 +3346,7 @@ static int run(int master,
int ifi = 0, r;
ssize_t l;
sigset_t mask_chld;
_cleanup_close_ int netns_fd = -1;
assert_se(sigemptyset(&mask_chld) == 0);
assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
@ -3365,6 +3401,20 @@ static int run(int master,
if (r < 0)
return log_error_errno(errno, "Failed to install SIGCHLD handler: %m");
if (arg_network_namespace_path) {
netns_fd = open(arg_network_namespace_path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (netns_fd < 0)
return log_error_errno(errno, "Cannot open file %s: %m", arg_network_namespace_path);
r = fd_is_network_ns(netns_fd);
if (r < 0 && r != -ENOTTY)
return log_error_errno(r, "Failed to check %s fs type: %m", arg_network_namespace_path);
if (r == 0) {
log_error("Path %s doesn't refer to a network namespace", arg_network_namespace_path);
return -EINVAL;
}
}
*pid = raw_clone(SIGCHLD|CLONE_NEWNS);
if (*pid < 0)
return log_error_errno(errno, "clone() failed%s: %m",
@ -3401,7 +3451,8 @@ static int run(int master,
rtnl_socket_pair[1],
uid_shift_socket_pair[1],
unified_cgroup_hierarchy_socket_pair[1],
fds);
fds,
netns_fd);
if (r < 0)
_exit(EXIT_FAILURE);