Merge pull request #12758 from fbuihuu/nspawn-console-tty

Create nspawn console tty in the child
This commit is contained in:
Lennart Poettering 2019-06-18 13:17:14 +02:00 committed by GitHub
commit 59da64738b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 166 additions and 154 deletions

3
TODO
View file

@ -209,9 +209,6 @@ Features:
/etc/resolv.conf. Should be smart and do something useful on read-only
images, for example fallback to read-only bind mounting the file instead.
* nspawn's console TTY should be allocated from within the container, not
mounted in from the outside
* show invocation ID in systemd-run output
* bypass SIGTERM state in unit files if KillSignal is SIGKILL

View file

@ -226,7 +226,6 @@ int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
}
int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
char fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
bool do_chown, do_chmod;
struct stat st;
@ -236,11 +235,9 @@ int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
* unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
* on chown().
*
* This call is happy with O_PATH fds, since we always go via /proc/self/fd/ to change
* ownership/access mode. */
* This call is happy with O_PATH fds. */
xsprintf(fd_path, "/proc/self/fd/%i", fd);
if (stat(fd_path, &st) < 0)
if (fstat(fd, &st) < 0)
return -errno;
do_chown =
@ -262,16 +259,16 @@ int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
if (((minimal ^ st.st_mode) & 07777) != 0)
if (chmod(fd_path, minimal & 07777) < 0)
if (fchmod_opath(fd, minimal & 07777) < 0)
return -errno;
}
if (do_chown)
if (chown(fd_path, uid, gid) < 0)
if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
return -errno;
if (do_chmod)
if (chmod(fd_path, mode & 07777) < 0)
if (fchmod_opath(fd, mode & 07777) < 0)
return -errno;
return do_chown || do_chmod;

View file

@ -62,21 +62,19 @@ int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *
}
if (pidns_fd)
*pidns_fd = pidnsfd;
*pidns_fd = TAKE_FD(pidnsfd);
if (mntns_fd)
*mntns_fd = mntnsfd;
*mntns_fd = TAKE_FD(mntnsfd);
if (netns_fd)
*netns_fd = netnsfd;
*netns_fd = TAKE_FD(netnsfd);
if (userns_fd)
*userns_fd = usernsfd;
*userns_fd = TAKE_FD(usernsfd);
if (root_fd)
*root_fd = rfd;
pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
*root_fd = TAKE_FD(rfd);
return 0;
}

View file

@ -1049,7 +1049,34 @@ int ptsname_malloc(int fd, char **ret) {
}
}
int ptsname_namespace(int pty, char **ret) {
int openpt_allocate(int flags, char **ret_slave) {
_cleanup_close_ int fd = -1;
_cleanup_free_ char *p = NULL;
int r;
fd = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
if (fd < 0)
return -errno;
if (ret_slave) {
r = ptsname_malloc(fd, &p);
if (r < 0)
return r;
if (!path_startswith(p, "/dev/pts/"))
return -EINVAL;
}
if (unlockpt(fd) < 0)
return -errno;
if (ret_slave)
*ret_slave = TAKE_PTR(p);
return TAKE_FD(fd);
}
static int ptsname_namespace(int pty, char **ret) {
int no = -1, r;
/* Like ptsname(), but doesn't assume that the path is
@ -1068,8 +1095,8 @@ int ptsname_namespace(int pty, char **ret) {
return 0;
}
int openpt_in_namespace(pid_t pid, int flags) {
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave) {
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1, fd = -1;
_cleanup_close_pair_ int pair[2] = { -1, -1 };
pid_t child;
int r;
@ -1088,18 +1115,13 @@ int openpt_in_namespace(pid_t pid, int flags) {
if (r < 0)
return r;
if (r == 0) {
int master;
pair[0] = safe_close(pair[0]);
master = posix_openpt(flags|O_NOCTTY|O_CLOEXEC);
if (master < 0)
fd = openpt_allocate(flags, NULL);
if (fd < 0)
_exit(EXIT_FAILURE);
if (unlockpt(master) < 0)
_exit(EXIT_FAILURE);
if (send_one_fd(pair[1], master, 0) < 0)
if (send_one_fd(pair[1], fd, 0) < 0)
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
@ -1113,7 +1135,17 @@ int openpt_in_namespace(pid_t pid, int flags) {
if (r != EXIT_SUCCESS)
return -EIO;
return receive_one_fd(pair[0], 0);
fd = receive_one_fd(pair[0], 0);
if (fd < 0)
return fd;
if (ret_slave) {
r = ptsname_namespace(fd, ret_slave);
if (r < 0)
return r;
}
return TAKE_FD(fd);
}
int open_terminal_in_namespace(pid_t pid, const char *name, int mode) {

View file

@ -151,9 +151,9 @@ int getttyname_malloc(int fd, char **r);
int getttyname_harder(int fd, char **r);
int ptsname_malloc(int fd, char **ret);
int ptsname_namespace(int pty, char **ret);
int openpt_in_namespace(pid_t pid, int flags);
int openpt_allocate(int flags, char **ret_slave);
int openpt_allocate_in_namespace(pid_t pid, int flags, char **ret_slave);
int open_terminal_in_namespace(pid_t pid, const char *name, int mode);
int vt_default_utf8(void);

View file

@ -423,14 +423,10 @@ int bus_machine_method_open_pty(sd_bus_message *message, void *userdata, sd_bus_
if (r == 0)
return 1; /* Will call us back */
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
if (master < 0)
return master;
r = ptsname_namespace(master, &pty_name);
if (r < 0)
return r;
r = sd_bus_message_new_method_return(message, &reply);
if (r < 0)
return r;
@ -514,17 +510,12 @@ int bus_machine_method_open_login(sd_bus_message *message, void *userdata, sd_bu
if (r == 0)
return 1; /* Will call us back */
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
if (master < 0)
return master;
r = ptsname_namespace(master, &pty_name);
if (r < 0)
return r;
p = path_startswith(pty_name, "/dev/pts/");
if (!p)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "PTS name %s is invalid", pty_name);
assert(p);
r = container_bus_new(m, error, &allocated_bus);
if (r < 0)
@ -630,14 +621,10 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu
if (r == 0)
return 1; /* Will call us back */
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC);
master = machine_openpt(m, O_RDWR|O_NOCTTY|O_CLOEXEC, &pty_name);
if (master < 0)
return master;
r = ptsname_namespace(master, &pty_name);
if (r < 0)
return r;
p = path_startswith(pty_name, "/dev/pts/");
assert(p);

View file

@ -530,29 +530,20 @@ int machine_kill(Machine *m, KillWho who, int signo) {
return manager_kill_unit(m->manager, m->unit, signo, NULL);
}
int machine_openpt(Machine *m, int flags) {
int machine_openpt(Machine *m, int flags, char **ret_slave) {
assert(m);
switch (m->class) {
case MACHINE_HOST: {
int fd;
case MACHINE_HOST:
fd = posix_openpt(flags);
if (fd < 0)
return -errno;
if (unlockpt(fd) < 0)
return -errno;
return fd;
}
return openpt_allocate(flags, ret_slave);
case MACHINE_CONTAINER:
if (m->leader <= 0)
return -EINVAL;
return openpt_in_namespace(m->leader, flags);
return openpt_allocate_in_namespace(m->leader, flags, ret_slave);
default:
return -EOPNOTSUPP;

View file

@ -89,7 +89,7 @@ MachineState machine_state_from_string(const char *s) _pure_;
const char *kill_who_to_string(KillWho k) _const_;
KillWho kill_who_from_string(const char *s) _pure_;
int machine_openpt(Machine *m, int flags);
int machine_openpt(Machine *m, int flags, char **ret_slave);
int machine_open_terminal(Machine *m, const char *path, int mode);
int machine_get_uid_shift(Machine *m, uid_t *ret);

View file

@ -2037,32 +2037,41 @@ static int setup_pts(const char *dest) {
return 0;
}
static int setup_dev_console(const char *dest, const char *console) {
_cleanup_umask_ mode_t u;
const char *to;
static int setup_stdio_as_dev_console(void) {
int terminal;
int r;
assert(dest);
terminal = open_terminal("/dev/console", O_RDWR);
if (terminal < 0)
return log_error_errno(terminal, "Failed to open console: %m");
u = umask(0000);
if (!console)
return 0;
r = chmod_and_chown(console, 0600, arg_uid_shift, arg_uid_shift);
/* Make sure we can continue logging to the original stderr, even if
* stderr points elsewhere now */
r = log_dup_console();
if (r < 0)
return log_error_errno(r, "Failed to correct access mode for TTY: %m");
return log_error_errno(r, "Failed to duplicate stderr: %m");
/* We need to bind mount the right tty to /dev/console since
* ptys can only exist on pts file systems. To have something
* to bind mount things on we create a empty regular file. */
to = prefix_roota(dest, "/dev/console");
r = touch(to);
/* invalidates 'terminal' on success and failure */
r = rearrange_stdio(terminal, terminal, terminal);
if (r < 0)
return log_error_errno(r, "touch() for /dev/console failed: %m");
return log_error_errno(r, "Failed to move console to stdin/stdout/stderr: %m");
return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
return 0;
}
static int setup_dev_console(const char *console) {
_cleanup_free_ char *p = NULL;
int r;
/* Create /dev/console symlink */
r = path_make_relative("/dev", console, &p);
if (r < 0)
return log_error_errno(r, "Failed to create relative path: %m");
if (symlink(p, "/dev/console") < 0)
return log_error_errno(errno, "Failed to create /dev/console symlink: %m");
return 0;
}
static int setup_keyring(void) {
@ -2775,8 +2784,10 @@ static int inner_child(
bool secondary,
int kmsg_socket,
int rtnl_socket,
int master_pty_socket,
FDSet *fds) {
_cleanup_close_ int master = -1;
_cleanup_free_ char *home = NULL;
char as_uuid[37];
size_t n_env = 1;
@ -2908,6 +2919,28 @@ static int inner_child(
rtnl_socket = safe_close(rtnl_socket);
}
if (arg_console_mode != CONSOLE_PIPE) {
_cleanup_free_ char *console = NULL;
/* Allocate a pty and make it available as /dev/console. */
master = openpt_allocate(O_RDWR|O_NONBLOCK, &console);
if (master < 0)
return log_error_errno(master, "Failed to allocate a pty: %m");
r = setup_dev_console(console);
if (r < 0)
return log_error_errno(r, "Failed to setup /dev/console: %m");
r = send_one_fd(master_pty_socket, master, 0);
if (r < 0)
return log_error_errno(r, "Failed to send master fd: %m");
master_pty_socket = safe_close(master_pty_socket);
r = setup_stdio_as_dev_console();
if (r < 0)
return r;
}
r = patch_sysctl();
if (r < 0)
return r;
@ -3129,7 +3162,6 @@ static int setup_sd_notify_child(void) {
static int outer_child(
Barrier *barrier,
const char *directory,
const char *console,
DissectedImage *dissected_image,
bool secondary,
int pid_socket,
@ -3138,6 +3170,7 @@ static int outer_child(
int kmsg_socket,
int rtnl_socket,
int uid_shift_socket,
int master_pty_socket,
int unified_cgroup_hierarchy_socket,
FDSet *fds,
int netns_fd) {
@ -3157,6 +3190,7 @@ static int outer_child(
assert(pid_socket >= 0);
assert(uuid_socket >= 0);
assert(notify_socket >= 0);
assert(master_pty_socket >= 0);
assert(kmsg_socket >= 0);
log_debug("Outer child is initializing.");
@ -3164,25 +3198,6 @@ static int outer_child(
if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
if (arg_console_mode != CONSOLE_PIPE) {
int terminal;
assert(console);
terminal = open_terminal(console, O_RDWR);
if (terminal < 0)
return log_error_errno(terminal, "Failed to open console: %m");
/* Make sure we can continue logging to the original stderr, even if stderr points elsewhere now */
r = log_dup_console();
if (r < 0)
return log_error_errno(r, "Failed to duplicate stderr: %m");
r = rearrange_stdio(terminal, terminal, terminal); /* invalidates 'terminal' on success and failure */
if (r < 0)
return log_error_errno(r, "Failed to move console to stdin/stdout/stderr: %m");
}
r = reset_audit_loginuid();
if (r < 0)
return r;
@ -3337,10 +3352,6 @@ static int outer_child(
if (r < 0)
return r;
r = setup_dev_console(directory, console);
if (r < 0)
return r;
r = setup_keyring();
if (r < 0)
return r;
@ -3415,7 +3426,7 @@ static int outer_child(
return log_error_errno(r, "Failed to join network namespace: %m");
}
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, master_pty_socket, fds);
if (r < 0)
_exit(EXIT_FAILURE);
@ -3438,11 +3449,12 @@ static int outer_child(
l = send_one_fd(notify_socket, fd, 0);
if (l < 0)
return log_error_errno(errno, "Failed to send notify fd: %m");
return log_error_errno(l, "Failed to send notify fd: %m");
pid_socket = safe_close(pid_socket);
uuid_socket = safe_close(uuid_socket);
notify_socket = safe_close(notify_socket);
master_pty_socket = safe_close(master_pty_socket);
kmsg_socket = safe_close(kmsg_socket);
rtnl_socket = safe_close(rtnl_socket);
netns_fd = safe_close(netns_fd);
@ -4042,14 +4054,13 @@ static int load_oci_bundle(void) {
return merge_settings(settings, arg_oci_bundle);
}
static int run_container(int master,
const char* console,
static int run_container(
DissectedImage *dissected_image,
bool secondary,
FDSet *fds,
char veth_name[IFNAMSIZ], bool *veth_created,
union in_addr_union *exposed,
pid_t *pid, int *ret) {
int *master, pid_t *pid, int *ret) {
static const struct sigaction sa = {
.sa_handler = nop_signal_handler,
@ -4065,9 +4076,10 @@ static int run_container(int master,
uuid_socket_pair[2] = { -1, -1 },
notify_socket_pair[2] = { -1, -1 },
uid_shift_socket_pair[2] = { -1, -1 },
master_pty_socket_pair[2] = { -1, -1 },
unified_cgroup_hierarchy_socket_pair[2] = { -1, -1};
_cleanup_close_ int notify_socket= -1;
_cleanup_close_ int notify_socket = -1;
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
_cleanup_(sd_event_source_unrefp) sd_event_source *notify_event_source = NULL;
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
@ -4115,6 +4127,9 @@ static int run_container(int master,
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, notify_socket_pair) < 0)
return log_error_errno(errno, "Failed to create notify socket pair: %m");
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, master_pty_socket_pair) < 0)
return log_error_errno(errno, "Failed to create console socket pair: %m");
if (arg_userns_mode != USER_NAMESPACE_NO)
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0)
return log_error_errno(errno, "Failed to create uid shift socket pair: %m");
@ -4158,13 +4173,12 @@ static int run_container(int master,
/* The outer child only has a file system namespace. */
barrier_set_role(&barrier, BARRIER_CHILD);
master = safe_close(master);
kmsg_socket_pair[0] = safe_close(kmsg_socket_pair[0]);
rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
pid_socket_pair[0] = safe_close(pid_socket_pair[0]);
uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]);
notify_socket_pair[0] = safe_close(notify_socket_pair[0]);
master_pty_socket_pair[0] = safe_close(master_pty_socket_pair[0]);
uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]);
unified_cgroup_hierarchy_socket_pair[0] = safe_close(unified_cgroup_hierarchy_socket_pair[0]);
@ -4173,7 +4187,6 @@ static int run_container(int master,
r = outer_child(&barrier,
arg_directory,
console,
dissected_image,
secondary,
pid_socket_pair[1],
@ -4182,6 +4195,7 @@ static int run_container(int master,
kmsg_socket_pair[1],
rtnl_socket_pair[1],
uid_shift_socket_pair[1],
master_pty_socket_pair[1],
unified_cgroup_hierarchy_socket_pair[1],
fds,
netns_fd);
@ -4200,6 +4214,7 @@ static int run_container(int master,
pid_socket_pair[1] = safe_close(pid_socket_pair[1]);
uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
notify_socket_pair[1] = safe_close(notify_socket_pair[1]);
master_pty_socket_pair[1] = safe_close(master_pty_socket_pair[1]);
uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
unified_cgroup_hierarchy_socket_pair[1] = safe_close(unified_cgroup_hierarchy_socket_pair[1]);
@ -4474,17 +4489,40 @@ static int run_container(int master,
rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
if (IN_SET(arg_console_mode, CONSOLE_INTERACTIVE, CONSOLE_READ_ONLY)) {
assert(master >= 0);
if (arg_console_mode != CONSOLE_PIPE) {
_cleanup_close_ int fd = -1;
PTYForwardFlags flags = 0;
r = pty_forward_new(event, master,
PTY_FORWARD_IGNORE_VHANGUP | (arg_console_mode == CONSOLE_READ_ONLY ? PTY_FORWARD_READ_ONLY : 0),
&forward);
if (r < 0)
return log_error_errno(r, "Failed to create PTY forwarder: %m");
/* Retrieve the master pty allocated by inner child */
fd = receive_one_fd(master_pty_socket_pair[0], 0);
if (fd < 0)
return log_error_errno(fd, "Failed to receive master pty from the inner child: %m");
if (arg_console_width != (unsigned) -1 || arg_console_height != (unsigned) -1)
(void) pty_forward_set_width_height(forward, arg_console_width, arg_console_height);
switch (arg_console_mode) {
case CONSOLE_READ_ONLY:
flags |= PTY_FORWARD_READ_ONLY;
_fallthrough_;
case CONSOLE_INTERACTIVE:
flags |= PTY_FORWARD_IGNORE_VHANGUP;
r = pty_forward_new(event, fd, flags, &forward);
if (r < 0)
return log_error_errno(r, "Failed to create PTY forwarder: %m");
if (arg_console_width != (unsigned) -1 || arg_console_height != (unsigned) -1)
(void) pty_forward_set_width_height(forward,
arg_console_width,
arg_console_height);
break;
default:
assert(arg_console_mode == CONSOLE_PASSIVE);
}
*master = TAKE_FD(fd);
}
r = sd_event_loop(event);
@ -4614,7 +4652,6 @@ static int initialize_rlimits(void) {
}
static int run(int argc, char *argv[]) {
_cleanup_free_ char *console = NULL;
_cleanup_close_ int master = -1;
_cleanup_fdset_free_ FDSet *fds = NULL;
int r, n_fd_passed, ret = EXIT_SUCCESS;
@ -4929,31 +4966,6 @@ static int run(int argc, char *argv[]) {
if (arg_console_mode == CONSOLE_PIPE) /* if we pass STDERR on to the container, don't add our own logs into it too */
arg_quiet = true;
if (arg_console_mode != CONSOLE_PIPE) {
master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NONBLOCK);
if (master < 0) {
r = log_error_errno(errno, "Failed to acquire pseudo tty: %m");
goto finish;
}
r = ptsname_malloc(master, &console);
if (r < 0) {
r = log_error_errno(r, "Failed to determine tty name: %m");
goto finish;
}
if (arg_selinux_apifs_context) {
r = mac_selinux_apply(console, arg_selinux_apifs_context);
if (r < 0)
goto finish;
}
if (unlockpt(master) < 0) {
r = log_error_errno(errno, "Failed to unlock tty: %m");
goto finish;
}
}
if (!arg_quiet)
log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.",
arg_machine, arg_image ?: arg_directory);
@ -4966,13 +4978,11 @@ static int run(int argc, char *argv[]) {
}
for (;;) {
r = run_container(master,
console,
dissected_image,
r = run_container(dissected_image,
secondary,
fds,
veth_name, &veth_created,
&exposed,
&exposed, &master,
&pid, &ret);
if (r <= 0)
break;