pid1: when taking possession of passed fds check O_CLOEXEC state first

So here's the thing. One library we use (libselinux) is opening fds
behind our back when we initialize it and keeps it open. On the other
hand we want to automatically pick up all fds passed in to us, so that
we can distribute them to our services and close the rest. We pick them
up very early in our code, to ensure that we don't get confused by open
fds at that point. Except that libselinux insists on being initialized
even earlier. So suddenly we might take possession of libselinux' fds,
and then close them later when we decide no service wants them. Then
during shutdown we close down selinux and selinux closes its fds, but
since already closed long ago this ight close our fds instead. Hilarity
ensues.

I wish low-level software wouldn't do such things behind our back, but
well, let's make the best of it.

This changes the fd pick-up logic to only pick up fds that have
O_CLOEXEC unset. O_CLOEXEC must be unset for any fds passed in to us
over execve() after all. And for all our own fds we should set O_CLOEXEC
since we generally don't want to litter fd tables for execve(). Also,
libselinux thankfully appears to set O_CLOEXEC correctly on its fds,
hence the filter works.

Fixes: #27491
This commit is contained in:
Lennart Poettering 2023-05-23 17:24:46 +02:00
parent 13f37e6e97
commit a3dff21ae8
5 changed files with 32 additions and 8 deletions

View file

@ -2675,16 +2675,24 @@ static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
assert(ret_fds);
assert(ret_error_message);
r = fdset_new_fill(ret_fds);
/* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
* off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
* should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
* mechanism to distinguish passed in fds from our own.
*
* Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
* process behind our back. We should not take possession of that (and then accidentally close
* it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds);
if (r < 0) {
*ret_error_message = "Failed to allocate fd set";
return log_emergency_errno(r, "Failed to allocate fd set: %m");
}
fdset_cloexec(*ret_fds, true);
(void) fdset_cloexec(*ret_fds, true);
if (arg_serialization)
assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
/* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization)));
return 0;
}

View file

@ -221,7 +221,7 @@ static int parse_argv(int argc, char *argv[]) {
if (!passed) {
/* Take possession of all passed fds */
r = fdset_new_fill(&passed);
r = fdset_new_fill(/* filter_cloexec= */ 0, &passed);
if (r < 0)
return log_error_errno(r, "Failed to take possession of passed file descriptors: %m");

View file

@ -139,7 +139,9 @@ int fdset_remove(FDSet *s, int fd) {
return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
}
int fdset_new_fill(FDSet **ret) {
int fdset_new_fill(
int filter_cloexec, /* if < 0 takes all fds, otherwise only those with O_CLOEXEC set (1) or unset (0) */
FDSet **ret) {
_cleanup_(fdset_shallow_freep) FDSet *s = NULL;
_cleanup_closedir_ DIR *d = NULL;
int r;
@ -172,6 +174,20 @@ int fdset_new_fill(FDSet **ret) {
if (fd == dirfd(d))
continue;
if (filter_cloexec >= 0) {
int fl;
/* If user asked for that filter by O_CLOEXEC. This is useful so that fds that have
* been passed in can be collected and fds which have been created locally can be
* ignored, under the assumption that only the latter have O_CLOEXEC set. */
fl = fcntl(fd, F_GETFD);
if (fl < 0)
return -errno;
if (FLAGS_SET(fl, FD_CLOEXEC) != !!filter_cloexec)
continue;
}
r = fdset_put(s, fd);
if (r < 0)
return r;

View file

@ -20,7 +20,7 @@ bool fdset_contains(FDSet *s, int fd);
int fdset_remove(FDSet *s, int fd);
int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
int fdset_new_fill(FDSet **ret);
int fdset_new_fill(int filter_cloexec, FDSet **ret);
int fdset_new_listen_fds(FDSet **ret, bool unset);
int fdset_cloexec(FDSet *fds, bool b);

View file

@ -17,7 +17,7 @@ TEST(fdset_new_fill) {
fd = mkostemp_safe(name);
assert_se(fd >= 0);
assert_se(fdset_new_fill(&fdset) >= 0);
assert_se(fdset_new_fill(/* filter_cloexec= */ -1, &fdset) >= 0);
assert_se(fdset_contains(fdset, fd));
}