mirror of
https://github.com/systemd/systemd
synced 2024-07-23 19:25:39 +00:00
pid1: when taking possession of passed fds check O_CLOEXEC state first
So here's the thing. One library we use (libselinux) is opening fds behind our back when we initialize it and keeps it open. On the other hand we want to automatically pick up all fds passed in to us, so that we can distribute them to our services and close the rest. We pick them up very early in our code, to ensure that we don't get confused by open fds at that point. Except that libselinux insists on being initialized even earlier. So suddenly we might take possession of libselinux' fds, and then close them later when we decide no service wants them. Then during shutdown we close down selinux and selinux closes its fds, but since already closed long ago this ight close our fds instead. Hilarity ensues. I wish low-level software wouldn't do such things behind our back, but well, let's make the best of it. This changes the fd pick-up logic to only pick up fds that have O_CLOEXEC unset. O_CLOEXEC must be unset for any fds passed in to us over execve() after all. And for all our own fds we should set O_CLOEXEC since we generally don't want to litter fd tables for execve(). Also, libselinux thankfully appears to set O_CLOEXEC correctly on its fds, hence the filter works. Fixes: #27491
This commit is contained in:
parent
13f37e6e97
commit
a3dff21ae8
|
@ -2675,16 +2675,24 @@ static int collect_fds(FDSet **ret_fds, const char **ret_error_message) {
|
|||
assert(ret_fds);
|
||||
assert(ret_error_message);
|
||||
|
||||
r = fdset_new_fill(ret_fds);
|
||||
/* Pick up all fds passed to us. We apply a filter here: we only take the fds that have O_CLOEXEC
|
||||
* off. All fds passed via execve() to us must have O_CLOEXEC off, and our own code and dependencies
|
||||
* should be clean enough to set O_CLOEXEC universally. Thus checking the bit should be a safe
|
||||
* mechanism to distinguish passed in fds from our own.
|
||||
*
|
||||
* Why bother? Some subsystems we initialize early, specifically selinux might keep fds open in our
|
||||
* process behind our back. We should not take possession of that (and then accidentally close
|
||||
* it). SELinux thankfully sets O_CLOEXEC on its fds, so this test should work. */
|
||||
r = fdset_new_fill(/* filter_cloexec= */ 0, ret_fds);
|
||||
if (r < 0) {
|
||||
*ret_error_message = "Failed to allocate fd set";
|
||||
return log_emergency_errno(r, "Failed to allocate fd set: %m");
|
||||
}
|
||||
|
||||
fdset_cloexec(*ret_fds, true);
|
||||
(void) fdset_cloexec(*ret_fds, true);
|
||||
|
||||
if (arg_serialization)
|
||||
assert_se(fdset_remove(*ret_fds, fileno(arg_serialization)) >= 0);
|
||||
/* The serialization fd should have O_CLOEXEC turned on already, let's verify that we didn't pick it up here */
|
||||
assert_se(!arg_serialization || !fdset_contains(*ret_fds, fileno(arg_serialization)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -221,7 +221,7 @@ static int parse_argv(int argc, char *argv[]) {
|
|||
|
||||
if (!passed) {
|
||||
/* Take possession of all passed fds */
|
||||
r = fdset_new_fill(&passed);
|
||||
r = fdset_new_fill(/* filter_cloexec= */ 0, &passed);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to take possession of passed file descriptors: %m");
|
||||
|
||||
|
|
|
@ -139,7 +139,9 @@ int fdset_remove(FDSet *s, int fd) {
|
|||
return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
|
||||
}
|
||||
|
||||
int fdset_new_fill(FDSet **ret) {
|
||||
int fdset_new_fill(
|
||||
int filter_cloexec, /* if < 0 takes all fds, otherwise only those with O_CLOEXEC set (1) or unset (0) */
|
||||
FDSet **ret) {
|
||||
_cleanup_(fdset_shallow_freep) FDSet *s = NULL;
|
||||
_cleanup_closedir_ DIR *d = NULL;
|
||||
int r;
|
||||
|
@ -172,6 +174,20 @@ int fdset_new_fill(FDSet **ret) {
|
|||
if (fd == dirfd(d))
|
||||
continue;
|
||||
|
||||
if (filter_cloexec >= 0) {
|
||||
int fl;
|
||||
|
||||
/* If user asked for that filter by O_CLOEXEC. This is useful so that fds that have
|
||||
* been passed in can be collected and fds which have been created locally can be
|
||||
* ignored, under the assumption that only the latter have O_CLOEXEC set. */
|
||||
fl = fcntl(fd, F_GETFD);
|
||||
if (fl < 0)
|
||||
return -errno;
|
||||
|
||||
if (FLAGS_SET(fl, FD_CLOEXEC) != !!filter_cloexec)
|
||||
continue;
|
||||
}
|
||||
|
||||
r = fdset_put(s, fd);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
|
|
@ -20,7 +20,7 @@ bool fdset_contains(FDSet *s, int fd);
|
|||
int fdset_remove(FDSet *s, int fd);
|
||||
|
||||
int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
|
||||
int fdset_new_fill(FDSet **ret);
|
||||
int fdset_new_fill(int filter_cloexec, FDSet **ret);
|
||||
int fdset_new_listen_fds(FDSet **ret, bool unset);
|
||||
|
||||
int fdset_cloexec(FDSet *fds, bool b);
|
||||
|
|
|
@ -17,7 +17,7 @@ TEST(fdset_new_fill) {
|
|||
|
||||
fd = mkostemp_safe(name);
|
||||
assert_se(fd >= 0);
|
||||
assert_se(fdset_new_fill(&fdset) >= 0);
|
||||
assert_se(fdset_new_fill(/* filter_cloexec= */ -1, &fdset) >= 0);
|
||||
assert_se(fdset_contains(fdset, fd));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue