varlink: implement file descriptor passing

Let's add infrastructure to implement fd passing in varlink, when used
over AF_UNIX.

This will optionally associate one or more fds with a message sent via
varlink and deliver it to the server.
This commit is contained in:
Lennart Poettering 2023-04-05 16:03:38 +02:00
parent 790446bd6c
commit d37cdac6ce
2 changed files with 438 additions and 22 deletions

View file

@ -81,6 +81,18 @@ typedef enum VarlinkState {
VARLINK_PENDING_METHOD, \
VARLINK_PENDING_METHOD_MORE)
typedef struct VarlinkJsonQueueItem VarlinkJsonQueueItem;
/* A queued message we shall write into the socket, along with the file descriptors to send at the same
* time. This queue item binds them together so that message/fd boundaries are maintained throughout the
* whole pipeline. */
struct VarlinkJsonQueueItem {
LIST_FIELDS(VarlinkJsonQueueItem, queue);
JsonVariant *data;
size_t n_fds;
int fds[];
};
struct Varlink {
unsigned n_ref;
@ -125,6 +137,25 @@ struct Varlink {
size_t output_buffer_index;
size_t output_buffer_size;
int *input_fds; /* file descriptors associated with the data in input_buffer (for fd passing) */
size_t n_input_fds;
int *output_fds; /* file descriptors associated with the data in output_buffer (for fd passing) */
size_t n_output_fds;
/* Further messages to output not yet formatted into text, and thus not included in output_buffer
* yet. We keep them separate from output_buffer, to not violate fd message boundaries: we want that
* each fd that is sent is associated with its fds, and that fds cannot be accidentally associated
* with preceeding or following messages. */
LIST_HEAD(VarlinkJsonQueueItem, output_queue);
VarlinkJsonQueueItem *output_queue_tail;
/* The fds to associate with the next message that is about to be enqueued. The user first pushes the
* fds it intends to send via varlink_push_fd() into this queue, and then once the message data is
* submitted we'll combine the fds and the message data into one. */
int *pushed_fds;
size_t n_pushed_fds;
VarlinkReply reply_callback;
JsonVariant *current;
@ -137,6 +168,11 @@ struct Varlink {
bool prefer_read_write:1;
bool got_pollhup:1;
bool allow_fd_passing_input:1;
bool allow_fd_passing_output:1;
int af; /* address family if socket; AF_UNSPEC if not socket; negative if not known */
usec_t timestamp;
usec_t timeout;
@ -222,6 +258,8 @@ DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(varlink_state, VarlinkState);
#define varlink_server_log(s, fmt, ...) \
log_debug("%s: " fmt, varlink_server_description(s), ##__VA_ARGS__)
static int varlink_format_queue(Varlink *v);
static inline const char *varlink_description(Varlink *v) {
return (v ? v->description : NULL) ?: "varlink";
}
@ -230,6 +268,36 @@ static inline const char *varlink_server_description(VarlinkServer *s) {
return (s ? s->description : NULL) ?: "varlink";
}
static VarlinkJsonQueueItem *varlink_json_queue_item_free(VarlinkJsonQueueItem *q) {
if (!q)
return NULL;
json_variant_unref(q->data);
close_many(q->fds, q->n_fds);
return mfree(q);
}
static VarlinkJsonQueueItem *varlink_json_queue_item_new(JsonVariant *m, const int fds[], size_t n_fds) {
VarlinkJsonQueueItem *q;
assert(m);
assert(fds || n_fds == 0);
q = malloc(offsetof(VarlinkJsonQueueItem, fds) + sizeof(int) * n_fds);
if (!q)
return NULL;
*q = (VarlinkJsonQueueItem) {
.data = json_variant_ref(m),
.n_fds = n_fds,
};
memcpy_safe(q->fds, fds, n_fds * sizeof(int));
return TAKE_PTR(q);
}
static void varlink_set_state(Varlink *v, VarlinkState state) {
assert(v);
assert(state >= 0 && state < _VARLINK_STATE_MAX);
@ -264,7 +332,9 @@ static int varlink_new(Varlink **ret) {
.ucred = UCRED_INVALID,
.timestamp = USEC_INFINITY,
.timeout = VARLINK_DEFAULT_TIMEOUT_USEC
.timeout = VARLINK_DEFAULT_TIMEOUT_USEC,
.af = -1,
};
*ret = v;
@ -288,6 +358,7 @@ int varlink_connect_address(Varlink **ret, const char *address) {
return log_debug_errno(errno, "Failed to create AF_UNIX socket: %m");
v->fd = fd_move_above_stdio(v->fd);
v->af = AF_UNIX;
r = sockaddr_un_set_path(&sockaddr.un, address);
if (r < 0) {
@ -338,6 +409,7 @@ int varlink_connect_fd(Varlink **ret, int fd) {
return log_debug_errno(r, "Failed to create varlink object: %m");
v->fd = fd;
v->af = -1,
varlink_set_state(v, VARLINK_IDLE_CLIENT);
/* Note that if this function is called we assume the passed socket (if it is one) is already
@ -365,6 +437,10 @@ static void varlink_clear_current(Varlink *v) {
/* Clears the currently processed incoming message */
v->current = json_variant_unref(v->current);
close_many(v->input_fds, v->n_input_fds);
v->input_fds = mfree(v->input_fds);
v->n_input_fds = 0;
}
static void varlink_clear(Varlink *v) {
@ -374,11 +450,29 @@ static void varlink_clear(Varlink *v) {
v->fd = safe_close(v->fd);
varlink_clear_current(v);
v->input_buffer = mfree(v->input_buffer);
v->output_buffer = mfree(v->output_buffer);
varlink_clear_current(v);
close_many(v->output_fds, v->n_output_fds);
v->output_fds = mfree(v->output_fds);
v->n_output_fds = 0;
close_many(v->pushed_fds, v->n_pushed_fds);
v->pushed_fds = mfree(v->pushed_fds);
v->n_pushed_fds = 0;
while (v->output_queue) {
VarlinkJsonQueueItem *q = v->output_queue;
LIST_REMOVE(queue, v->output_queue, q);
varlink_json_queue_item_free(q);
}
v->output_queue_tail = NULL;
v->event = sd_event_unref(v->event);
}
@ -446,6 +540,7 @@ disconnect:
static int varlink_write(Varlink *v) {
ssize_t n;
int r;
assert(v);
@ -454,25 +549,53 @@ static int varlink_write(Varlink *v) {
if (v->connecting) /* Writing while we are still wait for a non-blocking connect() to complete will
* result in ENOTCONN, hence exit early here */
return 0;
if (v->output_buffer_size == 0)
return 0;
if (v->write_disconnected)
return 0;
/* If needed let's convert some output queue json variants into text form */
r = varlink_format_queue(v);
if (r < 0)
return r;
if (v->output_buffer_size == 0)
return 0;
assert(v->fd >= 0);
/* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible
* with non-socket IO, hence fall back automatically.
*
* Use a local variable to help gcc figure out that we set 'n' in all cases. */
bool prefer_write = v->prefer_read_write;
if (!prefer_write) {
n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL);
if (n < 0 && errno == ENOTSOCK)
prefer_write = v->prefer_read_write = true;
if (v->n_output_fds > 0) { /* If we shall send fds along, we must use sendmsg() */
struct iovec iov = {
.iov_base = v->output_buffer + v->output_buffer_index,
.iov_len = v->output_buffer_size,
};
struct msghdr mh = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_controllen = CMSG_SPACE(sizeof(int) * v->n_output_fds),
};
mh.msg_control = alloca0(mh.msg_controllen);
struct cmsghdr *control = CMSG_FIRSTHDR(&mh);
control->cmsg_len = CMSG_LEN(sizeof(int) * v->n_output_fds);
control->cmsg_level = SOL_SOCKET;
control->cmsg_type = SCM_RIGHTS;
memcpy(CMSG_DATA(control), v->output_fds, sizeof(int) * v->n_output_fds);
n = sendmsg(v->fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
} else {
/* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible
* with non-socket IO, hence fall back automatically.
*
* Use a local variable to help gcc figure out that we set 'n' in all cases. */
bool prefer_write = v->prefer_read_write;
if (!prefer_write) {
n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL);
if (n < 0 && errno == ENOTSOCK)
prefer_write = v->prefer_read_write = true;
}
if (prefer_write)
n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size);
}
if (prefer_write)
n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size);
if (n < 0) {
if (errno == EAGAIN)
return 0;
@ -494,13 +617,22 @@ static int varlink_write(Varlink *v) {
else
v->output_buffer_index += n;
close_many(v->output_fds, v->n_output_fds);
v->n_output_fds = 0;
v->timestamp = now(CLOCK_MONOTONIC);
return 1;
}
#define VARLINK_FDS_MAX (16U*1024U)
static int varlink_read(Varlink *v) {
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * VARLINK_FDS_MAX)) control;
struct iovec iov;
struct msghdr mh;
size_t rs;
ssize_t n;
void *p;
assert(v);
@ -544,16 +676,31 @@ static int varlink_read(Varlink *v) {
}
}
p = v->input_buffer + v->input_buffer_index + v->input_buffer_size;
rs = MALLOC_SIZEOF_SAFE(v->input_buffer) - (v->input_buffer_index + v->input_buffer_size);
bool prefer_read = v->prefer_read_write;
if (!prefer_read) {
n = recv(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs, MSG_DONTWAIT);
if (n < 0 && errno == ENOTSOCK)
prefer_read = v->prefer_read_write = true;
if (v->allow_fd_passing_input) {
iov = (struct iovec) {
.iov_base = p,
.iov_len = rs,
};
mh = (struct msghdr) {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = &control,
.msg_controllen = sizeof(control),
};
n = recvmsg_safe(v->fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
} else {
bool prefer_read = v->prefer_read_write;
if (!prefer_read) {
n = recv(v->fd, p, rs, MSG_DONTWAIT);
if (n < 0 && errno == ENOTSOCK)
prefer_read = v->prefer_read_write = true;
}
if (prefer_read)
n = read(v->fd, p, rs);
}
if (prefer_read)
n = read(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs);
if (n < 0) {
if (errno == EAGAIN)
return 0;
@ -566,10 +713,44 @@ static int varlink_read(Varlink *v) {
return -errno;
}
if (n == 0) { /* EOF */
if (v->allow_fd_passing_input)
cmsg_close_all(&mh);
v->read_disconnected = true;
return 1;
}
if (v->allow_fd_passing_input) {
struct cmsghdr* cmsg;
cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, (socklen_t) -1);
if (cmsg) {
size_t add;
/* We only allow file descriptors to be passed along with the first byte of a
* message. If they are passed with any other byte this is a protocol violation. */
if (v->input_buffer_size != 0) {
cmsg_close_all(&mh);
return -EPROTO;
}
add = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
if (add > INT_MAX - v->n_input_fds) {
cmsg_close_all(&mh);
return -EBADF;
}
if (!GREEDY_REALLOC(v->input_fds, v->n_input_fds + add)) {
cmsg_close_all(&mh);
return -ENOMEM;
}
memcpy_safe(v->input_fds + v->n_input_fds, CMSG_TYPED_DATA(cmsg, int), add * sizeof(int));
v->n_input_fds += add;
}
}
v->input_buffer_size += n;
v->input_buffer_unscanned += n;
@ -1255,7 +1436,7 @@ Varlink* varlink_flush_close_unref(Varlink *v) {
return varlink_close_unref(v);
}
static int varlink_enqueue_json(Varlink *v, JsonVariant *m) {
static int varlink_format_json(Varlink *v, JsonVariant *m) {
_cleanup_free_ char *text = NULL;
int r;
@ -1305,6 +1486,70 @@ static int varlink_enqueue_json(Varlink *v, JsonVariant *m) {
return 0;
}
static int varlink_enqueue_json(Varlink *v, JsonVariant *m) {
VarlinkJsonQueueItem *q;
assert(v);
assert(m);
/* If ther are no file descriptors to be queued and no queue entries yet we can shortcut things and
* append this entry directly to the output buffer */
if (v->n_pushed_fds == 0 && !v->output_queue)
return varlink_format_json(v, m);
/* Otherwise add a queue entry for this */
q = varlink_json_queue_item_new(m, v->pushed_fds, v->n_pushed_fds);
if (!q)
return -ENOMEM;
v->n_pushed_fds = 0; /* fds now belong to the queue entry */
LIST_INSERT_AFTER(queue, v->output_queue, v->output_queue_tail, q);
v->output_queue_tail = q;
return 0;
}
static int varlink_format_queue(Varlink *v) {
int r;
assert(v);
/* Takes entries out of the output queue and formats them into the output buffer. But only if this
* would not corrupt our fd message boundaries */
while (v->output_queue) {
_cleanup_free_ int *array = NULL;
VarlinkJsonQueueItem *q = v->output_queue;
if (v->n_output_fds > 0) /* unwritten fds? if we'd add more we'd corrupt the fd message boundaries, hence wait */
return 0;
if (q->n_fds > 0) {
array = newdup(int, q->fds, q->n_fds);
if (!array)
return -ENOMEM;
}
r = varlink_format_json(v, q->data);
if (r < 0)
return r;
/* Take possession of the queue element's fds */
free(v->output_fds);
v->output_fds = TAKE_PTR(array);
v->n_output_fds = q->n_fds;
q->n_fds = 0;
LIST_REMOVE(queue, v->output_queue, q);
if (!v->output_queue)
v->output_queue_tail = NULL;
varlink_json_queue_item_free(q);
}
return 0;
}
int varlink_send(Varlink *v, const char *method, JsonVariant *parameters) {
_cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
int r;
@ -1483,6 +1728,8 @@ int varlink_call(
assert(v->n_pending == 0); /* n_pending can't be > 0 if we are in VARLINK_IDLE_CLIENT state */
/* If there was still a reply pinned from a previous call, now it's the time to get rid of it, so
* that we can assign a new reply shortly. */
varlink_clear_current(v);
r = varlink_sanitize_parameters(&parameters);
@ -1639,6 +1886,13 @@ int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters) {
VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE))
return varlink_log_errno(v, SYNTHETIC_ERRNO(EBUSY), "Connection busy.");
/* Reset the list of pushed file descriptors before sending an error reply. We do this here to
* simplify code that puts together a complex reply message with fds, and half-way something
* fails. In that case the pushed fds need to be flushed out again. Under the assumption that it
* never makes sense to send fds along with errors we simply flush them out here beforehand, so that
* the callers don't need to do this explicitly. */
varlink_reset_fds(v);
r = varlink_sanitize_parameters(&parameters);
if (r < 0)
return varlink_log_errno(v, r, "Failed to sanitize parameters: %m");
@ -2028,6 +2282,156 @@ sd_event *varlink_get_event(Varlink *v) {
return v->event;
}
int varlink_push_fd(Varlink *v, int fd) {
int i;
assert_return(v, -EINVAL);
assert_return(fd >= 0, -EBADF);
/* Takes an fd to send along with the *next* varlink message sent via this varlink connection. This
* takes ownership of the specified fd. Use varlink_dup_fd() below to duplicate the fd first. */
if (!v->allow_fd_passing_output)
return -EPERM;
if (v->n_pushed_fds >= INT_MAX)
return -ENOMEM;
if (!GREEDY_REALLOC(v->pushed_fds, v->n_pushed_fds + 1))
return -ENOMEM;
i = (int) v->n_pushed_fds;
v->pushed_fds[v->n_pushed_fds++] = fd;
return i;
}
int varlink_dup_fd(Varlink *v, int fd) {
_cleanup_close_ int dp = -1;
int r;
assert_return(v, -EINVAL);
assert_return(fd >= 0, -EBADF);
/* Like varlink_push_fd() but duplicates the specified fd instead of taking possession of it */
dp = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (dp < 0)
return -errno;
r = varlink_push_fd(v, dp);
if (r < 0)
return r;
TAKE_FD(dp);
return r;
}
int varlink_reset_fds(Varlink *v) {
assert_return(v, -EINVAL);
/* Closes all currently pending fds to send. This may be used whenever the caller is in the process
* of putting together a message with fds, and then eventually something fails and they need to
* rollback the fds. Note that this is implicitly called whenever an error reply is sent, see above. */
close_many(v->output_fds, v->n_output_fds);
v->n_output_fds = 0;
return 0;
}
int varlink_peek_fd(Varlink *v, size_t i) {
assert_return(v, -EINVAL);
/* Returns one of the file descriptors that were received along witht the current message. This does
* not duplicate the fd nor invalidate it, it hence remains in our possession. */
if (!v->allow_fd_passing_input)
return -EPERM;
if (i >= v->n_input_fds)
return -ENXIO;
return v->input_fds[i];
}
int varlink_take_fd(Varlink *v, size_t i) {
assert_return(v, -EINVAL);
/* Similar to varlink_peek_fd() but the file descriptor's ownership is passed to the caller, and
* we'll invalidate the reference to it under our possession. If called twice in a row will return
* -EBADF */
if (!v->allow_fd_passing_input)
return -EPERM;
if (i >= v->n_input_fds)
return -ENXIO;
return TAKE_FD(v->input_fds[i]);
}
static int verify_unix_socket(Varlink *v) {
assert(v);
if (v->af < 0) {
struct stat st;
if (fstat(v->fd, &st) < 0)
return -errno;
if (!S_ISSOCK(st.st_mode)) {
v->af = AF_UNSPEC;
return -ENOTSOCK;
}
v->af = socket_get_family(v->fd);
if (v->af < 0)
return v->af;
}
return v->af == AF_UNIX ? 0 : -ENOMEDIUM;
}
int varlink_set_allow_fd_passing_input(Varlink *v, bool b) {
int r;
assert_return(v, -EINVAL);
if (v->allow_fd_passing_input == b)
return 0;
if (!b) {
v->allow_fd_passing_input = false;
return 1;
}
r = verify_unix_socket(v);
if (r < 0)
return r;
v->allow_fd_passing_input = true;
return 0;
}
int varlink_set_allow_fd_passing_output(Varlink *v, bool b) {
int r;
assert_return(v, -EINVAL);
if (v->allow_fd_passing_output == b)
return 0;
if (!b) {
v->allow_fd_passing_output = false;
return 1;
}
r = verify_unix_socket(v);
if (r < 0)
return r;
v->allow_fd_passing_output = true;
return 0;
}
int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags) {
VarlinkServer *s;

View file

@ -107,6 +107,18 @@ int varlink_error_errno(Varlink *v, int error);
int varlink_notify(Varlink *v, JsonVariant *parameters);
int varlink_notifyb(Varlink *v, ...);
/* Write outgoing fds into the socket (to be associated with the next enqueued message) */
int varlink_push_fd(Varlink *v, int fd);
int varlink_dup_fd(Varlink *v, int fd);
int varlink_reset_fds(Varlink *v);
/* Read incoming fds from the socket (associated with the currently handled message) */
int varlink_peek_fd(Varlink *v, size_t i);
int varlink_take_fd(Varlink *v, size_t i);
int varlink_set_allow_fd_passing_input(Varlink *v, bool b);
int varlink_set_allow_fd_passing_output(Varlink *v, bool b);
/* Bind a disconnect, reply or timeout callback */
int varlink_bind_reply(Varlink *v, VarlinkReply reply);