diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 975e77fcec..82d4820b80 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -549,3 +549,18 @@ Whenever the system transitions to a new runtime phase, the specified PCR is extended with a different string, to ensure that security policies for TPM-bound secrets and other resources are limited to specific phases of the runtime. + +-- f9b0be465ad540d0850ad32172d57c21 +Subject: Memory Trimmed +Defined-By: systemd +Support: %SUPPORT_URL% + +Memory of process @_PID@ (@_COMM@) has been trimmed. + +Either on user request or as result of a memory pressure event, memory of the +process has been trimmed, returning unneded allocation caches and other +resources back to the OS kernel, making them available for other components of +the OS. + +@TRIMMED_BYTES@ of memory were returned to the OS, which took @TRIMMED_USEC@ +micro-seconds (µs). diff --git a/src/basic/psi-util.h b/src/basic/psi-util.h index 415fbbdd47..558a130996 100644 --- a/src/basic/psi-util.h +++ b/src/basic/psi-util.h @@ -28,3 +28,8 @@ int read_resource_pressure(const char *path, PressureType type, ResourcePressure /* Was the kernel compiled with CONFIG_PSI=y? 1 if yes, 0 if not, negative on error. */ int is_pressure_supported(void); + +/* Default parameters for memory pressure watch logic in sd-event and PID 1 */ +#define MEMORY_PRESSURE_DEFAULT_TYPE "some" +#define MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC (100 * USEC_PER_MSEC) +#define MEMORY_PRESSURE_DEFAULT_WINDOW_USEC USEC_PER_SEC diff --git a/src/libsystemd/sd-event/event-source.h b/src/libsystemd/sd-event/event-source.h index 6092652d0f..f4e38d78d0 100644 --- a/src/libsystemd/sd-event/event-source.h +++ b/src/libsystemd/sd-event/event-source.h @@ -27,6 +27,7 @@ typedef enum EventSourceType { SOURCE_EXIT, SOURCE_WATCHDOG, SOURCE_INOTIFY, + SOURCE_MEMORY_PRESSURE, _SOURCE_EVENT_SOURCE_TYPE_MAX, _SOURCE_EVENT_SOURCE_TYPE_INVALID = -EINVAL, } EventSourceType; @@ -129,6 +130,17 @@ struct sd_event_source { struct inode_data *inode_data; LIST_FIELDS(sd_event_source, by_inode_data); } inotify; + struct { + int fd; + sd_event_handler_t callback; + void *write_buffer; + size_t write_buffer_size; + uint32_t events, revents; + LIST_FIELDS(sd_event_source, write_list); + bool registered:1; + bool locked:1; + bool in_write_list:1; + } memory_pressure; }; }; diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index 307fdde311..2f9b0ecda0 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -7,6 +7,7 @@ #include "sd-daemon.h" #include "sd-event.h" #include "sd-id128.h" +#include "sd-messages.h" #include "alloc-util.h" #include "env-util.h" @@ -15,15 +16,22 @@ #include "fs-util.h" #include "glyph-util.h" #include "hashmap.h" +#include "hexdecoct.h" #include "list.h" #include "logarithm.h" #include "macro.h" +#include "mallinfo-util.h" #include "memory-util.h" +#include "missing_magic.h" #include "missing_syscall.h" +#include "path-util.h" #include "prioq.h" #include "process-util.h" +#include "psi-util.h" #include "set.h" #include "signal-util.h" +#include "socket-util.h" +#include "stat-util.h" #include "string-table.h" #include "string-util.h" #include "strxcpyx.h" @@ -63,6 +71,7 @@ static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] [SOURCE_EXIT] = "exit", [SOURCE_WATCHDOG] = "watchdog", [SOURCE_INOTIFY] = "inotify", + [SOURCE_MEMORY_PRESSURE] = "memory-pressure", }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); @@ -85,7 +94,8 @@ DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int); SOURCE_TIME_BOOTTIME_ALARM, \ SOURCE_SIGNAL, \ SOURCE_DEFER, \ - SOURCE_INOTIFY) + SOURCE_INOTIFY, \ + SOURCE_MEMORY_PRESSURE) /* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put(). * Time sources and ratelimited sources can be passed, so effectively this is the same as the @@ -130,6 +140,9 @@ struct sd_event { /* A list of inotify objects that already have events buffered which aren't processed yet */ LIST_HEAD(struct inotify_data, buffered_inotify_data_list); + /* A list of memory pressure event sources that still need their subscription string written */ + LIST_HEAD(sd_event_source, memory_pressure_write_list); + pid_t original_pid; uint64_t iteration; @@ -524,6 +537,65 @@ static int source_child_pidfd_register(sd_event_source *s, int enabled) { return 0; } +static void source_memory_pressure_unregister(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (event_pid_changed(s->event)) + return; + + if (!s->memory_pressure.registered) + return; + + if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->memory_pressure.fd, NULL) < 0) + log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m", + strna(s->description), event_source_type_to_string(s->type)); + + s->memory_pressure.registered = false; +} + +static int source_memory_pressure_register(sd_event_source *s, int enabled) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(enabled != SD_EVENT_OFF); + + struct epoll_event ev = { + .events = s->memory_pressure.write_buffer_size > 0 ? EPOLLOUT : + (s->memory_pressure.events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0)), + .data.ptr = s, + }; + + if (epoll_ctl(s->event->epoll_fd, + s->memory_pressure.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, + s->memory_pressure.fd, &ev) < 0) + return -errno; + + s->memory_pressure.registered = true; + return 0; +} + +static void source_memory_pressure_add_to_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->memory_pressure.in_write_list) + return; + + LIST_PREPEND(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = true; +} + +static void source_memory_pressure_remove_from_write_list(sd_event_source *s) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (!s->memory_pressure.in_write_list) + return; + + LIST_REMOVE(memory_pressure.write_list, s->event->memory_pressure_write_list, s); + s->memory_pressure.in_write_list = false; +} + static clockid_t event_source_type_to_clock(EventSourceType t) { switch (t) { @@ -947,6 +1019,11 @@ static void source_disconnect(sd_event_source *s) { break; } + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_remove_from_write_list(s); + source_memory_pressure_unregister(s); + break; + default: assert_not_reached(); } @@ -1017,6 +1094,11 @@ static sd_event_source* source_free(sd_event_source *s) { s->child.pidfd = safe_close(s->child.pidfd); } + if (s->type == SOURCE_MEMORY_PRESSURE) { + s->memory_pressure.fd = safe_close(s->memory_pressure.fd); + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + } + if (s->destroy_callback) s->destroy_callback(s->userdata); @@ -1092,6 +1174,7 @@ static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType t [SOURCE_POST] = endoffsetof_field(sd_event_source, post), [SOURCE_EXIT] = endoffsetof_field(sd_event_source, exit), [SOURCE_INOTIFY] = endoffsetof_field(sd_event_source, inotify), + [SOURCE_MEMORY_PRESSURE] = endoffsetof_field(sd_event_source, memory_pressure), }; sd_event_source *s; @@ -1771,6 +1854,257 @@ _public_ int sd_event_add_exit( return 0; } +int sd_event_trim_memory(void) { + int r; + + /* A default implementation of a memory pressure callback. Simply releases our own allocation caches + * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a + * NULL callback parameter. */ + + log_debug("Memory pressure event, trimming malloc() memory."); + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo before_mallinfo = generic_mallinfo_get(); +#endif + + usec_t before_timestamp = now(CLOCK_MONOTONIC); + hashmap_trim_pools(); + r = malloc_trim(0); + usec_t after_timestamp = now(CLOCK_MONOTONIC); + + if (r > 0) + log_debug("Successfully trimmed some memory."); + else + log_debug("Couldn't trim any memory."); + + usec_t period = after_timestamp - before_timestamp; + +#if HAVE_GENERIC_MALLINFO + generic_mallinfo after_mallinfo = generic_mallinfo_get(); + size_t l = LESS_BY((size_t) before_mallinfo.hblkhd, (size_t) after_mallinfo.hblkhd) + + LESS_BY((size_t) before_mallinfo.arena, (size_t) after_mallinfo.arena); + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s, returned %s to OS.", + FORMAT_TIMESPAN(period, 0), + FORMAT_BYTES(l)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_BYTES=%zu", l, + "TRIMMED_USEC=" USEC_FMT, period); +#else + log_struct(LOG_DEBUG, + LOG_MESSAGE("Memory trimming took %s.", + FORMAT_TIMESPAN(period, 0)), + "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR, + "TRIMMED_USEC=" USEC_FMT, period); +#endif + + return 0; +} + +static int memory_pressure_callback(sd_event_source *s, void *userdata) { + assert(s); + + sd_event_trim_memory(); + return 0; +} + +_public_ int sd_event_add_memory_pressure( + sd_event *e, + sd_event_source **ret, + sd_event_handler_t callback, + void *userdata) { + + _cleanup_free_ char *w = NULL; + _cleanup_(source_freep) sd_event_source *s = NULL; + _cleanup_close_ int path_fd = -1, fd = -1; + _cleanup_free_ void *write_buffer = NULL; + const char *watch, *watch_fallback, *env; + size_t write_buffer_size = 0; + struct stat st; + uint32_t events; + bool locked; + int r; + + assert_return(e, -EINVAL); + assert_return(e = event_resolve(e), -ENOPKG); + assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); + assert_return(!event_pid_changed(e), -ECHILD); + + if (!callback) + callback = memory_pressure_callback; + + s = source_new(e, !ret, SOURCE_MEMORY_PRESSURE); + if (!s) + return -ENOMEM; + + s->wakeup = WAKEUP_EVENT_SOURCE; + s->memory_pressure.callback = callback; + s->userdata = userdata; + s->enabled = SD_EVENT_ON; + s->memory_pressure.fd = -EBADF; + + env = secure_getenv("MEMORY_PRESSURE_WATCH"); + if (env) { + if (isempty(env) || path_equal(env, "/dev/null")) + return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH."); + + if (!path_is_absolute(env) || !path_is_normalized(env)) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), + "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env); + + watch = env; + + env = secure_getenv("MEMORY_PRESSURE_WRITE"); + if (env) { + r = unbase64mem(env, SIZE_MAX, &write_buffer, &write_buffer_size); + if (r < 0) + return r; + } + + locked = true; + } else { + + r = is_pressure_supported(); + if (r < 0) + return r; + if (r == 0) + return -EOPNOTSUPP; + + /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on + * the system wide pressure if for some reason we cannot (which could be: memory controller + * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll + * only use the system-wide logic. */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) + watch = "/proc/pressure/memory"; + else { + _cleanup_free_ char *cg = NULL; + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cg); + if (r < 0) + return r; + + w = path_join("/sys/fs/cgroup", cg, "memory.pressure"); + if (!w) + return -ENOMEM; + + watch = w; + watch_fallback = "/proc/pressure/memory"; + } + + /* Android uses three levels in its userspace low memory killer logic: + * some 70000 1000000 + * some 100000 1000000 + * full 70000 1000000 + * + * GNOME's low memory monitor uses: + * some 70000 1000000 + * some 100000 1000000 + * full 100000 1000000 + * + * We'll default to the middle level that both agree on */ + if (asprintf((char**) &write_buffer, + "%s " USEC_FMT " " USEC_FMT, + MEMORY_PRESSURE_DEFAULT_TYPE, + MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC, + MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0) + return -ENOMEM; + + write_buffer_size = strlen(write_buffer) + 1; + locked = false; + } + + path_fd = open(watch, O_PATH|O_CLOEXEC); + if (path_fd < 0) { + if (errno != ENOENT) + return -errno; + + /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as + * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and + * the PSI service apparently is not supported) */ + if (!watch_fallback) + return locked ? -ENOENT : -EOPNOTSUPP; + + path_fd = open(watch_fallback, O_PATH|O_CLOEXEC); + if (errno == ENOENT) /* PSI is not available in the kernel even under the fallback path? */ + return -EOPNOTSUPP; + if (errno < 0) + return -errno; + } + + if (fstat(path_fd, &st) < 0) + return -errno; + + if (S_ISSOCK(st.st_mode)) { + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + r = connect_unix_path(fd, path_fd, NULL); + if (r < 0) + return r; + + events = EPOLLIN; + + } else if (S_ISREG(st.st_mode) || S_ISFIFO(st.st_mode) || S_ISCHR(st.st_mode)) { + fd = fd_reopen(path_fd, (write_buffer_size > 0 ? O_RDWR : O_RDONLY) |O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (fd < 0) + return fd; + + if (S_ISREG(st.st_mode)) { + struct statfs sfs; + + /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */ + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + if (!is_fs_type(&sfs, PROC_SUPER_MAGIC) && + !is_fs_type(&sfs, CGROUP2_SUPER_MAGIC)) + return -ENOTTY; + + events = EPOLLPRI; + } else + /* For fifos and char devices just watch for EPOLLIN */ + events = EPOLLIN; + + } else if (S_ISDIR(st.st_mode)) + return -EISDIR; + else + return -EBADF; + + s->memory_pressure.fd = TAKE_FD(fd); + s->memory_pressure.write_buffer = TAKE_PTR(write_buffer); + s->memory_pressure.write_buffer_size = write_buffer_size; + s->memory_pressure.events = events; + s->memory_pressure.locked = locked; + + /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the + * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the + * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure + * event sources on which writes must be executed before the first event loop iteration is + * executed. (We could also write the data here, right away, but we want to give the caller the + * freedom to call sd_event_source_set_memory_pressure_type() and + * sd_event_source_set_memory_pressure_rate() before we write it. */ + + if (s->memory_pressure.write_buffer_size > 0) + source_memory_pressure_add_to_write_list(s); + else { + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + + if (ret) + *ret = s; + TAKE_PTR(s); + + return 0; +} + static void event_free_inotify_data(sd_event *e, struct inotify_data *d) { assert(e); @@ -2562,6 +2896,10 @@ static int event_source_offline( prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); break; + case SOURCE_MEMORY_PRESSURE: + source_memory_pressure_unregister(s); + break; + case SOURCE_TIME_REALTIME: case SOURCE_TIME_BOOTTIME: case SOURCE_TIME_MONOTONIC: @@ -2649,6 +2987,13 @@ static int event_source_online( s->event->n_online_child_sources++; break; + case SOURCE_MEMORY_PRESSURE: + r = source_memory_pressure_register(s, enabled); + if (r < 0) + return r; + + break; + case SOURCE_TIME_REALTIME: case SOURCE_TIME_BOOTTIME: case SOURCE_TIME_MONOTONIC: @@ -3630,6 +3975,106 @@ static int process_inotify(sd_event *e) { return done; } +static int process_memory_pressure(sd_event_source *s, uint32_t revents) { + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + if (s->pending) + s->memory_pressure.revents |= revents; + else + s->memory_pressure.revents = revents; + + return source_set_pending(s, true); +} + +static int source_memory_pressure_write(sd_event_source *s) { + ssize_t n; + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + /* once we start writing, the buffer is locked, we allow no further changes. */ + s->memory_pressure.locked = true; + + if (s->memory_pressure.write_buffer_size > 0) { + n = write(s->memory_pressure.fd, s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size); + if (n < 0) { + if (!ERRNO_IS_TRANSIENT(errno)) + return -errno; + + n = 0; + } + } else + n = 0; + + assert(n >= 0); + + if ((size_t) n == s->memory_pressure.write_buffer_size) { + s->memory_pressure.write_buffer = mfree(s->memory_pressure.write_buffer); + + if (n > 0) { + s->memory_pressure.write_buffer_size = 0; + + /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */ + r = source_memory_pressure_register(s, s->enabled); + if (r < 0) + return r; + } + } else if (n > 0) { + _cleanup_free_ void *c = NULL; + + assert((size_t) n < s->memory_pressure.write_buffer_size); + + c = memdup((uint8_t*) s->memory_pressure.write_buffer + n, s->memory_pressure.write_buffer_size - n); + if (!c) + return -ENOMEM; + + free_and_replace(s->memory_pressure.write_buffer, c); + s->memory_pressure.write_buffer_size -= n; + return 1; + } + + return 0; +} + +static int source_memory_pressure_initiate_dispatch(sd_event_source *s) { + int r; + + assert(s); + assert(s->type == SOURCE_MEMORY_PRESSURE); + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + if (r > 0) + return 1; /* if we wrote something, then don't continue with dispatching user dispatch + * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */ + + /* No pending incoming IO? Then let's not continue further */ + if ((s->memory_pressure.revents & (EPOLLIN|EPOLLPRI)) == 0) { + + /* Treat IO errors on the notifier the same ways errors returned from a callback */ + if ((s->memory_pressure.revents & (EPOLLHUP|EPOLLERR|EPOLLRDHUP)) != 0) + return -EIO; + + return 1; /* leave dispatch, we already processed everything */ + } + + if (s->memory_pressure.revents & EPOLLIN) { + uint8_t pipe_buf[PIPE_BUF]; + ssize_t n; + + /* If the fd is readable, then flush out anything that might be queued */ + + n = read(s->memory_pressure.fd, pipe_buf, sizeof(pipe_buf)); + if (n < 0 && !ERRNO_IS_TRANSIENT(errno)) + return -errno; + } + + return 0; /* go on, dispatch to user callback */ +} + static int source_dispatch(sd_event_source *s) { EventSourceType saved_type; sd_event *saved_event; @@ -3678,6 +4123,16 @@ static int source_dispatch(sd_event_source *s) { } } + if (s->type == SOURCE_MEMORY_PRESSURE) { + r = source_memory_pressure_initiate_dispatch(s); + if (r == -EIO) /* handle EIO errors similar to callback errors */ + goto finish; + if (r < 0) + return r; + if (r > 0) /* already handled */ + return 1; + } + if (s->enabled == SD_EVENT_ONESHOT) { r = sd_event_source_set_enabled(s, SD_EVENT_OFF); if (r < 0) @@ -3764,6 +4219,10 @@ static int source_dispatch(sd_event_source *s) { break; } + case SOURCE_MEMORY_PRESSURE: + r = s->memory_pressure.callback(s, s->userdata); + break; + case SOURCE_WATCHDOG: case _SOURCE_EVENT_SOURCE_TYPE_MAX: case _SOURCE_EVENT_SOURCE_TYPE_INVALID: @@ -3772,6 +4231,7 @@ static int source_dispatch(sd_event_source *s) { s->dispatching = false; +finish: if (r < 0) { log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m", strna(s->description), @@ -3922,6 +4382,30 @@ static void event_close_inode_data_fds(sd_event *e) { } } +static int event_memory_pressure_write_list(sd_event *e) { + int r; + + assert(e); + + for (;;) { + sd_event_source *s; + + s = LIST_POP(memory_pressure.write_list, e->memory_pressure_write_list); + if (!s) + break; + + assert(s->type == SOURCE_MEMORY_PRESSURE); + assert(s->memory_pressure.write_buffer_size > 0); + s->memory_pressure.in_write_list = false; + + r = source_memory_pressure_write(s); + if (r < 0) + return r; + } + + return 0; +} + _public_ int sd_event_prepare(sd_event *e) { int r; @@ -3950,6 +4434,10 @@ _public_ int sd_event_prepare(sd_event *e) { if (r < 0) return r; + r = event_memory_pressure_write_list(e); + if (r < 0) + return r; + r = event_arm_timer(e, &e->realtime); if (r < 0) return r; @@ -4115,6 +4603,10 @@ static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t r = process_pidfd(e, s, e->event_queue[i].events); break; + case SOURCE_MEMORY_PRESSURE: + r = process_memory_pressure(s, e->event_queue[i].events); + break; + default: assert_not_reached(); } @@ -4700,3 +5192,91 @@ _public_ int sd_event_set_signal_exit(sd_event *e, int b) { return change; } + +_public_ int sd_event_source_set_memory_pressure_type(sd_event_source *s, const char *ty) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + assert_return(ty, -EINVAL); + + if (!STR_IN_SET(ty, "some", "full")) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (streq(b, ty)) + return 0; + + size_t nl = strlen(ty) + (s->memory_pressure.write_buffer_size - l); + w = new(char, nl); + if (!w) + return -ENOMEM; + + memcpy(stpcpy(w, ty), space, (s->memory_pressure.write_buffer_size - l)); + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = nl; + s->memory_pressure.locked = false; + + return 1; +} + +_public_ int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec) { + _cleanup_free_ char *b = NULL; + _cleanup_free_ void *w = NULL; + + assert_return(s, -EINVAL); + assert_return(s->type == SOURCE_MEMORY_PRESSURE, -EDOM); + + if (threshold_usec <= 0 || threshold_usec >= UINT64_MAX) + return -ERANGE; + if (window_usec <= 0 || window_usec >= UINT64_MAX) + return -ERANGE; + if (threshold_usec > window_usec) + return -EINVAL; + + if (s->memory_pressure.locked) /* Refuse adjusting parameters, if caller told us how to watch for events */ + return -EBUSY; + + char* space = memchr(s->memory_pressure.write_buffer, ' ', s->memory_pressure.write_buffer_size); + if (!space) + return -EINVAL; + + size_t l = (char*) space - (char*) s->memory_pressure.write_buffer; + b = memdup_suffix0(s->memory_pressure.write_buffer, l); + if (!b) + return -ENOMEM; + if (!STR_IN_SET(b, "some", "full")) + return -EINVAL; + + if (asprintf((char**) &w, + "%s " USEC_FMT " " USEC_FMT "", + b, + threshold_usec, + window_usec) < 0) + return -EINVAL; + + l = strlen(w) + 1; + if (memcmp_nn(s->memory_pressure.write_buffer, s->memory_pressure.write_buffer_size, w, l) == 0) + return 0; + + free_and_replace(s->memory_pressure.write_buffer, w); + s->memory_pressure.write_buffer_size = l; + s->memory_pressure.locked = false; + + return 1; +} diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index cae4c8672a..5ca8528895 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -99,6 +99,7 @@ int sd_event_add_inotify_fd(sd_event *e, sd_event_source **s, int fd, uint32_t m int sd_event_add_defer(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_post(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_add_exit(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); +int sd_event_add_memory_pressure(sd_event *e, sd_event_source **s, sd_event_handler_t callback, void *userdata); int sd_event_prepare(sd_event *e); int sd_event_wait(sd_event *e, uint64_t usec); @@ -160,6 +161,8 @@ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo int sd_event_source_send_child_signal(sd_event_source *s, int sig, const void *si, unsigned flags); #endif int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *ret); +int sd_event_source_set_memory_pressure_type(sd_event_source *e, const char *ty); +int sd_event_source_set_memory_pressure_period(sd_event_source *s, uint64_t threshold_usec, uint64_t window_usec); int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback); int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret); int sd_event_source_get_floating(sd_event_source *s); @@ -171,6 +174,8 @@ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval_use int sd_event_source_is_ratelimited(sd_event_source *s); int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback); +int sd_event_trim_memory(void); + /* Define helpers so that __attribute__((cleanup(sd_event_unrefp))) and similar may be used. */ _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event, sd_event_unref); _SD_DEFINE_POINTER_CLEANUP_FUNC(sd_event_source, sd_event_source_unref); diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index 00fdbad2c5..39f56240fe 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -195,6 +195,9 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_TPM_PCR_EXTEND SD_ID128_MAKE(3f,7d,5e,f3,e5,4f,43,02,b4,f0,b1,43,bb,27,0c,ab) #define SD_MESSAGE_TPM_PCR_EXTEND_STR SD_ID128_MAKE_STR(3f,7d,5e,f3,e5,4f,43,02,b4,f0,b1,43,bb,27,0c,ab) +#define SD_MESSAGE_MEMORY_TRIM SD_ID128_MAKE(f9,b0,be,46,5a,d5,40,d0,85,0a,d3,21,72,d5,7c,21) +#define SD_MESSAGE_MEMORY_TRIM_STR SD_ID128_MAKE_STR(f9,b0,be,46,5a,d5,40,d0,85,0a,d3,21,72,d5,7c,21) + _SD_END_DECLARATIONS; #endif