Merge pull request #25496 from DaanDeMeyer/repart-optimize

repart: Prefer using loop devices to populate filesystems when available
This commit is contained in:
Luca Boccassi 2022-11-28 15:51:32 +01:00 committed by GitHub
commit 94d2caedee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3037,6 +3037,36 @@ static PartitionTarget *partition_target_free(PartitionTarget *t) {
DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free);
static int prepare_temporary_file(PartitionTarget *t, uint64_t size) {
_cleanup_(unlink_and_freep) char *temp = NULL;
_cleanup_close_ int fd = -1;
const char *vt;
int r;
assert(t);
r = var_tmp_dir(&vt);
if (r < 0)
return log_error_errno(r, "Could not determine temporary directory: %m");
temp = path_join(vt, "repart-XXXXXX");
if (!temp)
return log_oom();
fd = mkostemp_safe(temp);
if (fd < 0)
return log_error_errno(fd, "Failed to create temporary file: %m");
if (ftruncate(fd, size) < 0)
return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
FORMAT_BYTES(size));
t->fd = TAKE_FD(fd);
t->path = TAKE_PTR(temp);
return 0;
}
static int partition_target_prepare(
Context *context,
Partition *p,
@ -3045,9 +3075,8 @@ static int partition_target_prepare(
PartitionTarget **ret) {
_cleanup_(partition_target_freep) PartitionTarget *t = NULL;
struct stat st;
int whole_fd;
int r;
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
int whole_fd, r;
assert(context);
assert(p);
@ -3055,16 +3084,6 @@ static int partition_target_prepare(
assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
if (fstat(whole_fd, &st) < 0)
return -errno;
/* If we're operating on a block device, we definitely need privileges to access block devices so we
* can just use loop devices as our target. Otherwise, we're operating on a regular file, in that
* case, let's write to regular files and copy those into the final image so we can run without root
* privileges. On filesystems with reflinking support, we can take advantage of this and just reflink
* the result into the image.
*/
t = new(PartitionTarget, 1);
if (!t)
return log_oom();
@ -3073,47 +3092,38 @@ static int partition_target_prepare(
.whole_fd = -1,
};
if (S_ISBLK(st.st_mode) || (p->format && !mkfs_supports_root_option(p->format))) {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
/* Loopback block devices are not only useful to turn regular files into block devices, but
* also to cut out sections of block devices into new block devices. */
r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d);
if (r < 0)
return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
t->loop = TAKE_PTR(d);
} else if (need_path) {
_cleanup_(unlink_and_freep) char *temp = NULL;
_cleanup_close_ int fd = -1;
const char *vt;
r = var_tmp_dir(&vt);
if (r < 0)
return log_error_errno(r, "Could not determine temporary directory: %m");
temp = path_join(vt, "repart-XXXXXX");
if (!temp)
return log_oom();
fd = mkostemp_safe(temp);
if (fd < 0)
return log_error_errno(fd, "Failed to create temporary file: %m");
if (ftruncate(fd, size) < 0)
return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
FORMAT_BYTES(size));
t->fd = TAKE_FD(fd);
t->path = TAKE_PTR(temp);
} else {
if (!need_path) {
if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1)
return log_error_errno(errno, "Failed to seek to partition offset: %m");
t->whole_fd = whole_fd;
*ret = TAKE_PTR(t);
return 0;
}
/* Loopback block devices are not only useful to turn regular files into block devices, but
* also to cut out sections of block devices into new block devices. */
r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d);
if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r))
return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
if (r >= 0) {
t->loop = TAKE_PTR(d);
*ret = TAKE_PTR(t);
return 0;
}
/* If we can't allocate a loop device, let's write to a regular file that we copy into the final
* image so we can run in containers and without needing root privileges. On filesystems with
* reflinking support, we can take advantage of this and just reflink the result into the image.
*/
log_debug_errno(r, "No access to loop devices, falling back to a regular file");
r = prepare_temporary_file(t, size);
if (r < 0)
return r;
*ret = TAKE_PTR(t);
return 0;
@ -3678,7 +3688,12 @@ static int context_copy_blocks(Context *context) {
return 0;
}
static int do_copy_files(Partition *p, const char *root, const Set *denylist) {
static int do_copy_files(
Partition *p,
const char *root,
uid_t override_uid,
gid_t override_gid,
const Set *denylist) {
int r;
@ -3721,17 +3736,21 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) {
if (pfd < 0)
return log_error_errno(pfd, "Failed to open parent directory of target: %m");
/* Make sure everything is owned by the user running repart so that
* make_filesystem() can map the user running repart to "root" in a user
* namespace to have the files owned by root in the final image. */
r = copy_tree_at(
sfd, ".",
pfd, fn,
getuid(), getgid(),
override_uid, override_gid,
COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS,
denylist);
} else
r = copy_tree_at(
sfd, ".",
tfd, ".",
getuid(), getgid(),
override_uid, override_gid,
COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS,
denylist);
if (r < 0)
@ -3769,6 +3788,9 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) {
if (r < 0)
return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_root), *target);
if (fchown(tfd, override_uid, override_gid) < 0)
return log_error_errno(r, "Failed to change ownership of %s", *target);
(void) copy_xattr(sfd, tfd, COPY_ALL_XATTRS);
(void) copy_access(sfd, tfd);
(void) copy_times(sfd, tfd, 0);
@ -3778,7 +3800,7 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) {
return 0;
}
static int do_make_directories(Partition *p, const char *root) {
static int do_make_directories(Partition *p, uid_t override_uid, gid_t override_gid, const char *root) {
int r;
assert(p);
@ -3786,7 +3808,7 @@ static int do_make_directories(Partition *p, const char *root) {
STRV_FOREACH(d, p->make_directories) {
r = mkdir_p_root(root, *d, getuid(), getgid(), 0755);
r = mkdir_p_root(root, *d, override_uid, override_gid, 0755);
if (r < 0)
return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d);
}
@ -3794,6 +3816,11 @@ static int do_make_directories(Partition *p, const char *root) {
return 0;
}
static bool partition_needs_populate(Partition *p) {
assert(p);
return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories);
}
static int partition_populate_directory(Partition *p, const Set *denylist, char **ret) {
_cleanup_(rm_rf_physical_and_freep) char *root = NULL;
_cleanup_close_ int rfd = -1;
@ -3801,11 +3828,6 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char
assert(ret);
if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) {
*ret = NULL;
return 0;
}
rfd = mkdtemp_open("/var/tmp/repart-XXXXXX", 0, &root);
if (rfd < 0)
return log_error_errno(rfd, "Failed to create temporary directory: %m");
@ -3813,15 +3835,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char
if (fchmod(rfd, 0755) < 0)
return log_error_errno(errno, "Failed to change mode of temporary directory: %m");
/* Make sure everything is owned by the user running repart so that make_filesystem() can map the
* user running repart to "root" in a user namespace to have the files owned by root in the final
* image. */
r = do_copy_files(p, root, denylist);
r = do_copy_files(p, root, getuid(), getgid(), denylist);
if (r < 0)
return r;
r = do_make_directories(p, root);
r = do_make_directories(p, getuid(), getgid(), root);
if (r < 0)
return r;
@ -3830,27 +3848,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char
}
static int partition_populate_filesystem(Partition *p, const char *node, const Set *denylist) {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
struct stat st;
int r;
assert(p);
assert(node);
if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories))
return 0;
if (stat(node, &st) < 0)
return log_error_errno(errno, "Failed to stat %s: %m", node);
if (!S_ISBLK(st.st_mode)) {
r = loop_device_make_by_path(node, O_RDWR, 0, LOCK_EX, &d);
if (r < 0)
return log_error_errno(r, "Failed to make loopback device of %s: %m", node);
node = d->node;
}
log_info("Populating %s filesystem with files.", p->format);
/* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to
@ -3873,10 +3875,10 @@ static int partition_populate_filesystem(Partition *p, const char *node, const S
if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0)
_exit(EXIT_FAILURE);
if (do_copy_files(p, fs, denylist) < 0)
if (do_copy_files(p, fs, 0, 0, denylist) < 0)
_exit(EXIT_FAILURE);
if (do_make_directories(p, fs) < 0)
if (do_make_directories(p, 0, 0, fs) < 0)
_exit(EXIT_FAILURE);
r = syncfs_path(AT_FDCWD, fs);
@ -3979,11 +3981,16 @@ static int context_mkfs(Context *context) {
log_info("Formatting future partition %" PRIu64 ".", p->partno);
/* We prefer (or are required in the case of read-only filesystems) to populate filesystems
* directly via the corresponding mkfs binary if it supports a --rootdir (or equivalent)
* option. To do that, we need to setup the final directory tree beforehand. */
/* If we're not writing to a loop device or if we're populating a read-only filesystem, we
* have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that,
* we need to set up the final directory tree beforehand. */
if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) {
if (!mkfs_supports_root_option(p->format))
return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
"Loop device access is required to populate %s filesystems.",
p->format);
if (mkfs_supports_root_option(p->format)) {
r = partition_populate_directory(p, denylist, &root);
if (r < 0)
return r;
@ -3996,9 +4003,11 @@ static int context_mkfs(Context *context) {
log_info("Successfully formatted future partition %" PRIu64 ".", p->partno);
/* Now, we can populate all the other filesystems that we couldn't populate earlier. */
if (!mkfs_supports_root_option(p->format)) {
r = partition_populate_filesystem(p, partition_target_path(t), denylist);
/* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */
if (partition_needs_populate(p) && !root) {
assert(t->loop);
r = partition_populate_filesystem(p, t->loop->node, denylist);
if (r < 0)
return r;
}
@ -5243,6 +5252,7 @@ static int context_minimize(Context *context) {
LIST_FOREACH(partitions, p, context->partitions) {
_cleanup_(rm_rf_physical_and_freep) char *root = NULL;
_cleanup_(unlink_and_freep) char *temp = NULL;
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
_cleanup_close_ int fd = -1;
sd_id128_t fs_uuid;
uint64_t fsz;
@ -5259,6 +5269,9 @@ static int context_minimize(Context *context) {
if (!p->minimize)
continue;
if (!partition_needs_populate(p))
continue;
assert(!p->copy_blocks_path);
r = tempfn_random_child(vt, "repart", &temp);
@ -5273,11 +5286,15 @@ static int context_minimize(Context *context) {
return log_error_errno(errno, "Failed to open temporary file %s: %m", temp);
/* This may seem huge but it will be created sparse so it doesn't take up any space
* on disk until written to. */
* on disk until written to. */
if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0)
return log_error_errno(errno, "Failed to truncate temporary file to %s: %m",
FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL));
r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d);
if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r))
return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
/* We're going to populate this filesystem twice so use a random UUID the first time
* to avoid UUID conflicts. */
r = sd_id128_randomize(&fs_uuid);
@ -5285,13 +5302,18 @@ static int context_minimize(Context *context) {
return r;
}
if (mkfs_supports_root_option(p->format)) {
if (!d || fstype_is_ro(p->format)) {
if (!mkfs_supports_root_option(p->format))
return log_error_errno(SYNTHETIC_ERRNO(ENODEV),
"Loop device access is required to populate %s filesystems",
p->format);
r = partition_populate_directory(p, denylist, &root);
if (r < 0)
return r;
}
r = make_filesystem(temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard);
r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard);
if (r < 0)
return r;
@ -5302,8 +5324,10 @@ static int context_minimize(Context *context) {
continue;
}
if (!mkfs_supports_root_option(p->format)) {
r = partition_populate_filesystem(p, temp, denylist);
if (!root) {
assert(d);
r = partition_populate_filesystem(p, d->node, denylist);
if (r < 0)
return r;
}
@ -5328,6 +5352,8 @@ static int context_minimize(Context *context) {
if (minimal_size_by_fs_name(p->format) != UINT64_MAX)
fsz = MAX(minimal_size_by_fs_name(p->format), fsz);
d = loop_device_unref(d);
/* Erase the previous filesystem first. */
if (ftruncate(fd, 0))
return log_error_errno(errno, "Failed to erase temporary file: %m");
@ -5335,12 +5361,18 @@ static int context_minimize(Context *context) {
if (ftruncate(fd, fsz))
return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz));
r = make_filesystem(temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard);
r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d);
if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r))
return log_error_errno(r, "Failed to make loopback device of %s: %m", temp);
r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard);
if (r < 0)
return r;
if (!mkfs_supports_root_option(p->format)) {
r = partition_populate_filesystem(p, temp, denylist);
if (!root) {
assert(d);
r = partition_populate_filesystem(p, d->node, denylist);
if (r < 0)
return r;
}