importd: create a loopback btrfs file system for /var/lib/machines, if necessary

When manipulating container and VM images we need efficient and atomic
directory snapshots and file copies, as well as disk quota. btrfs
provides this, legacy file systems do not. Hence, implicitly create a
loopback file system in /var/lib/machines.raw and mount it to
/var/lib/machines, if that directory is not on btrfs anyway.

This is done implicitly and transparently the first time the user
invokes "machinectl import-xyz".

This allows us to take benefit of btrfs features for container
management without actually having the rest of the system use btrfs.

The loopback is sized 500M initially. Patches to grow it dynamically are
to follow.
This commit is contained in:
Lennart Poettering 2015-02-24 17:23:53 +01:00
parent 15e80c7b75
commit 113b3fc1a8
3 changed files with 243 additions and 8 deletions

View file

@ -499,6 +499,7 @@ dist_systemunit_DATA = \
units/sys-kernel-debug.mount \
units/sys-fs-fuse-connections.mount \
units/tmp.mount \
units/var-lib-machines.mount \
units/printer.target \
units/sound.target \
units/bluetooth.target \
@ -6586,7 +6587,8 @@ SYSINIT_TARGET_WANTS += \
LOCAL_FS_TARGET_WANTS += \
systemd-remount-fs.service \
tmp.mount
tmp.mount \
var-lib-machines.mount
MULTI_USER_TARGET_WANTS += \
getty.target \

View file

@ -21,6 +21,8 @@
#include <sys/prctl.h>
#include <sys/vfs.h>
#include <sys/statvfs.h>
#include <sys/mount.h>
#include "sd-bus.h"
#include "util.h"
@ -32,8 +34,13 @@
#include "mkdir.h"
#include "def.h"
#include "missing.h"
#include "btrfs-util.h"
#include "path-util.h"
#include "import-util.h"
#define VAR_LIB_MACHINES_SIZE_START (1024UL*1024UL*500UL)
#define VAR_LIB_MACHINES_FREE_MIN (1024UL*1024UL*750UL)
typedef struct Transfer Transfer;
typedef struct Manager Manager;
@ -643,7 +650,7 @@ static Transfer *manager_find(Manager *m, TransferType type, const char *dkr_ind
return NULL;
}
static int check_btrfs(sd_bus_error *error) {
static int check_btrfs(void) {
struct statfs sfs;
if (statfs("/var/lib/machines", &sfs) < 0) {
@ -654,10 +661,239 @@ static int check_btrfs(sd_bus_error *error) {
return -errno;
}
if (!F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
}
static int setup_machine_raw(sd_bus_error *error) {
_cleanup_free_ char *tmp = NULL;
_cleanup_close_ int fd = -1;
struct statvfs ss;
pid_t pid = 0;
siginfo_t si;
int r;
/* We want to be able to make use of btrfs-specific file
* system features, in particular subvolumes, reflinks and
* quota. Hence, if we detect that /var/lib/machines.raw is
* not located on btrfs, let's create a loopback file, place a
* btrfs file system into it, and mount it to
* /var/lib/machines. */
fd = open("/var/lib/machines.raw", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (fd >= 0) {
r = fd;
fd = -1;
return r;
}
if (errno != ENOENT)
return sd_bus_error_set_errnof(error, errno, "Failed to open /var/lib/machines.raw: %m");
r = tempfn_xxxxxx("/var/lib/machines.raw", &tmp);
if (r < 0)
return r;
(void) mkdir_p_label("/var/lib", 0755);
fd = open(tmp, O_RDWR|O_CREAT|O_EXCL|O_NOCTTY|O_CLOEXEC, 0600);
if (fd < 0)
return sd_bus_error_set_errnof(error, errno, "Failed to create /var/lib/machines.raw: %m");
if (fstatvfs(fd, &ss) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to determine free space on /var/lib/machines.raw: %m");
goto fail;
}
if (ss.f_bsize * ss.f_bavail < VAR_LIB_MACHINES_FREE_MIN) {
r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Not enough free disk space to set up /var/lib/machines.");
goto fail;
}
if (ftruncate(fd, VAR_LIB_MACHINES_SIZE_START) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to enlarge /var/lib/machines.raw: %m");
goto fail;
}
pid = fork();
if (pid < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to fork mkfs.btrfs: %m");
goto fail;
}
if (pid == 0) {
/* Child */
reset_all_signal_handlers();
reset_signal_mask();
assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
fd = safe_close(fd);
execlp("mkfs.btrfs", "-Lvar-lib-machines", tmp, NULL);
if (errno == ENOENT)
return 99;
_exit(EXIT_FAILURE);
}
r = wait_for_terminate(pid, &si);
if (r < 0) {
sd_bus_error_set_errnof(error, r, "Failed to wait for mkfs.btrfs: %m");
goto fail;
}
pid = 0;
if (si.si_code != CLD_EXITED) {
r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs died abnormally.");
goto fail;
}
if (si.si_status == 99) {
r = sd_bus_error_set_errnof(error, ENOENT, "Cannot set up /var/lib/machines, mkfs.btrfs is missing");
goto fail;
}
if (si.si_status != 0) {
r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "mkfs.btrfs failed with error code %i", si.si_status);
goto fail;
}
if (renameat2(AT_FDCWD, tmp, AT_FDCWD, "/var/lib/machines.raw", RENAME_NOREPLACE) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to move /var/lib/machines.raw into place: %m");
goto fail;
}
r = fd;
fd = -1;
return r;
fail:
if (tmp)
unlink_noerrno(tmp);
if (pid > 1)
kill_and_sigcont(pid, SIGKILL);
return r;
}
static int setup_machine_directory(sd_bus_error *error) {
struct loop_info64 info = {
.lo_flags = LO_FLAGS_AUTOCLEAR,
};
_cleanup_close_ int fd = -1, control = -1, loop = -1;
_cleanup_free_ char* loopdev = NULL;
char tmpdir[] = "/tmp/import-mount.XXXXXX", *mntdir = NULL;
bool tmpdir_made = false, mntdir_made = false, mntdir_mounted = false;
int r, nr = -1;
r = check_btrfs();
if (r < 0)
return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
if (r > 0) {
(void) btrfs_subvol_make_label("/var/lib/machines");
return 0;
}
if (path_is_mount_point("/var/lib/machines", true) > 0 ||
dir_is_empty("/var/lib/machines") == 0)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "/var/lib/machines is not a btrfs file system. Operation is not supported on legacy file systems.");
fd = setup_machine_raw(error);
if (fd < 0)
return fd;
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
if (control < 0)
return sd_bus_error_set_errnof(error, errno, "Failed to open /dev/loop-control: %m");
nr = ioctl(control, LOOP_CTL_GET_FREE);
if (nr < 0)
return sd_bus_error_set_errnof(error, errno, "Failed to allocate loop device: %m");
if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) {
r = -ENOMEM;
goto fail;
}
loop = open(loopdev, O_CLOEXEC|O_RDWR|O_NOCTTY|O_NONBLOCK);
if (loop < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to open loopback device: %m");
goto fail;
}
if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to bind loopback device: %m");
goto fail;
}
if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to enable auto-clear for loopback device: %m");
goto fail;
}
/* We need to make sure the new /var/lib/machines directory
* has an access mode of 0700 at the time it is first made
* available. mkfs will create it with 0755 however. Hence,
* let's mount the directory into an inaccessible directory
* below /tmp first, fix the access mode, and move it to the
* public place then. */
if (!mkdtemp(tmpdir)) {
r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount parent directory: %m");
goto fail;
}
tmpdir_made = true;
mntdir = strjoina(tmpdir, "/mnt");
if (mkdir(mntdir, 0700) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to create temporary mount directory: %m");
goto fail;
}
mntdir_made = true;
if (mount(loopdev, mntdir, "btrfs", 0, NULL) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to mount loopback device: %m");
goto fail;
}
mntdir_mounted = true;
if (chmod(mntdir, 0700) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to fix owner: %m");
goto fail;
}
(void) mkdir_p_label("/var/lib/machines", 0700);
if (mount(mntdir, "/var/lib/machines", NULL, MS_BIND, NULL) < 0) {
r = sd_bus_error_set_errnof(error, errno, "Failed to mount directory into right place: %m");
goto fail;
}
(void) umount2(mntdir, MNT_DETACH);
(void) rmdir(mntdir);
(void) rmdir(tmpdir);
return 0;
fail:
if (mntdir_mounted)
(void) umount2(mntdir, MNT_DETACH);
if (mntdir_made)
(void) rmdir(mntdir);
if (tmpdir_made)
(void) rmdir(tmpdir);
if (loop >= 0) {
(void) ioctl(loop, LOOP_CLR_FD);
loop = safe_close(loop);
}
if (control >= 0 && nr >= 0)
(void) ioctl(control, LOOP_CTL_REMOVE, nr);
return r;
}
static int method_pull_tar_or_raw(sd_bus *bus, sd_bus_message *msg, void *userdata, sd_bus_error *error) {
@ -705,7 +941,7 @@ static int method_pull_tar_or_raw(sd_bus *bus, sd_bus_message *msg, void *userda
if (v < 0)
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown verification mode %s", verify);
r = check_btrfs(error);
r = setup_machine_directory(error);
if (r < 0)
return r;
@ -800,7 +1036,7 @@ static int method_pull_dkr(sd_bus *bus, sd_bus_message *msg, void *userdata, sd_
if (v != IMPORT_VERIFY_NO)
return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "DKR does not support verification.");
r = check_btrfs(error);
r = setup_machine_directory(error);
if (r < 0)
return r;

View file

@ -15,6 +15,3 @@ BusName=org.freedesktop.import1
CapabilityBoundingSet=CAP_CHOWN CAP_FOWNER CAP_FSETID CAP_MKNOD CAP_SETFCAP CAP_SYS_ADMIN CAP_SETPCAP
NoNewPrivileges=yes
WatchdogSec=1min
PrivateTmp=yes
ProtectSystem=full
ProtectHome=yes