Detect unhandled reboots and require user intervention

Podman needs to be able to detect when a system reboot occurs to
do certain types of cleanup operation (for example, reset
container states, clean up IPAM allocations, etc). our current
method for this is a sentinel file on a tmpfs filesystem. The
problem emerges that there is no directory that is guaranteed to
be a tmpfs and is also guaranteed to be accessible to rootless
users in the FHS. If the user has a systemd user session, we can
depend on /run/user/$UID, but we can't reliably say that they do.

This code will detect the no-tmpfs-but-reboot-occurred case by
writing the current system boot ID to our tmpfs sentinel file
when it is created, and checking that file every time Podman
starts to make sure that the current boot ID matches the cached
one in the sentinel file. If they don't match, a reboot occurred
and the sentinel file was not on a tmpfs and thus survived. In
that case, throw an error telling the user to remove certain
directories (the ones that are supposed to be tmpfs), so we can
proceed as expected.

Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
Matt Heon 2024-04-05 09:01:22 -04:00
parent adbedb1464
commit 3560ccd5df
3 changed files with 31 additions and 0 deletions

View file

@ -624,6 +624,11 @@ func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
}
}
// Check current boot ID - will be written to the alive file.
if err := runtime.checkBootID(runtimeAliveFile); err != nil {
return err
}
runtime.startWorker()
return nil

View file

@ -4,3 +4,7 @@ package libpod
func checkCgroups2UnifiedMode(runtime *Runtime) {
}
func (r *Runtime) checkBootID(runtimeAliveFile string) error {
return nil
}

View file

@ -43,3 +43,25 @@ func checkCgroups2UnifiedMode(runtime *Runtime) {
}
}
}
// Check the current boot ID against the ID cached in the runtime alive file.
func (r *Runtime) checkBootID(runtimeAliveFile string) error {
systemBootID, err := os.ReadFile("/proc/sys/kernel/random/boot_id")
if err == nil {
podmanBootID, err := os.ReadFile(runtimeAliveFile)
if err != nil {
return fmt.Errorf("reading boot ID from runtime alive file: %w", err)
}
if len(podmanBootID) != 0 {
if string(systemBootID) != string(podmanBootID) {
return fmt.Errorf("current system boot ID differs from cached boot ID; an unhandled reboot has occurred. Please delete directories %q and %q and re-run Podman", r.storageConfig.RunRoot, r.config.Engine.TmpDir)
}
} else {
// Write the current boot ID to the alive file.
if err := os.WriteFile(runtimeAliveFile, systemBootID, 0644); err != nil {
return fmt.Errorf("writing boot ID to runtime alive file: %w", err)
}
}
}
return nil
}