mirror of
https://github.com/containers/podman
synced 2024-10-19 00:34:18 +00:00
container kill: handle stopped/exited container
The container lock is released before stopping/killing which implies
certain race conditions with, for instance, the cleanup process changing
the container state to stopped, exited or other states.
The (remaining) flakes seen in #16142 and #15367 strongly indicate a
race in between the stopping/killing a container and the cleanup
process. To fix the flake make sure to ignore invalid-state errors.
An alternative fix would be to change `KillContainer` to not return such
errors at all but commit c77691f06f
indicates an explicit desire to
have these errors being reported in the sig proxy.
[NO NEW TESTS NEEDED] as it's a race already covered by the system
tests.
Fixes: #16142
Fixes: #15367
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
parent
6f919af78b
commit
067442b570
|
@ -429,15 +429,20 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool)
|
|||
}
|
||||
|
||||
if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil {
|
||||
// If the PID is 0, then the container is already stopped.
|
||||
if ctr.state.PID == 0 {
|
||||
return nil
|
||||
// Ignore the error if KillContainer complains about it already
|
||||
// being stopped or exited. There's an inherent race with the
|
||||
// cleanup process (see #16142).
|
||||
if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) {
|
||||
// If the PID is 0, then the container is already stopped.
|
||||
if ctr.state.PID == 0 {
|
||||
return nil
|
||||
}
|
||||
// Again, check if the container is gone. If it is, exit cleanly.
|
||||
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
|
||||
}
|
||||
// Again, check if the container is gone. If it is, exit cleanly.
|
||||
if aliveErr := unix.Kill(ctr.state.PID, 0); errors.Is(aliveErr, unix.ESRCH) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err)
|
||||
}
|
||||
|
||||
// Give runtime a few seconds to make it happen
|
||||
|
|
Loading…
Reference in a new issue