mirror of
https://github.com/containers/podman
synced 2024-10-20 01:03:51 +00:00
Merge pull request #2697 from baude/healtcheckphase3
podman health check phase3
This commit is contained in:
commit
d0c6a35c05
|
@ -293,7 +293,7 @@ func getCreateFlags(c *cliconfig.PodmanCommand) {
|
|||
)
|
||||
createFlags.String(
|
||||
"healthcheck-interval", "30s",
|
||||
"set an interval for the healthchecks",
|
||||
"set an interval for the healthchecks (a value of disable results in no automatic timer setup)",
|
||||
)
|
||||
createFlags.Uint(
|
||||
"healthcheck-retries", 3,
|
||||
|
|
|
@ -494,6 +494,14 @@ func generateContainerFilterFuncs(filter, filterValue string, runtime *libpod.Ru
|
|||
}
|
||||
return false
|
||||
}, nil
|
||||
case "health":
|
||||
return func(c *libpod.Container) bool {
|
||||
hcStatus, err := c.HealthCheckStatus()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return hcStatus == filterValue
|
||||
}, nil
|
||||
}
|
||||
return nil, errors.Errorf("%s is an invalid filter", filter)
|
||||
}
|
||||
|
|
|
@ -868,21 +868,21 @@ func makeHealthCheckFromCli(c *cliconfig.PodmanCommand) (*manifest.Schema2Health
|
|||
hc := manifest.Schema2HealthConfig{
|
||||
Test: cmd,
|
||||
}
|
||||
|
||||
if inInterval == "disable" {
|
||||
inInterval = "0"
|
||||
}
|
||||
intervalDuration, err := time.ParseDuration(inInterval)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "invalid healthcheck-interval %s ", inInterval)
|
||||
}
|
||||
|
||||
if intervalDuration < time.Duration(time.Second*1) {
|
||||
return nil, errors.New("healthcheck-interval must be at least 1 second")
|
||||
}
|
||||
|
||||
hc.Interval = intervalDuration
|
||||
|
||||
if inRetries < 1 {
|
||||
return nil, errors.New("healthcheck-retries must be greater than 0.")
|
||||
}
|
||||
|
||||
hc.Retries = int(inRetries)
|
||||
timeoutDuration, err := time.ParseDuration(inTimeout)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "invalid healthcheck-timeout %s", inTimeout)
|
||||
|
|
|
@ -100,6 +100,7 @@ Valid filters are listed below:
|
|||
| before | [ID] or [Name] Containers created before this container |
|
||||
| since | [ID] or [Name] Containers created since this container |
|
||||
| volume | [VolumeName] or [MountpointDestination] Volume mounted in container |
|
||||
| health | [Status] healthy or unhealthy |
|
||||
|
||||
**--help**, **-h**
|
||||
|
||||
|
|
|
@ -833,6 +833,12 @@ func (c *Container) init(ctx context.Context) error {
|
|||
if err := c.save(); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.config.HealthCheckConfig != nil {
|
||||
if err := c.createTimer(); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
defer c.newContainerEvent(events.Init)
|
||||
return c.completeNetworkSetup()
|
||||
}
|
||||
|
@ -956,6 +962,15 @@ func (c *Container) start() error {
|
|||
|
||||
c.state.State = ContainerStateRunning
|
||||
|
||||
if c.config.HealthCheckConfig != nil {
|
||||
if err := c.updateHealthStatus(HealthCheckStarting); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
if err := c.startTimer(); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
defer c.newContainerEvent(events.Start)
|
||||
|
||||
return c.save()
|
||||
|
@ -1123,6 +1138,13 @@ func (c *Container) cleanup(ctx context.Context) error {
|
|||
|
||||
logrus.Debugf("Cleaning up container %s", c.ID())
|
||||
|
||||
// Remove healthcheck unit/timer file if it execs
|
||||
if c.config.HealthCheckConfig != nil {
|
||||
if err := c.removeTimer(); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up network namespace, if present
|
||||
if err := c.cleanupNetwork(); err != nil {
|
||||
lastError = err
|
||||
|
|
|
@ -3,13 +3,16 @@ package libpod
|
|||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containers/libpod/pkg/inspect"
|
||||
"github.com/coreos/go-systemd/dbus"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
@ -47,6 +50,10 @@ const (
|
|||
HealthCheckHealthy string = "healthy"
|
||||
// HealthCheckUnhealthy describes an unhealthy container
|
||||
HealthCheckUnhealthy string = "unhealthy"
|
||||
// HealthCheckStarting describes the time between when the container starts
|
||||
// and the start-period (time allowed for the container to start and application
|
||||
// to be running) expires.
|
||||
HealthCheckStarting string = "starting"
|
||||
)
|
||||
|
||||
// hcWriteCloser allows us to use bufio as a WriteCloser
|
||||
|
@ -68,17 +75,18 @@ func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
|
|||
}
|
||||
hcStatus, err := checkHealthCheckCanBeRun(container)
|
||||
if err == nil {
|
||||
return container.RunHealthCheck()
|
||||
return container.runHealthCheck()
|
||||
}
|
||||
return hcStatus, err
|
||||
}
|
||||
|
||||
// RunHealthCheck runs the health check as defined by the container
|
||||
func (c *Container) RunHealthCheck() (HealthCheckStatus, error) {
|
||||
// runHealthCheck runs the health check as defined by the container
|
||||
func (c *Container) runHealthCheck() (HealthCheckStatus, error) {
|
||||
var (
|
||||
newCommand []string
|
||||
returnCode int
|
||||
capture bytes.Buffer
|
||||
newCommand []string
|
||||
returnCode int
|
||||
capture bytes.Buffer
|
||||
inStartPeriod bool
|
||||
)
|
||||
hcStatus, err := checkHealthCheckCanBeRun(c)
|
||||
if err != nil {
|
||||
|
@ -111,12 +119,28 @@ func (c *Container) RunHealthCheck() (HealthCheckStatus, error) {
|
|||
returnCode = 1
|
||||
}
|
||||
timeEnd := time.Now()
|
||||
if c.HealthCheckConfig().StartPeriod > 0 {
|
||||
// there is a start-period we need to honor; we add startPeriod to container start time
|
||||
startPeriodTime := c.state.StartedTime.Add(c.HealthCheckConfig().StartPeriod)
|
||||
if timeStart.Before(startPeriodTime) {
|
||||
// we are still in the start period, flip the inStartPeriod bool
|
||||
inStartPeriod = true
|
||||
logrus.Debugf("healthcheck for %s being run in start-period", c.ID())
|
||||
}
|
||||
}
|
||||
|
||||
eventLog := capture.String()
|
||||
if len(eventLog) > MaxHealthCheckLogLength {
|
||||
eventLog = eventLog[:MaxHealthCheckLogLength]
|
||||
}
|
||||
|
||||
if timeEnd.Sub(timeStart) > c.HealthCheckConfig().Timeout {
|
||||
returnCode = -1
|
||||
hcResult = HealthCheckFailure
|
||||
hcErr = errors.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
|
||||
}
|
||||
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
|
||||
if err := c.updateHealthCheckLog(hcl); err != nil {
|
||||
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
|
||||
return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID())
|
||||
}
|
||||
return hcResult, hcErr
|
||||
|
@ -145,8 +169,23 @@ func newHealthCheckLog(start, end time.Time, exitCode int, log string) inspect.H
|
|||
}
|
||||
}
|
||||
|
||||
// updatedHealthCheckStatus updates the health status of the container
|
||||
// in the healthcheck log
|
||||
func (c *Container) updateHealthStatus(status string) error {
|
||||
healthCheck, err := c.GetHealthCheckLog()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
healthCheck.Status = status
|
||||
newResults, err := json.Marshal(healthCheck)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "unable to marshall healthchecks for writing status")
|
||||
}
|
||||
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
|
||||
}
|
||||
|
||||
// UpdateHealthCheckLog parses the health check results and writes the log
|
||||
func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error {
|
||||
func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog, inStartPeriod bool) error {
|
||||
healthCheck, err := c.GetHealthCheckLog()
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -159,11 +198,13 @@ func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error {
|
|||
if len(healthCheck.Status) < 1 {
|
||||
healthCheck.Status = HealthCheckHealthy
|
||||
}
|
||||
// increment failing streak
|
||||
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
|
||||
// if failing streak > retries, then status to unhealthy
|
||||
if int(healthCheck.FailingStreak) > c.HealthCheckConfig().Retries {
|
||||
healthCheck.Status = HealthCheckUnhealthy
|
||||
if !inStartPeriod {
|
||||
// increment failing streak
|
||||
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
|
||||
// if failing streak > retries, then status to unhealthy
|
||||
if int(healthCheck.FailingStreak) >= c.HealthCheckConfig().Retries {
|
||||
healthCheck.Status = HealthCheckUnhealthy
|
||||
}
|
||||
}
|
||||
}
|
||||
healthCheck.Log = append(healthCheck.Log, hcl)
|
||||
|
@ -199,3 +240,81 @@ func (c *Container) GetHealthCheckLog() (inspect.HealthCheckResults, error) {
|
|||
}
|
||||
return healthCheck, nil
|
||||
}
|
||||
|
||||
// createTimer systemd timers for healthchecks of a container
|
||||
func (c *Container) createTimer() error {
|
||||
if c.disableHealthCheckSystemd() {
|
||||
return nil
|
||||
}
|
||||
podman, err := os.Executable()
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to get path for podman for a health check timer")
|
||||
}
|
||||
|
||||
var cmd = []string{"--unit", fmt.Sprintf("%s", c.ID()), fmt.Sprintf("--on-unit-inactive=%s", c.HealthCheckConfig().Interval.String()), "--timer-property=AccuracySec=1s", podman, "healthcheck", "run", c.ID()}
|
||||
|
||||
conn, err := dbus.NewSystemdConnection()
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "unable to get systemd connection to add healthchecks")
|
||||
}
|
||||
conn.Close()
|
||||
logrus.Debugf("creating systemd-transient files: %s %s", "systemd-run", cmd)
|
||||
systemdRun := exec.Command("systemd-run", cmd...)
|
||||
_, err = systemdRun.CombinedOutput()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// startTimer starts a systemd timer for the healthchecks
|
||||
func (c *Container) startTimer() error {
|
||||
if c.disableHealthCheckSystemd() {
|
||||
return nil
|
||||
}
|
||||
conn, err := dbus.NewSystemdConnection()
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "unable to get systemd connection to start healthchecks")
|
||||
}
|
||||
defer conn.Close()
|
||||
_, err = conn.StartUnit(fmt.Sprintf("%s.service", c.ID()), "fail", nil)
|
||||
return err
|
||||
}
|
||||
|
||||
// removeTimer removes the systemd timer and unit files
|
||||
// for the container
|
||||
func (c *Container) removeTimer() error {
|
||||
if c.disableHealthCheckSystemd() {
|
||||
return nil
|
||||
}
|
||||
conn, err := dbus.NewSystemdConnection()
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "unable to get systemd connection to remove healthchecks")
|
||||
}
|
||||
defer conn.Close()
|
||||
serviceFile := fmt.Sprintf("%s.timer", c.ID())
|
||||
_, err = conn.StopUnit(serviceFile, "fail", nil)
|
||||
return err
|
||||
}
|
||||
|
||||
// HealthCheckStatus returns the current state of a container with a healthcheck
|
||||
func (c *Container) HealthCheckStatus() (string, error) {
|
||||
if !c.HasHealthCheck() {
|
||||
return "", errors.Errorf("container %s has no defined healthcheck", c.ID())
|
||||
}
|
||||
results, err := c.GetHealthCheckLog()
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "unable to get healthcheck log for %s", c.ID())
|
||||
}
|
||||
return results.Status, nil
|
||||
}
|
||||
|
||||
func (c *Container) disableHealthCheckSystemd() bool {
|
||||
if os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
|
||||
return true
|
||||
}
|
||||
if c.config.HealthCheckConfig.Interval == 0 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -239,7 +239,7 @@ func PodmanTestCreateUtil(tempDir string, remote bool) *PodmanTestIntegration {
|
|||
ociRuntime = "/usr/bin/runc"
|
||||
}
|
||||
}
|
||||
|
||||
os.Setenv("DISABLE_HC_SYSTEMD", "true")
|
||||
CNIConfigDir := "/etc/cni/net.d"
|
||||
|
||||
p := &PodmanTestIntegration{
|
||||
|
@ -314,6 +314,14 @@ func (s *PodmanSessionIntegration) InspectImageJSON() []inspect.ImageData {
|
|||
return i
|
||||
}
|
||||
|
||||
// InspectContainer returns a container's inspect data in JSON format
|
||||
func (p *PodmanTestIntegration) InspectContainer(name string) []inspect.ContainerData {
|
||||
cmd := []string{"inspect", name}
|
||||
session := p.Podman(cmd)
|
||||
session.WaitWithDefaultTimeout()
|
||||
return session.InspectContainerToJSON()
|
||||
}
|
||||
|
||||
func processTestResult(f GinkgoTestDescription) {
|
||||
tr := testResult{length: f.Duration.Seconds(), name: f.TestText}
|
||||
testResults = append(testResults, tr)
|
||||
|
|
|
@ -83,4 +83,98 @@ var _ = Describe("Podman healthcheck run", func() {
|
|||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(125))
|
||||
})
|
||||
|
||||
It("podman healthcheck should be starting", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-dt", "--name", "hc", "--healthcheck-retries", "2", "--healthcheck-command", "\"CMD-SHELL ls /foo || exit 1\"", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
inspect := podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("starting"))
|
||||
})
|
||||
|
||||
It("podman healthcheck failed checks in start-period should not change status", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-dt", "--name", "hc", "--healthcheck-start-period", "2m", "--healthcheck-retries", "2", "--healthcheck-command", "\"CMD-SHELL ls /foo || exit 1\"", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
|
||||
hc := podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
hc = podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
hc = podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
inspect := podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("starting"))
|
||||
})
|
||||
|
||||
It("podman healthcheck failed checks must reach retries before unhealthy ", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-dt", "--name", "hc", "--healthcheck-retries", "2", "--healthcheck-command", "\"CMD-SHELL ls /foo || exit 1\"", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
|
||||
hc := podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
inspect := podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("starting"))
|
||||
|
||||
hc = podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
inspect = podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("unhealthy"))
|
||||
|
||||
})
|
||||
|
||||
It("podman healthcheck good check results in healthy even in start-period", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-dt", "--name", "hc", "--healthcheck-start-period", "2m", "--healthcheck-retries", "2", "--healthcheck-command", "\"CMD-SHELL\" \"ls\" \"||\" \"exit\" \"1\"", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
|
||||
hc := podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(0))
|
||||
|
||||
inspect := podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("healthy"))
|
||||
})
|
||||
|
||||
It("podman healthcheck single healthy result changes failed to healthy", func() {
|
||||
session := podmanTest.Podman([]string{"run", "-dt", "--name", "hc", "--healthcheck-retries", "2", "--healthcheck-command", "\"CMD-SHELL\" \"ls\" \"/foo\" \"||\" \"exit\" \"1\"", ALPINE, "top"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session.ExitCode()).To(Equal(0))
|
||||
|
||||
hc := podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
inspect := podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("starting"))
|
||||
|
||||
hc = podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(1))
|
||||
|
||||
inspect = podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("unhealthy"))
|
||||
|
||||
foo := podmanTest.Podman([]string{"exec", "hc", "touch", "/foo"})
|
||||
foo.WaitWithDefaultTimeout()
|
||||
Expect(foo.ExitCode()).To(BeZero())
|
||||
|
||||
hc = podmanTest.Podman([]string{"healthcheck", "run", "hc"})
|
||||
hc.WaitWithDefaultTimeout()
|
||||
Expect(hc.ExitCode()).To(Equal(0))
|
||||
|
||||
inspect = podmanTest.InspectContainer("hc")
|
||||
Expect(inspect[0].State.Healthcheck.Status).To(Equal("healthy"))
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue