podman/libpod/healthcheck.go
Matthew Heon 1be345bd9d Begin to break up pkg/inspect
Let's put inspect structs where they're actually being used. We
originally made pkg/inspect to solve circular import issues.
There are no more circular import issues.

Image structs remain for now, I'm focusing on container inspect.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
2019-06-03 15:54:53 -04:00

283 lines
9.1 KiB
Go

package libpod
import (
"bufio"
"bytes"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// HealthCheckStatus represents the current state of a container
type HealthCheckStatus int
const (
// HealthCheckSuccess means the health worked
HealthCheckSuccess HealthCheckStatus = iota
// HealthCheckFailure means the health ran and failed
HealthCheckFailure HealthCheckStatus = iota
// HealthCheckContainerStopped means the health check cannot
// be run because the container is stopped
HealthCheckContainerStopped HealthCheckStatus = iota
// HealthCheckContainerNotFound means the container could
// not be found in local store
HealthCheckContainerNotFound HealthCheckStatus = iota
// HealthCheckNotDefined means the container has no health
// check defined in it
HealthCheckNotDefined HealthCheckStatus = iota
// HealthCheckInternalError means somes something failed obtaining or running
// a given health check
HealthCheckInternalError HealthCheckStatus = iota
// HealthCheckDefined means the healthcheck was found on the container
HealthCheckDefined HealthCheckStatus = iota
// MaxHealthCheckNumberLogs is the maximum number of attempts we keep
// in the healthcheck history file
MaxHealthCheckNumberLogs int = 5
// MaxHealthCheckLogLength in characters
MaxHealthCheckLogLength = 500
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
// HealthCheckUnhealthy describes an unhealthy container
HealthCheckUnhealthy string = "unhealthy"
// HealthCheckStarting describes the time between when the container starts
// and the start-period (time allowed for the container to start and application
// to be running) expires.
HealthCheckStarting string = "starting"
)
// HealthCheckResults describes the results/logs from a healthcheck
type HealthCheckResults struct {
// Status healthy or unhealthy
Status string `json:"Status"`
// FailingStreak is the number of consecutive failed healthchecks
FailingStreak int `json:"FailingStreak"`
// Log describes healthcheck attempts and results
Log []HealthCheckLog `json:"Log"`
}
// HealthCheckLog describes the results of a single healthcheck
type HealthCheckLog struct {
// Start time as string
Start string `json:"Start"`
// End time as a string
End string `json:"End"`
// Exitcode is 0 or 1
ExitCode int `json:"ExitCode"`
// Output is the stdout/stderr from the healthcheck command
Output string `json:"Output"`
}
// hcWriteCloser allows us to use bufio as a WriteCloser
type hcWriteCloser struct {
*bufio.Writer
}
// Used to add a closer to bufio
func (hcwc hcWriteCloser) Close() error {
return nil
}
// HealthCheck verifies the state and validity of the healthcheck configuration
// on the container and then executes the healthcheck
func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
container, err := r.LookupContainer(name)
if err != nil {
return HealthCheckContainerNotFound, errors.Wrapf(err, "unable to lookup %s to perform a health check", name)
}
hcStatus, err := checkHealthCheckCanBeRun(container)
if err == nil {
return container.runHealthCheck()
}
return hcStatus, err
}
// runHealthCheck runs the health check as defined by the container
func (c *Container) runHealthCheck() (HealthCheckStatus, error) {
var (
newCommand []string
returnCode int
capture bytes.Buffer
inStartPeriod bool
)
hcStatus, err := checkHealthCheckCanBeRun(c)
if err != nil {
return hcStatus, err
}
hcCommand := c.HealthCheckConfig().Test
if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" {
newCommand = []string{"sh", "-c", strings.Join(hcCommand[1:], " ")}
} else {
newCommand = hcCommand
}
captureBuffer := bufio.NewWriter(&capture)
hcw := hcWriteCloser{
captureBuffer,
}
streams := new(AttachStreams)
streams.OutputStream = hcw
streams.ErrorStream = hcw
streams.InputStream = os.Stdin
streams.AttachOutput = true
streams.AttachError = true
streams.AttachInput = true
logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID())
timeStart := time.Now()
hcResult := HealthCheckSuccess
hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0)
if hcErr != nil {
hcResult = HealthCheckFailure
returnCode = 1
}
timeEnd := time.Now()
if c.HealthCheckConfig().StartPeriod > 0 {
// there is a start-period we need to honor; we add startPeriod to container start time
startPeriodTime := c.state.StartedTime.Add(c.HealthCheckConfig().StartPeriod)
if timeStart.Before(startPeriodTime) {
// we are still in the start period, flip the inStartPeriod bool
inStartPeriod = true
logrus.Debugf("healthcheck for %s being run in start-period", c.ID())
}
}
eventLog := capture.String()
if len(eventLog) > MaxHealthCheckLogLength {
eventLog = eventLog[:MaxHealthCheckLogLength]
}
if timeEnd.Sub(timeStart) > c.HealthCheckConfig().Timeout {
returnCode = -1
hcResult = HealthCheckFailure
hcErr = errors.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
}
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID())
}
return hcResult, hcErr
}
func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
cstate, err := c.State()
if err != nil {
return HealthCheckInternalError, err
}
if cstate != ContainerStateRunning {
return HealthCheckContainerStopped, errors.Errorf("container %s is not running", c.ID())
}
if !c.HasHealthCheck() {
return HealthCheckNotDefined, errors.Errorf("container %s has no defined healthcheck", c.ID())
}
return HealthCheckDefined, nil
}
func newHealthCheckLog(start, end time.Time, exitCode int, log string) HealthCheckLog {
return HealthCheckLog{
Start: start.Format(time.RFC3339Nano),
End: end.Format(time.RFC3339Nano),
ExitCode: exitCode,
Output: log,
}
}
// updatedHealthCheckStatus updates the health status of the container
// in the healthcheck log
func (c *Container) updateHealthStatus(status string) error {
healthCheck, err := c.GetHealthCheckLog()
if err != nil {
return err
}
healthCheck.Status = status
newResults, err := json.Marshal(healthCheck)
if err != nil {
return errors.Wrapf(err, "unable to marshall healthchecks for writing status")
}
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl HealthCheckLog, inStartPeriod bool) error {
healthCheck, err := c.GetHealthCheckLog()
if err != nil {
return err
}
if hcl.ExitCode == 0 {
// set status to healthy, reset failing state to 0
healthCheck.Status = HealthCheckHealthy
healthCheck.FailingStreak = 0
} else {
if len(healthCheck.Status) < 1 {
healthCheck.Status = HealthCheckHealthy
}
if !inStartPeriod {
// increment failing streak
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
// if failing streak > retries, then status to unhealthy
if int(healthCheck.FailingStreak) >= c.HealthCheckConfig().Retries {
healthCheck.Status = HealthCheckUnhealthy
}
}
}
healthCheck.Log = append(healthCheck.Log, hcl)
if len(healthCheck.Log) > MaxHealthCheckNumberLogs {
healthCheck.Log = healthCheck.Log[1:]
}
newResults, err := json.Marshal(healthCheck)
if err != nil {
return errors.Wrapf(err, "unable to marshall healthchecks for writing")
}
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
}
// HealthCheckLogPath returns the path for where the health check log is
func (c *Container) healthCheckLogPath() string {
return filepath.Join(filepath.Dir(c.LogPath()), "healthcheck.log")
}
// GetHealthCheckLog returns HealthCheck results by reading the container's
// health check log file. If the health check log file does not exist, then
// an empty healthcheck struct is returned
func (c *Container) GetHealthCheckLog() (HealthCheckResults, error) {
var healthCheck HealthCheckResults
if _, err := os.Stat(c.healthCheckLogPath()); os.IsNotExist(err) {
return healthCheck, nil
}
b, err := ioutil.ReadFile(c.healthCheckLogPath())
if err != nil {
return healthCheck, errors.Wrapf(err, "failed to read health check log file %s", c.healthCheckLogPath())
}
if err := json.Unmarshal(b, &healthCheck); err != nil {
return healthCheck, errors.Wrapf(err, "failed to unmarshal existing healthcheck results in %s", c.healthCheckLogPath())
}
return healthCheck, nil
}
// HealthCheckStatus returns the current state of a container with a healthcheck
func (c *Container) HealthCheckStatus() (string, error) {
if !c.HasHealthCheck() {
return "", errors.Errorf("container %s has no defined healthcheck", c.ID())
}
results, err := c.GetHealthCheckLog()
if err != nil {
return "", errors.Wrapf(err, "unable to get healthcheck log for %s", c.ID())
}
return results.Status, nil
}
func (c *Container) disableHealthCheckSystemd() bool {
if os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
return true
}
if c.config.HealthCheckConfig.Interval == 0 {
return true
}
return false
}