podman/libpod/util.go
Paul Holzinger 0136a66a83
libpod: deduplicate ports in db
The OCICNI port format has one big problem: It does not support ranges.
So if a users forwards a range of 1k ports with podman run -p 1001-2000
we have to store each of the thousand ports individually as array element.
This bloats the db and makes the JSON encoding and decoding much slower.
In many places we already use a better port struct type which supports
ranges, e.g. `pkg/specgen` or the new network interface.

Because of this we have to do many runtime conversions between the two
port formats. If everything uses the new format we can skip the runtime
conversions.

This commit adds logic to replace all occurrences of the old format
with the new one. The database will automatically migrate the ports
to new format when the container config is read for the first time
after the update.

The `ParsePortMapping` function is `pkg/specgen/generate` has been
reworked to better work with the new format. The new logic is able
to deduplicate the given ports. This is necessary the ensure we
store them efficiently in the DB. The new code should also be more
performant than the old one.

To prove that the code is fast enough I added go benchmarks. Parsing
1 million ports took less than 0.5 seconds on my laptop.

Benchmark normalize PortMappings in specgen:
Please note that the 1 million ports are actually 20x 50k ranges
because we cannot have bigger ranges than 65535 ports.
```
$ go test -bench=. -benchmem  ./pkg/specgen/generate/
goos: linux
goarch: amd64
pkg: github.com/containers/podman/v3/pkg/specgen/generate
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
BenchmarkParsePortMappingNoPorts-12             480821532                2.230 ns/op           0 B/op          0 allocs/op
BenchmarkParsePortMapping1-12                      38972             30183 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMapping100-12                    18752             60688 ns/op          141088 B/op        315 allocs/op
BenchmarkParsePortMapping1k-12                      3104            331719 ns/op          223840 B/op       3018 allocs/op
BenchmarkParsePortMapping10k-12                      376           3122930 ns/op         1223650 B/op      30027 allocs/op
BenchmarkParsePortMapping1m-12                         3         390869926 ns/op        124593840 B/op   4000624 allocs/op
BenchmarkParsePortMappingReverse100-12             18940             63414 ns/op          141088 B/op        315 allocs/op
BenchmarkParsePortMappingReverse1k-12               3015            362500 ns/op          223841 B/op       3018 allocs/op
BenchmarkParsePortMappingReverse10k-12               343           3318135 ns/op         1223650 B/op      30027 allocs/op
BenchmarkParsePortMappingReverse1m-12                  3         403392469 ns/op        124593840 B/op   4000624 allocs/op
BenchmarkParsePortMappingRange1-12                 37635             28756 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange100-12               39604             28935 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange1k-12                38384             29921 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange10k-12               29479             40381 ns/op          131584 B/op          9 allocs/op
BenchmarkParsePortMappingRange1m-12                  927           1279369 ns/op          143022 B/op        164 allocs/op
PASS
ok      github.com/containers/podman/v3/pkg/specgen/generate    25.492s
```

Benchmark convert old port format to new one:
```
go test -bench=. -benchmem  ./libpod/
goos: linux
goarch: amd64
pkg: github.com/containers/podman/v3/libpod
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
Benchmark_ocicniPortsToNetTypesPortsNoPorts-12          663526126                1.663 ns/op           0 B/op          0 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1-12                 7858082               141.9 ns/op            72 B/op          2 allocs/op
Benchmark_ocicniPortsToNetTypesPorts10-12                2065347               571.0 ns/op           536 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts100-12                138478              8641 ns/op            4216 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1k-12                   9414            120964 ns/op           41080 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts10k-12                   781           1490526 ns/op          401528 B/op          4 allocs/op
Benchmark_ocicniPortsToNetTypesPorts1m-12                      4         250579010 ns/op        40001656 B/op          4 allocs/op
PASS
ok      github.com/containers/podman/v3/libpod  11.727s
```

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2021-10-27 18:59:56 +02:00

350 lines
10 KiB
Go

package libpod
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v3/libpod/define"
"github.com/containers/podman/v3/libpod/network/types"
"github.com/containers/podman/v3/utils"
"github.com/fsnotify/fsnotify"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// Runtime API constants
const (
unknownPackage = "Unknown"
)
// FuncTimer helps measure the execution time of a function
// For debug purposes, do not leave in code
// used like defer FuncTimer("foo")
func FuncTimer(funcName string) {
elapsed := time.Since(time.Now())
fmt.Printf("%s executed in %d ms\n", funcName, elapsed)
}
// MountExists returns true if dest exists in the list of mounts
func MountExists(specMounts []spec.Mount, dest string) bool {
for _, m := range specMounts {
if m.Destination == dest {
return true
}
}
return false
}
// WaitForFile waits until a file has been created or the given timeout has occurred
func WaitForFile(path string, chWait chan error, timeout time.Duration) (bool, error) {
var inotifyEvents chan fsnotify.Event
watcher, err := fsnotify.NewWatcher()
if err == nil {
if err := watcher.Add(filepath.Dir(path)); err == nil {
inotifyEvents = watcher.Events
}
defer watcher.Close()
}
var timeoutChan <-chan time.Time
if timeout != 0 {
timeoutChan = time.After(timeout)
}
for {
select {
case e := <-chWait:
return true, e
case <-inotifyEvents:
_, err := os.Stat(path)
if err == nil {
return false, nil
}
if !os.IsNotExist(err) {
return false, err
}
case <-time.After(25 * time.Millisecond):
// Check periodically for the file existence. It is needed
// if the inotify watcher could not have been created. It is
// also useful when using inotify as if for any reasons we missed
// a notification, we won't hang the process.
_, err := os.Stat(path)
if err == nil {
return false, nil
}
if !os.IsNotExist(err) {
return false, err
}
case <-timeoutChan:
return false, errors.Wrapf(define.ErrInternal, "timed out waiting for file %s", path)
}
}
}
type byDestination []spec.Mount
func (m byDestination) Len() int {
return len(m)
}
func (m byDestination) Less(i, j int) bool {
return m.parts(i) < m.parts(j)
}
func (m byDestination) Swap(i, j int) {
m[i], m[j] = m[j], m[i]
}
func (m byDestination) parts(i int) int {
return strings.Count(filepath.Clean(m[i].Destination), string(os.PathSeparator))
}
func sortMounts(m []spec.Mount) []spec.Mount {
sort.Sort(byDestination(m))
return m
}
func validPodNSOption(p *Pod, ctrPod string) error {
if p == nil {
return errors.Wrapf(define.ErrInvalidArg, "pod passed in was nil. Container may not be associated with a pod")
}
if ctrPod == "" {
return errors.Wrapf(define.ErrInvalidArg, "container is not a member of any pod")
}
if ctrPod != p.ID() {
return errors.Wrapf(define.ErrInvalidArg, "pod passed in is not the pod the container is associated with")
}
return nil
}
// JSONDeepCopy performs a deep copy by performing a JSON encode/decode of the
// given structures. From and To should be identically typed structs.
func JSONDeepCopy(from, to interface{}) error {
tmp, err := json.Marshal(from)
if err != nil {
return err
}
return json.Unmarshal(tmp, to)
}
func queryPackageVersion(cmdArg ...string) string {
output := unknownPackage
if 1 < len(cmdArg) {
cmd := exec.Command(cmdArg[0], cmdArg[1:]...)
if outp, err := cmd.Output(); err == nil {
output = string(outp)
}
}
return strings.Trim(output, "\n")
}
func packageVersion(program string) string { // program is full path
packagers := [][]string{
{"/usr/bin/rpm", "-q", "-f"},
{"/usr/bin/dpkg", "-S"}, // Debian, Ubuntu
{"/usr/bin/pacman", "-Qo"}, // Arch
{"/usr/bin/qfile", "-qv"}, // Gentoo (quick)
{"/usr/bin/equery", "b"}, // Gentoo (slow)
}
for _, cmd := range packagers {
cmd = append(cmd, program)
if out := queryPackageVersion(cmd...); out != unknownPackage {
return out
}
}
return unknownPackage
}
func programVersion(mountProgram string) (string, error) {
output, err := utils.ExecCmd(mountProgram, "--version")
if err != nil {
return "", err
}
return strings.TrimSuffix(output, "\n"), nil
}
// DefaultSeccompPath returns the path to the default seccomp.json file
// if it exists, first it checks OverrideSeccomp and then default.
// If neither exist function returns ""
func DefaultSeccompPath() (string, error) {
def, err := config.Default()
if err != nil {
return "", err
}
if def.Containers.SeccompProfile != "" {
return def.Containers.SeccompProfile, nil
}
_, err = os.Stat(config.SeccompOverridePath)
if err == nil {
return config.SeccompOverridePath, nil
}
if !os.IsNotExist(err) {
return "", err
}
if _, err := os.Stat(config.SeccompDefaultPath); err != nil {
if !os.IsNotExist(err) {
return "", err
}
return "", nil
}
return config.SeccompDefaultPath, nil
}
// CheckDependencyContainer verifies the given container can be used as a
// dependency of another container.
// Both the dependency to check and the container that will be using the
// dependency must be passed in.
// It is assumed that ctr is locked, and depCtr is unlocked.
func checkDependencyContainer(depCtr, ctr *Container) error {
state, err := depCtr.State()
if err != nil {
return errors.Wrapf(err, "error accessing dependency container %s state", depCtr.ID())
}
if state == define.ContainerStateRemoving {
return errors.Wrapf(define.ErrCtrStateInvalid, "cannot use container %s as a dependency as it is being removed", depCtr.ID())
}
if depCtr.ID() == ctr.ID() {
return errors.Wrapf(define.ErrInvalidArg, "must specify another container")
}
if ctr.config.Pod != "" && depCtr.PodID() != ctr.config.Pod {
return errors.Wrapf(define.ErrInvalidArg, "container has joined pod %s and dependency container %s is not a member of the pod", ctr.config.Pod, depCtr.ID())
}
return nil
}
// hijackWriteError writes an error to a hijacked HTTP session.
func hijackWriteError(toWrite error, cid string, terminal bool, httpBuf *bufio.ReadWriter) {
if toWrite != nil {
errString := []byte(fmt.Sprintf("Error: %v\n", toWrite))
if !terminal {
// We need a header.
header := makeHTTPAttachHeader(2, uint32(len(errString)))
if _, err := httpBuf.Write(header); err != nil {
logrus.Errorf("Writing header for container %s attach connection error: %v", cid, err)
}
}
if _, err := httpBuf.Write(errString); err != nil {
logrus.Errorf("Writing error to container %s HTTP attach connection: %v", cid, err)
}
if err := httpBuf.Flush(); err != nil {
logrus.Errorf("Flushing HTTP buffer for container %s HTTP attach connection: %v", cid, err)
}
}
}
// hijackWriteErrorAndClose writes an error to a hijacked HTTP session and
// closes it. Intended to HTTPAttach function.
// If error is nil, it will not be written; we'll only close the connection.
func hijackWriteErrorAndClose(toWrite error, cid string, terminal bool, httpCon io.Closer, httpBuf *bufio.ReadWriter) {
hijackWriteError(toWrite, cid, terminal, httpBuf)
if err := httpCon.Close(); err != nil {
logrus.Errorf("Closing container %s HTTP attach connection: %v", cid, err)
}
}
// makeHTTPAttachHeader makes an 8-byte HTTP header for a buffer of the given
// length and stream. Accepts an integer indicating which stream we are sending
// to (STDIN = 0, STDOUT = 1, STDERR = 2).
func makeHTTPAttachHeader(stream byte, length uint32) []byte {
header := make([]byte, 8)
header[0] = stream
binary.BigEndian.PutUint32(header[4:], length)
return header
}
// writeHijackHeader writes a header appropriate for the type of HTTP Hijack
// that occurred in a hijacked HTTP connection used for attach.
func writeHijackHeader(r *http.Request, conn io.Writer) {
// AttachHeader is the literal header sent for upgraded/hijacked connections for
// attach, sourced from Docker at:
// https://raw.githubusercontent.com/moby/moby/b95fad8e51bd064be4f4e58a996924f343846c85/api/server/router/container/container_routes.go
// Using literally to ensure compatibility with existing clients.
c := r.Header.Get("Connection")
proto := r.Header.Get("Upgrade")
if len(proto) == 0 || !strings.EqualFold(c, "Upgrade") {
// OK - can't upgrade if not requested or protocol is not specified
fmt.Fprintf(conn,
"HTTP/1.1 200 OK\r\nContent-Type: application/vnd.docker.raw-stream\r\n\r\n")
} else {
// Upgraded
fmt.Fprintf(conn,
"HTTP/1.1 101 UPGRADED\r\nContent-Type: application/vnd.docker.raw-stream\r\nConnection: Upgrade\r\nUpgrade: %s\r\n\r\n",
proto)
}
}
// Convert OCICNI port bindings into Inspect-formatted port bindings.
func makeInspectPortBindings(bindings []types.PortMapping, expose map[uint16][]string) map[string][]define.InspectHostPort {
portBindings := make(map[string][]define.InspectHostPort)
for _, port := range bindings {
protocols := strings.Split(port.Protocol, ",")
for _, protocol := range protocols {
for i := uint16(0); i < port.Range; i++ {
key := fmt.Sprintf("%d/%s", port.ContainerPort+i, protocol)
hostPorts := portBindings[key]
hostPorts = append(hostPorts, define.InspectHostPort{
HostIP: port.HostIP,
HostPort: fmt.Sprintf("%d", port.HostPort+i),
})
portBindings[key] = hostPorts
}
}
}
// add exposed ports without host port information to match docker
for port, protocols := range expose {
for _, protocol := range protocols {
key := fmt.Sprintf("%d/%s", port, protocol)
if _, ok := portBindings[key]; !ok {
portBindings[key] = nil
}
}
}
return portBindings
}
// Write a given string to a new file at a given path.
// Will error if a file with the given name already exists.
// Will be chown'd to the UID/GID provided and have the provided SELinux label
// set.
func writeStringToPath(path, contents, mountLabel string, uid, gid int) error {
f, err := os.Create(path)
if err != nil {
return errors.Wrapf(err, "unable to create %s", path)
}
defer f.Close()
if err := f.Chown(uid, gid); err != nil {
return err
}
if _, err := f.WriteString(contents); err != nil {
return errors.Wrapf(err, "unable to write %s", path)
}
// Relabel runDirResolv for the container
if err := label.Relabel(path, mountLabel, false); err != nil {
return err
}
return nil
}