3b15a9
From 69daa67c436a8fdeb0149aa5cb0112f03fdb699f Mon Sep 17 00:00:00 2001
3b15a9
From: Matthew Heon <mheon@redhat.com>
3b15a9
Date: Mon, 25 Jan 2021 14:18:07 -0500
3b15a9
Subject: [PATCH] Correct handling of capabilities
3b15a9
3b15a9
Ensure that capabilities are properly handled for non-root users
3b15a9
in privileged containers. We do not want to give full caps, but
3b15a9
instead only CapInh and CapEff (others should be all-zeroes).
3b15a9
3b15a9
Fixing `podman run` is easy - the same code as the Podman 1.6 fix
3b15a9
works there. The `podman exec` command is far more challenging.
3b15a9
Exec received a complete rewrite to use Conmon at some point
3b15a9
before Podman 1.6, and gained many capabilities in the process.
3b15a9
One of those was the ability to actually tweak the capabilities
3b15a9
of the exec process - 1.0 did not have that. Since it was needed
3b15a9
to resolve this CVE, I was forced to backport a large bit of the
3b15a9
1.0 -> 1.6 exec changes (passing a Process block to the OCI
3b15a9
runtime, and using `prepareProcessExec()` to prepare said block).
3b15a9
I am honestly uncomfortable with the size and scope of this
3b15a9
change but I don't see another way around this.
3b15a9
3b15a9
Fixes CVE-2021-20188
3b15a9
3b15a9
Signed-off-by: Matthew Heon <mheon@redhat.com>
3b15a9
---
3b15a9
 libpod/container_api.go |  24 +------
3b15a9
 libpod/oci.go           | 148 ++++++++++++++++++++++++++++++++--------
3b15a9
 pkg/spec/spec.go        |   8 +++
3b15a9
 3 files changed, 132 insertions(+), 48 deletions(-)
3b15a9
3b15a9
diff -up libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/container_api.go.orig libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/container_api.go
3b15a9
--- libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/container_api.go.orig	2019-02-11 16:26:46.000000000 +0100
3b15a9
+++ libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/container_api.go	2021-02-12 10:38:48.767172399 +0100
3b15a9
@@ -2,7 +2,6 @@ package libpod
3b15a9
 
3b15a9
 import (
3b15a9
 	"context"
3b15a9
-	"fmt"
3b15a9
 	"io/ioutil"
3b15a9
 	"os"
3b15a9
 	"strconv"
3b15a9
@@ -11,9 +10,7 @@ import (
3b15a9
 
3b15a9
 	"github.com/containers/libpod/libpod/driver"
3b15a9
 	"github.com/containers/libpod/pkg/inspect"
3b15a9
-	"github.com/containers/libpod/pkg/lookup"
3b15a9
 	"github.com/containers/storage/pkg/stringid"
3b15a9
-	"github.com/docker/docker/daemon/caps"
3b15a9
 	"github.com/pkg/errors"
3b15a9
 	"github.com/sirupsen/logrus"
3b15a9
 	"k8s.io/apimachinery/pkg/util/wait"
3b15a9
@@ -263,8 +260,6 @@ func (c *Container) Kill(signal uint) er
3b15a9
 // TODO allow specifying streams to attach to
3b15a9
 // TODO investigate allowing exec without attaching
3b15a9
 func (c *Container) Exec(tty, privileged bool, env, cmd []string, user, workDir string) error {
3b15a9
-	var capList []string
3b15a9
-
3b15a9
 	locked := false
3b15a9
 	if !c.batched {
3b15a9
 		locked = true
3b15a9
@@ -287,22 +282,8 @@ func (c *Container) Exec(tty, privileged
3b15a9
 	if conState != ContainerStateRunning {
3b15a9
 		return errors.Errorf("cannot exec into container that is not running")
3b15a9
 	}
3b15a9
-	if privileged || c.config.Privileged {
3b15a9
-		capList = caps.GetAllCapabilities()
3b15a9
-	}
3b15a9
 
3b15a9
-	// If user was set, look it up in the container to get a UID to use on
3b15a9
-	// the host
3b15a9
-	hostUser := ""
3b15a9
-	if user != "" {
3b15a9
-		execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, nil)
3b15a9
-		if err != nil {
3b15a9
-			return err
3b15a9
-		}
3b15a9
-
3b15a9
-		// runc expects user formatted as uid:gid
3b15a9
-		hostUser = fmt.Sprintf("%d:%d", execUser.Uid, execUser.Gid)
3b15a9
-	}
3b15a9
+	isPrivileged := privileged || c.config.Privileged
3b15a9
 
3b15a9
 	// Generate exec session ID
3b15a9
 	// Ensure we don't conflict with an existing session ID
3b15a9
@@ -324,10 +305,11 @@ func (c *Container) Exec(tty, privileged
3b15a9
 
3b15a9
 	logrus.Debugf("Creating new exec session in container %s with session id %s", c.ID(), sessionID)
3b15a9
 
3b15a9
-	execCmd, err := c.runtime.ociRuntime.execContainer(c, cmd, capList, env, tty, workDir, hostUser, sessionID)
3b15a9
+	execCmd, processFile, err := c.runtime.ociRuntime.execContainer(c, cmd, env, tty, workDir, user, sessionID, isPrivileged)
3b15a9
 	if err != nil {
3b15a9
 		return errors.Wrapf(err, "error exec %s", c.ID())
3b15a9
 	}
3b15a9
+	defer os.Remove(processFile)
3b15a9
 	chWait := make(chan error)
3b15a9
 	go func() {
3b15a9
 		chWait <- execCmd.Wait()
3b15a9
diff -up libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/oci.go.orig libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/oci.go
3b15a9
--- libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/oci.go.orig	2019-02-11 16:26:46.000000000 +0100
3b15a9
+++ libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/libpod/oci.go	2021-02-12 10:38:48.768172416 +0100
3b15a9
@@ -15,10 +15,12 @@ import (
3b15a9
 	"syscall"
3b15a9
 	"time"
3b15a9
 
3b15a9
+	"github.com/containers/libpod/pkg/lookup"
3b15a9
 	"github.com/containers/libpod/pkg/rootless"
3b15a9
 	"github.com/containers/libpod/pkg/util"
3b15a9
 	"github.com/coreos/go-systemd/activation"
3b15a9
 	"github.com/cri-o/ocicni/pkg/ocicni"
3b15a9
+	"github.com/docker/docker/daemon/caps"
3b15a9
 	spec "github.com/opencontainers/runtime-spec/specs-go"
3b15a9
 	"github.com/opencontainers/selinux/go-selinux"
3b15a9
 	"github.com/opencontainers/selinux/go-selinux/label"
3b15a9
@@ -735,18 +737,23 @@ func (r *OCIRuntime) unpauseContainer(ct
3b15a9
 // TODO: Add --detach support
3b15a9
 // TODO: Convert to use conmon
3b15a9
 // TODO: add --pid-file and use that to generate exec session tracking
3b15a9
-func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty bool, cwd, user, sessionID string) (*exec.Cmd, error) {
3b15a9
+func (r *OCIRuntime) execContainer(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string, privileged bool) (*exec.Cmd, string, error) {
3b15a9
 	if len(cmd) == 0 {
3b15a9
-		return nil, errors.Wrapf(ErrInvalidArg, "must provide a command to execute")
3b15a9
+		return nil, "", errors.Wrapf(ErrInvalidArg, "must provide a command to execute")
3b15a9
 	}
3b15a9
 
3b15a9
 	if sessionID == "" {
3b15a9
-		return nil, errors.Wrapf(ErrEmptyID, "must provide a session ID for exec")
3b15a9
+		return nil, "", errors.Wrapf(ErrEmptyID, "must provide a session ID for exec")
3b15a9
 	}
3b15a9
 
3b15a9
 	runtimeDir, err := util.GetRootlessRuntimeDir()
3b15a9
 	if err != nil {
3b15a9
-		return nil, err
3b15a9
+		return nil, "", err
3b15a9
+	}
3b15a9
+
3b15a9
+	processFile, err := prepareProcessExec(c, cmd, env, tty, cwd, user, sessionID, privileged)
3b15a9
+	if err != nil {
3b15a9
+		return nil, "", err
3b15a9
 	}
3b15a9
 
3b15a9
 	args := []string{}
3b15a9
@@ -756,34 +763,14 @@ func (r *OCIRuntime) execContainer(c *Co
3b15a9
 
3b15a9
 	args = append(args, "exec")
3b15a9
 
3b15a9
-	if cwd != "" {
3b15a9
-		args = append(args, "--cwd", cwd)
3b15a9
-	}
3b15a9
+	args = append(args, "--process", processFile)
3b15a9
 
3b15a9
 	args = append(args, "--pid-file", c.execPidPath(sessionID))
3b15a9
 
3b15a9
-	if tty {
3b15a9
-		args = append(args, "--tty")
3b15a9
-	} else {
3b15a9
-		args = append(args, "--tty=false")
3b15a9
-	}
3b15a9
-
3b15a9
-	if user != "" {
3b15a9
-		args = append(args, "--user", user)
3b15a9
-	}
3b15a9
-
3b15a9
 	if c.config.Spec.Process.NoNewPrivileges {
3b15a9
 		args = append(args, "--no-new-privs")
3b15a9
 	}
3b15a9
 
3b15a9
-	for _, cap := range capAdd {
3b15a9
-		args = append(args, "--cap", cap)
3b15a9
-	}
3b15a9
-
3b15a9
-	for _, envVar := range env {
3b15a9
-		args = append(args, "--env", envVar)
3b15a9
-	}
3b15a9
-
3b15a9
 	// Append container ID and command
3b15a9
 	args = append(args, c.ID())
3b15a9
 	args = append(args, cmd...)
3b15a9
@@ -797,10 +784,10 @@ func (r *OCIRuntime) execContainer(c *Co
3b15a9
 	execCmd.Env = append(execCmd.Env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir))
3b15a9
 
3b15a9
 	if err := execCmd.Start(); err != nil {
3b15a9
-		return nil, errors.Wrapf(err, "cannot start container %s", c.ID())
3b15a9
+		return nil, "", errors.Wrapf(err, "cannot start container %s", c.ID())
3b15a9
 	}
3b15a9
 
3b15a9
-	return execCmd, nil
3b15a9
+	return execCmd, processFile, nil
3b15a9
 }
3b15a9
 
3b15a9
 // execStopContainer stops all active exec sessions in a container
3b15a9
@@ -892,3 +879,110 @@ func (r *OCIRuntime) checkpointContainer
3b15a9
 	args = append(args, ctr.ID())
3b15a9
 	return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
3b15a9
 }
3b15a9
+
3b15a9
+// prepareProcessExec returns the path of the process.json used in runc exec -p.
3b15a9
+// Returns path to the created exec process file. This will need to be removed
3b15a9
+// by the caller when they're done, best effort.
3b15a9
+func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string, privileged bool) (string, error) {
3b15a9
+	filename := filepath.Join(c.bundlePath(), fmt.Sprintf("exec-process-%s", sessionID))
3b15a9
+	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0600)
3b15a9
+	if err != nil {
3b15a9
+		return "", err
3b15a9
+	}
3b15a9
+	defer f.Close()
3b15a9
+
3b15a9
+	pspec := c.config.Spec.Process
3b15a9
+	pspec.SelinuxLabel = c.config.ProcessLabel
3b15a9
+	pspec.Args = cmd
3b15a9
+	// We need to default this to false else it will inherit terminal as true
3b15a9
+	// from the container.
3b15a9
+	pspec.Terminal = false
3b15a9
+	if tty {
3b15a9
+		pspec.Terminal = true
3b15a9
+	}
3b15a9
+	if len(env) > 0 {
3b15a9
+		pspec.Env = append(pspec.Env, env...)
3b15a9
+	}
3b15a9
+
3b15a9
+	if cwd != "" {
3b15a9
+		pspec.Cwd = cwd
3b15a9
+
3b15a9
+	}
3b15a9
+
3b15a9
+	var addGroups []string
3b15a9
+	var sgids []uint32
3b15a9
+
3b15a9
+	// if the user is empty, we should inherit the user that the container is currently running with
3b15a9
+	if user == "" {
3b15a9
+		user = c.config.User
3b15a9
+		addGroups = c.config.Groups
3b15a9
+	}
3b15a9
+
3b15a9
+	execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, user, nil)
3b15a9
+	if err != nil {
3b15a9
+		return "", err
3b15a9
+	}
3b15a9
+
3b15a9
+	if len(addGroups) > 0 {
3b15a9
+		sgids, err = lookup.GetContainerGroups(addGroups, c.state.Mountpoint, nil)
3b15a9
+		if err != nil {
3b15a9
+			return "", errors.Wrapf(err, "error looking up supplemental groups for container %s exec session %s", c.ID(), sessionID)
3b15a9
+		}
3b15a9
+	}
3b15a9
+
3b15a9
+	// If user was set, look it up in the container to get a UID to use on
3b15a9
+	// the host
3b15a9
+	if user != "" || len(sgids) > 0 {
3b15a9
+		if user != "" {
3b15a9
+			for _, sgid := range execUser.Sgids {
3b15a9
+				sgids = append(sgids, uint32(sgid))
3b15a9
+			}
3b15a9
+		}
3b15a9
+		processUser := spec.User{
3b15a9
+			UID:            uint32(execUser.Uid),
3b15a9
+			GID:            uint32(execUser.Gid),
3b15a9
+			AdditionalGids: sgids,
3b15a9
+		}
3b15a9
+
3b15a9
+		pspec.User = processUser
3b15a9
+	}
3b15a9
+
3b15a9
+	allCaps := caps.GetAllCapabilities()
3b15a9
+	pspec.Capabilities.Effective = []string{}
3b15a9
+	if privileged {
3b15a9
+		pspec.Capabilities.Bounding = allCaps
3b15a9
+	} else {
3b15a9
+		pspec.Capabilities.Bounding = []string{}
3b15a9
+	}
3b15a9
+	pspec.Capabilities.Inheritable = pspec.Capabilities.Bounding
3b15a9
+	if execUser.Uid == 0 {
3b15a9
+		pspec.Capabilities.Effective = pspec.Capabilities.Bounding
3b15a9
+		pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
3b15a9
+		pspec.Capabilities.Ambient = pspec.Capabilities.Bounding
3b15a9
+	} else {
3b15a9
+		pspec.Capabilities.Permitted = pspec.Capabilities.Effective
3b15a9
+		pspec.Capabilities.Ambient = pspec.Capabilities.Effective
3b15a9
+	}
3b15a9
+
3b15a9
+	hasHomeSet := false
3b15a9
+	for _, s := range pspec.Env {
3b15a9
+		if strings.HasPrefix(s, "HOME=") {
3b15a9
+			hasHomeSet = true
3b15a9
+			break
3b15a9
+		}
3b15a9
+	}
3b15a9
+	if !hasHomeSet {
3b15a9
+		pspec.Env = append(pspec.Env, fmt.Sprintf("HOME=%s", execUser.Home))
3b15a9
+	}
3b15a9
+
3b15a9
+	processJSON, err := json.Marshal(pspec)
3b15a9
+	if err != nil {
3b15a9
+		return "", err
3b15a9
+	}
3b15a9
+
3b15a9
+	if err := ioutil.WriteFile(filename, processJSON, 0644); err != nil {
3b15a9
+		return "", err
3b15a9
+	}
3b15a9
+
3b15a9
+	return filename, nil
3b15a9
+}
3b15a9
diff -up libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/pkg/spec/spec.go.orig libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/pkg/spec/spec.go
3b15a9
--- libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/pkg/spec/spec.go.orig	2019-02-11 16:26:46.000000000 +0100
3b15a9
+++ libpod-921f98f8795eb9fcb19ce581020cfdeff6dee09f/pkg/spec/spec.go	2021-02-12 10:38:48.768172416 +0100
3b15a9
@@ -325,6 +325,14 @@ func CreateConfigToOCISpec(config *Creat
3b15a9
 		}
3b15a9
 	} else {
3b15a9
 		g.SetupPrivileged(true)
3b15a9
+		if config.User != "" {
3b15a9
+			user := strings.SplitN(config.User, ":", 2)[0]
3b15a9
+			if user != "root" && user != "0" {
3b15a9
+				g.Spec().Process.Capabilities.Effective = []string{}
3b15a9
+				g.Spec().Process.Capabilities.Permitted = []string{}
3b15a9
+				g.Spec().Process.Capabilities.Ambient = []string{}
3b15a9
+			}
3b15a9
+		}
3b15a9
 	}
3b15a9
 
3b15a9
 	// HANDLE SECCOMP