Blob Blame History Raw
From 764a4df7a35bacc693076afbe3679d0bfce503f5 Mon Sep 17 00:00:00 2001
From: Matthew Heon <mheon@redhat.com>
Date: Thu, 21 Jan 2021 16:22:32 -0500
Subject: [PATCH] Restrict caps of non-root processes in priv containers

Non-root processes in privileged containers do not get all caps,
and instead only have Bounding and Inherited set to all; the
other caps are left empty. This behavior matches Docker and newer
Podman releases.

Addresses CVE-2021-20188

Signed-off-by: Matthew Heon <mheon@redhat.com>
---
 libpod/container_api.go    | 11 ++++-------
 libpod/oci.go              |  4 ++--
 libpod/oci_conmon_linux.go | 31 ++++++++++++++++++++++++-------
 pkg/spec/spec.go           |  8 ++++++++
 4 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/libpod/container_api.go b/libpod/container_api.go
index 6d19bd85e..0c57d5678 100644
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@@ -10,7 +10,6 @@ import (
 	"github.com/containers/libpod/libpod/define"
 	"github.com/containers/libpod/libpod/events"
 	"github.com/containers/storage/pkg/stringid"
-	"github.com/docker/docker/oci/caps"
 	"github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
@@ -220,7 +219,6 @@ func (c *Container) Kill(signal uint) error {
 // Otherwise, the exit code will be the exit code of the executed call inside of the container.
 // TODO investigate allowing exec without attaching
 func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []string, user, workDir string, streams *AttachStreams, preserveFDs uint, resize chan remotecommand.TerminalSize, detachKeys string) (int, error) {
-	var capList []string
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
@@ -234,10 +232,6 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri
 		return define.ExecErrorCodeCannotInvoke, errors.Wrapf(define.ErrCtrStateInvalid, "cannot exec into container that is not running")
 	}
 
-	if privileged || c.config.Privileged {
-		capList = caps.GetAllCapabilities()
-	}
-
 	// Generate exec session ID
 	// Ensure we don't conflict with an existing session ID
 	sessionID := stringid.GenerateNonCryptoID()
@@ -270,7 +264,6 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri
 
 	opts := new(ExecOptions)
 	opts.Cmd = cmd
-	opts.CapAdd = capList
 	opts.Env = env
 	opts.Terminal = tty
 	opts.Cwd = workDir
@@ -280,6 +273,10 @@ func (c *Container) Exec(tty, privileged bool, env map[string]string, cmd []stri
 	opts.Resize = resize
 	opts.DetachKeys = detachKeys
 
+	if privileged || c.config.Privileged {
+		opts.Privileged = true
+	}
+
 	pid, attachChan, err := c.ociRuntime.ExecContainer(c, sessionID, opts)
 	if err != nil {
 		ec := define.ExecErrorCodeGeneric
diff --git a/libpod/oci.go b/libpod/oci.go
index 05a2f37db..559c1f59e 100644
--- a/libpod/oci.go
+++ b/libpod/oci.go
@@ -105,8 +105,6 @@ type OCIRuntime interface {
 type ExecOptions struct {
 	// Cmd is the command to execute.
 	Cmd []string
-	// CapAdd is a set of capabilities to add to the executed command.
-	CapAdd []string
 	// Env is a set of environment variables to add to the container.
 	Env map[string]string
 	// Terminal is whether to create a new TTY for the exec session.
@@ -129,4 +127,6 @@ type ExecOptions struct {
 	// DetachKeys is a set of keys that, when pressed in sequence, will
 	// detach from the container.
 	DetachKeys string
+	// Privileged is whether the exec session is privileged.
+	Privileged bool
 }
diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go
index 0b8decd31..d5973a1a6 100644
--- a/libpod/oci_conmon_linux.go
+++ b/libpod/oci_conmon_linux.go
@@ -26,6 +26,7 @@ import (
 	"github.com/containers/libpod/utils"
 	pmount "github.com/containers/storage/pkg/mount"
 	"github.com/coreos/go-systemd/activation"
+	"github.com/docker/docker/oci/caps"
 	spec "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/opencontainers/selinux/go-selinux"
 	"github.com/opencontainers/selinux/go-selinux/label"
@@ -523,7 +524,7 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options
 		finalEnv = append(finalEnv, fmt.Sprintf("%s=%s", k, v))
 	}
 
-	processFile, err := prepareProcessExec(c, options.Cmd, finalEnv, options.Terminal, options.Cwd, options.User, sessionID)
+	processFile, err := prepareProcessExec(c, options.Cmd, finalEnv, options.Terminal, options.Cwd, options.User, sessionID, options.Privileged)
 	if err != nil {
 		return -1, nil, err
 	}
@@ -538,10 +539,6 @@ func (r *ConmonOCIRuntime) ExecContainer(c *Container, sessionID string, options
 		args = append(args, formatRuntimeOpts("--preserve-fds", fmt.Sprintf("%d", options.PreserveFDs))...)
 	}
 
-	for _, capability := range options.CapAdd {
-		args = append(args, formatRuntimeOpts("--cap", capability)...)
-	}
-
 	if options.Terminal {
 		args = append(args, "-t")
 	}
@@ -1041,12 +1038,15 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
 
 // prepareProcessExec returns the path of the process.json used in runc exec -p
 // caller is responsible to close the returned *os.File if needed.
-func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string) (*os.File, error) {
+func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, sessionID string, privileged bool) (*os.File, error) {
 	f, err := ioutil.TempFile(c.execBundlePath(sessionID), "exec-process-")
 	if err != nil {
 		return nil, err
 	}
-	pspec := c.config.Spec.Process
+	pspec := new(spec.Process)
+	if err := JSONDeepCopy(c.config.Spec.Process, pspec); err != nil {
+		return nil, err
+	}
 	pspec.SelinuxLabel = c.config.ProcessLabel
 	pspec.Args = cmd
 	// We need to default this to false else it will inherit terminal as true
@@ -1103,6 +1103,23 @@ func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, se
 		pspec.User = processUser
 	}
 
+	allCaps := caps.GetAllCapabilities()
+	pspec.Capabilities.Effective = []string{}
+	if privileged {
+		pspec.Capabilities.Bounding = allCaps
+	} else {
+		pspec.Capabilities.Bounding = []string{}
+	}
+	pspec.Capabilities.Inheritable = pspec.Capabilities.Bounding
+	if execUser.Uid == 0 {
+		pspec.Capabilities.Effective = pspec.Capabilities.Bounding
+		pspec.Capabilities.Permitted = pspec.Capabilities.Bounding
+		pspec.Capabilities.Ambient = pspec.Capabilities.Bounding
+	} else {
+		pspec.Capabilities.Permitted = pspec.Capabilities.Effective
+		pspec.Capabilities.Ambient = pspec.Capabilities.Effective
+	}
+
 	hasHomeSet := false
 	for _, s := range pspec.Env {
 		if strings.HasPrefix(s, "HOME=") {
diff --git a/pkg/spec/spec.go b/pkg/spec/spec.go
index 86d701f7e..b313b6695 100644
--- a/pkg/spec/spec.go
+++ b/pkg/spec/spec.go
@@ -374,6 +374,14 @@ func (config *CreateConfig) createConfigToOCISpec(runtime *libpod.Runtime, userM
 		}
 	} else {
 		g.SetupPrivileged(true)
+		if config.User != "" {
+			user := strings.SplitN(config.User, ":", 2)[0]
+			if user != "root" && user != "0" {
+				g.Spec().Process.Capabilities.Effective = []string{}
+				g.Spec().Process.Capabilities.Permitted = []string{}
+				g.Spec().Process.Capabilities.Ambient = []string{}
+			}
+		}
 	}
 
 	// HANDLE SECCOMP
-- 
2.29.2