Blob Blame Raw
From d463f6485b809b5ea738f84e05ff5b456058a184 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Fri, 27 Sep 2019 12:01:07 +1000
Subject: [PATCH] *: verify that operations on /proc/... are on procfs

This is an additional mitigation for CVE-2019-16884. The primary problem
is that Docker can be coerced into bind-mounting a file system on top of
/proc (resulting in label-related writes to /proc no longer happening).

While we are working on mitigations against permitting the mounts, this
helps avoid our code from being tricked into writing to non-procfs
files. This is not a perfect solution (after all, there might be a
bind-mount of a different procfs file over the target) but in order to
exploit that you would need to be able to tweak a config.json pretty
specifically (which thankfully Docker doesn't allow).

Specifically this stops AppArmor from not labeling a process silently
due to /proc/self/attr/... being incorrectly set, and stops any
accidental fd leaks because /proc/self/fd/... is not real.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 libcontainer/apparmor/apparmor.go | 10 +++++--
 libcontainer/utils/utils_unix.go  | 44 ++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/libcontainer/apparmor/apparmor.go b/libcontainer/apparmor/apparmor.go
index 7fff0627f..debfc1e48 100644
--- a/libcontainer/apparmor/apparmor.go
+++ b/libcontainer/apparmor/apparmor.go
@@ -6,6 +6,8 @@ import (
 	"fmt"
 	"io/ioutil"
 	"os"
+
+	"github.com/opencontainers/runc/libcontainer/utils"
 )
 
 // IsEnabled returns true if apparmor is enabled for the host.
@@ -19,7 +21,7 @@ func IsEnabled() bool {
 	return false
 }
 
-func setprocattr(attr, value string) error {
+func setProcAttr(attr, value string) error {
 	// Under AppArmor you can only change your own attr, so use /proc/self/
 	// instead of /proc/<tid>/ like libapparmor does
 	path := fmt.Sprintf("/proc/self/attr/%s", attr)
@@ -30,6 +32,10 @@ func setprocattr(attr, value string) error {
 	}
 	defer f.Close()
 
+	if err := utils.EnsureProcHandle(f); err != nil {
+		return err
+	}
+
 	_, err = fmt.Fprintf(f, "%s", value)
 	return err
 }
@@ -37,7 +43,7 @@ func setprocattr(attr, value string) error {
 // changeOnExec reimplements aa_change_onexec from libapparmor in Go
 func changeOnExec(name string) error {
 	value := "exec " + name
-	if err := setprocattr("exec", value); err != nil {
+	if err := setProcAttr("exec", value); err != nil {
 		return fmt.Errorf("apparmor failed to apply profile: %s", err)
 	}
 	return nil
diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go
index c96088988..1576f2d4a 100644
--- a/libcontainer/utils/utils_unix.go
+++ b/libcontainer/utils/utils_unix.go
@@ -3,33 +3,57 @@
 package utils
 
 import (
-	"io/ioutil"
+	"fmt"
 	"os"
 	"strconv"
 
 	"golang.org/x/sys/unix"
 )
 
+// EnsureProcHandle returns whether or not the given file handle is on procfs.
+func EnsureProcHandle(fh *os.File) error {
+	var buf unix.Statfs_t
+	if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
+		return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err)
+	}
+	if buf.Type != unix.PROC_SUPER_MAGIC {
+		return fmt.Errorf("%s is not on procfs", fh.Name())
+	}
+	return nil
+}
+
+// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
+// the process (except for those below the given fd value).
 func CloseExecFrom(minFd int) error {
-	fdList, err := ioutil.ReadDir("/proc/self/fd")
+	fdDir, err := os.Open("/proc/self/fd")
+	if err != nil {
+		return err
+	}
+	defer fdDir.Close()
+
+	if err := EnsureProcHandle(fdDir); err != nil {
+		return err
+	}
+
+	fdList, err := fdDir.Readdirnames(-1)
 	if err != nil {
 		return err
 	}
-	for _, fi := range fdList {
-		fd, err := strconv.Atoi(fi.Name())
+	for _, fdStr := range fdList {
+		fd, err := strconv.Atoi(fdStr)
+		// Ignore non-numeric file names.
 		if err != nil {
-			// ignore non-numeric file names
 			continue
 		}
-
+		// Ignore descriptors lower than our specified minimum.
 		if fd < minFd {
-			// ignore descriptors lower than our specified minimum
 			continue
 		}
-
-		// intentionally ignore errors from unix.CloseOnExec
+		// Intentionally ignore errors from unix.CloseOnExec -- the cases where
+		// this might fail are basically file descriptors that have already
+		// been closed (including and especially the one that was created when
+		// ioutil.ReadDir did the "opendir" syscall).
 		unix.CloseOnExec(fd)
-		// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
 	}
 	return nil
 }