|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/container_linux.go runc-my/libcontainer/container_linux.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/container_linux.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/libcontainer/container_linux.go 2024-01-23 15:03:45.697128791 -0800
|
|
|
69af29 |
@@ -341,6 +341,17 @@
|
|
|
69af29 |
return newSystemErrorWithCause(err, "creating new parent process")
|
|
|
69af29 |
}
|
|
|
69af29 |
parent.forwardChildLogs()
|
|
|
69af29 |
+
|
|
|
69af29 |
+ // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
|
|
|
69af29 |
+ // to make sure we don't leak any files into "runc init". Any files to be
|
|
|
69af29 |
+ // passed to "runc init" through ExtraFiles will get dup2'd by the Go
|
|
|
69af29 |
+ // runtime and thus their O_CLOEXEC flag will be cleared. This is some
|
|
|
69af29 |
+ // additional protection against attacks like CVE-2024-21626, by making
|
|
|
69af29 |
+ // sure we never leak files to "runc init" we didn't intend to.
|
|
|
69af29 |
+ if err := utils.CloseExecFrom(3); err != nil {
|
|
|
69af29 |
+ return newSystemErrorWithCause(err, "unable to mark non-stdio fds as cloexec")
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+
|
|
|
69af29 |
if err := parent.start(); err != nil {
|
|
|
69af29 |
// terminate the process to ensure that it properly is reaped.
|
|
|
69af29 |
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/init_linux.go runc-my/libcontainer/init_linux.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/init_linux.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/libcontainer/init_linux.go 2024-01-23 15:25:54.398330999 -0800
|
|
|
69af29 |
@@ -9,6 +9,7 @@
|
|
|
69af29 |
"io/ioutil"
|
|
|
69af29 |
"net"
|
|
|
69af29 |
"os"
|
|
|
69af29 |
+ "path/filepath"
|
|
|
69af29 |
"strings"
|
|
|
69af29 |
"syscall" // only for Errno
|
|
|
69af29 |
"unsafe"
|
|
|
69af29 |
@@ -116,6 +117,32 @@
|
|
|
69af29 |
return nil
|
|
|
69af29 |
}
|
|
|
69af29 |
|
|
|
69af29 |
+// verifyCwd ensures that the current directory is actually inside the mount
|
|
|
69af29 |
+// namespace root of the current process.
|
|
|
69af29 |
+func verifyCwd() error {
|
|
|
69af29 |
+ // getcwd(2) on Linux detects if cwd is outside of the rootfs of the
|
|
|
69af29 |
+ // current mount namespace root, and in that case prefixes "(unreachable)"
|
|
|
69af29 |
+ // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
|
|
|
69af29 |
+ // when this happens and return ENOENT rather than returning a non-absolute
|
|
|
69af29 |
+ // path. In both cases we can therefore easily detect if we have an invalid
|
|
|
69af29 |
+ // cwd by checking the return value of getcwd(3). See getcwd(3) for more
|
|
|
69af29 |
+ // details, and CVE-2024-21626 for the security issue that motivated this
|
|
|
69af29 |
+ // check.
|
|
|
69af29 |
+ //
|
|
|
69af29 |
+ // We have to use unix.Getwd() here because os.Getwd() has a workaround for
|
|
|
69af29 |
+ // $PWD which involves doing stat(.), which can fail if the current
|
|
|
69af29 |
+ // directory is inaccessible to the container process.
|
|
|
69af29 |
+ if wd, err := unix.Getwd(); err == unix.ENOENT {
|
|
|
69af29 |
+ return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
|
|
|
69af29 |
+ } else if err != nil {
|
|
|
69af29 |
+ return fmt.Errorf("failed to verify if current working directory is safe: %s", err)
|
|
|
69af29 |
+ } else if !filepath.IsAbs(wd) {
|
|
|
69af29 |
+ // We shouldn't ever hit this, but check just in case.
|
|
|
69af29 |
+ return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+ return nil
|
|
|
69af29 |
+}
|
|
|
69af29 |
+
|
|
|
69af29 |
// finalizeNamespace drops the caps, sets the correct user
|
|
|
69af29 |
// and working dir, and closes any leaked file descriptors
|
|
|
69af29 |
// before executing the command inside the namespace
|
|
|
69af29 |
@@ -154,6 +181,10 @@
|
|
|
69af29 |
if err := setupUser(config); err != nil {
|
|
|
69af29 |
return errors.Wrap(err, "setup user")
|
|
|
69af29 |
}
|
|
|
69af29 |
+ // Make sure our final working directory is inside the container.
|
|
|
69af29 |
+ if err := verifyCwd(); err != nil {
|
|
|
69af29 |
+ return err
|
|
|
69af29 |
+ }
|
|
|
69af29 |
if err := system.ClearKeepCaps(); err != nil {
|
|
|
69af29 |
return errors.Wrap(err, "clear keep caps")
|
|
|
69af29 |
}
|
|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/setns_init_linux.go runc-my/libcontainer/setns_init_linux.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/setns_init_linux.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/libcontainer/setns_init_linux.go 2024-01-23 15:26:54.230209188 -0800
|
|
|
69af29 |
@@ -5,12 +5,15 @@
|
|
|
69af29 |
import (
|
|
|
69af29 |
"fmt"
|
|
|
69af29 |
"os"
|
|
|
69af29 |
+ "os/exec"
|
|
|
69af29 |
"runtime"
|
|
|
69af29 |
+ "syscall" //only for Exec
|
|
|
69af29 |
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/keys"
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/system"
|
|
|
69af29 |
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
|
69af29 |
"github.com/opencontainers/selinux/go-selinux/label"
|
|
|
69af29 |
"github.com/pkg/errors"
|
|
|
69af29 |
|
|
|
69af29 |
@@ -80,6 +83,12 @@
|
|
|
69af29 |
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
|
|
69af29 |
return err
|
|
|
69af29 |
}
|
|
|
69af29 |
+ // Check for the arg before waiting to make sure it exists and it is
|
|
|
69af29 |
+ // returned as a create time error.
|
|
|
69af29 |
+ name, err := exec.LookPath(l.config.Args[0])
|
|
|
69af29 |
+ if err != nil {
|
|
|
69af29 |
+ return err
|
|
|
69af29 |
+ }
|
|
|
69af29 |
// Set seccomp as close to execve as possible, so as few syscalls take
|
|
|
69af29 |
// place afterward (reducing the amount of syscalls that users need to
|
|
|
69af29 |
// enable in their seccomp profiles).
|
|
|
69af29 |
@@ -88,5 +97,21 @@
|
|
|
69af29 |
return newSystemErrorWithCause(err, "init seccomp")
|
|
|
69af29 |
}
|
|
|
69af29 |
}
|
|
|
69af29 |
- return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
|
|
69af29 |
+ // Close all file descriptors we are not passing to the container. This is
|
|
|
69af29 |
+ // necessary because the execve target could use internal runc fds as the
|
|
|
69af29 |
+ // execve path, potentially giving access to binary files from the host
|
|
|
69af29 |
+ // (which can then be opened by container processes, leading to container
|
|
|
69af29 |
+ // escapes). Note that because this operation will close any open file
|
|
|
69af29 |
+ // descriptors that are referenced by (*os.File) handles from underneath
|
|
|
69af29 |
+ // the Go runtime, we must not do any file operations after this point
|
|
|
69af29 |
+ // (otherwise the (*os.File) finaliser could close the wrong file). See
|
|
|
69af29 |
+ // CVE-2024-21626 for more information as to why this protection is
|
|
|
69af29 |
+ // necessary.
|
|
|
69af29 |
+ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
|
|
69af29 |
+ return err
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+ if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
|
|
69af29 |
+ return newSystemErrorWithCause(err, "exec user process")
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+ return nil
|
|
|
69af29 |
}
|
|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/standard_init_linux.go runc-my/libcontainer/standard_init_linux.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/standard_init_linux.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/libcontainer/standard_init_linux.go 2024-01-23 15:27:25.396145744 -0800
|
|
|
69af29 |
@@ -14,6 +14,7 @@
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/keys"
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/seccomp"
|
|
|
69af29 |
"github.com/opencontainers/runc/libcontainer/system"
|
|
|
69af29 |
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
|
69af29 |
"github.com/opencontainers/selinux/go-selinux/label"
|
|
|
69af29 |
"github.com/pkg/errors"
|
|
|
69af29 |
|
|
|
69af29 |
@@ -207,6 +208,19 @@
|
|
|
69af29 |
return newSystemErrorWithCause(err, "init seccomp")
|
|
|
69af29 |
}
|
|
|
69af29 |
}
|
|
|
69af29 |
+ // Close all file descriptors we are not passing to the container. This is
|
|
|
69af29 |
+ // necessary because the execve target could use internal runc fds as the
|
|
|
69af29 |
+ // execve path, potentially giving access to binary files from the host
|
|
|
69af29 |
+ // (which can then be opened by container processes, leading to container
|
|
|
69af29 |
+ // escapes). Note that because this operation will close any open file
|
|
|
69af29 |
+ // descriptors that are referenced by (*os.File) handles from underneath
|
|
|
69af29 |
+ // the Go runtime, we must not do any file operations after this point
|
|
|
69af29 |
+ // (otherwise the (*os.File) finaliser could close the wrong file). See
|
|
|
69af29 |
+ // CVE-2024-21626 for more information as to why this protection is
|
|
|
69af29 |
+ // necessary.
|
|
|
69af29 |
+ if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
|
|
|
69af29 |
+ return err
|
|
|
69af29 |
+ }
|
|
|
69af29 |
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
|
|
69af29 |
return newSystemErrorWithCause(err, "exec user process")
|
|
|
69af29 |
}
|
|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/utils/utils_unix.go runc-my/libcontainer/utils/utils_unix.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/libcontainer/utils/utils_unix.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/libcontainer/utils/utils_unix.go 2024-01-23 15:27:53.435088668 -0800
|
|
|
69af29 |
@@ -6,6 +6,7 @@
|
|
|
69af29 |
"fmt"
|
|
|
69af29 |
"os"
|
|
|
69af29 |
"strconv"
|
|
|
69af29 |
+ _ "unsafe" // for go:linkname
|
|
|
69af29 |
|
|
|
69af29 |
"golang.org/x/sys/unix"
|
|
|
69af29 |
)
|
|
|
69af29 |
@@ -22,9 +23,11 @@
|
|
|
69af29 |
return nil
|
|
|
69af29 |
}
|
|
|
69af29 |
|
|
|
69af29 |
-// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
|
|
|
69af29 |
-// the process (except for those below the given fd value).
|
|
|
69af29 |
-func CloseExecFrom(minFd int) error {
|
|
|
69af29 |
+type fdFunc func(fd int)
|
|
|
69af29 |
+
|
|
|
69af29 |
+// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
|
|
|
69af29 |
+// the current process.
|
|
|
69af29 |
+func fdRangeFrom(minFd int, fn fdFunc) error {
|
|
|
69af29 |
fdDir, err := os.Open("/proc/self/fd")
|
|
|
69af29 |
if err != nil {
|
|
|
69af29 |
return err
|
|
|
69af29 |
@@ -49,15 +52,60 @@
|
|
|
69af29 |
if fd < minFd {
|
|
|
69af29 |
continue
|
|
|
69af29 |
}
|
|
|
69af29 |
- // Intentionally ignore errors from unix.CloseOnExec -- the cases where
|
|
|
69af29 |
- // this might fail are basically file descriptors that have already
|
|
|
69af29 |
- // been closed (including and especially the one that was created when
|
|
|
69af29 |
- // ioutil.ReadDir did the "opendir" syscall).
|
|
|
69af29 |
- unix.CloseOnExec(fd)
|
|
|
69af29 |
+ // Ignore the file descriptor we used for readdir, as it will be closed
|
|
|
69af29 |
+ // when we return.
|
|
|
69af29 |
+ if uintptr(fd) == fdDir.Fd() {
|
|
|
69af29 |
+ continue
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+ // Run the closure.
|
|
|
69af29 |
+ fn(fd)
|
|
|
69af29 |
}
|
|
|
69af29 |
return nil
|
|
|
69af29 |
}
|
|
|
69af29 |
|
|
|
69af29 |
+// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
|
|
|
69af29 |
+// equal to minFd in the current process.
|
|
|
69af29 |
+func CloseExecFrom(minFd int) error {
|
|
|
69af29 |
+ return fdRangeFrom(minFd, unix.CloseOnExec)
|
|
|
69af29 |
+}
|
|
|
69af29 |
+
|
|
|
69af29 |
+//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
|
|
|
69af29 |
+
|
|
|
69af29 |
+// In order to make sure we do not close the internal epoll descriptors the Go
|
|
|
69af29 |
+// runtime uses, we need to ensure that we skip descriptors that match
|
|
|
69af29 |
+// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
|
|
|
69af29 |
+// unfortunately there's no other way to be sure we're only keeping the file
|
|
|
69af29 |
+// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
|
|
|
69af29 |
+func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
|
|
|
69af29 |
+
|
|
|
69af29 |
+// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
|
|
|
69af29 |
+// current process, except for those critical to Go's runtime (such as the
|
|
|
69af29 |
+// netpoll management descriptors).
|
|
|
69af29 |
+//
|
|
|
69af29 |
+// NOTE: That this function is incredibly dangerous to use in most Go code, as
|
|
|
69af29 |
+// closing file descriptors from underneath *os.File handles can lead to very
|
|
|
69af29 |
+// bad behaviour (the closed file descriptor can be re-used and then any
|
|
|
69af29 |
+// *os.File operations would apply to the wrong file). This function is only
|
|
|
69af29 |
+// intended to be called from the last stage of runc init.
|
|
|
69af29 |
+func UnsafeCloseFrom(minFd int) error {
|
|
|
69af29 |
+ // We must not close some file descriptors.
|
|
|
69af29 |
+ return fdRangeFrom(minFd, func(fd int) {
|
|
|
69af29 |
+ if runtime_IsPollDescriptor(uintptr(fd)) {
|
|
|
69af29 |
+ // These are the Go runtimes internal netpoll file descriptors.
|
|
|
69af29 |
+ // These file descriptors are operated on deep in the Go scheduler,
|
|
|
69af29 |
+ // and closing those files from underneath Go can result in panics.
|
|
|
69af29 |
+ // There is no issue with keeping them because they are not
|
|
|
69af29 |
+ // executable and are not useful to an attacker anyway. Also we
|
|
|
69af29 |
+ // don't have any choice.
|
|
|
69af29 |
+ return
|
|
|
69af29 |
+ }
|
|
|
69af29 |
+ // There's nothing we can do about errors from close(2), and the
|
|
|
69af29 |
+ // only likely error to be seen is EBADF which indicates the fd was
|
|
|
69af29 |
+ // already closed (in which case, we got what we wanted).
|
|
|
69af29 |
+ _ = unix.Close(fd)
|
|
|
69af29 |
+ })
|
|
|
69af29 |
+}
|
|
|
69af29 |
+
|
|
|
69af29 |
// NewSockPair returns a new unix socket pair
|
|
|
69af29 |
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
|
|
69af29 |
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
|
|
69af29 |
diff -urN runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/update.go runc-my/update.go
|
|
|
69af29 |
--- runc-dc9208a3303feef5b3839f4323d9beb36df0a9dd/update.go 2020-01-22 08:19:15.000000000 -0800
|
|
|
69af29 |
+++ runc-my/update.go 2024-01-23 14:41:07.528142160 -0800
|
|
|
69af29 |
@@ -173,6 +173,7 @@
|
|
|
69af29 |
return err
|
|
|
69af29 |
}
|
|
|
69af29 |
}
|
|
|
69af29 |
+ defer f.Close()
|
|
|
69af29 |
err = json.NewDecoder(f).Decode(&r)
|
|
|
69af29 |
if err != nil {
|
|
|
69af29 |
return err
|