diff --git a/SOURCES/0001-rootfs-add-mount-destination-validation.patch b/SOURCES/0001-rootfs-add-mount-destination-validation.patch new file mode 100644 index 0000000..c7734a7 --- /dev/null +++ b/SOURCES/0001-rootfs-add-mount-destination-validation.patch @@ -0,0 +1,540 @@ +From 2dd156b190c02476191fc2522f9b0e0a1a098608 Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Mon, 17 May 2021 16:11:35 -0700 +Subject: [PATCH] rootfs: add mount destination validation + +This is a manual backport of fix for CVE-2021-30465 to runc-1.0.0-rc10 +(aka -rc90), upstream commit 84c14b43fa703db7 by Aleksa Sarai. + +Original description follows. + +---- + +Because the target of a mount is inside a container (which may be a +volume that is shared with another container), there exists a race +condition where the target of the mount may change to a path containing +a symlink after we have sanitised the path -- resulting in us +inadvertently mounting the path outside of the container. + +This is not immediately useful because we are in a mount namespace with +MS_SLAVE mount propagation applied to "/", so we cannot mount on top of +host paths in the host namespace. However, if any subsequent mountpoints +in the configuration use a subdirectory of that host path as a source, +those subsequent mounts will use an attacker-controlled source path +(resolved within the host rootfs) -- allowing the bind-mounting of "/" +into the container. + +While arguably configuration issues like this are not entirely within +runc's threat model, within the context of Kubernetes (and possibly +other container managers that provide semi-arbitrary container creation +privileges to untrusted users) this is a legitimate issue. Since we +cannot block mounting from the host into the container, we need to block +the first stage of this attack (mounting onto a path outside the +container). + +The long-term plan to solve this would be to migrate to libpathrs, but +as a stop-gap we implement libpathrs-like path verification through +readlink(/proc/self/fd/$n) and then do mount operations through the +procfd once it's been verified to be inside the container. The target +could move after we've checked it, but if it is inside the container +then we can assume that it is safe for the same reason that libpathrs +operations would be safe. + +A slight wrinkle is the "copyup" functionality we provide for tmpfs, +which is the only case where we want to do a mount on the host +filesystem. To facilitate this, I split out the copy-up functionality +entirely so that the logic isn't interspersed with the regular tmpfs +logic. In addition, all dependencies on m.Destination being overwritten +have been removed since that pattern was just begging to be a source of +more mount-target bugs (we do still have to modify m.Destination for +tmpfs-copyup but we only do it temporarily). + +Fixes: CVE-2021-30465 +Reported-by: Etienne Champetier +Co-authored-by: Noah Meyerhans +Reviewed-by: Samuel Karp +Reviewed-by: Akihiro Suda +Signed-off-by: Aleksa Sarai + +Signed-off-by: Kir Kolyshkin +--- + libcontainer/rootfs_linux.go | 225 ++++++++++++++++--------------- + libcontainer/utils/utils.go | 54 ++++++++ + libcontainer/utils/utils_test.go | 35 +++++ + 3 files changed, 204 insertions(+), 110 deletions(-) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index 106c4c2b..fe9afe48 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -19,8 +19,9 @@ import ( + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/mount" + "github.com/opencontainers/runc/libcontainer/system" +- libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" ++ "github.com/opencontainers/runc/libcontainer/utils" + "github.com/opencontainers/selinux/go-selinux/label" ++ "github.com/sirupsen/logrus" + + "golang.org/x/sys/unix" + ) +@@ -30,7 +31,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV + // needsSetupDev returns true if /dev needs to be set up. + func needsSetupDev(config *configs.Config) bool { + for _, m := range config.Mounts { +- if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" { ++ if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" { + return false + } + } +@@ -131,7 +132,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { + func finalizeRootfs(config *configs.Config) (err error) { + // remount dev as ro if specified + for _, m := range config.Mounts { +- if libcontainerUtils.CleanPath(m.Destination) == "/dev" { ++ if utils.CleanPath(m.Destination) == "/dev" { + if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY { + if err := remountReadonly(m); err != nil { + return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination) +@@ -200,8 +201,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error { + if err := checkProcMount(rootfs, dest, m.Source); err != nil { + return err + } +- // update the mount with the correct dest after symlinks are resolved. +- m.Destination = dest + if err := createIfNotExists(dest, stat.IsDir()); err != nil { + return err + } +@@ -238,18 +237,21 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b + if err := os.MkdirAll(subsystemPath, 0755); err != nil { + return err + } +- flags := defaultMountFlags +- if m.Flags&unix.MS_RDONLY != 0 { +- flags = flags | unix.MS_RDONLY +- } +- cgroupmount := &configs.Mount{ +- Source: "cgroup", +- Device: "cgroup", +- Destination: subsystemPath, +- Flags: flags, +- Data: filepath.Base(subsystemPath), +- } +- if err := mountNewCgroup(cgroupmount); err != nil { ++ if err := utils.WithProcfd(rootfs, b.Destination, func(procfd string) error { ++ flags := defaultMountFlags ++ if m.Flags&unix.MS_RDONLY != 0 { ++ flags = flags | unix.MS_RDONLY ++ } ++ var ( ++ source = "cgroup" ++ data = filepath.Base(subsystemPath) ++ ) ++ if data == "systemd" { ++ data = cgroups.CgroupNamePrefix + data ++ source = "systemd" ++ } ++ return unix.Mount(source, procfd, "cgroup", uintptr(flags), data) ++ }); err != nil { + return err + } + } else { +@@ -279,22 +281,67 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b + if err := os.MkdirAll(cgroupPath, 0755); err != nil { + return err + } +- if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil { +- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) +- if err == unix.EPERM || err == unix.EBUSY { +- return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "") ++ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { ++ if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil { ++ // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) ++ if err == unix.EPERM || err == unix.EBUSY { ++ return unix.Mount("/sys/fs/cgroup", procfd, "", uintptr(m.Flags)|unix.MS_BIND, "") ++ } ++ return err + } ++ return nil ++ }) ++} ++ ++func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { ++ // Set up a scratch dir for the tmpfs on the host. ++ tmpdir, err := prepareTmp("/tmp") ++ if err != nil { ++ return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir") ++ } ++ defer cleanupTmp(tmpdir) ++ tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir") ++ if err != nil { ++ return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") ++ } ++ defer os.RemoveAll(tmpDir) ++ ++ // Configure the *host* tmpdir as if it's the container mount. We change ++ // m.Destination since we are going to mount *on the host*. ++ oldDest := m.Destination ++ m.Destination = tmpDir ++ err = mountPropagate(m, "/", mountLabel) ++ m.Destination = oldDest ++ if err != nil { + return err + } +- return nil ++ defer func() { ++ if Err != nil { ++ if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil { ++ logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err) ++ } ++ } ++ }() ++ ++ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) { ++ // Copy the container data to the host tmpdir. We append "/" to force ++ // CopyDirectory to resolve the symlink rather than trying to copy the ++ // symlink itself. ++ if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil { ++ return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %v", m.Destination, procfd, tmpDir, err) ++ } ++ // Now move the mount into the container. ++ if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil { ++ return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %v", tmpDir, procfd, m.Destination, err) ++ } ++ return nil ++ }) + } + + func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error { +- var ( +- dest = m.Destination +- ) +- if !strings.HasPrefix(dest, rootfs) { +- dest = filepath.Join(rootfs, dest) ++ dest, err := securejoin.SecureJoin(rootfs, m.Destination) ++ if err != nil { ++ return err + } + + switch m.Device { +@@ -329,46 +376,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b + } + return nil + case "tmpfs": +- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP +- tmpDir := "" + stat, err := os.Stat(dest) + if err != nil { + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } + } +- if copyUp { +- tmpdir, err := prepareTmp("/tmp") +- if err != nil { +- return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir") +- } +- defer cleanupTmp(tmpdir) +- tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir") +- if err != nil { +- return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") +- } +- defer os.RemoveAll(tmpDir) +- m.Destination = tmpDir ++ ++ if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP { ++ err = doTmpfsCopyUp(m, rootfs, mountLabel) ++ } else { ++ err = mountPropagate(m, rootfs, mountLabel) + } +- if err := mountPropagate(m, rootfs, mountLabel); err != nil { ++ if err != nil { + return err + } +- if copyUp { +- if err := fileutils.CopyDirectory(dest, tmpDir); err != nil { +- errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err) +- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { +- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) +- } +- return errMsg +- } +- if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil { +- errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err) +- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { +- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) +- } +- return errMsg +- } +- } + if stat != nil { + if err = os.Chmod(dest, stat.Mode()); err != nil { + return err +@@ -424,19 +446,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b + } + } + default: +- // ensure that the destination of the mount is resolved of symlinks at mount time because +- // any previous mounts can invalidate the next mount's destination. +- // this can happen when a user specifies mounts within other mounts to cause breakouts or other +- // evil stuff to try to escape the container's rootfs. +- var err error +- if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { +- return err +- } + if err := checkProcMount(rootfs, dest, m.Source); err != nil { + return err + } +- // update the mount with the correct dest after symlinks are resolved. +- m.Destination = dest + if err := os.MkdirAll(dest, 0755); err != nil { + return err + } +@@ -611,7 +623,7 @@ func createDevices(config *configs.Config) error { + return nil + } + +-func bindMountDeviceNode(dest string, node *configs.Device) error { ++func bindMountDeviceNode(rootfs, dest string, node *configs.Device) error { + f, err := os.Create(dest) + if err != nil && !os.IsExist(err) { + return err +@@ -619,24 +631,29 @@ func bindMountDeviceNode(dest string, node *configs.Device) error { + if f != nil { + f.Close() + } +- return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "") ++ return utils.WithProcfd(rootfs, dest, func(procfd string) error { ++ return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "") ++ }) + } + + // Creates the device node in the rootfs of the container. + func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { +- dest := filepath.Join(rootfs, node.Path) ++ dest, err := securejoin.SecureJoin(rootfs, node.Path) ++ if err != nil { ++ return err ++ } + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } + + if bind { +- return bindMountDeviceNode(dest, node) ++ return bindMountDeviceNode(rootfs, dest, node) + } + if err := mknodDevice(dest, node); err != nil { + if os.IsExist(err) { + return nil + } else if os.IsPermission(err) { +- return bindMountDeviceNode(dest, node) ++ return bindMountDeviceNode(rootfs, dest, node) + } + return err + } +@@ -955,55 +972,43 @@ func writeSystemProperty(key, value string) error { + } + + func remount(m *configs.Mount, rootfs string) error { +- var ( +- dest = m.Destination +- ) +- if !strings.HasPrefix(dest, rootfs) { +- dest = filepath.Join(rootfs, dest) +- } +- return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "") ++ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { ++ return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "") ++ }) + } + + // Do the mount operation followed by additional mounts required to take care +-// of propagation flags. ++// of propagation flags. This will always be scoped inside the container rootfs. + func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { + var ( +- dest = m.Destination + data = label.FormatMountLabel(m.Data, mountLabel) + flags = m.Flags + ) +- if libcontainerUtils.CleanPath(dest) == "/dev" { ++ if utils.CleanPath(m.Destination) == "/dev" { + flags &= ^unix.MS_RDONLY + } + +- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP +- if !(copyUp || strings.HasPrefix(dest, rootfs)) { +- dest = filepath.Join(rootfs, dest) +- } +- +- if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil { +- return err +- } +- +- for _, pflag := range m.PropagationFlags { +- if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil { +- return err ++ // Because the destination is inside a container path which might be ++ // mutating underneath us, we verify that we are actually going to mount ++ // inside the container with WithProcfd() -- mounting through a procfd ++ // mounts on the target. ++ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { ++ return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data) ++ }); err != nil { ++ return fmt.Errorf("mount through procfd: %v", err) ++ } ++ // We have to apply mount propagation flags in a separate WithProcfd() call ++ // because the previous call invalidates the passed procfd -- the mount ++ // target needs to be re-opened. ++ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { ++ for _, pflag := range m.PropagationFlags { ++ if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil { ++ return err ++ } + } +- } +- return nil +-} +- +-func mountNewCgroup(m *configs.Mount) error { +- var ( +- data = m.Data +- source = m.Source +- ) +- if data == "systemd" { +- data = cgroups.CgroupNamePrefix + data +- source = "systemd" +- } +- if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil { +- return err ++ return nil ++ }); err != nil { ++ return fmt.Errorf("change mount propagation through procfd: %v", err) + } + return nil + } +diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go +index 40ccfaa1..c1418ef9 100644 +--- a/libcontainer/utils/utils.go ++++ b/libcontainer/utils/utils.go +@@ -2,12 +2,15 @@ package utils + + import ( + "encoding/json" ++ "fmt" + "io" + "os" + "path/filepath" ++ "strconv" + "strings" + "unsafe" + ++ securejoin "github.com/cyphar/filepath-securejoin" + "golang.org/x/sys/unix" + ) + +@@ -73,6 +76,57 @@ func CleanPath(path string) string { + return filepath.Clean(path) + } + ++// stripRoot returns the passed path, stripping the root path if it was ++// (lexicially) inside it. Note that both passed paths will always be treated ++// as absolute, and the returned path will also always be absolute. In ++// addition, the paths are cleaned before stripping the root. ++func stripRoot(root, path string) string { ++ // Make the paths clean and absolute. ++ root, path = CleanPath("/"+root), CleanPath("/"+path) ++ switch { ++ case path == root: ++ path = "/" ++ case root == "/": ++ // do nothing ++ case strings.HasPrefix(path, root+"/"): ++ path = strings.TrimPrefix(path, root+"/") ++ } ++ return CleanPath("/" + path) ++} ++ ++// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) ++// corresponding to the unsafePath resolved within the root. Before passing the ++// fd, this path is verified to have been inside the root -- so operating on it ++// through the passed fdpath should be safe. Do not access this path through ++// the original path strings, and do not attempt to use the pathname outside of ++// the passed closure (the file handle will be freed once the closure returns). ++func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { ++ // Remove the root then forcefully resolve inside the root. ++ unsafePath = stripRoot(root, unsafePath) ++ path, err := securejoin.SecureJoin(root, unsafePath) ++ if err != nil { ++ return fmt.Errorf("resolving path inside rootfs failed: %v", err) ++ } ++ ++ // Open the target path. ++ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) ++ if err != nil { ++ return fmt.Errorf("open o_path procfd: %v", err) ++ } ++ defer fh.Close() ++ ++ // Double-check the path is the one we expected. ++ procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) ++ if realpath, err := os.Readlink(procfd); err != nil { ++ return fmt.Errorf("procfd verification failed: %v", err) ++ } else if realpath != path { ++ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) ++ } ++ ++ // Run the closure. ++ return fn(procfd) ++} ++ + // SearchLabels searches a list of key-value pairs for the provided key and + // returns the corresponding value. The pairs must be separated with '='. + func SearchLabels(labels []string, query string) string { +diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go +index 395eedcf..5b80cac6 100644 +--- a/libcontainer/utils/utils_test.go ++++ b/libcontainer/utils/utils_test.go +@@ -140,3 +140,38 @@ func TestCleanPath(t *testing.T) { + t.Errorf("expected to receive '/foo' and received %s", path) + } + } ++ ++func TestStripRoot(t *testing.T) { ++ for _, test := range []struct { ++ root, path, out string ++ }{ ++ // Works with multiple components. ++ {"/a/b", "/a/b/c", "/c"}, ++ {"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"}, ++ // '/' must be a no-op. ++ {"/", "/a/b/c", "/a/b/c"}, ++ // Must be the correct order. ++ {"/a/b", "/a/c/b", "/a/c/b"}, ++ // Must be at start. ++ {"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"}, ++ // Must be a lexical parent. ++ {"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"}, ++ // Must only strip the root once. ++ {"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"}, ++ // Deal with .. in a fairly sane way. ++ {"/foo/bar", "/foo/bar/../baz", "/foo/baz"}, ++ {"/foo/bar", "../../../../../../foo/bar/baz", "/baz"}, ++ {"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"}, ++ {"/foo/bar/../baz", "/foo/baz/bar", "/bar"}, ++ {"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"}, ++ // All paths are made absolute before stripping. ++ {"foo/bar", "/foo/bar/baz/bee", "/baz/bee"}, ++ {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"}, ++ {"foo/bar", "foo/bar/baz/beets", "/baz/beets"}, ++ } { ++ got := stripRoot(test.root, test.path) ++ if got != test.out { ++ t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out) ++ } ++ } ++} +-- +2.31.1 + diff --git a/SPECS/runc.spec b/SPECS/runc.spec index f2dcddd..514e077 100644 --- a/SPECS/runc.spec +++ b/SPECS/runc.spec @@ -26,13 +26,14 @@ go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUIL Name: %{repo} Version: 1.0.0 -Release: 64.rc10%{?dist} +Release: 65.rc10%{?dist} Summary: CLI for running Open Containers ExcludeArch: %{ix86} License: ASL 2.0 URL: %{git0} Source0: %{git0}/archive/%{commit0}/%{name}-%{shortcommit0}.tar.gz Patch0: 1807.patch +Patch1: 0001-rootfs-add-mount-destination-validation.patch BuildRequires: golang >= 1.12.12-4 BuildRequires: git BuildRequires: go-md2man @@ -89,6 +90,10 @@ install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash %{_datadir}/bash-completion/completions/%{name} %changelog +* Wed May 19 2021 Jindrich Novy - 1.0.0-65.rc10 +- fix CVE-2021-30465 +- Resolves: #1955650 + * Thu Feb 13 2020 Jindrich Novy - 1.0.0-64.rc10 - address CVE-2019-19921 by updating to rc10 - Resolves: #1801888