From 3bb17ac8c1549cd2b82912be7b55bb63aa629e31 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 01 2019 15:16:32 +0000 Subject: import runc-1.0.0-55.rc5.dev.git2abd837.module+el8.0.0+2956+30df4692 --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abc4b4f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/runc-2abd837.tar.gz diff --git a/.runc.metadata b/.runc.metadata new file mode 100644 index 0000000..93b3f44 --- /dev/null +++ b/.runc.metadata @@ -0,0 +1 @@ +cf7119a838db2963e7af6ecdba90a2cc95ec0d56 SOURCES/runc-2abd837.tar.gz diff --git a/SOURCES/0001-Revert-Apply-cgroups-earlier.patch b/SOURCES/0001-Revert-Apply-cgroups-earlier.patch new file mode 100644 index 0000000..4ad310a --- /dev/null +++ b/SOURCES/0001-Revert-Apply-cgroups-earlier.patch @@ -0,0 +1,62 @@ +From dfb3496c174377b860b62872ce6af951364cc3ac Mon Sep 17 00:00:00 2001 +From: Lokesh Mandvekar +Date: Tue, 12 Dec 2017 13:22:42 +0530 +Subject: [PATCH] Revert "Apply cgroups earlier" + +This reverts commit 7062c7556b71188abc18d7516441ff4b03fbc1fc. +--- + libcontainer/process_linux.go | 31 ++++++++++++++----------------- + 1 file changed, 14 insertions(+), 17 deletions(-) + +diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go +index 149b1126..b8a395af 100644 +--- a/libcontainer/process_linux.go ++++ b/libcontainer/process_linux.go +@@ -272,6 +272,20 @@ func (p *initProcess) start() error { + p.process.ops = nil + return newSystemErrorWithCause(err, "starting init process command") + } ++ if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { ++ return newSystemErrorWithCause(err, "copying bootstrap data to pipe") ++ } ++ if err := p.execSetns(); err != nil { ++ return newSystemErrorWithCause(err, "running exec setns process for init") ++ } ++ // Save the standard descriptor names before the container process ++ // can potentially move them (e.g., via dup2()). If we don't do this now, ++ // we won't know at checkpoint time which file descriptor to look up. ++ fds, err := getPipeFds(p.pid()) ++ if err != nil { ++ return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) ++ } ++ p.setExternalDescriptors(fds) + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. +@@ -292,23 +306,6 @@ func (p *initProcess) start() error { + } + } + }() +- +- if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { +- return newSystemErrorWithCause(err, "copying bootstrap data to pipe") +- } +- +- if err := p.execSetns(); err != nil { +- return newSystemErrorWithCause(err, "running exec setns process for init") +- } +- +- // Save the standard descriptor names before the container process +- // can potentially move them (e.g., via dup2()). If we don't do this now, +- // we won't know at checkpoint time which file descriptor to look up. +- fds, err := getPipeFds(p.pid()) +- if err != nil { +- return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) +- } +- p.setExternalDescriptors(fds) + if err := p.createNetworkInterfaces(); err != nil { + return newSystemErrorWithCause(err, "creating network interfaces") + } +-- +2.14.3 + diff --git a/SOURCES/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch b/SOURCES/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch new file mode 100644 index 0000000..7975703 --- /dev/null +++ b/SOURCES/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch @@ -0,0 +1,290 @@ +From bf6405284aa3870a39b402309003633a1c230ed9 Mon Sep 17 00:00:00 2001 +From: Aleksa Sarai +Date: Wed, 9 Jan 2019 13:40:01 +1100 +Subject: [PATCH 1/1] nsenter: clone /proc/self/exe to avoid exposing host + binary to container + +There are quite a few circumstances where /proc/self/exe pointing to a +pretty important container binary is a _bad_ thing, so to avoid this we +have to make a copy (preferably doing self-clean-up and not being +writeable). + +As a hotfix we require memfd_create(2), but we can always extend this to +use a scratch MNT_DETACH overlayfs or tmpfs. The main downside to this +approach is no page-cache sharing for the runc binary (which overlayfs +would give us) but this is far less complicated. + +This is only done during nsenter so that it happens transparently to the +Go code, and any libcontainer users benefit from it. This also makes +ExtraFiles and --preserve-fds handling trivial (because we don't need to +worry about it). + +Fixes: CVE-2019-5736 +Co-developed-by: Christian Brauner +Signed-off-by: Aleksa Sarai +Signed-off-by: Mrunal Patel +--- + libcontainer/nsenter/cloned_binary.c | 221 +++++++++++++++++++++++++++ + libcontainer/nsenter/nsexec.c | 11 ++ + 2 files changed, 232 insertions(+) + create mode 100644 libcontainer/nsenter/cloned_binary.c + +diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c +new file mode 100644 +index 00000000..d9f6093a +--- /dev/null ++++ b/libcontainer/nsenter/cloned_binary.c +@@ -0,0 +1,221 @@ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++/* Use our own wrapper for memfd_create. */ ++#if !defined(SYS_memfd_create) && defined(__NR_memfd_create) ++# define SYS_memfd_create __NR_memfd_create ++#endif ++#ifndef SYS_memfd_create ++# error "memfd_create(2) syscall not supported by this glibc version" ++#endif ++int memfd_create(const char *name, unsigned int flags) ++{ ++ return syscall(SYS_memfd_create, name, flags); ++} ++ ++/* This comes directly from . */ ++#ifndef F_LINUX_SPECIFIC_BASE ++# define F_LINUX_SPECIFIC_BASE 1024 ++#endif ++#ifndef F_ADD_SEALS ++# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) ++# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) ++#endif ++#ifndef F_SEAL_SEAL ++# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ ++# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ ++# define F_SEAL_GROW 0x0004 /* prevent file from growing */ ++# define F_SEAL_WRITE 0x0008 /* prevent writes */ ++#endif ++ ++ ++#define OUR_MEMFD_COMMENT "runc_cloned:/proc/self/exe" ++#define OUR_MEMFD_SEALS \ ++ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) ++ ++static void *must_realloc(void *ptr, size_t size) ++{ ++ void *old = ptr; ++ do { ++ ptr = realloc(old, size); ++ } while(!ptr); ++ return ptr; ++} ++ ++/* ++ * Verify whether we are currently in a self-cloned program (namely, is ++ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather ++ * for shmem files), and we want to be sure it's actually sealed. ++ */ ++static int is_self_cloned(void) ++{ ++ int fd, seals; ++ ++ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC); ++ if (fd < 0) ++ return -ENOTRECOVERABLE; ++ ++ seals = fcntl(fd, F_GET_SEALS); ++ close(fd); ++ return seals == OUR_MEMFD_SEALS; ++} ++ ++/* ++ * Basic wrapper around mmap(2) that gives you the file length so you can ++ * safely treat it as an ordinary buffer. Only gives you read access. ++ */ ++static char *read_file(char *path, size_t *length) ++{ ++ int fd; ++ char buf[4096], *copy = NULL; ++ ++ if (!length) ++ return NULL; ++ ++ fd = open(path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return NULL; ++ ++ *length = 0; ++ for (;;) { ++ int n; ++ ++ n = read(fd, buf, sizeof(buf)); ++ if (n < 0) ++ goto error; ++ if (!n) ++ break; ++ ++ copy = must_realloc(copy, (*length + n) * sizeof(*copy)); ++ memcpy(copy + *length, buf, n); ++ *length += n; ++ } ++ close(fd); ++ return copy; ++ ++error: ++ close(fd); ++ free(copy); ++ return NULL; ++} ++ ++/* ++ * A poor-man's version of "xargs -0". Basically parses a given block of ++ * NUL-delimited data, within the given length and adds a pointer to each entry ++ * to the array of pointers. ++ */ ++static int parse_xargs(char *data, int data_length, char ***output) ++{ ++ int num = 0; ++ char *cur = data; ++ ++ if (!data || *output != NULL) ++ return -1; ++ ++ while (cur < data + data_length) { ++ num++; ++ *output = must_realloc(*output, (num + 1) * sizeof(**output)); ++ (*output)[num - 1] = cur; ++ cur += strlen(cur) + 1; ++ } ++ (*output)[num] = NULL; ++ return num; ++} ++ ++/* ++ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ. ++ * This is necessary because we are running in a context where we don't have a ++ * main() that we can just get the arguments from. ++ */ ++static int fetchve(char ***argv, char ***envp) ++{ ++ char *cmdline = NULL, *environ = NULL; ++ size_t cmdline_size, environ_size; ++ ++ cmdline = read_file("/proc/self/cmdline", &cmdline_size); ++ if (!cmdline) ++ goto error; ++ environ = read_file("/proc/self/environ", &environ_size); ++ if (!environ) ++ goto error; ++ ++ if (parse_xargs(cmdline, cmdline_size, argv) <= 0) ++ goto error; ++ if (parse_xargs(environ, environ_size, envp) <= 0) ++ goto error; ++ ++ return 0; ++ ++error: ++ free(environ); ++ free(cmdline); ++ return -EINVAL; ++} ++ ++#define SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */ ++static int clone_binary(void) ++{ ++ int binfd, memfd, err; ++ ssize_t sent = 0; ++ ++ memfd = memfd_create(OUR_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING); ++ if (memfd < 0) ++ return -ENOTRECOVERABLE; ++ ++ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); ++ if (binfd < 0) ++ goto error; ++ ++ sent = sendfile(memfd, binfd, NULL, SENDFILE_MAX); ++ close(binfd); ++ if (sent < 0) ++ goto error; ++ ++ err = fcntl(memfd, F_ADD_SEALS, OUR_MEMFD_SEALS); ++ if (err < 0) ++ goto error; ++ ++ return memfd; ++ ++error: ++ close(memfd); ++ return -EIO; ++} ++ ++int ensure_cloned_binary(void) ++{ ++ int execfd; ++ char **argv = NULL, **envp = NULL; ++ ++ /* Check that we're not self-cloned, and if we are then bail. */ ++ int cloned = is_self_cloned(); ++ if (cloned > 0 || cloned == -ENOTRECOVERABLE) ++ return cloned; ++ ++ if (fetchve(&argv, &envp) < 0) ++ return -EINVAL; ++ ++ execfd = clone_binary(); ++ if (execfd < 0) ++ return -EIO; ++ ++ fexecve(execfd, argv, envp); ++ return -ENOEXEC; ++} +diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c +index cb224314..784fd9b0 100644 +--- a/libcontainer/nsenter/nsexec.c ++++ b/libcontainer/nsenter/nsexec.c +@@ -528,6 +528,9 @@ void join_namespaces(char *nslist) + free(namespaces); + } + ++/* Defined in cloned_binary.c. */ ++int ensure_cloned_binary(void); ++ + void nsexec(void) + { + int pipenum; +@@ -543,6 +546,14 @@ void nsexec(void) + if (pipenum == -1) + return; + ++ /* ++ * We need to re-exec if we are not in a cloned binary. This is necessary ++ * to ensure that containers won't be able to access the host binary ++ * through /proc/self/exe. See CVE-2019-5736. ++ */ ++ if (ensure_cloned_binary() < 0) ++ bail("could not ensure we are a cloned binary"); ++ + /* Parse all of the netlink configuration. */ + nl_parse(pipenum, &config); + +-- +2.20.1 + diff --git a/SOURCES/1807.patch b/SOURCES/1807.patch new file mode 100644 index 0000000..4f46e89 --- /dev/null +++ b/SOURCES/1807.patch @@ -0,0 +1,200 @@ +From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001 +From: Giuseppe Scrivano +Date: Fri, 25 May 2018 18:04:06 +0200 +Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create + +if NOTIFY_SOCKET is used, do not block the main runc process waiting +for events on the notify socket. Change the logic to create a new +process that monitors exclusively the notify socket until an event is +received. + +Signed-off-by: Giuseppe Scrivano +--- + init.go | 12 +++++++ + notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++--------- + signals.go | 5 +-- + 3 files changed, 99 insertions(+), 19 deletions(-) + +diff --git a/init.go b/init.go +index c8f453192..6a3d9e91c 100644 +--- a/init.go ++++ b/init.go +@@ -20,6 +20,18 @@ var initCommand = cli.Command{ + Name: "init", + Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`, + Action: func(context *cli.Context) error { ++ // If NOTIFY_SOCKET is used create a new process that stays around ++ // so to not block "runc start". It will automatically exits when the ++ // container notifies that it is ready, or when the container is deleted ++ if os.Getenv("_NOTIFY_SOCKET_FD") != "" { ++ fd := os.Getenv("_NOTIFY_SOCKET_FD") ++ pid := os.Getenv("_NOTIFY_SOCKET_PID") ++ hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST") ++ notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH") ++ notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath) ++ os.Exit(0) ++ } ++ + factory, _ := libcontainer.New("") + if err := factory.StartInitialization(); err != nil { + // as the error is sent back to the parent there is no need to log +diff --git a/notify_socket.go b/notify_socket.go +index cd6c0a989..e04e9d660 100644 +--- a/notify_socket.go ++++ b/notify_socket.go +@@ -6,10 +6,13 @@ import ( + "bytes" + "fmt" + "net" ++ "os" ++ "os/exec" + "path/filepath" ++ "strconv" ++ "time" + + "github.com/opencontainers/runtime-spec/specs-go" +- + "github.com/sirupsen/logrus" + "github.com/urfave/cli" + ) +@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error { + return nil + } + ++func (notifySocket *notifySocket) notifyNewPid(pid int) { ++ notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"} ++ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) ++ if err != nil { ++ return ++ } ++ newPid := fmt.Sprintf("MAINPID=%d\n", pid) ++ client.Write([]byte(newPid)) ++} ++ + // pid1 must be set only with -d, as it is used to set the new process as the main process + // for the service in systemd + func (notifySocket *notifySocket) run(pid1 int) { +- buf := make([]byte, 512) +- notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"} +- client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) ++ file, err := notifySocket.socket.File() + if err != nil { + logrus.Error(err) + return + } +- for { +- r, err := notifySocket.socket.Read(buf) +- if err != nil { +- break ++ defer file.Close() ++ defer notifySocket.socket.Close() ++ ++ cmd := exec.Command("/proc/self/exe", "init") ++ cmd.ExtraFiles = []*os.File{file} ++ cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3", ++ fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1), ++ fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host), ++ fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath)) ++ ++ if err := cmd.Start(); err != nil { ++ logrus.Fatal(err) ++ } ++ notifySocket.notifyNewPid(cmd.Process.Pid) ++ cmd.Process.Release() ++} ++ ++func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) { ++ intFd, err := strconv.Atoi(envFd) ++ if err != nil { ++ return ++ } ++ pid1, err := strconv.Atoi(envPid) ++ if err != nil { ++ return ++ } ++ ++ file := os.NewFile(uintptr(intFd), "unixgram") ++ defer file.Close() ++ ++ fileChan := make(chan []byte) ++ exitChan := make(chan bool) ++ ++ go func() { ++ for { ++ buf := make([]byte, 512) ++ r, err := file.Read(buf) ++ if err != nil { ++ return ++ } ++ fileChan <- buf[0:r] + } +- var out bytes.Buffer +- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) { +- if bytes.HasPrefix(line, []byte("READY=")) { ++ }() ++ go func() { ++ for { ++ if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) { ++ exitChan <- true ++ return ++ } ++ time.Sleep(time.Second) ++ } ++ }() ++ ++ notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"} ++ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) ++ if err != nil { ++ return ++ } ++ ++ for { ++ select { ++ case <-exitChan: ++ return ++ case b := <-fileChan: ++ for _, line := range bytes.Split(b, []byte{'\n'}) { ++ if !bytes.HasPrefix(line, []byte("READY=")) { ++ continue ++ } ++ ++ var out bytes.Buffer + _, err = out.Write(line) + if err != nil { + return +@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) { + } + + // now we can inform systemd to use pid1 as the pid to monitor +- if pid1 > 0 { +- newPid := fmt.Sprintf("MAINPID=%d\n", pid1) +- client.Write([]byte(newPid)) +- } ++ newPid := fmt.Sprintf("MAINPID=%d\n", pid1) ++ client.Write([]byte(newPid)) + return + } + } +diff --git a/signals.go b/signals.go +index 1811de837..d0988cb39 100644 +--- a/signals.go ++++ b/signals.go +@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach + h.notifySocket.run(pid1) + return 0, nil + } else { +- go h.notifySocket.run(0) ++ h.notifySocket.run(os.Getpid()) + } + } + +@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach + // status because we must ensure that any of the go specific process + // fun such as flushing pipes are complete before we return. + process.Wait() +- if h.notifySocket != nil { +- h.notifySocket.Close() +- } + return e.status, nil + } + } diff --git a/SOURCES/99-containers.conf b/SOURCES/99-containers.conf new file mode 100644 index 0000000..7e2d537 --- /dev/null +++ b/SOURCES/99-containers.conf @@ -0,0 +1 @@ +fs.may_detach_mounts=1 diff --git a/SOURCES/change-default-root.patch b/SOURCES/change-default-root.patch new file mode 100644 index 0000000..de94424 --- /dev/null +++ b/SOURCES/change-default-root.patch @@ -0,0 +1,61 @@ +diff --git a/list.go b/list.go +index 0313d8c..328798b 100644 +--- a/list.go ++++ b/list.go +@@ -50,7 +50,7 @@ var listCommand = cli.Command{ + ArgsUsage: ` + + Where the given root is specified via the global option "--root" +-(default: "/run/runc"). ++(default: "/run/runc-ctrs"). + + EXAMPLE 1: + To list containers created via the default "--root": +diff --git a/main.go b/main.go +index 278399a..0f49fce 100644 +--- a/main.go ++++ b/main.go +@@ -62,7 +62,7 @@ func main() { + v = append(v, fmt.Sprintf("spec: %s", specs.Version)) + app.Version = strings.Join(v, "\n") + +- root := "/run/runc" ++ root := "/run/runc-ctrs" + rootless, err := isRootless(nil) + if err != nil { + fatal(err) +@@ -70,7 +70,7 @@ func main() { + if rootless { + runtimeDir := os.Getenv("XDG_RUNTIME_DIR") + if runtimeDir != "" { +- root = runtimeDir + "/runc" ++ root = runtimeDir + "/runc-ctrs" + // According to the XDG specification, we need to set anything in + // XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get + // auto-pruned. +diff --git a/man/runc-list.8.md b/man/runc-list.8.md +index f737424..107220e 100644 +--- a/man/runc-list.8.md ++++ b/man/runc-list.8.md +@@ -6,7 +6,7 @@ + + # EXAMPLE + Where the given root is specified via the global option "--root" +-(default: "/run/runc"). ++(default: "/run/runc-ctrs"). + + To list containers created via the default "--root": + # runc list +diff --git a/man/runc.8.md b/man/runc.8.md +index 6d0ddff..337bc73 100644 +--- a/man/runc.8.md ++++ b/man/runc.8.md +@@ -51,7 +51,7 @@ value for "bundle" is the current directory. + --debug enable debug output for logging + --log value set the log file path where internal debug information is written (default: "/dev/null") + --log-format value set the format used by logs ('text' (default), or 'json') (default: "text") +- --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers) ++ --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc-ctrs" or $XDG_RUNTIME_DIR/runc-ctrs for rootless containers) + --criu value path to the criu binary used for checkpoint and restore (default: "criu") + --systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234" + --rootless value enable rootless mode ('true', 'false', or 'auto') (default: "auto") diff --git a/SOURCES/pivot-root.patch b/SOURCES/pivot-root.patch new file mode 100644 index 0000000..16679df --- /dev/null +++ b/SOURCES/pivot-root.patch @@ -0,0 +1,72 @@ +From 28a697cce3e4f905dca700eda81d681a30eef9cd Mon Sep 17 00:00:00 2001 +From: Giuseppe Scrivano +Date: Fri, 11 Jan 2019 21:53:45 +0100 +Subject: [PATCH] rootfs: umount all procfs and sysfs with --no-pivot + +When creating a new user namespace, the kernel doesn't allow to mount +a new procfs or sysfs file system if there is not already one instance +fully visible in the current mount namespace. + +When using --no-pivot we were effectively inhibiting this protection +from the kernel, as /proc and /sys from the host are still present in +the container mount namespace. + +A container without full access to /proc could then create a new user +namespace, and from there able to mount a fully visible /proc, bypassing +the limitations in the container. + +A simple reproducer for this issue is: + +unshare -mrfp sh -c "mount -t proc none /proc && echo c > /proc/sysrq-trigger" + +Signed-off-by: Giuseppe Scrivano +--- + libcontainer/rootfs_linux.go | 35 +++++++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index e7c2f8ada..6bd6da74a 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -748,6 +748,41 @@ func pivotRoot(rootfs string) error { + } + + func msMoveRoot(rootfs string) error { ++ mountinfos, err := mount.GetMounts() ++ if err != nil { ++ return err ++ } ++ ++ absRootfs, err := filepath.Abs(rootfs) ++ if err != nil { ++ return err ++ } ++ ++ for _, info := range mountinfos { ++ p, err := filepath.Abs(info.Mountpoint) ++ if err != nil { ++ return err ++ } ++ // Umount every syfs and proc file systems, except those under the container rootfs ++ if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) { ++ continue ++ } ++ // Be sure umount events are not propagated to the host. ++ if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { ++ return err ++ } ++ if err := unix.Unmount(p, unix.MNT_DETACH); err != nil { ++ if err != unix.EINVAL && err != unix.EPERM { ++ return err ++ } else { ++ // If we have not privileges for umounting (e.g. rootless), then ++ // cover the path. ++ if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil { ++ return err ++ } ++ } ++ } ++ } + if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil { + return err + } diff --git a/SPECS/runc.spec b/SPECS/runc.spec new file mode 100644 index 0000000..7e1f5b4 --- /dev/null +++ b/SPECS/runc.spec @@ -0,0 +1,263 @@ +%global with_debug 1 +%global with_bundled 1 +%global with_check 0 + +%if 0%{?with_debug} +%global _find_debuginfo_dwz_opts %{nil} +%global _dwz_low_mem_die_limit 0 +%else +%global debug_package %{nil} +%endif + +%if 0%{?rhel} > 7 && ! 0%{?fedora} +%define gobuild(o:) \ +go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**}; +%endif # distro + +%global provider github +%global provider_tld com +%global project opencontainers +%global repo runc +# https://github.com/opencontainers/runc +%global provider_prefix %{provider}.%{provider_tld}/%{project}/%{repo} +%global import_path %{provider_prefix} +%global git0 https://github.com/opencontainers/runc +%global commit0 2abd837c8c25b0102ac4ce14f17bc0bc7ddffba7 +%global shortcommit0 %(c=%{commit0}; echo ${c:0:7}) + +Name: %{repo} +Version: 1.0.0 +Release: 55.rc5.dev.git%{shortcommit0}%{?dist} +Summary: CLI for running Open Containers +ExcludeArch: %{ix86} +License: ASL 2.0 +URL: http//%{provider_prefix} +Source0: %{git0}/archive/%{commit0}/%{repo}-%{shortcommit0}.tar.gz +Source1: 99-containers.conf +Patch0: change-default-root.patch +Patch1: 0001-Revert-Apply-cgroups-earlier.patch +Patch2: 1807.patch +Patch3: 0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch +Patch4: pivot-root.patch +Requires: criu +Requires(pre): container-selinux >= 2:2.2-2 + +# If go_compiler is not set to 1, there is no virtual provide. Use golang instead. +BuildRequires: %{?go_compiler:compiler(go-compiler)}%{!?go_compiler:golang} >= 1.6.2 +BuildRequires: git +BuildRequires: go-md2man +BuildRequires: libseccomp-devel + +%description +The runc command can be used to start containers which are packaged +in accordance with the Open Container Initiative's specifications, +and to manage containers running under runc. + +%prep +%autosetup -Sgit -n %{repo}-%{commit0} +sed -i '/\#\!\/bin\/bash/d' contrib/completions/bash/%{name} + +%build +mkdir -p GOPATH +pushd GOPATH + mkdir -p src/%{provider}.%{provider_tld}/%{project} + ln -s $(dirs +1 -l) src/%{import_path} +popd + +pushd GOPATH/src/%{import_path} +export GOPATH=%{gopath}:$(pwd)/GOPATH +export BUILDTAGS="selinux seccomp" +%gobuild -o %{name} %{import_path} + +pushd man +./md2man-all.sh +popd + +%install +install -d -p %{buildroot}%{_bindir} +install -p -m 755 %{name} %{buildroot}%{_bindir} + +# install man pages +install -d -p %{buildroot}%{_mandir}/man8 +install -p -m 644 man/man8/* %{buildroot}%{_mandir}/man8 +# install bash completion +install -d -p %{buildroot}%{_datadir}/bash-completion/completions +install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash-completion/completions + +%check + +#define license tag if not already defined +%{!?_licensedir:%global license %doc} + +%files +%license LICENSE +%doc MAINTAINERS_GUIDE.md PRINCIPLES.md README.md CONTRIBUTING.md +%{_bindir}/%{name} +%{_mandir}/man8/%{name}* +%{_datadir}/bash-completion/completions/%{name} + +%changelog +* Tue Feb 12 2019 Lokesh Mandvekar - 1.0.0-55.rc5.dev.git2abd837 +- Resolves: #1665770 - rootfs: umount all procfs and sysfs with --no-pivot +- Resolves: CVE-2019-5736 + +* Tue Dec 18 2018 Frantisek Kluknavsky - 1.0.0-54.rc5.dev.git2abd837 +- re-enable debuginfo + +* Mon Dec 17 2018 Frantisek Kluknavsky - 1.0.0-53.rc5.dev.git2abd837 +- go toolset not in scl anymore + +* Wed Sep 26 2018 Frantisek Kluknavsky - 1.0.0-52.rc5.dev.git2abd837 +- rebase + +* Fri Aug 31 2018 Dan Walsh - 2:1.0.0-51.dev.gitfdd8055 +- Fix handling of tmpcopyup + +* Fri Aug 24 2018 Lokesh Mandvekar - 2:1.0.0-49.rc5.dev.gitb4e2ecb +- %%gobuild uses no_openssl +- remove unused devel and unit-test subpackages + +* Tue Aug 07 2018 Lokesh Mandvekar - 2:1.0.0-48.rc5.dev.gitad0f525 +- build with %%gobuild +- exlude i686 temporarily because of go-toolset issues + +* Mon Jul 30 2018 Florian Weimer - 1.0.0-47.dev.gitb4e2ecb +- Rebuild with fixed binutils + +* Fri Jul 27 2018 Dan Walsh - 2:1.0.0-46.dev.gitb4e2ecb +- Add patch https://github.com/opencontainers/runc/pull/1807 to allow +- runc and podman to work with sd_notify + +* Wed Jul 18 2018 Dan Walsh - 2:1.0.0-40.rc5.dev.gitad0f525 +- Remove sysclt handling, not needed in RHEL8 +- Make sure package built with seccomp flags +- Remove rectty +- Add completions + +* Fri Jun 15 2018 Dan Walsh - 2:1.0.0-36.rc5.dev.gitad0f525 +- Better handling of user namespace + +* Tue May 1 2018 Dan Walsh - 2:1.0.0-31.rc5.git0cbfd83 +- Fix issues between SELinux and UserNamespace + +* Tue Apr 17 2018 Frantisek Kluknavsky - 1.0.0-27.rc5.dev.git4bb1fe4 +- rebuilt, placed missing changelog entry back + +* Tue Feb 27 2018 Dan Walsh - 2:1.0.0-26.rc5.git4bb1fe4 +- release v1.0.0~rc5 + +* Wed Jan 24 2018 Dan Walsh - 1.0.0-26.rc4.git9f9c962 +- Bump to the latest from upstream + +* Mon Dec 18 2017 Lokesh Mandvekar - 1.0.0-25.rc4.gite6516b3 +- built commit e6516b3 + +* Fri Dec 15 2017 Frantisek Kluknavsky - 1.0.0-24.rc4.dev.gitc6e4a1e.1 +- rebase to c6e4a1ebeb1a72b529c6f1b6ee2b1ae5b868b14f +- https://github.com/opencontainers/runc/pull/1651 + +* Tue Dec 12 2017 Lokesh Mandvekar - 1.0.0-23.rc4.git1d3ab6d +- Resolves: #1524654 + +* Sun Dec 10 2017 Dan Walsh - 1.0.0-22.rc4.git1d3ab6d +- Many Stability fixes +- Many fixes for rootless containers +- Many fixes for static builds + +* Thu Nov 09 2017 Lokesh Mandvekar - 1.0.0-21.rc4.dev.gitaea4f21 +- enable debuginfo and include -buildmode=pie for go build + +* Tue Nov 07 2017 Lokesh Mandvekar - 1.0.0-20.rc4.dev.gitaea4f21 +- use Makefile + +* Tue Nov 07 2017 Lokesh Mandvekar - 1.0.0-19.rc4.dev.gitaea4f21 +- disable debuginfo temporarily + +* Fri Nov 03 2017 Lokesh Mandvekar - 1.0.0-18.rc4.dev.gitaea4f21 +- enable debuginfo + +* Wed Oct 25 2017 Dan Walsh - 1.0.0-17.rc4.gitaea4f21 +- Add container-selinux prerequires to make sure runc is labeled correctly + +* Thu Oct 19 2017 Lokesh Mandvekar - 1.0.0-16.rc4.dev.gitaea4f21 +- correct the release tag "rc4dev" -> "rc4.dev" cause I'm OCD + +* Mon Oct 16 2017 Dan Walsh - 1.0.0-15.rc4dev.gitaea4f21 +- Use the same checkout as Fedora for lates CRI-O + +* Fri Sep 22 2017 Frantisek Kluknavsky - 1.0.0-14.rc4dev.git84a082b +- rebase to 84a082bfef6f932de921437815355186db37aeb1 + +* Tue Jun 13 2017 Lokesh Mandvekar - 1.0.0-13.rc3.gitd40db12 +- Resolves: #1479489 +- built commit d40db12 + +* Tue Jun 13 2017 Lokesh Mandvekar - 1.0.0-12.1.gitf8ce01d +- disable s390x temporarily because of indefinite wait times on brew + +* Tue Jun 13 2017 Lokesh Mandvekar - 1.0.0-11.1.gitf8ce01d +- correct previous bogus date :\ + +* Mon Jun 12 2017 Lokesh Mandvekar - 1.0.0-10.1.gitf8ce01d +- Resolves: #1441737 - run sysctl_apply for sysctl knob + +* Tue May 09 2017 Lokesh Mandvekar - 1.0.0-9.1.gitf8ce01d +- Resolves: #1447078 - change default root path +- add commit e800860 from runc @projectatomic/change-root-path + +* Fri May 05 2017 Lokesh Mandvekar - 1.0.0-8.1.gitf8ce01d +- Resolves: #1441737 - enable kernel sysctl knob /proc/sys/fs/may_detach_mounts + +* Thu Apr 13 2017 Lokesh Mandvekar - 1.0.0-7.1.gitf8ce01d +- Resolves: #1429675 +- built @opencontainers/master commit f8ce01d + +* Thu Mar 16 2017 Lokesh Mandvekar - 1.0.0-4.1.gitee992e5 +- built @projectatomic/master commit ee992e5 + +* Fri Feb 24 2017 Lokesh Mandvekar - 1.0.0-3.rc2 +- Resolves: #1426674 +- built projectatomic/runc_rhel_7 commit 5d93f81 + +* Mon Feb 06 2017 Lokesh Mandvekar - 1.0.0-2.rc2 +- Resolves: #1419702 - rebase to latest upstream master +- built commit b263a43 + +* Wed Jan 11 2017 Lokesh Mandvekar - 1.0.0-1.rc2 +- Resolves: #1412239 - *CVE-2016-9962* - set init processes as non-dumpable, +runc patch from Michael Crosby + +* Wed Sep 07 2016 Lokesh Mandvekar - 0.1.1-6 +- Resolves: #1373980 - rebuild for 7.3.0 + +* Sat Jun 25 2016 Lokesh Mandvekar - 0.1.1-5 +- build with golang >= 1.6.2 + +* Tue May 31 2016 Lokesh Mandvekar - 0.1.1-4 +- release tags were inconsistent in the previous build + +* Tue May 31 2016 Lokesh Mandvekar - 0.1.1-1 +- Resolves: #1341267 - rebase runc to v0.1.1 + +* Tue May 03 2016 Lokesh Mandvekar - 0.1.0-3 +- add selinux build tag +- add BR: libseccomp-devel + +* Tue May 03 2016 Lokesh Mandvekar - 0.1.0-2 +- Resolves: #1328970 - add seccomp buildtag + +* Tue Apr 19 2016 Lokesh Mandvekar - 0.1.0-1 +- Resolves: rhbz#1328616 - rebase to v0.1.0 + +* Tue Mar 08 2016 Lokesh Mandvekar - 0.0.8-1.git4155b68 +- Resolves: rhbz#1277245 - bump to 0.0.8 +- Resolves: rhbz#1302363 - criu is a runtime dep +- Resolves: rhbz#1302348 - libseccomp-golang is bundled in Godeps +- manpages included + +* Wed Nov 25 2015 jchaloup - 1:0.0.5-0.1.git97bc9a7 +- Update to 0.0.5, introduce Epoch for Fedora due to 0.2 version instead of 0.0.2 + +* Fri Aug 21 2015 Jan Chaloupka - 0.2-0.2.git90e6d37 +- First package for Fedora + resolves: #1255179