From 62b0c31d4b940ff93a23ac6fdb3a6ef345910abf Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 14 Jun 2022 17:19:10 -0700 Subject: [PATCH] libct: fix mounting via wrong proc fd Due to a bug in commit 9c444070ec7, when the user and mount namespaces are used, and the bind mount is followed by the cgroup mount in the spec, the cgroup is mounted using the bind mount's mount fd. This can be reproduced with podman 4.1 (when configured to use runc): $ podman run --uidmap 0:100:10000 quay.io/libpod/testimage:20210610 mount Error: /home/kir/git/runc/runc: runc create failed: unable to start container process: error during container init: error mounting "cgroup" to rootfs at "/sys/fs/cgroup": mount /proc/self/fd/11:/sys/fs/cgroup/systemd (via /proc/self/fd/12), flags: 0x20502f: operation not permitted: OCI permission denied or manually with the spec mounts containing something like this: { "destination": "/etc/resolv.conf", "type": "bind", "source": "/userdata/resolv.conf", "options": [ "bind" ] }, { "destination": "/sys/fs/cgroup", "type": "cgroup", "source": "cgroup", "options": [ "rprivate", "nosuid", "noexec", "nodev", "relatime", "ro" ] } The issue was not found earlier since it requires using userns, and even then mount fd is ignored by mountToRootfs, except for bind mounts, and all the bind mounts have mountfd set, except for the case of cgroup v1's /sys/fs/cgroup which is internally transformed into a bunch of bind mounts. This is a minimal fix for the issue, suitable for backporting. Fixes: 9c444070ec7 ("Open bind mount sources from the host userns") Signed-off-by: Kir Kolyshkin (cherry picked from commit b3aa20af7fb67ee1f2b381f3c82329e73c7d3a0c) Signed-off-by: Kir Kolyshkin --- libcontainer/rootfs_linux.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 3cfd2bf1e4..ec7638e4d5 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -80,6 +80,8 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds []int) (err // Therefore, we can access mountFds[i] without any concerns. if mountFds != nil && mountFds[i] != -1 { mountConfig.fd = &mountFds[i] + } else { + mountConfig.fd = nil } if err := mountToRootfs(m, mountConfig); err != nil {