Blob Blame Raw
From a2050ea471b0eb3c7240282219773c0f1d7ec554 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Wed, 7 Apr 2021 16:45:39 -0700
Subject: [PATCH 1/2] runc run: fix start for rootless + host pidns

Currently, runc fails like this when used from rootless podman
with host PID namespace:

> $ podman --runtime=runc run --pid=host --rm -it busybox sh
> WARN[0000] additional gid=10 is not present in the user namespace, skip setting it
> Error: container_linux.go:380: starting container process caused:
> process_linux.go:545: container init caused: readonly path /proc/asound:
> operation not permitted: OCI permission denied

(Here /proc/asound is the first path from OCI spec's readonlyPaths).

The code uses MS_BIND|MS_REMOUNT flags that have a special meaning in
the kernel ("keep the flags like nodev, nosuid, noexec as is").
For some reason, this "special meaning" trick is not working for the
above use case (rootless podman + no PID namespace), and I don't know
how to reproduce this without podman.

Instead of relying on the kernel feature, let's just get the current
mount flags using fstatfs(2) and add those that needs to be preserved.

While at it, wrap errors from unix.Mount into os.PathError to make
errors a bit less cryptic.

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 libcontainer/rootfs_linux.go | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
index ed38e77219..3d67287926 100644
--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
@@ -931,9 +931,20 @@ func readonlyPath(path string) error {
 		if os.IsNotExist(err) {
 			return nil
 		}
-		return err
+		return &os.PathError{Op: "bind-mount", Path: path, Err: err}
+	}
+
+	var s unix.Statfs_t
+	if err := unix.Statfs(path, &s); err != nil {
+		return &os.PathError{Op: "statfs", Path: path, Err: err}
 	}
-	return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
+	flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
+
+	if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
+		return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err}
+	}
+
+	return nil
 }
 
 // remountReadonly will remount an existing mount point and ensure that it is read-only.

From 31dd1e499b2f590cd3bcf59153491967ea2a8e1f Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Wed, 14 Apr 2021 10:58:42 -0700
Subject: [PATCH 2/2] tests/int: add rootless + host pidns test case

For the fix, see previous commit. Without the fix, this test case fails:

> container_linux.go:380: starting container process caused:
> process_linux.go:545: container init caused: readonly path /proc/bus:
> operation not permitted

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
---
 tests/integration/start_hello.bats | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/integration/start_hello.bats b/tests/integration/start_hello.bats
index 5c2a66fdb1..858847032c 100644
--- a/tests/integration/start_hello.bats
+++ b/tests/integration/start_hello.bats
@@ -59,3 +59,20 @@ function teardown() {
 
 	[[ "$(cat pid.txt)" =~ [0-9]+ ]]
 }
+
+# https://github.com/opencontainers/runc/pull/2897
+@test "runc run [rootless with host pidns]" {
+	requires rootless_no_features
+
+	# Remove pid namespace, and replace /proc mount
+	# with a bind mount from the host.
+	update_config '	  .linux.namespaces -= [{"type": "pid"}]
+			| .mounts |= map((select(.type == "proc")
+				| .type = "none"
+				| .source = "/proc"
+				| .options = ["rbind", "nosuid", "nodev", "noexec"]
+			  ) // .)'
+
+	runc run test_hello
+	[ "$status" -eq 0 ]
+}