16d03e
From a2050ea471b0eb3c7240282219773c0f1d7ec554 Mon Sep 17 00:00:00 2001
16d03e
From: Kir Kolyshkin <kolyshkin@gmail.com>
16d03e
Date: Wed, 7 Apr 2021 16:45:39 -0700
16d03e
Subject: [PATCH 1/2] runc run: fix start for rootless + host pidns
16d03e
16d03e
Currently, runc fails like this when used from rootless podman
16d03e
with host PID namespace:
16d03e
16d03e
> $ podman --runtime=runc run --pid=host --rm -it busybox sh
16d03e
> WARN[0000] additional gid=10 is not present in the user namespace, skip setting it
16d03e
> Error: container_linux.go:380: starting container process caused:
16d03e
> process_linux.go:545: container init caused: readonly path /proc/asound:
16d03e
> operation not permitted: OCI permission denied
16d03e
16d03e
(Here /proc/asound is the first path from OCI spec's readonlyPaths).
16d03e
16d03e
The code uses MS_BIND|MS_REMOUNT flags that have a special meaning in
16d03e
the kernel ("keep the flags like nodev, nosuid, noexec as is").
16d03e
For some reason, this "special meaning" trick is not working for the
16d03e
above use case (rootless podman + no PID namespace), and I don't know
16d03e
how to reproduce this without podman.
16d03e
16d03e
Instead of relying on the kernel feature, let's just get the current
16d03e
mount flags using fstatfs(2) and add those that needs to be preserved.
16d03e
16d03e
While at it, wrap errors from unix.Mount into os.PathError to make
16d03e
errors a bit less cryptic.
16d03e
16d03e
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
16d03e
---
16d03e
 libcontainer/rootfs_linux.go | 15 +++++++++++++--
16d03e
 1 file changed, 13 insertions(+), 2 deletions(-)
16d03e
16d03e
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
16d03e
index ed38e77219..3d67287926 100644
16d03e
--- a/libcontainer/rootfs_linux.go
16d03e
+++ b/libcontainer/rootfs_linux.go
16d03e
@@ -931,9 +931,20 @@ func readonlyPath(path string) error {
16d03e
 		if os.IsNotExist(err) {
16d03e
 			return nil
16d03e
 		}
16d03e
-		return err
16d03e
+		return &os.PathError{Op: "bind-mount", Path: path, Err: err}
16d03e
+	}
16d03e
+
16d03e
+	var s unix.Statfs_t
16d03e
+	if err := unix.Statfs(path, &s); err != nil {
16d03e
+		return &os.PathError{Op: "statfs", Path: path, Err: err}
16d03e
 	}
16d03e
-	return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
16d03e
+	flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
16d03e
+
16d03e
+	if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil {
16d03e
+		return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err}
16d03e
+	}
16d03e
+
16d03e
+	return nil
16d03e
 }
16d03e
 
16d03e
 // remountReadonly will remount an existing mount point and ensure that it is read-only.
16d03e
16d03e
From 31dd1e499b2f590cd3bcf59153491967ea2a8e1f Mon Sep 17 00:00:00 2001
16d03e
From: Kir Kolyshkin <kolyshkin@gmail.com>
16d03e
Date: Wed, 14 Apr 2021 10:58:42 -0700
16d03e
Subject: [PATCH 2/2] tests/int: add rootless + host pidns test case
16d03e
16d03e
For the fix, see previous commit. Without the fix, this test case fails:
16d03e
16d03e
> container_linux.go:380: starting container process caused:
16d03e
> process_linux.go:545: container init caused: readonly path /proc/bus:
16d03e
> operation not permitted
16d03e
16d03e
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
16d03e
---
16d03e
 tests/integration/start_hello.bats | 17 +++++++++++++++++
16d03e
 1 file changed, 17 insertions(+)
16d03e
16d03e
diff --git a/tests/integration/start_hello.bats b/tests/integration/start_hello.bats
16d03e
index 5c2a66fdb1..858847032c 100644
16d03e
--- a/tests/integration/start_hello.bats
16d03e
+++ b/tests/integration/start_hello.bats
16d03e
@@ -59,3 +59,20 @@ function teardown() {
16d03e
 
16d03e
 	[[ "$(cat pid.txt)" =~ [0-9]+ ]]
16d03e
 }
16d03e
+
16d03e
+# https://github.com/opencontainers/runc/pull/2897
16d03e
+@test "runc run [rootless with host pidns]" {
16d03e
+	requires rootless_no_features
16d03e
+
16d03e
+	# Remove pid namespace, and replace /proc mount
16d03e
+	# with a bind mount from the host.
16d03e
+	update_config '	  .linux.namespaces -= [{"type": "pid"}]
16d03e
+			| .mounts |= map((select(.type == "proc")
16d03e
+				| .type = "none"
16d03e
+				| .source = "/proc"
16d03e
+				| .options = ["rbind", "nosuid", "nodev", "noexec"]
16d03e
+			  ) // .)'
16d03e
+
16d03e
+	runc run test_hello
16d03e
+	[ "$status" -eq 0 ]
16d03e
+}