1d4c55
commit 2fe64148a81f0d78050c302f34a6853d21f7cae4
1d4c55
Author: DJ Delorie <dj@redhat.com>
1d4c55
Date:   Mon Mar 28 23:53:33 2022 -0400
1d4c55
1d4c55
    Allow for unpriviledged nested containers
1d4c55
    
1d4c55
    If the build itself is run in a container, we may not be able to
1d4c55
    fully set up a nested container for test-container testing.
1d4c55
    Notably is the mounting of /proc, since it's critical that it
1d4c55
    be mounted from within the same PID namespace as its users, and
1d4c55
    thus cannot be bind mounted from outside the container like other
1d4c55
    mounts.
1d4c55
    
1d4c55
    This patch defaults to using the parent's PID namespace instead of
1d4c55
    creating a new one, as this is more likely to be allowed.
1d4c55
    
1d4c55
    If the test needs an isolated PID namespace, it should add the "pidns"
1d4c55
    command to its init script.
1d4c55
    
1d4c55
    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
1d4c55
1d4c55
Conflicts:
1d4c55
	nss/tst-reload2.c
1d4c55
          (not in RHEL-8)
1d4c55
	support/Makefile
1d4c55
          (RHEL-8 missing some routines in libsupport-routines)
1d4c55
1d4c55
diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c
1d4c55
index f381cb0fa7e6b93d..45ac033a0f897088 100644
1d4c55
--- a/elf/tst-pldd.c
1d4c55
+++ b/elf/tst-pldd.c
1d4c55
@@ -85,6 +85,8 @@ in_str_list (const char *libname, const char *const strlist[])
1d4c55
 static int
1d4c55
 do_test (void)
1d4c55
 {
1d4c55
+  support_need_proc ("needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child");
1d4c55
+
1d4c55
   /* Check if our subprocess can be debugged with ptrace.  */
1d4c55
   {
1d4c55
     int ptrace_scope = support_ptrace_scope ();
1d4c55
diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c
1d4c55
index 273b6073abe9cb60..f1c0b39f3a27724c 100644
1d4c55
--- a/nptl/tst-pthread-getattr.c
1d4c55
+++ b/nptl/tst-pthread-getattr.c
1d4c55
@@ -28,6 +28,8 @@
1d4c55
 #include <unistd.h>
1d4c55
 #include <inttypes.h>
1d4c55
 
1d4c55
+#include <support/support.h>
1d4c55
+
1d4c55
 /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes
1d4c55
    returned as unlimited when it is not, which may cause this test to fail.
1d4c55
    There is also the other case where RLIMIT_STACK is intentionally set as
1d4c55
@@ -152,6 +154,8 @@ check_stack_top (void)
1d4c55
 static int
1d4c55
 do_test (void)
1d4c55
 {
1d4c55
+  support_need_proc ("Reads /proc/self/maps to get stack size.");
1d4c55
+
1d4c55
   pagesize = sysconf (_SC_PAGESIZE);
1d4c55
   return check_stack_top ();
1d4c55
 }
1d4c55
diff --git a/support/Makefile b/support/Makefile
1d4c55
index 636d69c4f8e7e139..e184fccbe7d2310c 100644
1d4c55
--- a/support/Makefile
1d4c55
+++ b/support/Makefile
1d4c55
@@ -59,6 +59,7 @@ libsupport-routines = \
1d4c55
   support_format_hostent \
1d4c55
   support_format_netent \
1d4c55
   support_isolate_in_subprocess \
1d4c55
+  support_need_proc \
1d4c55
   support_process_state \
1d4c55
   support_ptrace \
1d4c55
   support_openpty \
1d4c55
diff --git a/support/support.h b/support/support.h
1d4c55
index 96833bd4e992e6d3..1466eb29f840fa59 100644
1d4c55
--- a/support/support.h
1d4c55
+++ b/support/support.h
1d4c55
@@ -81,6 +81,11 @@ char *support_quote_string (const char *);
1d4c55
    regular file open for writing, and initially empty.  */
1d4c55
 int support_descriptor_supports_holes (int fd);
1d4c55
 
1d4c55
+/* Predicates that a test requires a working /proc filesystem.  This
1d4c55
+   call will exit with UNSUPPORTED if /proc is not available, printing
1d4c55
+   WHY_MSG as part of the diagnostic.  */
1d4c55
+void support_need_proc (const char *why_msg);
1d4c55
+
1d4c55
 /* Error-checking wrapper functions which terminate the process on
1d4c55
    error.  */
1d4c55
 
1d4c55
diff --git a/support/support_need_proc.c b/support/support_need_proc.c
1d4c55
new file mode 100644
1d4c55
index 0000000000000000..9b4eab7539b2d6c3
1d4c55
--- /dev/null
1d4c55
+++ b/support/support_need_proc.c
1d4c55
@@ -0,0 +1,35 @@
1d4c55
+/* Indicate that a test requires a working /proc.
1d4c55
+   Copyright (C) 2022 Free Software Foundation, Inc.
1d4c55
+   This file is part of the GNU C Library.
1d4c55
+
1d4c55
+   The GNU C Library is free software; you can redistribute it and/or
1d4c55
+   modify it under the terms of the GNU Lesser General Public
1d4c55
+   License as published by the Free Software Foundation; either
1d4c55
+   version 2.1 of the License, or (at your option) any later version.
1d4c55
+
1d4c55
+   The GNU C Library is distributed in the hope that it will be useful,
1d4c55
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
1d4c55
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1d4c55
+   Lesser General Public License for more details.
1d4c55
+
1d4c55
+   You should have received a copy of the GNU Lesser General Public
1d4c55
+   License along with the GNU C Library; if not, see
1d4c55
+   <https://www.gnu.org/licenses/>.  */
1d4c55
+
1d4c55
+#include <unistd.h>
1d4c55
+#include <support/check.h>
1d4c55
+#include <support/support.h>
1d4c55
+
1d4c55
+/* We test for /proc/self/maps since that's one of the files that one
1d4c55
+   of our tests actually uses, but the general idea is if Linux's
1d4c55
+   /proc/ (procfs) filesystem is mounted.  If not, the process exits
1d4c55
+   with an UNSUPPORTED result code.  */
1d4c55
+
1d4c55
+void
1d4c55
+support_need_proc (const char *why_msg)
1d4c55
+{
1d4c55
+#ifdef __linux__
1d4c55
+  if (access ("/proc/self/maps", R_OK))
1d4c55
+    FAIL_UNSUPPORTED ("/proc is not available, %s", why_msg);
1d4c55
+#endif
1d4c55
+}
1d4c55
diff --git a/support/test-container.c b/support/test-container.c
1d4c55
index 9975c8cb7bc9a955..2bce4db841ff7668 100644
1d4c55
--- a/support/test-container.c
1d4c55
+++ b/support/test-container.c
1d4c55
@@ -95,6 +95,7 @@ int verbose = 0;
1d4c55
    * mytest.root/mytest.script has a list of "commands" to run:
1d4c55
        syntax:
1d4c55
          # comment
1d4c55
+	 pidns <comment>
1d4c55
          su
1d4c55
          mv FILE FILE
1d4c55
 	 cp FILE FILE
1d4c55
@@ -120,6 +121,8 @@ int verbose = 0;
1d4c55
 
1d4c55
        details:
1d4c55
          - '#': A comment.
1d4c55
+	 - 'pidns': Require a separate PID namespace, prints comment if it can't
1d4c55
+	    (default is a shared pid namespace)
1d4c55
          - 'su': Enables running test as root in the container.
1d4c55
          - 'mv': A minimal move files command.
1d4c55
          - 'cp': A minimal copy files command.
1d4c55
@@ -143,7 +146,7 @@ int verbose = 0;
1d4c55
    * Simple, easy to review code (i.e. prefer simple naive code over
1d4c55
      complex efficient code)
1d4c55
 
1d4c55
-   * The current implementation ist parallel-make-safe, but only in
1d4c55
+   * The current implementation is parallel-make-safe, but only in
1d4c55
      that it uses a lock to prevent parallel access to the testroot.  */
1d4c55
 
1d4c55
 
1d4c55
@@ -222,11 +225,37 @@ concat (const char *str, ...)
1d4c55
   return bufs[n];
1d4c55
 }
1d4c55
 
1d4c55
+/* Like the above, but put spaces between words.  Caller frees.  */
1d4c55
+static char *
1d4c55
+concat_words (char **words, int num_words)
1d4c55
+{
1d4c55
+  int len = 0;
1d4c55
+  int i;
1d4c55
+  char *rv, *p;
1d4c55
+
1d4c55
+  for (i = 0; i < num_words; i ++)
1d4c55
+    {
1d4c55
+      len += strlen (words[i]);
1d4c55
+      len ++;
1d4c55
+    }
1d4c55
+
1d4c55
+  p = rv = (char *) xmalloc (len);
1d4c55
+
1d4c55
+  for (i = 0; i < num_words; i ++)
1d4c55
+    {
1d4c55
+      if (i > 0)
1d4c55
+	p = stpcpy (p, " ");
1d4c55
+      p = stpcpy (p, words[i]);
1d4c55
+    }
1d4c55
+
1d4c55
+  return rv;
1d4c55
+}
1d4c55
+
1d4c55
 /* Try to mount SRC onto DEST.  */
1d4c55
 static void
1d4c55
 trymount (const char *src, const char *dest)
1d4c55
 {
1d4c55
-  if (mount (src, dest, "", MS_BIND, NULL) < 0)
1d4c55
+  if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0)
1d4c55
     FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest);
1d4c55
 }
1d4c55
 
1d4c55
@@ -709,6 +738,9 @@ main (int argc, char **argv)
1d4c55
   gid_t original_gid;
1d4c55
   /* If set, the test runs as root instead of the user running the testsuite.  */
1d4c55
   int be_su = 0;
1d4c55
+  int require_pidns = 0;
1d4c55
+  const char *pidns_comment = NULL;
1d4c55
+  int do_proc_mounts = 0;
1d4c55
   int UMAP;
1d4c55
   int GMAP;
1d4c55
   /* Used for "%lld %lld 1" so need not be large.  */
1d4c55
@@ -991,6 +1023,12 @@ main (int argc, char **argv)
1d4c55
 	      {
1d4c55
 		be_su = 1;
1d4c55
 	      }
1d4c55
+	    else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0)
1d4c55
+	      {
1d4c55
+		require_pidns = 1;
1d4c55
+		if (nt > 1)
1d4c55
+		  pidns_comment = concat_words (the_words + 1, nt - 1);
1d4c55
+	      }
1d4c55
 	    else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0)
1d4c55
 	      {
1d4c55
 		long int m;
1d4c55
@@ -1048,7 +1086,8 @@ main (int argc, char **argv)
1d4c55
 
1d4c55
 #ifdef CLONE_NEWNS
1d4c55
   /* The unshare here gives us our own spaces and capabilities.  */
1d4c55
-  if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0)
1d4c55
+  if (unshare (CLONE_NEWUSER | CLONE_NEWNS
1d4c55
+	       | (require_pidns ? CLONE_NEWPID : 0)) < 0)
1d4c55
     {
1d4c55
       /* Older kernels may not support all the options, or security
1d4c55
 	 policy may block this call.  */
1d4c55
@@ -1059,6 +1098,11 @@ main (int argc, char **argv)
1d4c55
 	    check_for_unshare_hints ();
1d4c55
 	  FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno));
1d4c55
 	}
1d4c55
+      /* We're about to exit anyway, it's "safe" to call unshare again
1d4c55
+	 just to see if the CLONE_NEWPID caused the error.  */
1d4c55
+      else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0)
1d4c55
+	FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno),
1d4c55
+		    pidns_comment ? pidns_comment : "required by test");
1d4c55
       else
1d4c55
 	FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno));
1d4c55
     }
1d4c55
@@ -1074,6 +1118,15 @@ main (int argc, char **argv)
1d4c55
   trymount (support_srcdir_root, new_srcdir_path);
1d4c55
   trymount (support_objdir_root, new_objdir_path);
1d4c55
 
1d4c55
+  /* It may not be possible to mount /proc directly.  */
1d4c55
+  if (! require_pidns)
1d4c55
+  {
1d4c55
+    char *new_proc = concat (new_root_path, "/proc", NULL);
1d4c55
+    xmkdirp (new_proc, 0755);
1d4c55
+    trymount ("/proc", new_proc);
1d4c55
+    do_proc_mounts = 1;
1d4c55
+  }
1d4c55
+
1d4c55
   xmkdirp (concat (new_root_path, "/dev", NULL), 0755);
1d4c55
   devmount (new_root_path, "null");
1d4c55
   devmount (new_root_path, "zero");
1d4c55
@@ -1136,42 +1189,60 @@ main (int argc, char **argv)
1d4c55
 
1d4c55
   maybe_xmkdir ("/tmp", 0755);
1d4c55
 
1d4c55
-  /* Now that we're pid 1 (effectively "root") we can mount /proc  */
1d4c55
-  maybe_xmkdir ("/proc", 0777);
1d4c55
-  if (mount ("proc", "/proc", "proc", 0, NULL) < 0)
1d4c55
-    FAIL_EXIT1 ("Unable to mount /proc: ");
1d4c55
-
1d4c55
-  /* We map our original UID to the same UID in the container so we
1d4c55
-     can own our own files normally.  */
1d4c55
-  UMAP = open ("/proc/self/uid_map", O_WRONLY);
1d4c55
-  if (UMAP < 0)
1d4c55
-    FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
1d4c55
-
1d4c55
-  sprintf (tmp, "%lld %lld 1\n",
1d4c55
-	   (long long) (be_su ? 0 : original_uid), (long long) original_uid);
1d4c55
-  write (UMAP, tmp, strlen (tmp));
1d4c55
-  xclose (UMAP);
1d4c55
-
1d4c55
-  /* We must disable setgroups () before we can map our groups, else we
1d4c55
-     get EPERM.  */
1d4c55
-  GMAP = open ("/proc/self/setgroups", O_WRONLY);
1d4c55
-  if (GMAP >= 0)
1d4c55
+  if (require_pidns)
1d4c55
     {
1d4c55
-      /* We support kernels old enough to not have this.  */
1d4c55
-      write (GMAP, "deny\n", 5);
1d4c55
-      xclose (GMAP);
1d4c55
+      /* Now that we're pid 1 (effectively "root") we can mount /proc  */
1d4c55
+      maybe_xmkdir ("/proc", 0777);
1d4c55
+      if (mount ("proc", "/proc", "proc", 0, NULL) != 0)
1d4c55
+	{
1d4c55
+	  /* This happens if we're trying to create a nested container,
1d4c55
+	     like if the build is running under podman, and we lack
1d4c55
+	     priviledges.
1d4c55
+
1d4c55
+	     Ideally we would WARN here, but that would just add noise to
1d4c55
+	     *every* test-container test, and the ones that care should
1d4c55
+	     have their own relevent diagnostics.
1d4c55
+
1d4c55
+	     FAIL_EXIT1 ("Unable to mount /proc: ");  */
1d4c55
+	}
1d4c55
+      else
1d4c55
+	do_proc_mounts = 1;
1d4c55
     }
1d4c55
 
1d4c55
-  /* We map our original GID to the same GID in the container so we
1d4c55
-     can own our own files normally.  */
1d4c55
-  GMAP = open ("/proc/self/gid_map", O_WRONLY);
1d4c55
-  if (GMAP < 0)
1d4c55
-    FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
1d4c55
+  if (do_proc_mounts)
1d4c55
+    {
1d4c55
+      /* We map our original UID to the same UID in the container so we
1d4c55
+	 can own our own files normally.  */
1d4c55
+      UMAP = open ("/proc/self/uid_map", O_WRONLY);
1d4c55
+      if (UMAP < 0)
1d4c55
+	FAIL_EXIT1 ("can't write to /proc/self/uid_map\n");
1d4c55
+
1d4c55
+      sprintf (tmp, "%lld %lld 1\n",
1d4c55
+	       (long long) (be_su ? 0 : original_uid), (long long) original_uid);
1d4c55
+      write (UMAP, tmp, strlen (tmp));
1d4c55
+      xclose (UMAP);
1d4c55
+
1d4c55
+      /* We must disable setgroups () before we can map our groups, else we
1d4c55
+	 get EPERM.  */
1d4c55
+      GMAP = open ("/proc/self/setgroups", O_WRONLY);
1d4c55
+      if (GMAP >= 0)
1d4c55
+	{
1d4c55
+	  /* We support kernels old enough to not have this.  */
1d4c55
+	  write (GMAP, "deny\n", 5);
1d4c55
+	  xclose (GMAP);
1d4c55
+	}
1d4c55
 
1d4c55
-  sprintf (tmp, "%lld %lld 1\n",
1d4c55
-	   (long long) (be_su ? 0 : original_gid), (long long) original_gid);
1d4c55
-  write (GMAP, tmp, strlen (tmp));
1d4c55
-  xclose (GMAP);
1d4c55
+      /* We map our original GID to the same GID in the container so we
1d4c55
+	 can own our own files normally.  */
1d4c55
+      GMAP = open ("/proc/self/gid_map", O_WRONLY);
1d4c55
+      if (GMAP < 0)
1d4c55
+	FAIL_EXIT1 ("can't write to /proc/self/gid_map\n");
1d4c55
+
1d4c55
+      sprintf (tmp, "%lld %lld 1\n",
1d4c55
+	       (long long) (be_su ? 0 : original_gid), (long long) original_gid);
1d4c55
+      write (GMAP, tmp, strlen (tmp));
1d4c55
+      xclose (GMAP);
1d4c55
+    }
1d4c55
 
1d4c55
   if (change_cwd)
1d4c55
     {