ac3a84
From 6ab61ac93e534aec1ea4d16e77c1c355c8286e64 Mon Sep 17 00:00:00 2001
ac3a84
From: Daan De Meyer <daan.j.demeyer@gmail.com>
ac3a84
Date: Thu, 27 Oct 2022 13:14:12 +0200
ac3a84
Subject: [PATCH] namespace: Add hidepid/subset support check
ac3a84
ac3a84
Using fsopen()/fsconfig(), we can check if hidepid/subset are supported to
ac3a84
avoid the noisy logs from the kernel if they aren't supported. This works
ac3a84
on centos/redhat 8 as well since they've backported fsopen()/fsconfig().
ac3a84
ac3a84
(cherry picked from commit 1c265fcd5963603d338233840129ecad8d9c1420)
ac3a84
ac3a84
Related #2138081
ac3a84
---
ac3a84
 meson.build                 |  2 ++
ac3a84
 src/basic/missing_syscall.h | 40 +++++++++++++++++++++++++++++++
ac3a84
 src/core/namespace.c        | 47 ++++++++++++++++++++++++++++++++-----
ac3a84
 3 files changed, 83 insertions(+), 6 deletions(-)
ac3a84
ac3a84
diff --git a/meson.build b/meson.build
ac3a84
index 76ad51d3fb..7750534466 100644
ac3a84
--- a/meson.build
ac3a84
+++ b/meson.build
ac3a84
@@ -606,6 +606,8 @@ foreach ident : [
ac3a84
         ['mount_setattr',     '''#include <sys/mount.h>'''],
ac3a84
         ['move_mount',        '''#include <sys/mount.h>'''],
ac3a84
         ['open_tree',         '''#include <sys/mount.h>'''],
ac3a84
+        ['fsopen',            '''#include <sys/mount.h>'''],
ac3a84
+        ['fsconfig',          '''#include <sys/mount.h>'''],
ac3a84
         ['getdents64',        '''#include <dirent.h>'''],
ac3a84
 ]
ac3a84
 
ac3a84
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
ac3a84
index 793d111c55..d54e59fdf9 100644
ac3a84
--- a/src/basic/missing_syscall.h
ac3a84
+++ b/src/basic/missing_syscall.h
ac3a84
@@ -593,6 +593,46 @@ static inline int missing_move_mount(
ac3a84
 
ac3a84
 /* ======================================================================= */
ac3a84
 
ac3a84
+#if !HAVE_FSOPEN
ac3a84
+
ac3a84
+#ifndef FSOPEN_CLOEXEC
ac3a84
+#define FSOPEN_CLOEXEC 0x00000001
ac3a84
+#endif
ac3a84
+
ac3a84
+static inline int missing_fsopen(const char *fsname, unsigned flags) {
ac3a84
+#  if defined __NR_fsopen && __NR_fsopen >= 0
ac3a84
+        return syscall(__NR_fsopen, fsname, flags);
ac3a84
+#  else
ac3a84
+        errno = ENOSYS;
ac3a84
+        return -1;
ac3a84
+#  endif
ac3a84
+}
ac3a84
+
ac3a84
+#  define fsopen missing_fsopen
ac3a84
+#endif
ac3a84
+
ac3a84
+/* ======================================================================= */
ac3a84
+
ac3a84
+#if !HAVE_FSCONFIG
ac3a84
+
ac3a84
+#ifndef FSCONFIG_SET_STRING
ac3a84
+#define FSCONFIG_SET_STRING 1 /* Set parameter, supplying a string value */
ac3a84
+#endif
ac3a84
+
ac3a84
+static inline int missing_fsconfig(int fd, unsigned cmd, const char *key, const void *value, int aux) {
ac3a84
+#  if defined __NR_fsconfig && __NR_fsconfig >= 0
ac3a84
+        return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
ac3a84
+#  else
ac3a84
+        errno = ENOSYS;
ac3a84
+        return -1;
ac3a84
+#  endif
ac3a84
+}
ac3a84
+
ac3a84
+#  define fsconfig missing_fsconfig
ac3a84
+#endif
ac3a84
+
ac3a84
+/* ======================================================================= */
ac3a84
+
ac3a84
 #if !HAVE_GETDENTS64
ac3a84
 
ac3a84
 static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) {
ac3a84
diff --git a/src/core/namespace.c b/src/core/namespace.c
ac3a84
index c3cced7410..852be3bdde 100644
ac3a84
--- a/src/core/namespace.c
ac3a84
+++ b/src/core/namespace.c
ac3a84
@@ -26,6 +26,7 @@
ac3a84
 #include "list.h"
ac3a84
 #include "loop-util.h"
ac3a84
 #include "loopback-setup.h"
ac3a84
+#include "missing_syscall.h"
ac3a84
 #include "mkdir-label.h"
ac3a84
 #include "mount-util.h"
ac3a84
 #include "mountpoint-util.h"
ac3a84
@@ -1073,6 +1074,27 @@ static int mount_sysfs(const MountEntry *m) {
ac3a84
         return 1;
ac3a84
 }
ac3a84
 
ac3a84
+static bool mount_option_supported(const char *fstype, const char *key, const char *value) {
ac3a84
+        _cleanup_close_ int fd = -1;
ac3a84
+        int r;
ac3a84
+
ac3a84
+        /* This function assumes support by default. Only if the fsconfig() call fails with -EINVAL/-EOPNOTSUPP
ac3a84
+         * will it report that the option/value is not supported. */
ac3a84
+
ac3a84
+        fd = fsopen(fstype, FSOPEN_CLOEXEC);
ac3a84
+        if (fd < 0) {
ac3a84
+                if (errno != ENOSYS)
ac3a84
+                        log_debug_errno(errno, "Failed to open superblock context for '%s': %m", fstype);
ac3a84
+                return true; /* If fsopen() fails for whatever reason, assume the value is supported. */
ac3a84
+        }
ac3a84
+
ac3a84
+        r = fsconfig(fd, FSCONFIG_SET_STRING, key, value, 0);
ac3a84
+        if (r < 0 && !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS))
ac3a84
+                log_debug_errno(errno, "Failed to set '%s=%s' on '%s' superblock context: %m", key, value, fstype);
ac3a84
+
ac3a84
+        return r >= 0 || !IN_SET(errno, EINVAL, EOPNOTSUPP);
ac3a84
+}
ac3a84
+
ac3a84
 static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
ac3a84
         _cleanup_free_ char *opts = NULL;
ac3a84
         const char *entry_path;
ac3a84
@@ -1090,12 +1112,25 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
ac3a84
                  * per-instance, we'll exclusively use the textual value for hidepid=, since support was
ac3a84
                  * added in the same commit: if it's supported it is thus also per-instance. */
ac3a84
 
ac3a84
-                opts = strjoin("hidepid=",
ac3a84
-                               ns_info->protect_proc == PROTECT_PROC_DEFAULT ? "off" :
ac3a84
-                               protect_proc_to_string(ns_info->protect_proc),
ac3a84
-                               ns_info->proc_subset == PROC_SUBSET_PID ? ",subset=pid" : "");
ac3a84
-                if (!opts)
ac3a84
-                        return -ENOMEM;
ac3a84
+                const char *hpv = ns_info->protect_proc == PROTECT_PROC_DEFAULT ?
ac3a84
+                                "off" :
ac3a84
+                                protect_proc_to_string(ns_info->protect_proc);
ac3a84
+
ac3a84
+                /* hidepid= support was added in 5.8, so we can use fsconfig()/fsopen() (which were added in
ac3a84
+                 * 5.2) to check if hidepid= is supported. This avoids a noisy dmesg log by the kernel when
ac3a84
+                 * trying to use hidepid= on systems where it isn't supported. The same applies for subset=.
ac3a84
+                 * fsopen()/fsconfig() was also backported on some distros which allows us to detect
ac3a84
+                 * hidepid=/subset= support in even more scenarios. */
ac3a84
+
ac3a84
+                if (mount_option_supported("proc", "hidepid", hpv)) {
ac3a84
+                        opts = strjoin("hidepid=", hpv);
ac3a84
+                        if (!opts)
ac3a84
+                                return -ENOMEM;
ac3a84
+                }
ac3a84
+
ac3a84
+                if (ns_info->proc_subset == PROC_SUBSET_PID && mount_option_supported("proc", "subset", "pid"))
ac3a84
+                        if (!strextend_with_separator(&opts, ",", "subset=pid"))
ac3a84
+                                return -ENOMEM;
ac3a84
         }
ac3a84
 
ac3a84
         entry_path = mount_entry_path(m);