d759b5
From de87d273266404f3d8fc079efda3d325f6031cc5 Mon Sep 17 00:00:00 2001
d759b5
Message-Id: <de87d273266404f3d8fc079efda3d325f6031cc5@dist-git>
e00e43
From: Michal Privoznik <mprivozn@redhat.com>
e00e43
Date: Wed, 9 Oct 2019 14:27:44 +0200
e00e43
Subject: [PATCH] virCommand: use procfs to learn opened FDs
e00e43
MIME-Version: 1.0
e00e43
Content-Type: text/plain; charset=UTF-8
e00e43
Content-Transfer-Encoding: 8bit
e00e43
e00e43
When spawning a child process, between fork() and exec() we close
e00e43
all file descriptors and keep only those the caller wants us to
e00e43
pass onto the child. The problem is how we do that. Currently, we
e00e43
get the limit of opened files and then iterate through each one
e00e43
of them and either close() it or make it survive exec(). This
e00e43
approach is suboptimal (although, not that much in default
e00e43
configurations where the limit is pretty low - 1024). We have
e00e43
/proc where we can learn what FDs we hold open and thus we can
e00e43
selectively close only those.
e00e43
e00e43
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
e00e43
Reviewed-by: Ján Tomko <jtomko@redhat.com>
e00e43
(cherry picked from commit 432faf259b696043ee5d7e8f657d855419a9a3fa)
e00e43
e00e43
https://bugzilla.redhat.com/show_bug.cgi?id=1759904
e00e43
e00e43
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
e00e43
Message-Id: <b9a09b65205ad8d2b001a55c329ec36c0e4f3183.1570623892.git.mprivozn@redhat.com>
e00e43
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
e00e43
---
e00e43
 src/util/vircommand.c | 86 +++++++++++++++++++++++++++++++++++++++----
e00e43
 1 file changed, 78 insertions(+), 8 deletions(-)
e00e43
e00e43
diff --git a/src/util/vircommand.c b/src/util/vircommand.c
e00e43
index 02dbe0fc76..2d0c987fe2 100644
e00e43
--- a/src/util/vircommand.c
e00e43
+++ b/src/util/vircommand.c
e00e43
@@ -492,27 +492,97 @@ virExecCommon(virCommandPtr cmd, gid_t *groups, int ngroups)
e00e43
     return ret;
e00e43
 }
e00e43
 
e00e43
+# ifdef __linux__
e00e43
+/* On Linux, we can utilize procfs and read the table of opened
e00e43
+ * FDs and selectively close only those FDs we don't want to pass
e00e43
+ * onto child process (well, the one we will exec soon since this
e00e43
+ * is called from the child). */
e00e43
+static int
e00e43
+virCommandMassCloseGetFDsLinux(virCommandPtr cmd ATTRIBUTE_UNUSED,
e00e43
+                               virBitmapPtr fds)
e00e43
+{
e00e43
+    DIR *dp = NULL;
e00e43
+    struct dirent *entry;
e00e43
+    const char *dirName = "/proc/self/fd";
e00e43
+    int rc;
e00e43
+    int ret = -1;
e00e43
+
e00e43
+    if (virDirOpen(&dp, dirName) < 0)
e00e43
+        return -1;
e00e43
+
e00e43
+    while ((rc = virDirRead(dp, &entry, dirName)) > 0) {
e00e43
+        int fd;
e00e43
+
e00e43
+        if (virStrToLong_i(entry->d_name, NULL, 10, &fd) < 0) {
e00e43
+            virReportError(VIR_ERR_INTERNAL_ERROR,
e00e43
+                           _("unable to parse FD: %s"),
e00e43
+                           entry->d_name);
e00e43
+            goto cleanup;
e00e43
+        }
e00e43
+
e00e43
+        if (virBitmapSetBit(fds, fd) < 0) {
e00e43
+            virReportError(VIR_ERR_INTERNAL_ERROR,
e00e43
+                           _("unable to set FD as open: %d"),
e00e43
+                           fd);
e00e43
+            goto cleanup;
e00e43
+        }
e00e43
+    }
e00e43
+
e00e43
+    if (rc < 0)
e00e43
+        goto cleanup;
e00e43
+
e00e43
+    ret = 0;
e00e43
+ cleanup:
e00e43
+    VIR_DIR_CLOSE(dp);
e00e43
+    return ret;
e00e43
+}
e00e43
+
e00e43
+# else /* !__linux__ */
e00e43
+
e00e43
+static int
e00e43
+virCommandMassCloseGetFDsGeneric(virCommandPtr cmd ATTRIBUTE_UNUSED,
e00e43
+                                 virBitmapPtr fds)
e00e43
+{
e00e43
+    virBitmapSetAll(fds);
e00e43
+    return 0;
e00e43
+}
e00e43
+# endif /* !__linux__ */
e00e43
+
e00e43
 static int
e00e43
 virCommandMassClose(virCommandPtr cmd,
e00e43
                     int childin,
e00e43
                     int childout,
e00e43
                     int childerr)
e00e43
 {
e00e43
+    VIR_AUTOPTR(virBitmap) fds = NULL;
e00e43
     int openmax = sysconf(_SC_OPEN_MAX);
e00e43
-    int fd;
e00e43
-    int tmpfd;
e00e43
+    int fd = -1;
e00e43
 
e00e43
-    if (openmax < 0) {
e00e43
-        virReportSystemError(errno,  "%s",
e00e43
-                             _("sysconf(_SC_OPEN_MAX) failed"));
e00e43
+    /* In general, it is not safe to call malloc() between fork() and exec()
e00e43
+     * because the child might have forked at the worst possible time, i.e.
e00e43
+     * when another thread was in malloc() and thus held its lock. That is to
e00e43
+     * say, POSIX does not mandate malloc() to be async-safe. Fortunately,
e00e43
+     * glibc developers are aware of this and made malloc() async-safe.
e00e43
+     * Therefore we can safely allocate memory here (and transitively call
e00e43
+     * opendir/readdir) without a deadlock. */
e00e43
+
e00e43
+    if (!(fds = virBitmapNew(openmax)))
e00e43
         return -1;
e00e43
-    }
e00e43
 
e00e43
-    for (fd = 3; fd < openmax; fd++) {
e00e43
+# ifdef __linux__
e00e43
+    if (virCommandMassCloseGetFDsLinux(cmd, fds) < 0)
e00e43
+        return -1;
e00e43
+# else
e00e43
+    if (virCommandMassCloseGetFDsGeneric(cmd, fds) < 0)
e00e43
+        return -1;
e00e43
+# endif
e00e43
+
e00e43
+    fd = virBitmapNextSetBit(fds, 2);
e00e43
+    for (; fd >= 0; fd = virBitmapNextSetBit(fds, fd)) {
e00e43
         if (fd == childin || fd == childout || fd == childerr)
e00e43
             continue;
e00e43
         if (!virCommandFDIsSet(cmd, fd)) {
e00e43
-            tmpfd = fd;
e00e43
+            int tmpfd = fd;
e00e43
             VIR_MASS_CLOSE(tmpfd);
e00e43
         } else if (virSetInherit(fd, true) < 0) {
e00e43
             virReportSystemError(errno, _("failed to preserve fd %d"), fd);
e00e43
-- 
e00e43
2.23.0
e00e43