c480ed
From 53d2fa1d52ddc8d1d219a6882f84241a996ea752 Mon Sep 17 00:00:00 2001
c480ed
Message-Id: <53d2fa1d52ddc8d1d219a6882f84241a996ea752@dist-git>
c480ed
From: Michal Privoznik <mprivozn@redhat.com>
c480ed
Date: Tue, 30 Jul 2019 15:30:54 +0200
c480ed
Subject: [PATCH] virCommand: use procfs to learn opened FDs
c480ed
MIME-Version: 1.0
c480ed
Content-Type: text/plain; charset=UTF-8
c480ed
Content-Transfer-Encoding: 8bit
c480ed
c480ed
When spawning a child process, between fork() and exec() we close
c480ed
all file descriptors and keep only those the caller wants us to
c480ed
pass onto the child. The problem is how we do that. Currently, we
c480ed
get the limit of opened files and then iterate through each one
c480ed
of them and either close() it or make it survive exec(). This
c480ed
approach is suboptimal (although, not that much in default
c480ed
configurations where the limit is pretty low - 1024). We have
c480ed
/proc where we can learn what FDs we hold open and thus we can
c480ed
selectively close only those.
c480ed
c480ed
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
c480ed
Reviewed-by: Ján Tomko <jtomko@redhat.com>
c480ed
(cherry picked from commit 432faf259b696043ee5d7e8f657d855419a9a3fa)
c480ed
c480ed
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1721434
c480ed
c480ed
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
c480ed
Message-Id: <a9bee8e6412dc010caccef2bded857bb2a1c5eb0.1564493409.git.mprivozn@redhat.com>
c480ed
Reviewed-by: Ján Tomko <jtomko@redhat.com>
c480ed
---
c480ed
 src/util/vircommand.c | 86 +++++++++++++++++++++++++++++++++++++++----
c480ed
 1 file changed, 78 insertions(+), 8 deletions(-)
c480ed
c480ed
diff --git a/src/util/vircommand.c b/src/util/vircommand.c
c480ed
index 8ae9a952a3..2353a4a554 100644
c480ed
--- a/src/util/vircommand.c
c480ed
+++ b/src/util/vircommand.c
c480ed
@@ -491,27 +491,97 @@ virExecCommon(virCommandPtr cmd, gid_t *groups, int ngroups)
c480ed
     return ret;
c480ed
 }
c480ed
 
c480ed
+# ifdef __linux__
c480ed
+/* On Linux, we can utilize procfs and read the table of opened
c480ed
+ * FDs and selectively close only those FDs we don't want to pass
c480ed
+ * onto child process (well, the one we will exec soon since this
c480ed
+ * is called from the child). */
c480ed
+static int
c480ed
+virCommandMassCloseGetFDsLinux(virCommandPtr cmd ATTRIBUTE_UNUSED,
c480ed
+                               virBitmapPtr fds)
c480ed
+{
c480ed
+    DIR *dp = NULL;
c480ed
+    struct dirent *entry;
c480ed
+    const char *dirName = "/proc/self/fd";
c480ed
+    int rc;
c480ed
+    int ret = -1;
c480ed
+
c480ed
+    if (virDirOpen(&dp, dirName) < 0)
c480ed
+        return -1;
c480ed
+
c480ed
+    while ((rc = virDirRead(dp, &entry, dirName)) > 0) {
c480ed
+        int fd;
c480ed
+
c480ed
+        if (virStrToLong_i(entry->d_name, NULL, 10, &fd) < 0) {
c480ed
+            virReportError(VIR_ERR_INTERNAL_ERROR,
c480ed
+                           _("unable to parse FD: %s"),
c480ed
+                           entry->d_name);
c480ed
+            goto cleanup;
c480ed
+        }
c480ed
+
c480ed
+        if (virBitmapSetBit(fds, fd) < 0) {
c480ed
+            virReportError(VIR_ERR_INTERNAL_ERROR,
c480ed
+                           _("unable to set FD as open: %d"),
c480ed
+                           fd);
c480ed
+            goto cleanup;
c480ed
+        }
c480ed
+    }
c480ed
+
c480ed
+    if (rc < 0)
c480ed
+        goto cleanup;
c480ed
+
c480ed
+    ret = 0;
c480ed
+ cleanup:
c480ed
+    VIR_DIR_CLOSE(dp);
c480ed
+    return ret;
c480ed
+}
c480ed
+
c480ed
+# else /* !__linux__ */
c480ed
+
c480ed
+static int
c480ed
+virCommandMassCloseGetFDsGeneric(virCommandPtr cmd ATTRIBUTE_UNUSED,
c480ed
+                                 virBitmapPtr fds)
c480ed
+{
c480ed
+    virBitmapSetAll(fds);
c480ed
+    return 0;
c480ed
+}
c480ed
+# endif /* !__linux__ */
c480ed
+
c480ed
 static int
c480ed
 virCommandMassClose(virCommandPtr cmd,
c480ed
                     int childin,
c480ed
                     int childout,
c480ed
                     int childerr)
c480ed
 {
c480ed
+    VIR_AUTOPTR(virBitmap) fds = NULL;
c480ed
     int openmax = sysconf(_SC_OPEN_MAX);
c480ed
-    int fd;
c480ed
-    int tmpfd;
c480ed
+    int fd = -1;
c480ed
 
c480ed
-    if (openmax < 0) {
c480ed
-        virReportSystemError(errno,  "%s",
c480ed
-                             _("sysconf(_SC_OPEN_MAX) failed"));
c480ed
+    /* In general, it is not safe to call malloc() between fork() and exec()
c480ed
+     * because the child might have forked at the worst possible time, i.e.
c480ed
+     * when another thread was in malloc() and thus held its lock. That is to
c480ed
+     * say, POSIX does not mandate malloc() to be async-safe. Fortunately,
c480ed
+     * glibc developers are aware of this and made malloc() async-safe.
c480ed
+     * Therefore we can safely allocate memory here (and transitively call
c480ed
+     * opendir/readdir) without a deadlock. */
c480ed
+
c480ed
+    if (!(fds = virBitmapNew(openmax)))
c480ed
         return -1;
c480ed
-    }
c480ed
 
c480ed
-    for (fd = 3; fd < openmax; fd++) {
c480ed
+# ifdef __linux__
c480ed
+    if (virCommandMassCloseGetFDsLinux(cmd, fds) < 0)
c480ed
+        return -1;
c480ed
+# else
c480ed
+    if (virCommandMassCloseGetFDsGeneric(cmd, fds) < 0)
c480ed
+        return -1;
c480ed
+# endif
c480ed
+
c480ed
+    fd = virBitmapNextSetBit(fds, 2);
c480ed
+    for (; fd >= 0; fd = virBitmapNextSetBit(fds, fd)) {
c480ed
         if (fd == childin || fd == childout || fd == childerr)
c480ed
             continue;
c480ed
         if (!virCommandFDIsSet(cmd, fd)) {
c480ed
-            tmpfd = fd;
c480ed
+            int tmpfd = fd;
c480ed
             VIR_MASS_CLOSE(tmpfd);
c480ed
         } else if (virSetInherit(fd, true) < 0) {
c480ed
             virReportSystemError(errno, _("failed to preserve fd %d"), fd);
c480ed
-- 
c480ed
2.22.0
c480ed