Blob Blame History Raw
From 9bbb628620d4e586941344e1bdbbc166a885c0a9 Mon Sep 17 00:00:00 2001
From: Rob Crittenden <rcritten@redhat.com>
Date: Thu, 5 Sep 2019 12:45:52 -0400
Subject: [PATCH] Optimize closing open file descriptors

When forking, the code would close all unused file descriptors up
to maximum number of files. In the default case this is 1024. In
the container case this is 1048576. Huge delays in startup were
seen due to this.

Even in a default 1024 ulimit case this drastically reduces the
number of file descriptors to mark FD_CLOEXEC but in the container
default case this saves another order of magnitude of work.

This patch takes inspiration from systemd[1] and walks /proc/self/fd
if it is available to determine the list of open descriptors. It
falls back to the "close all fds we don't care about up to limit"
method.

https://bugzilla.redhat.com/show_bug.cgi?id=1656519

[1] https://github.com/systemd/systemd/blob/5238e9575906297608ff802a27e2ff9effa3b338/src/basic/fd-util.c#L217
---
 src/subproc.c | 71 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 9 deletions(-)

diff --git a/src/subproc.c b/src/subproc.c
index e49e3762..8df836ae 100644
--- a/src/subproc.c
+++ b/src/subproc.c
@@ -19,6 +19,7 @@
 
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <paths.h>
@@ -436,6 +437,25 @@ cm_subproc_parse_args(void *parent, const char *cmdline, const char **error)
 	return argv;
 }
 
+/* Based heavily on systemd version */
+static
+int safe_atoi(const char *s, int *ret_i) {
+	char *x = NULL;
+	long l;
+
+	errno = 0;
+	l = strtol(s, &x, 0);
+	if (errno > 0)
+		return -1;
+	if (!x || x == s || *x != 0)
+		return -1;
+	if ((long) (int) l != l)
+		return -1;
+
+	*ret_i = (int) l;
+	return 0;
+}
+
 /* Redirect stdio to /dev/null, and mark everything else as close-on-exec,
  * except for perhaps one to three of them that are passed in by number. */
 void
@@ -443,6 +463,9 @@ cm_subproc_mark_most_cloexec(int fd, int fd2, int fd3)
 {
 	int i;
 	long l;
+	DIR *dir = NULL;
+	struct dirent *de;
+
 	if ((fd != STDIN_FILENO) &&
 	    (fd2 != STDIN_FILENO) &&
 	    (fd3 != STDIN_FILENO)) {
@@ -482,17 +505,47 @@ cm_subproc_mark_most_cloexec(int fd, int fd2, int fd3)
 			close(STDERR_FILENO);
 		}
 	}
-	for (i = getdtablesize() - 1; i >= 3; i--) {
-		if ((i == fd) ||
-		    (i == fd2) ||
-		    (i == fd3)) {
-			continue;
+	dir = opendir("/proc/self/fd");
+	if (!dir) {
+		/* /proc isn't available, fall back to old way */
+		for (i = getdtablesize() - 1; i >= 3; i--) {
+			if ((i == fd) ||
+			    (i == fd2) ||
+			    (i == fd3)) {
+				continue;
+			}
+			l = fcntl(i, F_GETFD);
+			if (l != -1) {
+				if (fcntl(i, F_SETFD, l | FD_CLOEXEC) != 0) {
+					cm_log(0, "Potentially leaking FD %d.\n", i);
+				}
+			}
 		}
-		l = fcntl(i, F_GETFD);
-		if (l != -1) {
-			if (fcntl(i, F_SETFD, l | FD_CLOEXEC) != 0) {
-				cm_log(0, "Potentially leaking FD %d.\n", i);
+	} else {
+		while ((de = readdir(dir)) != NULL) {
+			int i = -1;
+
+			if (safe_atoi(de->d_name, &i) < 0) {
+				continue;
+			}
+
+			if ((i == fd) ||
+			    (i == fd2) ||
+			    (i == fd3)) {
+				continue;
+			}
+
+			if (i == dirfd(dir)) {
+				continue;
+			}
+
+			l = fcntl(i, F_GETFD);
+			if (l != -1) {
+				if (fcntl(i, F_SETFD, l | FD_CLOEXEC) != 0) {
+					cm_log(0, "Potentially leaking FD %d.\n", i);
+				}
 			}
 		}
+		closedir(dir);
 	}
 }
-- 
2.21.0