Blame SOURCES/0030-BZ-1700451-test-socket-connection-in-non-blocking-mo.patch

a3944b
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
a3944b
From: Martin Wilck <mwilck@suse.com>
a3944b
Date: Wed, 24 Apr 2019 11:07:59 +0200
a3944b
Subject: [PATCH] BZ 1700451: test socket connection in non-blocking mode
a3944b
a3944b
Since commit d7188fcd "multipathd: start daemon after udev trigger",
a3944b
multipathd startup is delayed during boot until after "udev settle"
a3944b
terminates. But "multipath -u" is run by udev workers for storage devices,
a3944b
and attempts to connect to the multipathd socket. This causes a start job
a3944b
for multipathd to be scheduled by systemd, but that job won't be started
a3944b
until "udev settle" finishes. This is not a problem on systems with 129 or
a3944b
less storage units, because the connect() call of "multipath -u" will
a3944b
succeed anyway. But on larger systems, the listen backlog of the systemd
a3944b
socket can be exceeded, which causes connect() calls for the socket to
a3944b
block until multipathd starts up and begins calling accept(). This creates
a3944b
a deadlock situation, because "multipath -u" (called by udev workers)
a3944b
blocks, and thus "udev settle" doesn't finish, delaying multipathd
a3944b
startup. This situation then persists until either the workers or "udev
a3944b
settle" time out. In the former case, path devices might be misclassified
a3944b
as non-multipath devices by "multipath -u".
a3944b
a3944b
Fix this by using a non-blocking socket fd for connect() and interpret the
a3944b
errno appropriately.
a3944b
a3944b
This patch reverts most of the changes from commit 8cdf6661 "multipath:
a3944b
check on multipathd without starting it". Instead, "multipath -u" does
a3944b
access the socket and start multipath again (which is what we want IMO),
a3944b
but it is now able to detect and handle the "full backlog" situation.
a3944b
a3944b
Signed-off-by: Martin Wilck <mwilck@suse.com>
a3944b
a3944b
V2:
a3944b
a3944b
Use same error reporting convention in __mpath_connect() as in
a3944b
mpath_connect() (Hannes Reinecke). We can't easily change the latter,
a3944b
because it's part of the "public" libmpathcmd API.
a3944b
a3944b
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
a3944b
---
a3944b
 libmpathcmd/mpath_cmd.c | 24 +++++++++++++-
a3944b
 libmpathcmd/mpath_cmd.h | 15 +++++++++
a3944b
 multipath/main.c        | 70 +++++++++++++----------------------------
a3944b
 3 files changed, 60 insertions(+), 49 deletions(-)
a3944b
a3944b
diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c
a1c519
index df4ca54..b681311 100644
a3944b
--- a/libmpathcmd/mpath_cmd.c
a3944b
+++ b/libmpathcmd/mpath_cmd.c
a3944b
@@ -26,6 +26,7 @@
a3944b
 #include <poll.h>
a3944b
 #include <string.h>
a3944b
 #include <errno.h>
a3944b
+#include <fcntl.h>
a3944b
 
a3944b
 #include "mpath_cmd.h"
a3944b
 
a3944b
@@ -93,10 +94,11 @@ static size_t write_all(int fd, const void *buf, size_t len)
a3944b
 /*
a3944b
  * connect to a unix domain socket
a3944b
  */
a3944b
-int mpath_connect(void)
a3944b
+int __mpath_connect(int nonblocking)
a3944b
 {
a3944b
 	int fd, len;
a3944b
 	struct sockaddr_un addr;
a3944b
+	int flags = 0;
a3944b
 
a3944b
 	memset(&addr, 0, sizeof(addr));
a3944b
 	addr.sun_family = AF_LOCAL;
a3944b
@@ -108,14 +110,34 @@ int mpath_connect(void)
a3944b
 	if (fd == -1)
a3944b
 		return -1;
a3944b
 
a3944b
+	if (nonblocking) {
a3944b
+		flags = fcntl(fd, F_GETFL, 0);
a3944b
+		if (flags != -1)
a3944b
+			(void)fcntl(fd, F_SETFL, flags|O_NONBLOCK);
a3944b
+	}
a3944b
+
a3944b
 	if (connect(fd, (struct sockaddr *)&addr, len) == -1) {
a3944b
+		int err = errno;
a3944b
+
a3944b
 		close(fd);
a3944b
+		errno = err;
a3944b
 		return -1;
a3944b
 	}
a3944b
 
a3944b
+	if (nonblocking && flags != -1)
a3944b
+		(void)fcntl(fd, F_SETFL, flags);
a3944b
+
a3944b
 	return fd;
a3944b
 }
a3944b
 
a3944b
+/*
a3944b
+ * connect to a unix domain socket
a3944b
+ */
a3944b
+int mpath_connect(void)
a3944b
+{
a3944b
+	return __mpath_connect(0);
a3944b
+}
a3944b
+
a3944b
 int mpath_disconnect(int fd)
a3944b
 {
a3944b
 	return close(fd);
a3944b
diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h
a1c519
index 15aeb06..ccfd35f 100644
a3944b
--- a/libmpathcmd/mpath_cmd.h
a3944b
+++ b/libmpathcmd/mpath_cmd.h
a1c519
@@ -34,6 +34,21 @@ extern "C" {
a3944b
 #define DEFAULT_REPLY_TIMEOUT	4000
a3944b
 
a3944b
 
a3944b
+/*
a3944b
+ * DESCRIPTION:
a3944b
+ *	Same as mpath_connect() (see below) except for the "nonblocking"
a3944b
+ *	parameter.
a3944b
+ *	If "nonblocking" is set, connects in non-blocking mode. This is
a3944b
+ *	useful to avoid blocking if the listening socket's backlog is
a3944b
+ *	exceeded. In this case, errno will be set to EAGAIN.
a3944b
+ *	In case of success, the returned file descriptor is in in blocking
a3944b
+ *	mode, even if "nonblocking" was true.
a3944b
+ *
a3944b
+ * RETURNS:
a3944b
+ *	A file descriptor on success. -1 on failure (with errno set).
a3944b
+ */
a3944b
+int __mpath_connect(int nonblocking);
a3944b
+
a3944b
 /*
a3944b
  * DESCRIPTION:
a3944b
  *	Connect to the running multipathd daemon. On systems with the
a3944b
diff --git a/multipath/main.c b/multipath/main.c
a1c519
index 632ce4d..3fb6699 100644
a3944b
--- a/multipath/main.c
a3944b
+++ b/multipath/main.c
a1c519
@@ -852,55 +852,29 @@ out:
a3944b
 	return r;
a3944b
 }
a3944b
 
a3944b
-int is_multipathd_running(void)
a3944b
+static int test_multipathd_socket(void)
a3944b
 {
a3944b
-	FILE *f = NULL;
a3944b
-	char buf[16];
a3944b
-	char path[PATH_MAX];
a3944b
-	int pid;
a3944b
-	char *end;
a3944b
+	int fd;
a3944b
+	/*
a3944b
+	 * "multipath -u" may be run before the daemon is started. In this
a3944b
+	 * case, systemd might own the socket but might delay multipathd
a3944b
+	 * startup until some other unit (udev settle!)  has finished
a3944b
+	 * starting. With many LUNs, the listen backlog may be exceeded, which
a3944b
+	 * would cause connect() to block. This causes udev workers calling
a3944b
+	 * "multipath -u" to hang, and thus creates a deadlock, until "udev
a3944b
+	 * settle" times out.  To avoid this, call connect() in non-blocking
a3944b
+	 * mode here, and take EAGAIN as indication for a filled-up systemd
a3944b
+	 * backlog.
a3944b
+	 */
a3944b
 
a3944b
-	f = fopen(DEFAULT_PIDFILE, "r");
a3944b
-	if (!f) {
a3944b
-		if (errno != ENOENT)
a3944b
-			condlog(4, "can't open " DEFAULT_PIDFILE ": %s",
a3944b
-				strerror(errno));
a3944b
-		return 0;
a3944b
-	}
a3944b
-	if (!fgets(buf, sizeof(buf), f)) {
a3944b
-		if (ferror(f))
a3944b
-			condlog(4, "read of " DEFAULT_PIDFILE " failed: %s",
a3944b
-				strerror(errno));
a3944b
-		fclose(f);
a3944b
-		return 0;
a3944b
-	}
a3944b
-	fclose(f);
a3944b
-	errno = 0;
a3944b
-	strchop(buf);
a3944b
-	pid = strtol(buf, &end, 10);
a3944b
-	if (errno != 0 || pid <= 0 || *end != '\0') {
a3944b
-		condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'",
a3944b
-			buf);
a3944b
-		return 0;
a3944b
-	}
a3944b
-	snprintf(path, sizeof(path), "/proc/%d/comm", pid);
a3944b
-	f = fopen(path, "r");
a3944b
-	if (!f) {
a3944b
-		if (errno != ENOENT)
a3944b
-			condlog(4, "can't open %s: %s", path, strerror(errno));
a3944b
-		return 0;
a3944b
-	}
a3944b
-	if (!fgets(buf, sizeof(buf), f)) {
a3944b
-		if (ferror(f))
a3944b
-			condlog(4, "read of %s failed: %s", path,
a3944b
-				strerror(errno));
a3944b
-		fclose(f);
a3944b
-		return 0;
a3944b
-	}
a3944b
-	fclose(f);
a3944b
-	strchop(buf);
a3944b
-	if (strcmp(buf, "multipathd") != 0)
a3944b
-		return 0;
a3944b
+	fd = __mpath_connect(1);
a3944b
+	if (fd == -1) {
a3944b
+		if (errno == EAGAIN)
a3944b
+			condlog(3, "daemon backlog exceeded");
a3944b
+		else
a3944b
+			return 0;
a3944b
+	} else
a3944b
+		close(fd);
a3944b
 	return 1;
a3944b
 }
a3944b
 
a1c519
@@ -1086,7 +1060,7 @@ main (int argc, char *argv[])
a3944b
 	}
a3944b
 	if (cmd == CMD_VALID_PATH &&
a3944b
 	    dev_type == DEV_UEVENT) {
a3944b
-		if (!is_multipathd_running()) {
a3944b
+		if (!test_multipathd_socket()) {
a3944b
 			condlog(3, "%s: daemon is not running", dev);
a3944b
 			if (!systemd_service_enabled(dev)) {
a1c519
 				r = print_cmd_valid(RTVL_NO, NULL, conf);
a3944b
-- 
a3944b
2.17.2
a3944b