From a3944b1cc4902eb27ec04159bd46061c9df62981 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jul 29 2019 08:16:10 +0000 Subject: import device-mapper-multipath-0.7.8-7.el8_0.2 --- diff --git a/SOURCES/0024-BZ-1723746-check-on-multipathd-without-starting-it.patch b/SOURCES/0024-BZ-1723746-check-on-multipathd-without-starting-it.patch new file mode 100644 index 0000000..47300fb --- /dev/null +++ b/SOURCES/0024-BZ-1723746-check-on-multipathd-without-starting-it.patch @@ -0,0 +1,118 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Thu, 18 Apr 2019 12:49:46 -0500 +Subject: [PATCH] BZ 1700451: check on multipathd without starting it + +When "multipath -u" is run, it checks if multipathd is running. +Currently it does this by trying to connect to the mutipathd socket. +This can cause problems during boot. The multipathd.socket systemd unit +file will cause "multipath -u" to wait until multipathd has been started +before continuing. If there is a lot of activity on the system, +multipathd may not start up immediately, causing block device +initialization to be delayed, potentially until after systemd times +waiting for the device. To avoid this, multipath now checks if +multipathd is running by reading /run/multipathd.pid and checking the +/proc//comm to verify that multipathd is really running with this +pid. This avoids forcing "multipath -u" to wait on multipathd starting +up. + +As an alternative to this patch, multipath could simply switch the order +of the calls to systemd_service_enabled() and mpath_connect(). This would +make multipath only try to connect with multipathd if it wasn't enabled in +systemd, so that it wouldn't autostart. + +Another alternative is to do away with multipathd.socket. Since multipathd +needs to always be running in order to get uevents, there isn't much value +in having it autoactivate when it gets an interactive command. + +Signed-off-by: Benjamin Marzinski +--- + multipath/main.c | 60 +++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 54 insertions(+), 6 deletions(-) + +diff --git a/multipath/main.c b/multipath/main.c +index 0545226..c2aa20d 100644 +--- a/multipath/main.c ++++ b/multipath/main.c +@@ -850,6 +850,58 @@ out: + return r; + } + ++int is_multipathd_running(void) ++{ ++ FILE *f = NULL; ++ char buf[16]; ++ char path[PATH_MAX]; ++ int pid; ++ char *end; ++ ++ f = fopen(DEFAULT_PIDFILE, "r"); ++ if (!f) { ++ if (errno != ENOENT) ++ condlog(4, "can't open " DEFAULT_PIDFILE ": %s", ++ strerror(errno)); ++ return 0; ++ } ++ if (!fgets(buf, sizeof(buf), f)) { ++ if (ferror(f)) ++ condlog(4, "read of " DEFAULT_PIDFILE " failed: %s", ++ strerror(errno)); ++ fclose(f); ++ return 0; ++ } ++ fclose(f); ++ errno = 0; ++ strchop(buf); ++ pid = strtol(buf, &end, 10); ++ if (errno != 0 || pid <= 0 || *end != '\0') { ++ condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'", ++ buf); ++ return 0; ++ } ++ snprintf(path, sizeof(path), "/proc/%d/comm", pid); ++ f = fopen(path, "r"); ++ if (!f) { ++ if (errno != ENOENT) ++ condlog(4, "can't open %s: %s", path, strerror(errno)); ++ return 0; ++ } ++ if (!fgets(buf, sizeof(buf), f)) { ++ if (ferror(f)) ++ condlog(4, "read of %s failed: %s", path, ++ strerror(errno)); ++ fclose(f); ++ return 0; ++ } ++ fclose(f); ++ strchop(buf); ++ if (strcmp(buf, "multipathd") != 0) ++ return 0; ++ return 1; ++} ++ + int + main (int argc, char *argv[]) + { +@@ -1031,17 +1083,13 @@ main (int argc, char *argv[]) + } + if (cmd == CMD_VALID_PATH && + dev_type == DEV_UEVENT) { +- int fd; +- +- fd = mpath_connect(); +- if (fd == -1) { ++ if (!is_multipathd_running()) { + condlog(3, "%s: daemon is not running", dev); + if (!systemd_service_enabled(dev)) { + r = print_cmd_valid(1, NULL, conf); + goto out; + } +- } else +- mpath_disconnect(fd); ++ } + } + + if (cmd == CMD_REMOVE_WWID && !dev) { +-- +2.17.2 + diff --git a/SOURCES/0025-BZ-1723746-test-socket-connection-in-non-blocking-mo.patch b/SOURCES/0025-BZ-1723746-test-socket-connection-in-non-blocking-mo.patch new file mode 100644 index 0000000..b1dd2ab --- /dev/null +++ b/SOURCES/0025-BZ-1723746-test-socket-connection-in-non-blocking-mo.patch @@ -0,0 +1,223 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Wed, 24 Apr 2019 11:07:59 +0200 +Subject: [PATCH] BZ 1700451: test socket connection in non-blocking mode + +Since commit d7188fcd "multipathd: start daemon after udev trigger", +multipathd startup is delayed during boot until after "udev settle" +terminates. But "multipath -u" is run by udev workers for storage devices, +and attempts to connect to the multipathd socket. This causes a start job +for multipathd to be scheduled by systemd, but that job won't be started +until "udev settle" finishes. This is not a problem on systems with 129 or +less storage units, because the connect() call of "multipath -u" will +succeed anyway. But on larger systems, the listen backlog of the systemd +socket can be exceeded, which causes connect() calls for the socket to +block until multipathd starts up and begins calling accept(). This creates +a deadlock situation, because "multipath -u" (called by udev workers) +blocks, and thus "udev settle" doesn't finish, delaying multipathd +startup. This situation then persists until either the workers or "udev +settle" time out. In the former case, path devices might be misclassified +as non-multipath devices by "multipath -u". + +Fix this by using a non-blocking socket fd for connect() and interpret the +errno appropriately. + +This patch reverts most of the changes from commit 8cdf6661 "multipath: +check on multipathd without starting it". Instead, "multipath -u" does +access the socket and start multipath again (which is what we want IMO), +but it is now able to detect and handle the "full backlog" situation. + +Signed-off-by: Martin Wilck + +V2: + +Use same error reporting convention in __mpath_connect() as in +mpath_connect() (Hannes Reinecke). We can't easily change the latter, +because it's part of the "public" libmpathcmd API. + +Signed-off-by: Benjamin Marzinski +--- + libmpathcmd/mpath_cmd.c | 24 +++++++++++++- + libmpathcmd/mpath_cmd.h | 15 +++++++++ + multipath/main.c | 70 +++++++++++++---------------------------- + 3 files changed, 60 insertions(+), 49 deletions(-) + +diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c +index 61e6a98..28b2b45 100644 +--- a/libmpathcmd/mpath_cmd.c ++++ b/libmpathcmd/mpath_cmd.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #include "mpath_cmd.h" + +@@ -93,10 +94,11 @@ static size_t write_all(int fd, const void *buf, size_t len) + /* + * connect to a unix domain socket + */ +-int mpath_connect(void) ++int __mpath_connect(int nonblocking) + { + int fd, len; + struct sockaddr_un addr; ++ int flags = 0; + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_LOCAL; +@@ -108,14 +110,34 @@ int mpath_connect(void) + if (fd == -1) + return -1; + ++ if (nonblocking) { ++ flags = fcntl(fd, F_GETFL, 0); ++ if (flags != -1) ++ (void)fcntl(fd, F_SETFL, flags|O_NONBLOCK); ++ } ++ + if (connect(fd, (struct sockaddr *)&addr, len) == -1) { ++ int err = errno; ++ + close(fd); ++ errno = err; + return -1; + } + ++ if (nonblocking && flags != -1) ++ (void)fcntl(fd, F_SETFL, flags); ++ + return fd; + } + ++/* ++ * connect to a unix domain socket ++ */ ++int mpath_connect(void) ++{ ++ return __mpath_connect(0); ++} ++ + int mpath_disconnect(int fd) + { + return close(fd); +diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h +index df9d938..aa2850b 100644 +--- a/libmpathcmd/mpath_cmd.h ++++ b/libmpathcmd/mpath_cmd.h +@@ -28,6 +28,21 @@ extern "C" { + #define DEFAULT_REPLY_TIMEOUT 4000 + + ++/* ++ * DESCRIPTION: ++ * Same as mpath_connect() (see below) except for the "nonblocking" ++ * parameter. ++ * If "nonblocking" is set, connects in non-blocking mode. This is ++ * useful to avoid blocking if the listening socket's backlog is ++ * exceeded. In this case, errno will be set to EAGAIN. ++ * In case of success, the returned file descriptor is in in blocking ++ * mode, even if "nonblocking" was true. ++ * ++ * RETURNS: ++ * A file descriptor on success. -1 on failure (with errno set). ++ */ ++int __mpath_connect(int nonblocking); ++ + /* + * DESCRIPTION: + * Connect to the running multipathd daemon. On systems with the +diff --git a/multipath/main.c b/multipath/main.c +index c2aa20d..6f2e64b 100644 +--- a/multipath/main.c ++++ b/multipath/main.c +@@ -850,55 +850,29 @@ out: + return r; + } + +-int is_multipathd_running(void) ++static int test_multipathd_socket(void) + { +- FILE *f = NULL; +- char buf[16]; +- char path[PATH_MAX]; +- int pid; +- char *end; ++ int fd; ++ /* ++ * "multipath -u" may be run before the daemon is started. In this ++ * case, systemd might own the socket but might delay multipathd ++ * startup until some other unit (udev settle!) has finished ++ * starting. With many LUNs, the listen backlog may be exceeded, which ++ * would cause connect() to block. This causes udev workers calling ++ * "multipath -u" to hang, and thus creates a deadlock, until "udev ++ * settle" times out. To avoid this, call connect() in non-blocking ++ * mode here, and take EAGAIN as indication for a filled-up systemd ++ * backlog. ++ */ + +- f = fopen(DEFAULT_PIDFILE, "r"); +- if (!f) { +- if (errno != ENOENT) +- condlog(4, "can't open " DEFAULT_PIDFILE ": %s", +- strerror(errno)); +- return 0; +- } +- if (!fgets(buf, sizeof(buf), f)) { +- if (ferror(f)) +- condlog(4, "read of " DEFAULT_PIDFILE " failed: %s", +- strerror(errno)); +- fclose(f); +- return 0; +- } +- fclose(f); +- errno = 0; +- strchop(buf); +- pid = strtol(buf, &end, 10); +- if (errno != 0 || pid <= 0 || *end != '\0') { +- condlog(4, "invalid contents in " DEFAULT_PIDFILE ": '%s'", +- buf); +- return 0; +- } +- snprintf(path, sizeof(path), "/proc/%d/comm", pid); +- f = fopen(path, "r"); +- if (!f) { +- if (errno != ENOENT) +- condlog(4, "can't open %s: %s", path, strerror(errno)); +- return 0; +- } +- if (!fgets(buf, sizeof(buf), f)) { +- if (ferror(f)) +- condlog(4, "read of %s failed: %s", path, +- strerror(errno)); +- fclose(f); +- return 0; +- } +- fclose(f); +- strchop(buf); +- if (strcmp(buf, "multipathd") != 0) +- return 0; ++ fd = __mpath_connect(1); ++ if (fd == -1) { ++ if (errno == EAGAIN) ++ condlog(3, "daemon backlog exceeded"); ++ else ++ return 0; ++ } else ++ close(fd); + return 1; + } + +@@ -1083,7 +1057,7 @@ main (int argc, char *argv[]) + } + if (cmd == CMD_VALID_PATH && + dev_type == DEV_UEVENT) { +- if (!is_multipathd_running()) { ++ if (!test_multipathd_socket()) { + condlog(3, "%s: daemon is not running", dev); + if (!systemd_service_enabled(dev)) { + r = print_cmd_valid(1, NULL, conf); +-- +2.17.2 + diff --git a/SPECS/device-mapper-multipath.spec b/SPECS/device-mapper-multipath.spec index 2934145..d047062 100644 --- a/SPECS/device-mapper-multipath.spec +++ b/SPECS/device-mapper-multipath.spec @@ -1,7 +1,7 @@ Summary: Tools to manage multipath devices using device-mapper Name: device-mapper-multipath Version: 0.7.8 -Release: 7%{?dist} +Release: 7%{?dist}.2 License: GPLv2 Group: System Environment/Base URL: http://christophe.varoqui.free.fr/ @@ -34,6 +34,8 @@ Patch0020: 0020-BZ-1668693-disable-user_friendly_names-for-NetApp.patch Patch0021: 0021-BZ-1673167-Fix-miscounting-active-paths.patch Patch0022: 0022-BZ-1673167-ignore-failed-wwid-recheck.patch Patch0023: 0023-BZ-1673167-fixup-wwid-recheck.patch +Patch0024: 0024-BZ-1723746-check-on-multipathd-without-starting-it.patch +Patch0025: 0025-BZ-1723746-test-socket-connection-in-non-blocking-mo.patch # runtime Requires: %{name}-libs = %{version}-%{release} @@ -135,6 +137,9 @@ device-mapper-multipath's libdmmp C API library %patch0021 -p1 %patch0022 -p1 %patch0023 -p1 +%patch0024 -p1 +%patch0025 -p1 +cp %{SOURCE1} . cp %{SOURCE1} . %build @@ -258,6 +263,16 @@ fi %{_pkgconfdir}/libdmmp.pc %changelog +* Mon Jul 1 2019 Benjamin Marzinski 0.7.8-7.2 +- Fix spec file +- Related: bz #1723746 + +* Tue Jun 25 2019 Benjamin Marzinski 0.7.8-7.1 +- Add 0024-BZ-1723746-check-on-multipathd-without-starting-it.patch +- Add 0025-BZ-1723746-test-socket-connection-in-non-blocking-mo.patch +- Update CI testing +- Resolves: bz #1723746 + * Mon Feb 25 2019 Benjamin Marzinski 0.7.8-7 - Add 0023-BZ-1673167-fixup-wwid-recheck.patch - Resolves: bz #1673167