|
|
5d5466 |
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
|
|
|
5d5466 |
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
|
|
5d5466 |
Date: Fri, 22 Feb 2019 13:30:27 +0100
|
|
|
5d5466 |
Subject: [RHEL7.7 PATCH 19/24] mdmon: wait for previous mdmon to exit during
|
|
|
5d5466 |
takeover
|
|
|
5d5466 |
|
|
|
5d5466 |
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
|
|
|
5d5466 |
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
|
|
|
5d5466 |
cases with filesystem traffic since the very start of the system, it
|
|
|
5d5466 |
might take a while to transit to clean state. Due to fact that new
|
|
|
5d5466 |
mdmon does not wait for the old one to exit - it might happen that the
|
|
|
5d5466 |
new one switches safe_mode_delay back to seconds, before old one exits.
|
|
|
5d5466 |
As the result two mdmons are running concurrently on same array.
|
|
|
5d5466 |
|
|
|
5d5466 |
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
|
|
|
5d5466 |
in case it is sleeping.
|
|
|
5d5466 |
|
|
|
5d5466 |
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
|
|
5d5466 |
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
|
|
5d5466 |
---
|
|
|
5d5466 |
mdmon.c | 14 +++++++++++---
|
|
|
5d5466 |
1 file changed, 11 insertions(+), 3 deletions(-)
|
|
|
5d5466 |
|
|
|
5d5466 |
diff --git a/mdmon.c b/mdmon.c
|
|
|
5d5466 |
index 0955fcc..ff985d2 100644
|
|
|
5d5466 |
--- a/mdmon.c
|
|
|
5d5466 |
+++ b/mdmon.c
|
|
|
5d5466 |
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
|
|
5d5466 |
int fd;
|
|
|
5d5466 |
int n;
|
|
|
5d5466 |
long fl;
|
|
|
5d5466 |
+ int rv;
|
|
|
5d5466 |
|
|
|
5d5466 |
/* first rule of survival... don't off yourself */
|
|
|
5d5466 |
if (pid == getpid())
|
|
|
5d5466 |
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
|
|
5d5466 |
fl &= ~O_NONBLOCK;
|
|
|
5d5466 |
fcntl(sock, F_SETFL, fl);
|
|
|
5d5466 |
n = read(sock, buf, 100);
|
|
|
5d5466 |
- /* Ignore result, it is just the wait that
|
|
|
5d5466 |
- * matters
|
|
|
5d5466 |
- */
|
|
|
5d5466 |
+
|
|
|
5d5466 |
+ /* If there is I/O going on it might took some time to get to
|
|
|
5d5466 |
+ * clean state. Wait for monitor to exit fully to avoid races.
|
|
|
5d5466 |
+ * Ping it with SIGUSR1 in case that it is sleeping */
|
|
|
5d5466 |
+ for (n = 0; n < 25; n++) {
|
|
|
5d5466 |
+ rv = kill(pid, SIGUSR1);
|
|
|
5d5466 |
+ if (rv < 0)
|
|
|
5d5466 |
+ break;
|
|
|
5d5466 |
+ usleep(200000);
|
|
|
5d5466 |
+ }
|
|
|
5d5466 |
}
|
|
|
5d5466 |
|
|
|
5d5466 |
void remove_pidfile(char *devname)
|
|
|
5d5466 |
--
|
|
|
5d5466 |
2.7.5
|
|
|
5d5466 |
|