|
|
c025cf |
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
|
|
|
c025cf |
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
|
|
c025cf |
Date: Fri, 22 Feb 2019 13:30:27 +0100
|
|
|
c025cf |
Subject: [RHEL7.7 PATCH 19/21] mdmon: wait for previous mdmon to exit during
|
|
|
c025cf |
takeover
|
|
|
c025cf |
|
|
|
c025cf |
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
|
|
|
c025cf |
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
|
|
|
c025cf |
cases with filesystem traffic since the very start of the system, it
|
|
|
c025cf |
might take a while to transit to clean state. Due to fact that new
|
|
|
c025cf |
mdmon does not wait for the old one to exit - it might happen that the
|
|
|
c025cf |
new one switches safe_mode_delay back to seconds, before old one exits.
|
|
|
c025cf |
As the result two mdmons are running concurrently on same array.
|
|
|
c025cf |
|
|
|
c025cf |
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
|
|
|
c025cf |
in case it is sleeping.
|
|
|
c025cf |
|
|
|
c025cf |
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
|
|
c025cf |
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
|
|
c025cf |
---
|
|
|
c025cf |
mdmon.c | 14 +++++++++++---
|
|
|
c025cf |
1 file changed, 11 insertions(+), 3 deletions(-)
|
|
|
c025cf |
|
|
|
c025cf |
diff --git a/mdmon.c b/mdmon.c
|
|
|
c025cf |
index 0955fcc..ff985d2 100644
|
|
|
c025cf |
--- a/mdmon.c
|
|
|
c025cf |
+++ b/mdmon.c
|
|
|
c025cf |
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
|
|
c025cf |
int fd;
|
|
|
c025cf |
int n;
|
|
|
c025cf |
long fl;
|
|
|
c025cf |
+ int rv;
|
|
|
c025cf |
|
|
|
c025cf |
/* first rule of survival... don't off yourself */
|
|
|
c025cf |
if (pid == getpid())
|
|
|
c025cf |
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
|
|
c025cf |
fl &= ~O_NONBLOCK;
|
|
|
c025cf |
fcntl(sock, F_SETFL, fl);
|
|
|
c025cf |
n = read(sock, buf, 100);
|
|
|
c025cf |
- /* Ignore result, it is just the wait that
|
|
|
c025cf |
- * matters
|
|
|
c025cf |
- */
|
|
|
c025cf |
+
|
|
|
c025cf |
+ /* If there is I/O going on it might took some time to get to
|
|
|
c025cf |
+ * clean state. Wait for monitor to exit fully to avoid races.
|
|
|
c025cf |
+ * Ping it with SIGUSR1 in case that it is sleeping */
|
|
|
c025cf |
+ for (n = 0; n < 25; n++) {
|
|
|
c025cf |
+ rv = kill(pid, SIGUSR1);
|
|
|
c025cf |
+ if (rv < 0)
|
|
|
c025cf |
+ break;
|
|
|
c025cf |
+ usleep(200000);
|
|
|
c025cf |
+ }
|
|
|
c025cf |
}
|
|
|
c025cf |
|
|
|
c025cf |
void remove_pidfile(char *devname)
|
|
|
c025cf |
--
|
|
|
c025cf |
2.7.5
|
|
|
c025cf |
|