|
|
60de42 |
From 142169da59518ee17bbf885dfbf42525fd2c1d0b Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
|
|
|
60de42 |
Date: Tue, 31 Jan 2017 20:06:21 +0100
|
|
|
60de42 |
Subject: [PATCH 1/2] Low: libservices (sync): ensure no zombie is left behind
|
|
|
60de42 |
|
|
|
60de42 |
It could happen because this parent that is to wait for its
|
|
|
60de42 |
not-well-behaved child is knowlingly not blocking any signal (beside
|
|
|
60de42 |
SIGCHLD explicitly in case of using signalfd facility) and delivery of
|
|
|
60de42 |
such signal can interrupt waitpid, at least on paper, so protect
|
|
|
60de42 |
against this accordingly.
|
|
|
60de42 |
|
|
|
60de42 |
Speaking of SIGCHLD in plain self-pipe (not signalfd one) context, while
|
|
|
60de42 |
there's no clash with other synchronous actions, it may be the case with
|
|
|
60de42 |
asynchronous ones (or for that matter, arbitrary other fork-related
|
|
|
60de42 |
activities in the main program a library can have no idea about), and
|
|
|
60de42 |
this is exactly what could interrupt waitpid for real.
|
|
|
60de42 |
---
|
|
|
60de42 |
lib/services/services_linux.c | 4 ++--
|
|
|
60de42 |
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
|
|
|
60de42 |
index cd7fd3f..ffb74ef 100644
|
|
|
60de42 |
--- a/lib/services/services_linux.c
|
|
|
60de42 |
+++ b/lib/services/services_linux.c
|
|
|
60de42 |
@@ -517,7 +517,7 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
|
|
|
60de42 |
if (1) {
|
|
|
60de42 |
/* Clear out the sigchld pipe. */
|
|
|
60de42 |
char ch;
|
|
|
60de42 |
- while (read(sfd, &ch, 1) == 1);
|
|
|
60de42 |
+ while (read(sfd, &ch, 1) == 1) /*omit*/;
|
|
|
60de42 |
#endif
|
|
|
60de42 |
wait_rc = waitpid(op->pid, &status, WNOHANG);
|
|
|
60de42 |
|
|
|
60de42 |
@@ -567,7 +567,7 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
|
|
|
60de42 |
*
|
|
|
60de42 |
* This makes it safe to skip WNOHANG here
|
|
|
60de42 |
*/
|
|
|
60de42 |
- waitpid(op->pid, &status, 0);
|
|
|
60de42 |
+ while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
|
|
|
60de42 |
|
|
|
60de42 |
} else if (WIFEXITED(status)) {
|
|
|
60de42 |
op->status = PCMK_LRM_OP_DONE;
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|
|
|
60de42 |
|
|
|
60de42 |
From 9acc32260562df262a768bd2259372d16d90283b Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
|
|
|
60de42 |
Date: Tue, 31 Jan 2017 20:49:04 +0100
|
|
|
60de42 |
Subject: [PATCH 2/2] Low: libservices(sync): partially prevent killing foreign
|
|
|
60de42 |
process
|
|
|
60de42 |
|
|
|
60de42 |
Do not attempt sending SIGKILL to process for which previous
|
|
|
60de42 |
waitpid(pid, &st, WNOHANG) indicated ECHILD issue. This might
|
|
|
60de42 |
be a result of clashing with another thread or an orthogonal
|
|
|
60de42 |
signal handler getting to reap the child first.
|
|
|
60de42 |
|
|
|
60de42 |
For good measure, repeat non-hanging waitpid test right before
|
|
|
60de42 |
killing -- it won't prevent illustrated race, but will limit it
|
|
|
60de42 |
a bit more. That's unlikely, sad path, hence without regularly
|
|
|
60de42 |
imposed performance penalty. And when we can pay more attention
|
|
|
60de42 |
to prevent killing innocent processes (the code in question is
|
|
|
60de42 |
commonly run as root so nothing will prevent that), we should.
|
|
|
60de42 |
---
|
|
|
60de42 |
lib/services/services_linux.c | 36 +++++++++++++++++++++---------------
|
|
|
60de42 |
1 file changed, 21 insertions(+), 15 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
|
|
|
60de42 |
index ffb74ef..16f25f3 100644
|
|
|
60de42 |
--- a/lib/services/services_linux.c
|
|
|
60de42 |
+++ b/lib/services/services_linux.c
|
|
|
60de42 |
@@ -521,11 +521,19 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
|
|
|
60de42 |
#endif
|
|
|
60de42 |
wait_rc = waitpid(op->pid, &status, WNOHANG);
|
|
|
60de42 |
|
|
|
60de42 |
- if (wait_rc < 0){
|
|
|
60de42 |
- crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
|
|
|
60de42 |
-
|
|
|
60de42 |
- } else if (wait_rc > 0) {
|
|
|
60de42 |
+ if (wait_rc > 0) {
|
|
|
60de42 |
break;
|
|
|
60de42 |
+
|
|
|
60de42 |
+ } else if (wait_rc < 0){
|
|
|
60de42 |
+ if (errno == ECHILD) {
|
|
|
60de42 |
+ /* Here, don't dare to kill and bail out... */
|
|
|
60de42 |
+ break;
|
|
|
60de42 |
+
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ /* ...otherwise pretend process still runs. */
|
|
|
60de42 |
+ wait_rc = 0;
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ crm_perror(LOG_ERR, "waitpid() for %d failed", op->pid);
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
@@ -547,9 +555,8 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
|
|
|
60de42 |
|
|
|
60de42 |
crm_trace("Child done: %d", op->pid);
|
|
|
60de42 |
if (wait_rc <= 0) {
|
|
|
60de42 |
- int killrc = kill(op->pid, SIGKILL);
|
|
|
60de42 |
-
|
|
|
60de42 |
op->rc = PCMK_OCF_UNKNOWN_ERROR;
|
|
|
60de42 |
+
|
|
|
60de42 |
if (op->timeout > 0 && timeout <= 0) {
|
|
|
60de42 |
op->status = PCMK_LRM_OP_TIMEOUT;
|
|
|
60de42 |
crm_warn("%s:%d - timed out after %dms", op->id, op->pid, op->timeout);
|
|
|
60de42 |
@@ -558,16 +565,15 @@ action_synced_wait(svc_action_t * op, sigset_t *mask)
|
|
|
60de42 |
op->status = PCMK_LRM_OP_ERROR;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
- if (killrc && errno != ESRCH) {
|
|
|
60de42 |
- crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
|
|
|
60de42 |
+ /* If only child hasn't been successfully waited for, yet.
|
|
|
60de42 |
+ This is to limit killing wrong target a bit more. */
|
|
|
60de42 |
+ if (wait_rc == 0 && waitpid(op->pid, &status, WNOHANG) == 0) {
|
|
|
60de42 |
+ if (kill(op->pid, SIGKILL)) {
|
|
|
60de42 |
+ crm_err("kill(%d, KILL) failed: %d", op->pid, errno);
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+ /* Safe to skip WNOHANG here as we sent non-ignorable signal. */
|
|
|
60de42 |
+ while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
|
|
|
60de42 |
}
|
|
|
60de42 |
- /*
|
|
|
60de42 |
- * From sigprocmask(2):
|
|
|
60de42 |
- * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored.
|
|
|
60de42 |
- *
|
|
|
60de42 |
- * This makes it safe to skip WNOHANG here
|
|
|
60de42 |
- */
|
|
|
60de42 |
- while (waitpid(op->pid, &status, 0) == (pid_t) -1 && errno == EINTR) /*omit*/;
|
|
|
60de42 |
|
|
|
60de42 |
} else if (WIFEXITED(status)) {
|
|
|
60de42 |
op->status = PCMK_LRM_OP_DONE;
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|