Blob Blame History Raw
From f657f0399f5fa6ba45dbc6bc46be6d869f907409 Mon Sep 17 00:00:00 2001
From: Alexey Tikhonov <atikhono@redhat.com>
Date: Wed, 6 May 2020 21:38:12 +0200
Subject: [PATCH 25/25] WATCHDOG: log process termination to the journal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds explicit system journal message in case process was
terminated by an internal watchdog.

Resolves: https://github.com/SSSD/sssd/issues/5146

Reviewed-by: Pawel Polawski <ppolawsk@redhat.com>
(cherry picked from commit 65369f293b06ce0fe5622502bb32596bb50c523a)

Reviewed-by: Pavel Březina <pbrezina@redhat.com>

Reviewed-by: Pavel Březina <pbrezina@redhat.com>
---
 src/monitor/monitor.c    | 29 +++++++++++++++++++++++------
 src/util/util.h          |  2 ++
 src/util/util_watchdog.c |  2 +-
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/src/monitor/monitor.c b/src/monitor/monitor.c
index ed40a920c..12250a15e 100644
--- a/src/monitor/monitor.c
+++ b/src/monitor/monitor.c
@@ -476,17 +476,34 @@ static int add_svc_conn_spy(struct mt_svc *svc)
 
 static void svc_child_info(struct mt_svc *svc, int wait_status)
 {
+    int exit_code = 0;
+    int pid = svc->pid;
+    const char *name = (svc->name ? svc->name : "");
+    const char *identity = (svc->identity ? svc->identity : "");
+
     if (WIFEXITED(wait_status)) {
-        DEBUG(SSSDBG_OP_FAILURE,
-              "Child [%d] exited with code [%d]\n",
-               svc->pid, WEXITSTATUS(wait_status));
+        exit_code = WEXITSTATUS(wait_status);
+        if (exit_code == SSS_WATCHDOG_EXIT_CODE) {
+            DEBUG(SSSDBG_CRIT_FAILURE,
+                  "Child [%d] ('%s':'%s') was terminated by own WATCHDOG\n",
+                  pid, name, identity);
+            sss_log(SSS_LOG_CRIT,
+                    "Child [%d] ('%s':'%s') was terminated by own WATCHDOG. "
+                    "Consult corresponding logs to figure out the reason.",
+                    pid, name, identity);
+        } else {
+            DEBUG(SSSDBG_OP_FAILURE,
+                  "Child [%d] ('%s':'%s') exited with code [%d]\n",
+                   pid, name, identity, exit_code);
+        }
     } else if (WIFSIGNALED(wait_status)) {
         DEBUG(SSSDBG_OP_FAILURE,
-              "Child [%d] terminated with signal [%d]\n",
-               svc->pid, WTERMSIG(wait_status));
+              "Child [%d] ('%s':'%s') terminated with signal [%d]\n",
+               pid, name, identity, WTERMSIG(wait_status));
     } else {
         DEBUG(SSSDBG_FATAL_FAILURE,
-              "Child [%d] did not exit cleanly\n", svc->pid);
+              "Child [%d] ('%s':'%s') did not exit cleanly\n",
+              pid, name, identity);
         /* Forcibly kill this child, just in case */
         kill(svc->pid, SIGKILL);
 
diff --git a/src/util/util.h b/src/util/util.h
index 8a754dbfd..8dc887cab 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -104,6 +104,8 @@ extern int dbus_activated;
 #define FLAGS_GEN_CONF 0x0008
 #define FLAGS_NO_WATCHDOG 0x0010
 
+#define SSS_WATCHDOG_EXIT_CODE 70 /* to match EX_SOFTWARE in sysexits.h */
+
 #define PIPE_INIT { -1, -1 }
 
 #define PIPE_FD_CLOSE(fd) do {      \
diff --git a/src/util/util_watchdog.c b/src/util/util_watchdog.c
index 0a4d83505..69160fbdf 100644
--- a/src/util/util_watchdog.c
+++ b/src/util/util_watchdog.c
@@ -75,7 +75,7 @@ static void watchdog_handler(int sig)
         if (getpid() == getpgrp()) {
             kill(-getpgrp(), SIGTERM);
         }
-        _exit(1);
+        _exit(SSS_WATCHDOG_EXIT_CODE);
     }
 }
 
-- 
2.21.1