Blame SOURCES/bz1115024-pcmk-handle_systemd_reporting_done_before_a_resource_is_actually_stopped.patch

60d645
commit 0ea59c13caf51db459bfc6448ce8b7661778405d
60d645
Author: Andrew Beekhof <andrew@beekhof.net>
60d645
Date:   Fri Jun 27 13:26:57 2014 +1000
60d645
60d645
    Fix: lrmd: Handle systemd reporting 'done' before a resource is actually stopped
60d645
    
60d645
    (cherry picked from commit 3bd6c30adbb46891ee962cd2c1f2e191da88b808)
60d645
60d645
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
60d645
index a28ae14..f9d6d29 100644
60d645
--- a/lib/services/systemd.c
60d645
+++ b/lib/services/systemd.c
60d645
@@ -422,6 +422,8 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous)
60d645
 
60d645
         if (g_strcmp0(state, "active") == 0) {
60d645
             op->rc = PCMK_OCF_OK;
60d645
+        } else if (g_strcmp0(state, "activating") == 0) {
60d645
+            op->rc = PCMK_OCF_PENDING;
60d645
         } else {
60d645
             op->rc = PCMK_OCF_NOT_RUNNING;
60d645
         }
60d645
diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c
60d645
index 517e98f..5443fa4 100644
60d645
--- a/lrmd/lrmd.c
60d645
+++ b/lrmd/lrmd.c
60d645
@@ -58,6 +58,7 @@ typedef struct lrmd_cmd_s {
60d645
     char *origin;
60d645
     char *rsc_id;
60d645
     char *action;
60d645
+    char *real_action;
60d645
     char *output;
60d645
     char *userdata_str;
60d645
 
60d645
@@ -359,7 +360,11 @@ send_cmd_complete_notify(lrmd_cmd_t * cmd)
60d645
 
60d645
     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
60d645
     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
60d645
-    crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
60d645
+    if(cmd->real_action) {
60d645
+        crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
60d645
+    } else {
60d645
+        crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
60d645
+    }
60d645
     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
60d645
     crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
60d645
 
60d645
@@ -584,6 +589,8 @@ action_complete(svc_action_t * action)
60d645
     lrmd_rsc_t *rsc;
60d645
     lrmd_cmd_t *cmd = action->cb_data;
60d645
 
60d645
+    bool goagain = false;
60d645
+
60d645
     if (!cmd) {
60d645
         crm_err("LRMD action (%s) completed does not match any known operations.", action->id);
60d645
         return;
60d645
@@ -604,6 +611,30 @@ action_complete(svc_action_t * action)
60d645
     } else if (action->stdout_data) {
60d645
         cmd->output = strdup(action->stdout_data);
60d645
     }
60d645
+
60d645
+    if (rsc && safe_str_eq(rsc->class, "systemd")) {
60d645
+        if(safe_str_eq(cmd->action, "start")) {
60d645
+            /* systemd I curse thee!
60d645
+             *
60d645
+             * systemd returns from start actions after the start _begins_
60d645
+             * not after it completes.
60d645
+             *
60d645
+             * So we have to jump through a few hoops so that we don't
60d645
+             * report 'complete' to the rest of pacemaker until, you know,
60d645
+             * its actually done.
60d645
+             */
60d645
+            goagain = true;
60d645
+            cmd->real_action = cmd->action;
60d645
+            cmd->action = strdup("monitor");
60d645
+
60d645
+        } else if(cmd->real_action) {
60d645
+            /* Ok, so this is the follow up monitor action to check if start actually completed */
60d645
+            if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) {
60d645
+                goagain = true;
60d645
+            }
60d645
+        }
60d645
+    }
60d645
+
60d645
 #if SUPPORT_NAGIOS
60d645
     if (rsc && safe_str_eq(rsc->class, "nagios")) {
60d645
         if (safe_str_eq(cmd->action, "monitor") &&
60d645
@@ -612,41 +643,46 @@ action_complete(svc_action_t * action)
60d645
             cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
60d645
 
60d645
         } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) {
60d645
-            int time_sum = 0;
60d645
-            int timeout_left = 0;
60d645
-            int delay = cmd->timeout_orig / 10;
60d645
+            goagain = true;
60d645
+        }
60d645
+    }
60d645
+#endif
60d645
+
60d645
+    if(goagain) {
60d645
+        int time_sum = 0;
60d645
+        int timeout_left = 0;
60d645
+        int delay = cmd->timeout_orig / 10;
60d645
 
60d645
 #  ifdef HAVE_SYS_TIMEB_H
60d645
-            struct timeb now = { 0, };
60d645
+        struct timeb now = { 0, };
60d645
 
60d645
-            ftime(&now;;
60d645
-            time_sum = time_diff_ms(&now, &cmd->t_first_run);
60d645
-            timeout_left = cmd->timeout_orig - time_sum;
60d645
-            if (delay < timeout_left) {
60d645
-                cmd->start_delay = delay;
60d645
-                cmd->timeout = timeout_left;
60d645
+        ftime(&now;;
60d645
+        time_sum = time_diff_ms(&now, &cmd->t_first_run);
60d645
+        timeout_left = cmd->timeout_orig - time_sum;
60d645
+        if (delay < timeout_left) {
60d645
+            cmd->start_delay = delay;
60d645
+            cmd->timeout = timeout_left;
60d645
 
60d645
+            if(cmd->exec_rc != PCMK_OCF_OK) {
60d645
                 crm_notice
60d645
                     ("%s %s failed (rc=%d): re-scheduling (time_sum=%dms, start_delay=%dms, timeout=%dms)",
60d645
                      cmd->rsc_id, cmd->action, cmd->exec_rc, time_sum, cmd->start_delay,
60d645
                      cmd->timeout);
60d645
+            }
60d645
 
60d645
-                cmd->lrmd_op_status = 0;
60d645
-                cmd->last_pid = 0;
60d645
-                memset(&cmd->t_run, 0, sizeof(cmd->t_run));
60d645
-                memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
60d645
-                free(cmd->output);
60d645
-                cmd->output = NULL;
60d645
+            cmd->lrmd_op_status = 0;
60d645
+            cmd->last_pid = 0;
60d645
+            memset(&cmd->t_run, 0, sizeof(cmd->t_run));
60d645
+            memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
60d645
+            free(cmd->output);
60d645
+            cmd->output = NULL;
60d645
 
60d645
-                rsc->active = NULL;
60d645
-                schedule_lrmd_cmd(rsc, cmd);
60d645
-                return;
60d645
-            }
60d645
-#  endif
60d645
+            rsc->active = NULL;
60d645
+            schedule_lrmd_cmd(rsc, cmd);
60d645
+            return;
60d645
         }
60d645
+#  endif
60d645
     }
60d645
-#endif
60d645
-
60d645
     cmd_finalize(cmd, rsc);
60d645
 }
60d645