diff --git a/SOURCES/bz1115024-pcmk-handle_systemd_reporting_done_before_a_resource_is_actually_stopped.patch b/SOURCES/bz1115024-pcmk-handle_systemd_reporting_done_before_a_resource_is_actually_stopped.patch new file mode 100644 index 0000000..cfeaf2b --- /dev/null +++ b/SOURCES/bz1115024-pcmk-handle_systemd_reporting_done_before_a_resource_is_actually_stopped.patch @@ -0,0 +1,156 @@ +commit 0ea59c13caf51db459bfc6448ce8b7661778405d +Author: Andrew Beekhof +Date: Fri Jun 27 13:26:57 2014 +1000 + + Fix: lrmd: Handle systemd reporting 'done' before a resource is actually stopped + + (cherry picked from commit 3bd6c30adbb46891ee962cd2c1f2e191da88b808) + +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index a28ae14..f9d6d29 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -422,6 +422,8 @@ systemd_unit_exec(svc_action_t * op, gboolean synchronous) + + if (g_strcmp0(state, "active") == 0) { + op->rc = PCMK_OCF_OK; ++ } else if (g_strcmp0(state, "activating") == 0) { ++ op->rc = PCMK_OCF_PENDING; + } else { + op->rc = PCMK_OCF_NOT_RUNNING; + } +diff --git a/lrmd/lrmd.c b/lrmd/lrmd.c +index 517e98f..5443fa4 100644 +--- a/lrmd/lrmd.c ++++ b/lrmd/lrmd.c +@@ -58,6 +58,7 @@ typedef struct lrmd_cmd_s { + char *origin; + char *rsc_id; + char *action; ++ char *real_action; + char *output; + char *userdata_str; + +@@ -359,7 +360,11 @@ send_cmd_complete_notify(lrmd_cmd_t * cmd) + + crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); + crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); +- crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); ++ if(cmd->real_action) { ++ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); ++ } else { ++ crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); ++ } + crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); + crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); + +@@ -584,6 +589,8 @@ action_complete(svc_action_t * action) + lrmd_rsc_t *rsc; + lrmd_cmd_t *cmd = action->cb_data; + ++ bool goagain = false; ++ + if (!cmd) { + crm_err("LRMD action (%s) completed does not match any known operations.", action->id); + return; +@@ -604,6 +611,30 @@ action_complete(svc_action_t * action) + } else if (action->stdout_data) { + cmd->output = strdup(action->stdout_data); + } ++ ++ if (rsc && safe_str_eq(rsc->class, "systemd")) { ++ if(safe_str_eq(cmd->action, "start")) { ++ /* systemd I curse thee! ++ * ++ * systemd returns from start actions after the start _begins_ ++ * not after it completes. ++ * ++ * So we have to jump through a few hoops so that we don't ++ * report 'complete' to the rest of pacemaker until, you know, ++ * its actually done. ++ */ ++ goagain = true; ++ cmd->real_action = cmd->action; ++ cmd->action = strdup("monitor"); ++ ++ } else if(cmd->real_action) { ++ /* Ok, so this is the follow up monitor action to check if start actually completed */ ++ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { ++ goagain = true; ++ } ++ } ++ } ++ + #if SUPPORT_NAGIOS + if (rsc && safe_str_eq(rsc->class, "nagios")) { + if (safe_str_eq(cmd->action, "monitor") && +@@ -612,41 +643,46 @@ action_complete(svc_action_t * action) + cmd->exec_rc = PCMK_OCF_NOT_RUNNING; + + } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { +- int time_sum = 0; +- int timeout_left = 0; +- int delay = cmd->timeout_orig / 10; ++ goagain = true; ++ } ++ } ++#endif ++ ++ if(goagain) { ++ int time_sum = 0; ++ int timeout_left = 0; ++ int delay = cmd->timeout_orig / 10; + + # ifdef HAVE_SYS_TIMEB_H +- struct timeb now = { 0, }; ++ struct timeb now = { 0, }; + +- ftime(&now); +- time_sum = time_diff_ms(&now, &cmd->t_first_run); +- timeout_left = cmd->timeout_orig - time_sum; +- if (delay < timeout_left) { +- cmd->start_delay = delay; +- cmd->timeout = timeout_left; ++ ftime(&now); ++ time_sum = time_diff_ms(&now, &cmd->t_first_run); ++ timeout_left = cmd->timeout_orig - time_sum; ++ if (delay < timeout_left) { ++ cmd->start_delay = delay; ++ cmd->timeout = timeout_left; + ++ if(cmd->exec_rc != PCMK_OCF_OK) { + crm_notice + ("%s %s failed (rc=%d): re-scheduling (time_sum=%dms, start_delay=%dms, timeout=%dms)", + cmd->rsc_id, cmd->action, cmd->exec_rc, time_sum, cmd->start_delay, + cmd->timeout); ++ } + +- cmd->lrmd_op_status = 0; +- cmd->last_pid = 0; +- memset(&cmd->t_run, 0, sizeof(cmd->t_run)); +- memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); +- free(cmd->output); +- cmd->output = NULL; ++ cmd->lrmd_op_status = 0; ++ cmd->last_pid = 0; ++ memset(&cmd->t_run, 0, sizeof(cmd->t_run)); ++ memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); ++ free(cmd->output); ++ cmd->output = NULL; + +- rsc->active = NULL; +- schedule_lrmd_cmd(rsc, cmd); +- return; +- } +-# endif ++ rsc->active = NULL; ++ schedule_lrmd_cmd(rsc, cmd); ++ return; + } ++# endif + } +-#endif +- + cmd_finalize(cmd, rsc); + } + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index b3df2d7..a1d8b4e 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -2,7 +2,7 @@ %global uname hacluster %global pcmk_docdir %{_docdir}/%{name} -%global specversion 31 +%global specversion 32 %global upstream_prefix pacemaker %global upstream_version Pacemaker-1.1.10 @@ -181,6 +181,7 @@ Patch118: bz1078078-pcmk-crm_report_suppress_logging_errors_after_the_tar Patch119: bz1078078-pcmk-fence_using_all_required_devices.patch Patch120: bz1078078-pcmk-execute_all_required_fencing_devices_regardless_of_what_topology_level_they_are_at.patch Patch121: bz1078078-pcmk-default_to_off_when_agent_does_not_advertise_reboot_in_metadata.patch +Patch122: bz1115024-pcmk-handle_systemd_reporting_done_before_a_resource_is_actually_stopped.patch BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) AutoReqProv: on @@ -649,6 +650,11 @@ exit 0 %doc AUTHORS %changelog +* Fri Jul 04 2014 Andrew Beekhof - 1.1.10-32 + +- Fix: lrmd: Handle systemd reporting 'done' before a resource is actually stopped + Resolves: rhbz#1115024 + * Thu Apr 17 2014 David Vossel - 1.1.10-31 - fencing: Fence using all required devices