From 19d273ae5831f40e1816b138a56260ddd3448a4e Mon Sep 17 00:00:00 2001
From: Andrew Beekhof <andrew@beekhof.net>
Date: Fri, 12 Aug 2016 10:03:37 +1000
Subject: [PATCH] Fix: crmd: Resend the shutdown request if the DC forgets
As seen in:
https://bugzilla.redhat.com/show_bug.cgi?id=1310486
Scenario needs very poor timing and some bad luck:
1. Start a node wait for it to become the DC
2. Start a second node
3. Tell the second node to stop while it is in the process of
negotiating with the DC.
Specifically just after do_cl_join_finalize_respond() is called on
the second node.
4. Cross your fingers that somehow the shutdown=0 update makes it to
attrd _after_ the DC sets shutdown=${large int}
Under these conditions, the request to shut down will be lost and the DC
will feel free to start services on the second node.
---
crmd/lrm.c | 36 ++++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/crmd/lrm.c b/crmd/lrm.c
index c987e49..3e32f33 100644
--- a/crmd/lrm.c
+++ b/crmd/lrm.c
@@ -2025,6 +2025,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat
fsa_data_t *msg_data = NULL;
const char *transition = NULL;
gboolean stop_recurring = FALSE;
+ bool send_nack = FALSE;
CRM_CHECK(rsc != NULL, return);
CRM_CHECK(operation != NULL, return);
@@ -2075,18 +2076,29 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat
/* now do the op */
crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
- if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) {
- if (safe_str_neq(operation, "fail")
- && safe_str_neq(operation, CRMD_ACTION_STOP)) {
- crm_info("Discarding attempt to perform action %s on %s in state %s",
- operation, rsc->id, fsa_state2string(fsa_state));
- op->rc = CRM_DIRECT_NACK_RC;
- op->op_status = PCMK_LRM_OP_ERROR;
- send_direct_ack(NULL, NULL, rsc, op, rsc->id);
- lrmd_free_event(op);
- free(op_id);
- return;
- }
+ if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
+ register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
+ send_nack = TRUE;
+
+ } else if (fsa_state != S_NOT_DC
+ && fsa_state != S_POLICY_ENGINE /* Recalculating */
+ && fsa_state != S_TRANSITION_ENGINE
+ && safe_str_neq(operation, "fail")
+ && safe_str_neq(operation, CRMD_ACTION_STOP)) {
+ send_nack = TRUE;
+ }
+
+ if(send_nack) {
+ crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
+ operation, rsc->id, fsa_state2string(fsa_state),
+ is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
+
+ op->rc = CRM_DIRECT_NACK_RC;
+ op->op_status = PCMK_LRM_OP_ERROR;
+ send_direct_ack(NULL, NULL, rsc, op, rsc->id);
+ lrmd_free_event(op);
+ free(op_id);
+ return;
}
op_id = generate_op_key(rsc->id, op->op_type, op->interval);
--
1.8.3.1