From 19d273ae5831f40e1816b138a56260ddd3448a4e Mon Sep 17 00:00:00 2001 From: Andrew Beekhof Date: Fri, 12 Aug 2016 10:03:37 +1000 Subject: [PATCH] Fix: crmd: Resend the shutdown request if the DC forgets As seen in: https://bugzilla.redhat.com/show_bug.cgi?id=1310486 Scenario needs very poor timing and some bad luck: 1. Start a node wait for it to become the DC 2. Start a second node 3. Tell the second node to stop while it is in the process of negotiating with the DC. Specifically just after do_cl_join_finalize_respond() is called on the second node. 4. Cross your fingers that somehow the shutdown=0 update makes it to attrd _after_ the DC sets shutdown=${large int} Under these conditions, the request to shut down will be lost and the DC will feel free to start services on the second node. --- crmd/lrm.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/crmd/lrm.c b/crmd/lrm.c index c987e49..3e32f33 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -2025,6 +2025,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; + bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); @@ -2075,18 +2076,29 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); - if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { - if (safe_str_neq(operation, "fail") - && safe_str_neq(operation, CRMD_ACTION_STOP)) { - crm_info("Discarding attempt to perform action %s on %s in state %s", - operation, rsc->id, fsa_state2string(fsa_state)); - op->rc = CRM_DIRECT_NACK_RC; - op->op_status = PCMK_LRM_OP_ERROR; - send_direct_ack(NULL, NULL, rsc, op, rsc->id); - lrmd_free_event(op); - free(op_id); - return; - } + if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { + register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); + send_nack = TRUE; + + } else if (fsa_state != S_NOT_DC + && fsa_state != S_POLICY_ENGINE /* Recalculating */ + && fsa_state != S_TRANSITION_ENGINE + && safe_str_neq(operation, "fail") + && safe_str_neq(operation, CRMD_ACTION_STOP)) { + send_nack = TRUE; + } + + if(send_nack) { + crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", + operation, rsc->id, fsa_state2string(fsa_state), + is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); + + op->rc = CRM_DIRECT_NACK_RC; + op->op_status = PCMK_LRM_OP_ERROR; + send_direct_ack(NULL, NULL, rsc, op, rsc->id); + lrmd_free_event(op); + free(op_id); + return; } op_id = generate_op_key(rsc->id, op->op_type, op->interval); -- 1.8.3.1