From ba17007f04d2fdbd2147c14c7eedb0de137ff448 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 13 Dec 2019 11:38:49 -0600 Subject: [PATCH 05/10] Low: controller: don't clear shutdown locks when node rejoins Add new controld_delete_node_state() values for clearing resource history while preserving shutdown locks. This is accomplished by deleting all unlocked lrm_resource entries and all lrm_rsc_op entries, instead of the entire lrm subsection. --- crmd/cib.c | 22 +++++++++++++++++++++- crmd/crmd_utils.h | 2 ++ crmd/join_dc.c | 7 +++++-- crmd/remote_lrmd_ra.c | 18 +++++++++++------- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/crmd/cib.c b/crmd/cib.c index e8c6376..a9e4ed3 100644 --- a/crmd/cib.c +++ b/crmd/cib.c @@ -247,12 +247,21 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM +// Node's lrm_rsc_op entries and lrm_resource entries without lock (name 2x) +#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ + "|" XPATH_NODE_STATE \ + "//" XML_LRM_TAG_RESOURCE \ + "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ")]" + // Node's transient_attributes section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS // Everything under node_state (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" +// Unlocked history + transient attributes (name 3x) +#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS + /*! * \internal * \brief Delete subsection of a node's CIB node_state @@ -274,6 +283,11 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc = crm_strdup_printf("resource history for node %s", uname); break; + case controld_section_lrm_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname); + desc = crm_strdup_printf("resource history (other than shutdown " + "locks) for node %s", uname); + break; case controld_section_attrs: xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc = crm_strdup_printf("transient attributes for node %s", uname); @@ -282,6 +296,12 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc = crm_strdup_printf("all state for node %s", uname); break; + case controld_section_all_unlocked: + xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, + uname, uname, uname); + desc = crm_strdup_printf("all state (other than shutdown locks) " + "for node %s", uname); + break; } if (fsa_cib_conn == NULL) { @@ -290,7 +310,7 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, } else { int call_id; - options |= cib_quorum_override|cib_xpath; + options |= cib_quorum_override|cib_xpath|cib_multiple; call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, options); crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", desc, call_id, xpath); diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h index 9ecce88..77dcfc2 100644 --- a/crmd/crmd_utils.h +++ b/crmd/crmd_utils.h @@ -120,8 +120,10 @@ bool controld_action_is_recordable(const char *action); // Subsections of node_state enum controld_section_e { controld_section_lrm, + controld_section_lrm_unlocked, controld_section_attrs, controld_section_all, + controld_section_all_unlocked }; void controld_delete_node_state(const char *uname, diff --git a/crmd/join_dc.c b/crmd/join_dc.c index 8284695..1553078 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -534,6 +534,7 @@ do_dc_join_ack(long long action, int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); + enum controld_section_e section = controld_section_lrm; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); @@ -583,8 +584,10 @@ do_dc_join_ack(long long action, /* Update CIB with node's current LRM state. A new transition will be * triggered later, when the CIB notifies us of the change. */ - controld_delete_node_state(join_from, controld_section_lrm, - cib_scope_local); + if (controld_shutdown_lock_enabled) { + section = controld_section_lrm_unlocked; + } + controld_delete_node_state(join_from, section, cib_scope_local); if (safe_str_eq(join_from, fsa_our_uname)) { xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c index c4f58d6..3870431 100644 --- a/crmd/remote_lrmd_ra.c +++ b/crmd/remote_lrmd_ra.c @@ -1,5 +1,5 @@ -/* - * Copyright 2013-2019 the Pacemaker project contributors +/* + * Copyright 2013-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -191,17 +191,21 @@ remote_node_up(const char *node_name) int call_opt, call_id = 0; xmlNode *update, *state; crm_node_t *node; + enum controld_section_e section = controld_section_all; CRM_CHECK(node_name != NULL, return); crm_info("Announcing pacemaker_remote node %s", node_name); - /* Clear node's entire state (resource history and transient attributes). - * The transient attributes should and normally will be cleared when the - * node leaves, but since remote node state has a number of corner cases, - * clear them here as well, to be sure. + /* Clear node's entire state (resource history and transient attributes) + * other than shutdown locks. The transient attributes should and normally + * will be cleared when the node leaves, but since remote node state has a + * number of corner cases, clear them here as well, to be sure. */ call_opt = crmd_cib_smart_opt(); - controld_delete_node_state(node_name, controld_section_all, call_opt); + if (controld_shutdown_lock_enabled) { + section = controld_section_all_unlocked; + } + controld_delete_node_state(node_name, section, call_opt); /* Clear node's probed attribute */ update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); -- 1.8.3.1