From 223ab7251adcb8c6f6b96def138be58b1478c42b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 22 Nov 2019 17:03:20 -0600 Subject: [PATCH 09/18] Low: controller: mark shutdown-locked resources in resource history When a graph action indicates that the resource should be shutdown-locked to its node, remember the shutdown lock time in active_op_t so we can remember that when the result comes back. When the result does come back, add "shutdown-lock" to its lrm_resource entry in the CIB status section -- as the timestamp if it's a successful stop or a probe finding the resource inactive, or as 0 to clear the lock for any other operation. --- daemons/controld/controld_control.c | 9 ++++- daemons/controld/controld_execd.c | 44 +++++++++++++++++++-- daemons/controld/controld_lrm.h | 1 + daemons/controld/controld_te_callbacks.c | 65 ++++++++++++++++++++++---------- daemons/controld/controld_utils.h | 1 + 5 files changed, 95 insertions(+), 25 deletions(-) diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 6c7f97c..c918a1e 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2019 the Pacemaker project contributors + * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -35,6 +35,7 @@ gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; +bool controld_shutdown_lock_enabled = false; /* A_HA_CONNECT */ void @@ -587,7 +588,10 @@ static pe_cluster_option crmd_opts[] = { { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, "How many times stonith can fail before it will no longer be attempted on a target" }, + + // Already documented in libpe_status (other values must be kept identical) { "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, + { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", &check_boolean, NULL, NULL }, }; /* *INDENT-ON* */ @@ -698,6 +702,9 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void value = crmd_pref(config_hash, "join-finalization-timeout"); finalization_timer->period_ms = crm_parse_interval_spec(value); + value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); + controld_shutdown_lock_enabled = crm_is_true(value); + free(fsa_cluster_name); fsa_cluster_name = NULL; diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 17cc8d6..c0436a2 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -44,7 +44,8 @@ static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); -static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); +static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time); static void lrm_connection_destroy(void) @@ -2171,7 +2172,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); - do_update_resource(node_name, rsc, op); + do_update_resource(node_name, rsc, op, 0); } static void @@ -2313,6 +2314,10 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = op->user_data? strdup(op->user_data) : NULL; + if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &(pending->lock_time)) != pcmk_ok) { + pending->lock_time = 0; + } g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) @@ -2356,8 +2361,28 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use } } +/* Only successful stops, and probes that found the resource inactive, get locks + * recorded in the history. This ensures the resource stays locked to the node + * until it is active there again after the node comes back up. + */ +static bool +should_preserve_lock(lrmd_event_data_t *op) +{ + if (!controld_shutdown_lock_enabled) { + return false; + } + if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { + return true; + } + if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { + return true; + } + return false; +} + static int -do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) +do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, + lrmd_event_data_t *op, time_t lock_time) { /* @@ -2412,6 +2437,16 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); + if (lock_time != 0) { + /* Actions on a locked resource should either preserve the lock by + * recording it with the action result, or clear it. + */ + if (!should_preserve_lock(op)) { + lock_time = 0; + } + crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + (long long) lock_time); + } if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); @@ -2616,7 +2651,8 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can - update_id = do_update_resource(node_name, rsc, op); + update_id = do_update_resource(node_name, rsc, op, + pending? pending->lock_time : 0); need_direct_ack = FALSE; } else if (op->rsc_deleted) { diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h index 7acac2a..da0582c 100644 --- a/daemons/controld/controld_lrm.h +++ b/daemons/controld/controld_lrm.h @@ -46,6 +46,7 @@ typedef struct active_op_s { int call_id; uint32_t flags; // bitmask of active_op_e time_t start_time; + time_t lock_time; char *rsc_id; char *op_type; char *op_key; diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index 25f0ab2..8506f26 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2019 the Pacemaker project contributors + * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -28,6 +28,17 @@ crm_trigger_t *transition_trigger = NULL; /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" +// An explicit shutdown-lock of 0 means the lock has been cleared +static bool +shutdown_lock_cleared(xmlNode *lrm_resource) +{ + time_t shutdown_lock = 0; + + return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, + &shutdown_lock) == pcmk_ok) + && (shutdown_lock == 0); +} + static void te_update_diff_v1(const char *event, xmlNode *diff) { @@ -106,33 +117,42 @@ te_update_diff_v1(const char *event, xmlNode *diff) } freeXpathObject(xpathObj); + // Check for lrm_resource entries + xpathObj = xpath_search(diff, + "//" F_CIB_UPDATE_RESULT + "//" XML_TAG_DIFF_ADDED + "//" XML_LRM_TAG_RESOURCE); + max = numXpathResults(xpathObj); + /* - * Updates by, or in response to, TE actions will never contain updates - * for more than one resource at a time, so such updates indicate an - * LRM refresh. - * - * In that case, start a new transition rather than check each result - * individually, which can result in _huge_ speedups in large clusters. + * Updates by, or in response to, graph actions will never affect more than + * one resource at a time, so such updates indicate an LRM refresh. In that + * case, start a new transition rather than check each result individually, + * which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ - if (transition_graph->pending == 0) { - xpathObj = xpath_search(diff, - "//" F_CIB_UPDATE_RESULT - "//" XML_TAG_DIFF_ADDED - "//" XML_LRM_TAG_RESOURCE); - max = numXpathResults(xpathObj); - if (max > 1) { - crm_debug("Ignoring resource operation updates due to history refresh of %d resources", - max); - crm_log_xml_trace(diff, "lrm-refresh"); - abort_transition(INFINITY, tg_restart, "History refresh", NULL); - goto bail; + if ((transition_graph->pending == 0) && (max > 1)) { + crm_debug("Ignoring resource operation updates due to history refresh of %d resources", + max); + crm_log_xml_trace(diff, "lrm-refresh"); + abort_transition(INFINITY, tg_restart, "History refresh", NULL); + goto bail; + } + + if (max == 1) { + xmlNode *lrm_resource = getXpathResult(xpathObj, 0); + + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", + lrm_resource); + // Still process results, so we stop timers and update failcounts } - freeXpathObject(xpathObj); } + freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = @@ -205,6 +225,11 @@ process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) rsc_op = __xml_next(rsc_op)) { process_graph_event(rsc_op, node); } + if (shutdown_lock_cleared(lrm_resource)) { + // @TODO would be more efficient to abort once after transition done + abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", + lrm_resource); + } } static void diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h index ca8cddb..8e31007 100644 --- a/daemons/controld/controld_utils.h +++ b/daemons/controld/controld_utils.h @@ -41,6 +41,7 @@ fsa_cib_anon_update(const char *section, xmlNode *data) { } extern gboolean fsa_has_quorum; +extern bool controld_shutdown_lock_enabled; extern int last_peer_update; extern int last_resource_update; -- 1.8.3.1