|
|
af918f |
From 11e8a3b9c8e35301b197724658ec2d243aec3336 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 22 Nov 2019 16:39:54 -0600
|
|
|
af918f |
Subject: [PATCH 01/11] Refactor: controller: rename struct recurring_op_s to
|
|
|
af918f |
active_op_t
|
|
|
af918f |
|
|
|
af918f |
... because it holds both recurring and pending non-recurring actions,
|
|
|
af918f |
and the name was confusing
|
|
|
af918f |
---
|
|
|
af918f |
crmd/crmd_lrm.h | 8 ++++----
|
|
|
af918f |
crmd/lrm.c | 18 +++++++++---------
|
|
|
af918f |
crmd/lrm_state.c | 4 ++--
|
|
|
af918f |
3 files changed, 15 insertions(+), 15 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
|
|
|
af918f |
index 0870817..7c6a3c4 100644
|
|
|
af918f |
--- a/crmd/crmd_lrm.h
|
|
|
af918f |
+++ b/crmd/crmd_lrm.h
|
|
|
af918f |
@@ -42,8 +42,8 @@ typedef struct resource_history_s {
|
|
|
af918f |
|
|
|
af918f |
void history_free(gpointer data);
|
|
|
af918f |
|
|
|
af918f |
-/* TODO - Replace this with lrmd_event_data_t */
|
|
|
af918f |
-struct recurring_op_s {
|
|
|
af918f |
+// In-flight action (recurring or pending)
|
|
|
af918f |
+typedef struct active_op_s {
|
|
|
af918f |
int call_id;
|
|
|
af918f |
int interval;
|
|
|
af918f |
gboolean remove;
|
|
|
af918f |
@@ -54,7 +54,7 @@ struct recurring_op_s {
|
|
|
af918f |
char *op_key;
|
|
|
af918f |
char *user_data;
|
|
|
af918f |
GHashTable *params;
|
|
|
af918f |
-};
|
|
|
af918f |
+} active_op_t;
|
|
|
af918f |
|
|
|
af918f |
typedef struct lrm_state_s {
|
|
|
af918f |
const char *node_name;
|
|
|
af918f |
@@ -171,4 +171,4 @@ void remote_ra_process_maintenance_nodes(xmlNode *xml);
|
|
|
af918f |
gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state);
|
|
|
af918f |
|
|
|
af918f |
void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
|
af918f |
- struct recurring_op_s *pending, xmlNode *action_xml);
|
|
|
af918f |
+ active_op_t *pending, xmlNode *action_xml);
|
|
|
af918f |
diff --git a/crmd/lrm.c b/crmd/lrm.c
|
|
|
af918f |
index 437840f..e459465 100644
|
|
|
af918f |
--- a/crmd/lrm.c
|
|
|
af918f |
+++ b/crmd/lrm.c
|
|
|
af918f |
@@ -400,7 +400,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
|
|
|
af918f |
GHashTableIter gIter;
|
|
|
af918f |
const char *key = NULL;
|
|
|
af918f |
rsc_history_t *entry = NULL;
|
|
|
af918f |
- struct recurring_op_s *pending = NULL;
|
|
|
af918f |
+ active_op_t *pending = NULL;
|
|
|
af918f |
|
|
|
af918f |
crm_debug("Checking for active resources before exit");
|
|
|
af918f |
|
|
|
af918f |
@@ -917,7 +917,7 @@ static gboolean
|
|
|
af918f |
lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
{
|
|
|
af918f |
const char *rsc = user_data;
|
|
|
af918f |
- struct recurring_op_s *pending = value;
|
|
|
af918f |
+ active_op_t *pending = value;
|
|
|
af918f |
|
|
|
af918f |
if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
|
|
|
af918f |
crm_info("Removing op %s:%d for deleted resource %s",
|
|
|
af918f |
@@ -1148,7 +1148,7 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op,
|
|
|
af918f |
{
|
|
|
af918f |
int rc = pcmk_ok;
|
|
|
af918f |
char *local_key = NULL;
|
|
|
af918f |
- struct recurring_op_s *pending = NULL;
|
|
|
af918f |
+ active_op_t *pending = NULL;
|
|
|
af918f |
|
|
|
af918f |
CRM_CHECK(op != 0, return FALSE);
|
|
|
af918f |
CRM_CHECK(rsc_id != NULL, return FALSE);
|
|
|
af918f |
@@ -1213,7 +1213,7 @@ cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
{
|
|
|
af918f |
gboolean remove = FALSE;
|
|
|
af918f |
struct cancel_data *data = user_data;
|
|
|
af918f |
- struct recurring_op_s *op = (struct recurring_op_s *)value;
|
|
|
af918f |
+ active_op_t *op = value;
|
|
|
af918f |
|
|
|
af918f |
if (crm_str_eq(op->op_key, data->key, TRUE)) {
|
|
|
af918f |
data->done = TRUE;
|
|
|
af918f |
@@ -2104,7 +2104,7 @@ stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
{
|
|
|
af918f |
gboolean remove = FALSE;
|
|
|
af918f |
struct stop_recurring_action_s *event = user_data;
|
|
|
af918f |
- struct recurring_op_s *op = (struct recurring_op_s *)value;
|
|
|
af918f |
+ active_op_t *op = value;
|
|
|
af918f |
|
|
|
af918f |
if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
|
|
|
af918f |
crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
|
|
|
af918f |
@@ -2119,7 +2119,7 @@ stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
{
|
|
|
af918f |
gboolean remove = FALSE;
|
|
|
af918f |
lrm_state_t *lrm_state = user_data;
|
|
|
af918f |
- struct recurring_op_s *op = (struct recurring_op_s *)value;
|
|
|
af918f |
+ active_op_t *op = value;
|
|
|
af918f |
|
|
|
af918f |
if (op->interval != 0) {
|
|
|
af918f |
crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
|
|
|
af918f |
@@ -2294,9 +2294,9 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat
|
|
|
af918f |
* for them to complete during shutdown
|
|
|
af918f |
*/
|
|
|
af918f |
char *call_id_s = make_stop_id(rsc->id, call_id);
|
|
|
af918f |
- struct recurring_op_s *pending = NULL;
|
|
|
af918f |
+ active_op_t *pending = NULL;
|
|
|
af918f |
|
|
|
af918f |
- pending = calloc(1, sizeof(struct recurring_op_s));
|
|
|
af918f |
+ pending = calloc(1, sizeof(active_op_t));
|
|
|
af918f |
crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
|
|
|
af918f |
|
|
|
af918f |
pending->call_id = call_id;
|
|
|
af918f |
@@ -2517,7 +2517,7 @@ did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
|
|
|
af918f |
|
|
|
af918f |
void
|
|
|
af918f |
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
|
af918f |
- struct recurring_op_s *pending, xmlNode *action_xml)
|
|
|
af918f |
+ active_op_t *pending, xmlNode *action_xml)
|
|
|
af918f |
{
|
|
|
af918f |
char *op_id = NULL;
|
|
|
af918f |
char *op_key = NULL;
|
|
|
af918f |
diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c
|
|
|
af918f |
index 8d07ef5..0f39c3d 100644
|
|
|
af918f |
--- a/crmd/lrm_state.c
|
|
|
af918f |
+++ b/crmd/lrm_state.c
|
|
|
af918f |
@@ -58,7 +58,7 @@ free_deletion_op(gpointer value)
|
|
|
af918f |
static void
|
|
|
af918f |
free_recurring_op(gpointer value)
|
|
|
af918f |
{
|
|
|
af918f |
- struct recurring_op_s *op = (struct recurring_op_s *)value;
|
|
|
af918f |
+ active_op_t *op = value;
|
|
|
af918f |
|
|
|
af918f |
free(op->user_data);
|
|
|
af918f |
free(op->rsc_id);
|
|
|
af918f |
@@ -75,7 +75,7 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
{
|
|
|
af918f |
lrmd_event_data_t event = { 0, };
|
|
|
af918f |
lrm_state_t *lrm_state = user_data;
|
|
|
af918f |
- struct recurring_op_s *op = (struct recurring_op_s *)value;
|
|
|
af918f |
+ active_op_t *op = value;
|
|
|
af918f |
|
|
|
af918f |
crm_trace("Pre-emptively failing %s_%s_%d on %s (call=%s, %s)",
|
|
|
af918f |
op->rsc_id, op->op_type, op->interval,
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 9795f5401957563de2307f94c393dc83ce41d3d1 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 22 Nov 2019 16:45:31 -0600
|
|
|
af918f |
Subject: [PATCH 02/11] Refactor: controller: convert active_op_t booleans to
|
|
|
af918f |
bitmask
|
|
|
af918f |
|
|
|
af918f |
---
|
|
|
af918f |
crmd/crmd_lrm.h | 8 ++++++--
|
|
|
af918f |
crmd/lrm.c | 11 +++++------
|
|
|
af918f |
2 files changed, 11 insertions(+), 8 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h
|
|
|
af918f |
index 7c6a3c4..eb27d84 100644
|
|
|
af918f |
--- a/crmd/crmd_lrm.h
|
|
|
af918f |
+++ b/crmd/crmd_lrm.h
|
|
|
af918f |
@@ -42,12 +42,16 @@ typedef struct resource_history_s {
|
|
|
af918f |
|
|
|
af918f |
void history_free(gpointer data);
|
|
|
af918f |
|
|
|
af918f |
+enum active_op_e {
|
|
|
af918f |
+ active_op_remove = (1 << 0),
|
|
|
af918f |
+ active_op_cancelled = (1 << 1),
|
|
|
af918f |
+};
|
|
|
af918f |
+
|
|
|
af918f |
// In-flight action (recurring or pending)
|
|
|
af918f |
typedef struct active_op_s {
|
|
|
af918f |
int call_id;
|
|
|
af918f |
int interval;
|
|
|
af918f |
- gboolean remove;
|
|
|
af918f |
- gboolean cancelled;
|
|
|
af918f |
+ uint32_t flags; // bitmask of active_op_e
|
|
|
af918f |
unsigned int start_time;
|
|
|
af918f |
char *rsc_id;
|
|
|
af918f |
char *op_type;
|
|
|
af918f |
diff --git a/crmd/lrm.c b/crmd/lrm.c
|
|
|
af918f |
index e459465..2ae0d85 100644
|
|
|
af918f |
--- a/crmd/lrm.c
|
|
|
af918f |
+++ b/crmd/lrm.c
|
|
|
af918f |
@@ -1159,18 +1159,17 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op,
|
|
|
af918f |
pending = g_hash_table_lookup(lrm_state->pending_ops, key);
|
|
|
af918f |
|
|
|
af918f |
if (pending) {
|
|
|
af918f |
- if (remove && pending->remove == FALSE) {
|
|
|
af918f |
- pending->remove = TRUE;
|
|
|
af918f |
+ if (remove && is_not_set(pending->flags, active_op_remove)) {
|
|
|
af918f |
+ set_bit(pending->flags, active_op_remove);
|
|
|
af918f |
crm_debug("Scheduling %s for removal", key);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- if (pending->cancelled) {
|
|
|
af918f |
+ if (is_set(pending->flags, active_op_cancelled)) {
|
|
|
af918f |
crm_debug("Operation %s already cancelled", key);
|
|
|
af918f |
free(local_key);
|
|
|
af918f |
return FALSE;
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
- pending->cancelled = TRUE;
|
|
|
af918f |
+ set_bit(pending->flags, active_op_cancelled);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
crm_info("No pending op found for %s", key);
|
|
|
af918f |
@@ -2636,7 +2635,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
|
af918f |
crm_err("Recurring operation %s was cancelled without transition information",
|
|
|
af918f |
op_key);
|
|
|
af918f |
|
|
|
af918f |
- } else if (pending->remove) {
|
|
|
af918f |
+ } else if (is_set(pending->flags, active_op_remove)) {
|
|
|
af918f |
/* This recurring operation was cancelled (by us) and pending, and we
|
|
|
af918f |
* have been waiting for it to finish.
|
|
|
af918f |
*/
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 620ff6cb923e4fe6c3d1a9e3345a26ff5180d56d Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 22 Nov 2019 16:58:25 -0600
|
|
|
af918f |
Subject: [PATCH 03/11] Refactor: controller: remove unused argument
|
|
|
af918f |
|
|
|
af918f |
---
|
|
|
af918f |
crmd/lrm.c | 10 +++++-----
|
|
|
af918f |
1 file changed, 5 insertions(+), 5 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/lrm.c b/crmd/lrm.c
|
|
|
af918f |
index 2ae0d85..80fcd69 100644
|
|
|
af918f |
--- a/crmd/lrm.c
|
|
|
af918f |
+++ b/crmd/lrm.c
|
|
|
af918f |
@@ -44,8 +44,8 @@ static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int ca
|
|
|
af918f |
|
|
|
af918f |
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
|
|
|
af918f |
const char *rsc_id, const char *operation);
|
|
|
af918f |
-static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
|
|
|
af918f |
- xmlNode * msg, xmlNode * request);
|
|
|
af918f |
+static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
|
af918f |
+ const char *operation, xmlNode *msg);
|
|
|
af918f |
|
|
|
af918f |
void send_direct_ack(const char *to_host, const char *to_sys,
|
|
|
af918f |
lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
|
|
|
af918f |
@@ -1851,7 +1851,7 @@ do_lrm_invoke(long long action,
|
|
|
af918f |
crm_rsc_delete, user_name);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
|
|
|
af918f |
+ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
lrmd_free_rsc_info(rsc);
|
|
|
af918f |
@@ -2167,8 +2167,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
static void
|
|
|
af918f |
-do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
|
|
|
af918f |
- xmlNode * request)
|
|
|
af918f |
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
|
af918f |
+ const char *operation, xmlNode *msg)
|
|
|
af918f |
{
|
|
|
af918f |
int call_id = 0;
|
|
|
af918f |
char *op_id = NULL;
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 2716f2f4c334b927f9e253979dce27b56bbff46a Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 6 Dec 2019 12:15:05 -0600
|
|
|
af918f |
Subject: [PATCH 04/11] Refactor: scheduler: combine two "if" statements
|
|
|
af918f |
|
|
|
af918f |
... for readability, and ease of adding another block later
|
|
|
af918f |
---
|
|
|
af918f |
pengine/graph.c | 120 ++++++++++++++++++++++++++++----------------------------
|
|
|
af918f |
1 file changed, 60 insertions(+), 60 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/pengine/graph.c b/pengine/graph.c
|
|
|
af918f |
index 9edd1a1..cba30d0 100644
|
|
|
af918f |
--- a/pengine/graph.c
|
|
|
af918f |
+++ b/pengine/graph.c
|
|
|
af918f |
@@ -1095,71 +1095,71 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
|
|
|
af918f |
return action_xml;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- /* List affected resource */
|
|
|
af918f |
- if (action->rsc) {
|
|
|
af918f |
- if (is_set(action->flags, pe_action_pseudo) == FALSE) {
|
|
|
af918f |
- int lpc = 0;
|
|
|
af918f |
-
|
|
|
af918f |
- xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml));
|
|
|
af918f |
-
|
|
|
af918f |
- const char *attr_list[] = {
|
|
|
af918f |
- XML_AGENT_ATTR_CLASS,
|
|
|
af918f |
- XML_AGENT_ATTR_PROVIDER,
|
|
|
af918f |
- XML_ATTR_TYPE
|
|
|
af918f |
- };
|
|
|
af918f |
-
|
|
|
af918f |
- if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) {
|
|
|
af918f |
- /* Do not use the 'instance free' name here as that
|
|
|
af918f |
- * might interfere with the instance we plan to keep.
|
|
|
af918f |
- * Ie. if there are more than two named /anonymous/
|
|
|
af918f |
- * instances on a given node, we need to make sure the
|
|
|
af918f |
- * command goes to the right one.
|
|
|
af918f |
- *
|
|
|
af918f |
- * Keep this block, even when everyone is using
|
|
|
af918f |
- * 'instance free' anonymous clone names - it means
|
|
|
af918f |
- * we'll do the right thing if anyone toggles the
|
|
|
af918f |
- * unique flag to 'off'
|
|
|
af918f |
- */
|
|
|
af918f |
- crm_debug("Using orphan clone name %s instead of %s", action->rsc->id,
|
|
|
af918f |
- action->rsc->clone_name);
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
|
|
|
af918f |
+ if (action->rsc && is_not_set(action->flags, pe_action_pseudo)) {
|
|
|
af918f |
+ int lpc = 0;
|
|
|
af918f |
+ xmlNode *rsc_xml = NULL;
|
|
|
af918f |
+ const char *attr_list[] = {
|
|
|
af918f |
+ XML_AGENT_ATTR_CLASS,
|
|
|
af918f |
+ XML_AGENT_ATTR_PROVIDER,
|
|
|
af918f |
+ XML_ATTR_TYPE
|
|
|
af918f |
+ };
|
|
|
af918f |
+
|
|
|
af918f |
+ // List affected resource
|
|
|
af918f |
+
|
|
|
af918f |
+ rsc_xml = create_xml_node(action_xml,
|
|
|
af918f |
+ crm_element_name(action->rsc->xml));
|
|
|
af918f |
+ if (is_set(action->rsc->flags, pe_rsc_orphan)
|
|
|
af918f |
+ && action->rsc->clone_name) {
|
|
|
af918f |
+ /* Do not use the 'instance free' name here as that
|
|
|
af918f |
+ * might interfere with the instance we plan to keep.
|
|
|
af918f |
+ * Ie. if there are more than two named /anonymous/
|
|
|
af918f |
+ * instances on a given node, we need to make sure the
|
|
|
af918f |
+ * command goes to the right one.
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * Keep this block, even when everyone is using
|
|
|
af918f |
+ * 'instance free' anonymous clone names - it means
|
|
|
af918f |
+ * we'll do the right thing if anyone toggles the
|
|
|
af918f |
+ * unique flag to 'off'
|
|
|
af918f |
+ */
|
|
|
af918f |
+ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id,
|
|
|
af918f |
+ action->rsc->clone_name);
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
|
|
|
af918f |
|
|
|
af918f |
- } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) {
|
|
|
af918f |
- const char *xml_id = ID(action->rsc->xml);
|
|
|
af918f |
-
|
|
|
af918f |
- crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id,
|
|
|
af918f |
- action->rsc->clone_name);
|
|
|
af918f |
-
|
|
|
af918f |
- /* ID is what we'd like client to use
|
|
|
af918f |
- * ID_LONG is what they might know it as instead
|
|
|
af918f |
- *
|
|
|
af918f |
- * ID_LONG is only strictly needed /here/ during the
|
|
|
af918f |
- * transition period until all nodes in the cluster
|
|
|
af918f |
- * are running the new software /and/ have rebooted
|
|
|
af918f |
- * once (meaning that they've only ever spoken to a DC
|
|
|
af918f |
- * supporting this feature).
|
|
|
af918f |
- *
|
|
|
af918f |
- * If anyone toggles the unique flag to 'on', the
|
|
|
af918f |
- * 'instance free' name will correspond to an orphan
|
|
|
af918f |
- * and fall into the clause above instead
|
|
|
af918f |
- */
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
|
|
|
af918f |
- if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) {
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
|
|
|
af918f |
- }
|
|
|
af918f |
+ } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) {
|
|
|
af918f |
+ const char *xml_id = ID(action->rsc->xml);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id,
|
|
|
af918f |
+ action->rsc->clone_name);
|
|
|
af918f |
|
|
|
af918f |
+ /* ID is what we'd like client to use
|
|
|
af918f |
+ * ID_LONG is what they might know it as instead
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * ID_LONG is only strictly needed /here/ during the
|
|
|
af918f |
+ * transition period until all nodes in the cluster
|
|
|
af918f |
+ * are running the new software /and/ have rebooted
|
|
|
af918f |
+ * once (meaning that they've only ever spoken to a DC
|
|
|
af918f |
+ * supporting this feature).
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * If anyone toggles the unique flag to 'on', the
|
|
|
af918f |
+ * 'instance free' name will correspond to an orphan
|
|
|
af918f |
+ * and fall into the clause above instead
|
|
|
af918f |
+ */
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
|
|
|
af918f |
+ if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) {
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
|
|
|
af918f |
} else {
|
|
|
af918f |
- CRM_ASSERT(action->rsc->clone_name == NULL);
|
|
|
af918f |
- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- for (lpc = 0; lpc < DIMOF(attr_list); lpc++) {
|
|
|
af918f |
- crm_xml_add(rsc_xml, attr_list[lpc],
|
|
|
af918f |
- g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
|
|
|
af918f |
- }
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ CRM_ASSERT(action->rsc->clone_name == NULL);
|
|
|
af918f |
+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ for (lpc = 0; lpc < DIMOF(attr_list); lpc++) {
|
|
|
af918f |
+ crm_xml_add(rsc_xml, attr_list[lpc],
|
|
|
af918f |
+ g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From dc73907eeae769d70b04e4a8feae208c12018d83 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Thu, 19 Dec 2019 17:18:41 -0600
|
|
|
af918f |
Subject: [PATCH 05/11] Log: scheduler: drop redundant trace messages
|
|
|
af918f |
|
|
|
af918f |
We logged "applying placement constraints" three times.
|
|
|
af918f |
---
|
|
|
af918f |
pengine/allocate.c | 17 ++++-------------
|
|
|
af918f |
1 file changed, 4 insertions(+), 13 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/pengine/allocate.c b/pengine/allocate.c
|
|
|
af918f |
index b819af3..30d29e1 100644
|
|
|
af918f |
--- a/pengine/allocate.c
|
|
|
af918f |
+++ b/pengine/allocate.c
|
|
|
af918f |
@@ -652,21 +652,15 @@ check_actions(pe_working_set_t * data_set)
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
-static gboolean
|
|
|
af918f |
+static void
|
|
|
af918f |
apply_placement_constraints(pe_working_set_t * data_set)
|
|
|
af918f |
{
|
|
|
af918f |
- GListPtr gIter = NULL;
|
|
|
af918f |
-
|
|
|
af918f |
- crm_trace("Applying constraints...");
|
|
|
af918f |
-
|
|
|
af918f |
- for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) {
|
|
|
af918f |
+ for (GList *gIter = data_set->placement_constraints;
|
|
|
af918f |
+ gIter != NULL; gIter = gIter->next) {
|
|
|
af918f |
pe__location_t *cons = gIter->data;
|
|
|
af918f |
|
|
|
af918f |
cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons);
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
- return TRUE;
|
|
|
af918f |
-
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
static gboolean
|
|
|
af918f |
@@ -1026,10 +1020,7 @@ stage2(pe_working_set_t * data_set)
|
|
|
af918f |
{
|
|
|
af918f |
GListPtr gIter = NULL;
|
|
|
af918f |
|
|
|
af918f |
- crm_trace("Applying placement constraints");
|
|
|
af918f |
-
|
|
|
af918f |
- gIter = data_set->nodes;
|
|
|
af918f |
- for (; gIter != NULL; gIter = gIter->next) {
|
|
|
af918f |
+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
|
|
|
af918f |
node_t *node = (node_t *) gIter->data;
|
|
|
af918f |
|
|
|
af918f |
if (node == NULL) {
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From ae404a4e04b50e6177942256c5b8ff57a36af309 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Sat, 7 Dec 2019 12:13:11 -0600
|
|
|
af918f |
Subject: [PATCH 06/11] Refactor: libcrmcommon: convenience functions for list
|
|
|
af918f |
length comparisons
|
|
|
af918f |
|
|
|
af918f |
... for efficiency and readability
|
|
|
af918f |
---
|
|
|
af918f |
include/crm/common/internal.h | 14 ++++++++++++++
|
|
|
af918f |
1 file changed, 14 insertions(+)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
|
|
|
af918f |
index 0d225f5..0f7158c 100644
|
|
|
af918f |
--- a/include/crm/common/internal.h
|
|
|
af918f |
+++ b/include/crm/common/internal.h
|
|
|
af918f |
@@ -84,6 +84,20 @@ crm_getpid_s()
|
|
|
af918f |
return crm_strdup_printf("%lu", (unsigned long) getpid());
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+// More efficient than g_list_length(list) == 1
|
|
|
af918f |
+static inline bool
|
|
|
af918f |
+pcmk__list_of_1(GList *list)
|
|
|
af918f |
+{
|
|
|
af918f |
+ return list && (list->next == NULL);
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
+// More efficient than g_list_length(list) > 1
|
|
|
af918f |
+static inline bool
|
|
|
af918f |
+pcmk__list_of_multiple(GList *list)
|
|
|
af918f |
+{
|
|
|
af918f |
+ return list && (list->next != NULL);
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
/* convenience functions for failure-related node attributes */
|
|
|
af918f |
|
|
|
af918f |
#define CRM_FAIL_COUNT_PREFIX "fail-count"
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 78a1d57ae4028e3068b13d3526840f24c7798140 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Mon, 16 Dec 2019 14:13:30 -0600
|
|
|
af918f |
Subject: [PATCH 07/11] Refactor: libcrmcommon: add convenience macros for
|
|
|
af918f |
plurals
|
|
|
af918f |
|
|
|
af918f |
I've avoided making s_if_plural() an official API due to its hackiness, but
|
|
|
af918f |
it really is the best solution for now. Promote it to pcmk__plural_s(), along
|
|
|
af918f |
with a companion macro pcmk__plural_alt() for more complicated plurals.
|
|
|
af918f |
---
|
|
|
af918f |
include/crm/common/internal.h | 23 +++++++++++++++++++++++
|
|
|
af918f |
1 file changed, 23 insertions(+)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
|
|
|
af918f |
index 0f7158c..5bbf01b 100644
|
|
|
af918f |
--- a/include/crm/common/internal.h
|
|
|
af918f |
+++ b/include/crm/common/internal.h
|
|
|
af918f |
@@ -72,6 +72,29 @@ bool crm_compress_string(const char *data, int length, int max, char **result,
|
|
|
af918f |
unsigned int *result_len);
|
|
|
af918f |
gint crm_alpha_sort(gconstpointer a, gconstpointer b);
|
|
|
af918f |
|
|
|
af918f |
+/* Correctly displaying singular or plural is complicated; consider "1 node has"
|
|
|
af918f |
+ * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g.
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * if (a == 1) {
|
|
|
af918f |
+ * crm_info("singular message"):
|
|
|
af918f |
+ * } else {
|
|
|
af918f |
+ * crm_info("plural message");
|
|
|
af918f |
+ * }
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * though even that's not sufficient for all languages besides English (if we
|
|
|
af918f |
+ * ever desire to do translations of output and log messages). But the following
|
|
|
af918f |
+ * convenience macros are "good enough" and more concise for many cases.
|
|
|
af918f |
+ */
|
|
|
af918f |
+
|
|
|
af918f |
+/* Example:
|
|
|
af918f |
+ * crm_info("Found %d %s", nentries,
|
|
|
af918f |
+ * pcmk__plural_alt(nentries, "entry", "entries"));
|
|
|
af918f |
+ */
|
|
|
af918f |
+#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2))
|
|
|
af918f |
+
|
|
|
af918f |
+// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes));
|
|
|
af918f |
+#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s")
|
|
|
af918f |
+
|
|
|
af918f |
static inline int
|
|
|
af918f |
crm_strlen_zero(const char *s)
|
|
|
af918f |
{
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From e33b5e297c24182c15ace7142efef1add7664643 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Thu, 12 Dec 2019 20:50:50 -0600
|
|
|
af918f |
Subject: [PATCH 08/11] Log: controller: improve join messages
|
|
|
af918f |
|
|
|
af918f |
---
|
|
|
af918f |
crmd/fsa.c | 79 +++++++-----
|
|
|
af918f |
crmd/join_dc.c | 375 +++++++++++++++++++++++++++++++++------------------------
|
|
|
af918f |
2 files changed, 263 insertions(+), 191 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/fsa.c b/crmd/fsa.c
|
|
|
af918f |
index 9b1189a..7f6a3ac 100644
|
|
|
af918f |
--- a/crmd/fsa.c
|
|
|
af918f |
+++ b/crmd/fsa.c
|
|
|
af918f |
@@ -469,12 +469,53 @@ log_fsa_input(fsa_data_t * stored_msg)
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+static void
|
|
|
af918f |
+check_join_counts(fsa_data_t *msg_data)
|
|
|
af918f |
+{
|
|
|
af918f |
+ int count;
|
|
|
af918f |
+ guint npeers;
|
|
|
af918f |
+
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_finalized);
|
|
|
af918f |
+ if (count > 0) {
|
|
|
af918f |
+ crm_err("%d cluster node%s failed to confirm join",
|
|
|
af918f |
+ count, pcmk__plural_s(count));
|
|
|
af918f |
+ crmd_join_phase_log(LOG_NOTICE);
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ npeers = crm_active_peers();
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_confirmed);
|
|
|
af918f |
+ if (count == npeers) {
|
|
|
af918f |
+ if (npeers == 1) {
|
|
|
af918f |
+ crm_debug("Sole active cluster node is fully joined");
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ crm_debug("All %d active cluster nodes are fully joined", count);
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ } else if (count > npeers) {
|
|
|
af918f |
+ crm_err("New election needed because more nodes confirmed join "
|
|
|
af918f |
+ "than are in membership (%d > %u)", count, npeers);
|
|
|
af918f |
+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
|
|
|
af918f |
+
|
|
|
af918f |
+ } else if (saved_ccm_membership_id != crm_peer_seq) {
|
|
|
af918f |
+ crm_info("New join needed because membership changed (%llu -> %llu)",
|
|
|
af918f |
+ saved_ccm_membership_id, crm_peer_seq);
|
|
|
af918f |
+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
|
|
|
af918f |
+
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ crm_warn("Only %d of %u active cluster nodes fully joined "
|
|
|
af918f |
+ "(%d did not respond to offer)",
|
|
|
af918f |
+ count, npeers, crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
+ }
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
long long
|
|
|
af918f |
do_state_transition(long long actions,
|
|
|
af918f |
enum crmd_fsa_state cur_state,
|
|
|
af918f |
enum crmd_fsa_state next_state, fsa_data_t * msg_data)
|
|
|
af918f |
{
|
|
|
af918f |
int level = LOG_INFO;
|
|
|
af918f |
+ int count = 0;
|
|
|
af918f |
long long tmp = actions;
|
|
|
af918f |
gboolean clear_recovery_bit = TRUE;
|
|
|
af918f |
|
|
|
af918f |
@@ -572,13 +613,14 @@ do_state_transition(long long actions,
|
|
|
af918f |
crm_warn("Progressed to state %s after %s",
|
|
|
af918f |
fsa_state2string(next_state), fsa_cause2string(cause));
|
|
|
af918f |
}
|
|
|
af918f |
- if (crmd_join_phase_count(crm_join_welcomed) > 0) {
|
|
|
af918f |
- crm_warn("%u cluster nodes failed to respond"
|
|
|
af918f |
- " to the join offer.", crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+ if (count > 0) {
|
|
|
af918f |
+ crm_warn("%d cluster node%s failed to respond to join offer",
|
|
|
af918f |
+ count, pcmk__plural_s(count));
|
|
|
af918f |
crmd_join_phase_log(LOG_NOTICE);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_debug("All %d cluster nodes responded to the join offer.",
|
|
|
af918f |
+ crm_debug("All cluster nodes (%d) responded to join offer",
|
|
|
af918f |
crmd_join_phase_count(crm_join_integrated));
|
|
|
af918f |
}
|
|
|
af918f |
break;
|
|
|
af918f |
@@ -590,34 +632,7 @@ do_state_transition(long long actions,
|
|
|
af918f |
crm_info("Progressed to state %s after %s",
|
|
|
af918f |
fsa_state2string(next_state), fsa_cause2string(cause));
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
- if (crmd_join_phase_count(crm_join_finalized) > 0) {
|
|
|
af918f |
- crm_err("%u cluster nodes failed to confirm their join.",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_finalized));
|
|
|
af918f |
- crmd_join_phase_log(LOG_NOTICE);
|
|
|
af918f |
-
|
|
|
af918f |
- } else if (crmd_join_phase_count(crm_join_confirmed)
|
|
|
af918f |
- == crm_active_peers()) {
|
|
|
af918f |
- crm_debug("All %u cluster nodes are"
|
|
|
af918f |
- " eligible to run resources.", crm_active_peers());
|
|
|
af918f |
-
|
|
|
af918f |
- } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) {
|
|
|
af918f |
- crm_err("We have more confirmed nodes than our membership does: %d vs. %d",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_confirmed), crm_active_peers());
|
|
|
af918f |
- register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
|
|
|
af918f |
-
|
|
|
af918f |
- } else if (saved_ccm_membership_id != crm_peer_seq) {
|
|
|
af918f |
- crm_info("Membership changed: %llu -> %llu - join restart",
|
|
|
af918f |
- saved_ccm_membership_id, crm_peer_seq);
|
|
|
af918f |
- register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
|
|
|
af918f |
-
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_warn("Only %u of %u cluster "
|
|
|
af918f |
- "nodes are eligible to run resources - continue %d",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_confirmed),
|
|
|
af918f |
- crm_active_peers(), crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
- }
|
|
|
af918f |
-/* initialize_join(FALSE); */
|
|
|
af918f |
+ check_join_counts(msg_data);
|
|
|
af918f |
break;
|
|
|
af918f |
|
|
|
af918f |
case S_STOPPING:
|
|
|
af918f |
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
|
|
|
af918f |
index 857e760..cdb3f77 100644
|
|
|
af918f |
--- a/crmd/join_dc.c
|
|
|
af918f |
+++ b/crmd/join_dc.c
|
|
|
af918f |
@@ -36,7 +36,11 @@ void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
|
|
|
af918f |
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
|
|
|
af918f |
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
|
|
|
af918f |
|
|
|
af918f |
+/* Numeric counter used to identify join rounds (an unsigned int would be
|
|
|
af918f |
+ * appropriate, except we get and set it in XML as int)
|
|
|
af918f |
+ */
|
|
|
af918f |
static int current_join_id = 0;
|
|
|
af918f |
+
|
|
|
af918f |
unsigned long long saved_ccm_membership_id = 0;
|
|
|
af918f |
|
|
|
af918f |
void
|
|
|
af918f |
@@ -44,12 +48,7 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
|
|
|
af918f |
{
|
|
|
af918f |
enum crm_join_phase last = 0;
|
|
|
af918f |
|
|
|
af918f |
- if(node == NULL) {
|
|
|
af918f |
- crm_err("Could not update join because node not specified"
|
|
|
af918f |
- CRM_XS " join-%u source=%s phase=%s",
|
|
|
af918f |
- current_join_id, source, crm_join_phase_str(phase));
|
|
|
af918f |
- return;
|
|
|
af918f |
- }
|
|
|
af918f |
+ CRM_CHECK(node != NULL, return);
|
|
|
af918f |
|
|
|
af918f |
/* Remote nodes do not participate in joins */
|
|
|
af918f |
if (is_set(node->flags, crm_remote_node)) {
|
|
|
af918f |
@@ -59,21 +58,23 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase
|
|
|
af918f |
last = node->join;
|
|
|
af918f |
|
|
|
af918f |
if(phase == last) {
|
|
|
af918f |
- crm_trace("%s: Node %s[%u] - join-%u phase still %s",
|
|
|
af918f |
- source, node->uname, node->id, current_join_id,
|
|
|
af918f |
- crm_join_phase_str(last));
|
|
|
af918f |
+ crm_trace("Node %s join-%d phase is still %s "
|
|
|
af918f |
+ CRM_XS " nodeid=%u source=%s",
|
|
|
af918f |
+ node->uname, current_join_id, crm_join_phase_str(last),
|
|
|
af918f |
+ node->id, source);
|
|
|
af918f |
|
|
|
af918f |
} else if ((phase <= crm_join_none) || (phase == (last + 1))) {
|
|
|
af918f |
node->join = phase;
|
|
|
af918f |
- crm_info("%s: Node %s[%u] - join-%u phase %s -> %s",
|
|
|
af918f |
- source, node->uname, node->id, current_join_id,
|
|
|
af918f |
- crm_join_phase_str(last), crm_join_phase_str(phase));
|
|
|
af918f |
+ crm_trace("Node %s join-%d phase is now %s (was %s) "
|
|
|
af918f |
+ CRM_XS " nodeid=%u source=%s",
|
|
|
af918f |
+ node->uname, current_join_id, crm_join_phase_str(phase),
|
|
|
af918f |
+ crm_join_phase_str(last), node->id, source);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_err("Could not update join for node %s because phase transition invalid "
|
|
|
af918f |
- CRM_XS " join-%u source=%s node_id=%u last=%s new=%s",
|
|
|
af918f |
- node->uname, current_join_id, source, node->id,
|
|
|
af918f |
- crm_join_phase_str(last), crm_join_phase_str(phase));
|
|
|
af918f |
+ crm_warn("Rejecting join-%d phase update for node %s because "
|
|
|
af918f |
+ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s",
|
|
|
af918f |
+ current_join_id, node->uname, crm_join_phase_str(last),
|
|
|
af918f |
+ crm_join_phase_str(phase), node->id, source);
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
@@ -83,9 +84,7 @@ initialize_join(gboolean before)
|
|
|
af918f |
GHashTableIter iter;
|
|
|
af918f |
crm_node_t *peer = NULL;
|
|
|
af918f |
|
|
|
af918f |
- /* clear out/reset a bunch of stuff */
|
|
|
af918f |
- crm_debug("join-%d: Initializing join data (flag=%s)",
|
|
|
af918f |
- current_join_id, before ? "true" : "false");
|
|
|
af918f |
+ crm_debug("Starting new join round join-%d", current_join_id);
|
|
|
af918f |
|
|
|
af918f |
g_hash_table_iter_init(&iter, crm_peer_cache);
|
|
|
af918f |
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
|
|
|
af918f |
@@ -138,7 +137,9 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
|
|
|
af918f |
CRM_ASSERT(member != NULL);
|
|
|
af918f |
if (crm_is_peer_active(member) == FALSE) {
|
|
|
af918f |
- crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state);
|
|
|
af918f |
+ crm_info("Not making join-%d offer to inactive node %s",
|
|
|
af918f |
+ current_join_id,
|
|
|
af918f |
+ (member->uname? member->uname : "with unknown name"));
|
|
|
af918f |
if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) {
|
|
|
af918f |
/* You would think this unsafe, but in fact this plus an
|
|
|
af918f |
* active resource is what causes it to be fenced.
|
|
|
af918f |
@@ -155,17 +156,21 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if (member->uname == NULL) {
|
|
|
af918f |
- crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid);
|
|
|
af918f |
+ crm_info("Not making join-%d offer to node uuid %s with unknown name",
|
|
|
af918f |
+ current_join_id, member->uuid);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if (saved_ccm_membership_id != crm_peer_seq) {
|
|
|
af918f |
saved_ccm_membership_id = crm_peer_seq;
|
|
|
af918f |
- crm_info("Making join offers based on membership %llu", crm_peer_seq);
|
|
|
af918f |
+ crm_info("Making join-%d offers based on membership event %llu",
|
|
|
af918f |
+ current_join_id, crm_peer_seq);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if(user_data && member->join > crm_join_none) {
|
|
|
af918f |
- crm_info("Skipping %s: already known %d", member->uname, member->join);
|
|
|
af918f |
+ crm_info("Not making join-%d offer to already known node %s (%s)",
|
|
|
af918f |
+ current_join_id, member->uname,
|
|
|
af918f |
+ crm_join_phase_str(member->join));
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
@@ -173,14 +178,11 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
|
|
|
af918f |
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname);
|
|
|
af918f |
|
|
|
af918f |
- /* send the welcome */
|
|
|
af918f |
- crm_info("join-%d: Sending offer to %s", current_join_id, member->uname);
|
|
|
af918f |
-
|
|
|
af918f |
+ crm_info("Sending join-%d offer to %s", current_join_id, member->uname);
|
|
|
af918f |
send_cluster_message(member, crm_msg_crmd, offer, TRUE);
|
|
|
af918f |
free_xml(offer);
|
|
|
af918f |
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed);
|
|
|
af918f |
- /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
/* A_DC_JOIN_OFFER_ALL */
|
|
|
af918f |
@@ -190,6 +192,8 @@ do_dc_join_offer_all(long long action,
|
|
|
af918f |
enum crmd_fsa_state cur_state,
|
|
|
af918f |
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
|
|
|
af918f |
{
|
|
|
af918f |
+ int count;
|
|
|
af918f |
+
|
|
|
af918f |
/* reset everyone's status back to down or in_ccm in the CIB
|
|
|
af918f |
*
|
|
|
af918f |
* any nodes that are active in the CIB but not in the CCM list
|
|
|
af918f |
@@ -205,9 +209,11 @@ do_dc_join_offer_all(long long action,
|
|
|
af918f |
}
|
|
|
af918f |
g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL);
|
|
|
af918f |
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count));
|
|
|
af918f |
+
|
|
|
af918f |
/* don't waste time by invoking the PE yet; */
|
|
|
af918f |
- crm_info("join-%d: Waiting on %d outstanding join acks",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
/* A_DC_JOIN_OFFER_ONE */
|
|
|
af918f |
@@ -219,50 +225,40 @@ do_dc_join_offer_one(long long action,
|
|
|
af918f |
{
|
|
|
af918f |
crm_node_t *member;
|
|
|
af918f |
ha_msg_input_t *welcome = NULL;
|
|
|
af918f |
-
|
|
|
af918f |
- const char *op = NULL;
|
|
|
af918f |
+ int count;
|
|
|
af918f |
const char *join_to = NULL;
|
|
|
af918f |
|
|
|
af918f |
- if (msg_data->data) {
|
|
|
af918f |
- welcome = fsa_typed_data(fsa_dt_ha_msg);
|
|
|
af918f |
-
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes");
|
|
|
af918f |
+ if (msg_data->data == NULL) {
|
|
|
af918f |
+ crm_info("Making join-%d offers to any unconfirmed nodes "
|
|
|
af918f |
+ "because an unknown node joined", current_join_id);
|
|
|
af918f |
g_hash_table_foreach(crm_peer_cache, join_make_offer, &member);
|
|
|
af918f |
check_join_state(cur_state, __FUNCTION__);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+ welcome = fsa_typed_data(fsa_dt_ha_msg);
|
|
|
af918f |
if (welcome == NULL) {
|
|
|
af918f |
- crm_err("Attempt to send welcome message without a message to reply to!");
|
|
|
af918f |
+ // fsa_typed_data() already logged an error
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM);
|
|
|
af918f |
if (join_to == NULL) {
|
|
|
af918f |
- crm_err("Attempt to send welcome message without a host to reply to!");
|
|
|
af918f |
+ crm_err("Can't make join-%d offer to unknown node", current_join_id);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
member = crm_get_peer(0, join_to);
|
|
|
af918f |
- op = crm_element_value(welcome->msg, F_CRM_TASK);
|
|
|
af918f |
- if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) {
|
|
|
af918f |
- /* note: it _is_ possible that a node will have been
|
|
|
af918f |
- * sick or starting up when the original offer was made.
|
|
|
af918f |
- * however, it will either re-announce itself in due course
|
|
|
af918f |
- * _or_ we can re-store the original offer on the client.
|
|
|
af918f |
- */
|
|
|
af918f |
- crm_trace("(Re-)offering membership to %s...", join_to);
|
|
|
af918f |
- }
|
|
|
af918f |
|
|
|
af918f |
- crm_info("join-%d: Processing %s request from %s in state %s",
|
|
|
af918f |
- current_join_id, op, join_to, fsa_state2string(cur_state));
|
|
|
af918f |
+ /* It is possible that a node will have been sick or starting up when the
|
|
|
af918f |
+ * original offer was made. However, it will either re-announce itself in
|
|
|
af918f |
+ * due course, or we can re-store the original offer on the client.
|
|
|
af918f |
+ */
|
|
|
af918f |
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, member, crm_join_none);
|
|
|
af918f |
join_make_offer(NULL, member, NULL);
|
|
|
af918f |
|
|
|
af918f |
- /* always offer to the DC (ourselves)
|
|
|
af918f |
- * this ensures the correct value for max_generation_from
|
|
|
af918f |
+ /* If the offer isn't to the local node, make an offer to the local node as
|
|
|
af918f |
+ * well, to ensure the correct value for max_generation_from.
|
|
|
af918f |
*/
|
|
|
af918f |
if (strcmp(join_to, fsa_our_uname) != 0) {
|
|
|
af918f |
member = crm_get_peer(0, fsa_our_uname);
|
|
|
af918f |
@@ -274,9 +270,11 @@ do_dc_join_offer_one(long long action,
|
|
|
af918f |
*/
|
|
|
af918f |
abort_transition(INFINITY, tg_restart, "Node join", NULL);
|
|
|
af918f |
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+ crm_info("Waiting on join-%d requests from %d outstanding node%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count));
|
|
|
af918f |
+
|
|
|
af918f |
/* don't waste time by invoking the PE yet; */
|
|
|
af918f |
- crm_debug("Waiting on %d outstanding join acks for join-%d",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_welcomed), current_join_id);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
static int
|
|
|
af918f |
@@ -309,20 +307,29 @@ do_dc_join_filter_offer(long long action,
|
|
|
af918f |
|
|
|
af918f |
int cmp = 0;
|
|
|
af918f |
int join_id = -1;
|
|
|
af918f |
+ int count = 0;
|
|
|
af918f |
gboolean ack_nack_bool = TRUE;
|
|
|
af918f |
- const char *ack_nack = CRMD_JOINSTATE_MEMBER;
|
|
|
af918f |
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
|
|
|
af918f |
|
|
|
af918f |
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
|
|
|
af918f |
const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE);
|
|
|
af918f |
+ crm_node_t *join_node = NULL;
|
|
|
af918f |
|
|
|
af918f |
- crm_node_t *join_node = crm_get_peer(0, join_from);
|
|
|
af918f |
-
|
|
|
af918f |
- crm_debug("Processing req from %s", join_from);
|
|
|
af918f |
+ if (join_from == NULL) {
|
|
|
af918f |
+ crm_err("Ignoring invalid join request without node name");
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
+ join_node = crm_get_peer(0, join_from);
|
|
|
af918f |
|
|
|
af918f |
- generation = join_ack->xml;
|
|
|
af918f |
crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
|
|
|
af918f |
+ if (join_id != current_join_id) {
|
|
|
af918f |
+ crm_debug("Ignoring join-%d request from %s because we are on join-%d",
|
|
|
af918f |
+ join_id, join_from, current_join_id);
|
|
|
af918f |
+ check_join_state(cur_state, __FUNCTION__);
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
|
|
|
af918f |
+ generation = join_ack->xml;
|
|
|
af918f |
if (max_generation_xml != NULL && generation != NULL) {
|
|
|
af918f |
int lpc = 0;
|
|
|
af918f |
|
|
|
af918f |
@@ -337,61 +344,63 @@ do_dc_join_filter_offer(long long action,
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- if (join_id != current_join_id) {
|
|
|
af918f |
- crm_debug("Invalid response from %s: join-%d vs. join-%d",
|
|
|
af918f |
- join_from, join_id, current_join_id);
|
|
|
af918f |
- check_join_state(cur_state, __FUNCTION__);
|
|
|
af918f |
- return;
|
|
|
af918f |
+ if (ref == NULL) {
|
|
|
af918f |
+ ref = "none"; // for logging only
|
|
|
af918f |
+ }
|
|
|
af918f |
|
|
|
af918f |
- } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) {
|
|
|
af918f |
- crm_err("Node %s is not a member", join_from);
|
|
|
af918f |
+ if (crm_is_peer_active(join_node) == FALSE) {
|
|
|
af918f |
+ crm_err("Rejecting join-%d request from inactive node %s "
|
|
|
af918f |
+ CRM_XS " ref=%s", join_id, join_from, ref);
|
|
|
af918f |
ack_nack_bool = FALSE;
|
|
|
af918f |
|
|
|
af918f |
} else if (generation == NULL) {
|
|
|
af918f |
- crm_err("Generation was NULL");
|
|
|
af918f |
+ crm_err("Rejecting invalid join-%d request from node %s "
|
|
|
af918f |
+ "missing CIB generation " CRM_XS " ref=%s",
|
|
|
af918f |
+ join_id, join_from, ref);
|
|
|
af918f |
ack_nack_bool = FALSE;
|
|
|
af918f |
|
|
|
af918f |
} else if (max_generation_xml == NULL) {
|
|
|
af918f |
+ crm_debug("Accepting join-%d request from %s "
|
|
|
af918f |
+ "(with first CIB generation) " CRM_XS " ref=%s",
|
|
|
af918f |
+ join_id, join_from, ref);
|
|
|
af918f |
max_generation_xml = copy_xml(generation);
|
|
|
af918f |
max_generation_from = strdup(join_from);
|
|
|
af918f |
|
|
|
af918f |
} else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) {
|
|
|
af918f |
- crm_debug("%s has a better generation number than"
|
|
|
af918f |
- " the current max %s", join_from, max_generation_from);
|
|
|
af918f |
- if (max_generation_xml) {
|
|
|
af918f |
- crm_log_xml_debug(max_generation_xml, "Max generation");
|
|
|
af918f |
- }
|
|
|
af918f |
- crm_log_xml_debug(generation, "Their generation");
|
|
|
af918f |
+ crm_debug("Accepting join-%d request from %s (with better "
|
|
|
af918f |
+ "CIB generation than current best from %s) " CRM_XS " ref=%s",
|
|
|
af918f |
+ join_id, join_from, max_generation_from, ref);
|
|
|
af918f |
+ crm_log_xml_debug(max_generation_xml, "Old max generation");
|
|
|
af918f |
+ crm_log_xml_debug(generation, "New max generation");
|
|
|
af918f |
|
|
|
af918f |
free(max_generation_from);
|
|
|
af918f |
free_xml(max_generation_xml);
|
|
|
af918f |
|
|
|
af918f |
max_generation_from = strdup(join_from);
|
|
|
af918f |
max_generation_xml = copy_xml(join_ack->xml);
|
|
|
af918f |
+
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s",
|
|
|
af918f |
+ join_id, join_from, ref);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if (ack_nack_bool == FALSE) {
|
|
|
af918f |
- /* NACK this client */
|
|
|
af918f |
- ack_nack = CRMD_JOINSTATE_NACK;
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack);
|
|
|
af918f |
- crm_err("Rejecting cluster join request from %s " CRM_XS
|
|
|
af918f |
- " NACK join-%d ref=%s", join_from, join_id, ref);
|
|
|
af918f |
-
|
|
|
af918f |
+ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK);
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref);
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated);
|
|
|
af918f |
+ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- crm_update_peer_expected(__FUNCTION__, join_node, ack_nack);
|
|
|
af918f |
-
|
|
|
af918f |
- crm_debug("%u nodes have been integrated into join-%d",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_integrated), join_id);
|
|
|
af918f |
-
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_integrated);
|
|
|
af918f |
+ crm_debug("%d node%s currently integrated in join-%d",
|
|
|
af918f |
+ count, pcmk__plural_s(count), join_id);
|
|
|
af918f |
|
|
|
af918f |
if (check_join_state(cur_state, __FUNCTION__) == FALSE) {
|
|
|
af918f |
/* don't waste time by invoking the PE yet; */
|
|
|
af918f |
- crm_debug("join-%d: Still waiting on %d outstanding offers",
|
|
|
af918f |
- join_id, crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
+ count = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s",
|
|
|
af918f |
+ join_id, count, pcmk__plural_s(count));
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
@@ -404,21 +413,24 @@ do_dc_join_finalize(long long action,
|
|
|
af918f |
{
|
|
|
af918f |
char *sync_from = NULL;
|
|
|
af918f |
int rc = pcmk_ok;
|
|
|
af918f |
+ int count_welcomed = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+ int count_integrated = crmd_join_phase_count(crm_join_integrated);
|
|
|
af918f |
|
|
|
af918f |
/* This we can do straight away and avoid clients timing us out
|
|
|
af918f |
* while we compute the latest CIB
|
|
|
af918f |
*/
|
|
|
af918f |
- crm_debug("Finalizing join-%d for %d clients",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_integrated));
|
|
|
af918f |
-
|
|
|
af918f |
- crmd_join_phase_log(LOG_INFO);
|
|
|
af918f |
- if (crmd_join_phase_count(crm_join_welcomed) != 0) {
|
|
|
af918f |
- crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
+ if (count_welcomed != 0) {
|
|
|
af918f |
+ crm_debug("Waiting on join-%d requests from %d outstanding node%s "
|
|
|
af918f |
+ "before finalizing join", current_join_id, count_welcomed,
|
|
|
af918f |
+ pcmk__plural_s(count_welcomed));
|
|
|
af918f |
+ crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
/* crmd_fsa_stall(FALSE); Needed? */
|
|
|
af918f |
return;
|
|
|
af918f |
|
|
|
af918f |
- } else if (crmd_join_phase_count(crm_join_integrated) == 0) {
|
|
|
af918f |
- /* Nothing to do */
|
|
|
af918f |
+ } else if (count_integrated == 0) {
|
|
|
af918f |
+ crm_debug("Finalization not needed for join-%d at the current time",
|
|
|
af918f |
+ current_join_id);
|
|
|
af918f |
+ crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
check_join_state(fsa_state, __FUNCTION__);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
@@ -429,8 +441,9 @@ do_dc_join_finalize(long long action,
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if (is_set(fsa_input_register, R_IN_TRANSITION)) {
|
|
|
af918f |
- crm_warn("Delaying response to cluster join offer while transition in progress "
|
|
|
af918f |
- CRM_XS " join-%d", current_join_id);
|
|
|
af918f |
+ crm_warn("Delaying join-%d finalization while transition in progress",
|
|
|
af918f |
+ current_join_id);
|
|
|
af918f |
+ crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
crmd_fsa_stall(FALSE);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
@@ -439,18 +452,20 @@ do_dc_join_finalize(long long action,
|
|
|
af918f |
/* ask for the agreed best CIB */
|
|
|
af918f |
sync_from = strdup(max_generation_from);
|
|
|
af918f |
set_bit(fsa_input_register, R_CIB_ASKED);
|
|
|
af918f |
- crm_notice("Syncing the Cluster Information Base from %s to rest of cluster "
|
|
|
af918f |
- CRM_XS " join-%d", sync_from, current_join_id);
|
|
|
af918f |
- crm_log_xml_notice(max_generation_xml, "Requested version");
|
|
|
af918f |
+ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
|
|
|
af918f |
+ current_join_id, count_integrated,
|
|
|
af918f |
+ pcmk__plural_s(count_integrated), sync_from);
|
|
|
af918f |
+ crm_log_xml_notice(max_generation_xml, "Requested CIB version");
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
/* Send _our_ CIB out to everyone */
|
|
|
af918f |
sync_from = strdup(fsa_our_uname);
|
|
|
af918f |
- crm_info("join-%d: Syncing our CIB to the rest of the cluster",
|
|
|
af918f |
- current_join_id);
|
|
|
af918f |
- crm_log_xml_debug(max_generation_xml, "Requested version");
|
|
|
af918f |
+ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
|
|
|
af918f |
+ current_join_id, count_integrated,
|
|
|
af918f |
+ pcmk__plural_s(count_integrated));
|
|
|
af918f |
+ crm_log_xml_debug(max_generation_xml, "Requested CIB version");
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
+ crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
|
|
|
af918f |
rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override);
|
|
|
af918f |
fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback);
|
|
|
af918f |
@@ -462,26 +477,33 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi
|
|
|
af918f |
CRM_LOG_ASSERT(-EPERM != rc);
|
|
|
af918f |
clear_bit(fsa_input_register, R_CIB_ASKED);
|
|
|
af918f |
if (rc != pcmk_ok) {
|
|
|
af918f |
- do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR),
|
|
|
af918f |
- "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc));
|
|
|
af918f |
+ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
|
|
|
af918f |
+ "Could not sync CIB from %s in join-%d: %s",
|
|
|
af918f |
+ (char *) user_data, current_join_id, pcmk_strerror(rc));
|
|
|
af918f |
|
|
|
af918f |
/* restart the whole join process */
|
|
|
af918f |
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__);
|
|
|
af918f |
|
|
|
af918f |
- } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) {
|
|
|
af918f |
+ } else if (!AM_I_DC) {
|
|
|
af918f |
+ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
|
|
|
af918f |
+
|
|
|
af918f |
+ } else if (fsa_state != S_FINALIZE_JOIN) {
|
|
|
af918f |
+ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)",
|
|
|
af918f |
+ current_join_id, fsa_state2string(fsa_state));
|
|
|
af918f |
+
|
|
|
af918f |
+ } else {
|
|
|
af918f |
set_bit(fsa_input_register, R_HAVE_CIB);
|
|
|
af918f |
clear_bit(fsa_input_register, R_CIB_ASKED);
|
|
|
af918f |
|
|
|
af918f |
/* make sure dc_uuid is re-set to us */
|
|
|
af918f |
if (check_join_state(fsa_state, __FUNCTION__) == FALSE) {
|
|
|
af918f |
- crm_debug("Notifying %d clients of join-%d results",
|
|
|
af918f |
- crmd_join_phase_count(crm_join_integrated), current_join_id);
|
|
|
af918f |
+ int count_integrated = crmd_join_phase_count(crm_join_integrated);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("Notifying %d node%s of join-%d results",
|
|
|
af918f |
+ count_integrated, pcmk__plural_s(count_integrated),
|
|
|
af918f |
+ current_join_id);
|
|
|
af918f |
g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL);
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s",
|
|
|
af918f |
- AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state));
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
@@ -491,11 +513,14 @@ join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * outp
|
|
|
af918f |
fsa_data_t *msg_data = NULL;
|
|
|
af918f |
|
|
|
af918f |
if (rc == pcmk_ok) {
|
|
|
af918f |
- crm_debug("Join update %d complete", call_id);
|
|
|
af918f |
+ crm_debug("join-%d node history update (via CIB call %d) complete",
|
|
|
af918f |
+ current_join_id, call_id);
|
|
|
af918f |
check_join_state(fsa_state, __FUNCTION__);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_err("Join update %d failed", call_id);
|
|
|
af918f |
+ crm_err("join-%d node history update (via CIB call %d) failed: %s "
|
|
|
af918f |
+ "(next transition may determine resource status incorrectly)",
|
|
|
af918f |
+ current_join_id, call_id, pcmk_strerror(rc));
|
|
|
af918f |
crm_log_xml_debug(msg, "failed");
|
|
|
af918f |
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
|
|
|
af918f |
}
|
|
|
af918f |
@@ -514,60 +539,75 @@ do_dc_join_ack(long long action,
|
|
|
af918f |
|
|
|
af918f |
const char *op = crm_element_value(join_ack->msg, F_CRM_TASK);
|
|
|
af918f |
const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM);
|
|
|
af918f |
- crm_node_t *peer = crm_get_peer(0, join_from);
|
|
|
af918f |
+ crm_node_t *peer = NULL;
|
|
|
af918f |
|
|
|
af918f |
- if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) {
|
|
|
af918f |
- crm_debug("Ignoring op=%s message from %s", op, join_from);
|
|
|
af918f |
+ // Sanity checks
|
|
|
af918f |
+ if (join_from == NULL) {
|
|
|
af918f |
+ crm_warn("Ignoring message received without node identification");
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
+ if (op == NULL) {
|
|
|
af918f |
+ crm_warn("Ignoring message received from %s without task", join_from);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- crm_trace("Processing ack from %s", join_from);
|
|
|
af918f |
- crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id);
|
|
|
af918f |
+ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
|
|
|
af918f |
+ crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
|
|
|
af918f |
+ op, join_from, CRM_OP_JOIN_CONFIRM);
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
|
|
|
af918f |
+ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) {
|
|
|
af918f |
+ crm_warn("Ignoring join confirmation from %s without valid join ID",
|
|
|
af918f |
+ join_from);
|
|
|
af918f |
+ return;
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ peer = crm_get_peer(0, join_from);
|
|
|
af918f |
if (peer->join != crm_join_finalized) {
|
|
|
af918f |
- crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)",
|
|
|
af918f |
- join_id, join_from, peer->join);
|
|
|
af918f |
+ crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
|
|
|
af918f |
+ "(currently %s not %s)",
|
|
|
af918f |
+ join_id, join_from, crm_join_phase_str(peer->join),
|
|
|
af918f |
+ crm_join_phase_str(crm_join_finalized));
|
|
|
af918f |
return;
|
|
|
af918f |
+ }
|
|
|
af918f |
|
|
|
af918f |
- } else if (join_id != current_join_id) {
|
|
|
af918f |
- crm_err("Invalid response from %s: join-%d vs. join-%d",
|
|
|
af918f |
- join_from, join_id, current_join_id);
|
|
|
af918f |
+ if (join_id != current_join_id) {
|
|
|
af918f |
+ crm_err("Rejecting join-%d confirmation from %s "
|
|
|
af918f |
+ "because currently on join-%d",
|
|
|
af918f |
+ join_id, join_from, current_join_id);
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, peer, crm_join_nack);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed);
|
|
|
af918f |
|
|
|
af918f |
- crm_info("join-%d: Updating node state to %s for %s",
|
|
|
af918f |
- join_id, CRMD_JOINSTATE_MEMBER, join_from);
|
|
|
af918f |
-
|
|
|
af918f |
- /* update CIB with the current LRM status from the node
|
|
|
af918f |
- * We don't need to notify the TE of these updates, a transition will
|
|
|
af918f |
- * be started in due time
|
|
|
af918f |
+ /* Update CIB with node's current LRM state. A new transition will be
|
|
|
af918f |
+ * triggered later, when the CIB notifies us of the change.
|
|
|
af918f |
*/
|
|
|
af918f |
erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
af918f |
-
|
|
|
af918f |
if (safe_str_eq(join_from, fsa_our_uname)) {
|
|
|
af918f |
xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
|
|
|
af918f |
|
|
|
af918f |
if (now_dc_lrmd_state != NULL) {
|
|
|
af918f |
- crm_debug("LRM state is updated from do_lrm_query.(%s)", join_from);
|
|
|
af918f |
fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
|
|
|
af918f |
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
|
|
|
af918f |
free_xml(now_dc_lrmd_state);
|
|
|
af918f |
+ crm_debug("Updating local node history for join-%d "
|
|
|
af918f |
+ "from query result (via CIB call %d)", join_id, call_id);
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_warn("Could not get our LRM state. LRM state is updated from join_ack->xml.(%s)", join_from);
|
|
|
af918f |
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
|
|
|
af918f |
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
|
|
|
af918f |
+ crm_warn("Updating local node history from join-%d confirmation "
|
|
|
af918f |
+ "because query failed (via CIB call %d)", join_id, call_id);
|
|
|
af918f |
}
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_debug("LRM state is updated from join_ack->xml.(%s)", join_from);
|
|
|
af918f |
fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml,
|
|
|
af918f |
cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL);
|
|
|
af918f |
+ crm_debug("Updating node history for %s from join-%d confirmation "
|
|
|
af918f |
+ "(via CIB call %d)", join_from, join_id, call_id);
|
|
|
af918f |
}
|
|
|
af918f |
-
|
|
|
af918f |
fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback);
|
|
|
af918f |
- crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
void
|
|
|
af918f |
@@ -579,17 +619,16 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
const char *join_to = join_node->uname;
|
|
|
af918f |
|
|
|
af918f |
if(join_node->join != crm_join_integrated) {
|
|
|
af918f |
- crm_trace("Skipping %s in state %d", join_to, join_node->join);
|
|
|
af918f |
+ crm_trace("Not updating non-integrated node %s (%s) for join-%d",
|
|
|
af918f |
+ join_to, crm_join_phase_str(join_node->join),
|
|
|
af918f |
+ current_join_id);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- /* make sure a node entry exists for the new node */
|
|
|
af918f |
- crm_trace("Creating node entry for %s", join_to);
|
|
|
af918f |
-
|
|
|
af918f |
+ crm_trace("Updating node state for %s", join_to);
|
|
|
af918f |
tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE);
|
|
|
af918f |
set_uuid(tmp1, XML_ATTR_UUID, join_node);
|
|
|
af918f |
crm_xml_add(tmp1, XML_ATTR_UNAME, join_to);
|
|
|
af918f |
-
|
|
|
af918f |
fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1);
|
|
|
af918f |
free_xml(tmp1);
|
|
|
af918f |
|
|
|
af918f |
@@ -608,11 +647,10 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data)
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- /* send the ack/nack to the node */
|
|
|
af918f |
- acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
|
|
|
af918f |
-
|
|
|
af918f |
- crm_debug("join-%d: ACK'ing join request from %s",
|
|
|
af918f |
+ // Acknowledge node's join request
|
|
|
af918f |
+ crm_debug("Acknowledging join-%d request from %s",
|
|
|
af918f |
current_join_id, join_to);
|
|
|
af918f |
+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
|
|
|
af918f |
crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE);
|
|
|
af918f |
crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized);
|
|
|
af918f |
crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER);
|
|
|
af918f |
@@ -629,11 +667,11 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
|
|
|
af918f |
{
|
|
|
af918f |
static unsigned long long highest_seq = 0;
|
|
|
af918f |
|
|
|
af918f |
- crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state));
|
|
|
af918f |
-
|
|
|
af918f |
if (saved_ccm_membership_id != crm_peer_seq) {
|
|
|
af918f |
- crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)",
|
|
|
af918f |
- source, saved_ccm_membership_id, crm_peer_seq, highest_seq);
|
|
|
af918f |
+ crm_debug("join-%d: Membership changed from %llu to %llu "
|
|
|
af918f |
+ CRM_XS " highest=%llu state=%s for=%s",
|
|
|
af918f |
+ current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq,
|
|
|
af918f |
+ fsa_state2string(cur_state), source);
|
|
|
af918f |
if(highest_seq < crm_peer_seq) {
|
|
|
af918f |
/* Don't spam the FSA with duplicates */
|
|
|
af918f |
highest_seq = crm_peer_seq;
|
|
|
af918f |
@@ -642,34 +680,53 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source)
|
|
|
af918f |
|
|
|
af918f |
} else if (cur_state == S_INTEGRATION) {
|
|
|
af918f |
if (crmd_join_phase_count(crm_join_welcomed) == 0) {
|
|
|
af918f |
- crm_debug("join-%d: Integration of %d peers complete: %s",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_integrated), source);
|
|
|
af918f |
+ int count = crmd_join_phase_count(crm_join_integrated);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("join-%d: Integration of %d peer%s complete "
|
|
|
af918f |
+ CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count),
|
|
|
af918f |
+ fsa_state2string(cur_state), source);
|
|
|
af918f |
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
} else if (cur_state == S_FINALIZE_JOIN) {
|
|
|
af918f |
if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) {
|
|
|
af918f |
- crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id);
|
|
|
af918f |
+ crm_debug("join-%d: Delaying finalization until we have CIB "
|
|
|
af918f |
+ CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, fsa_state2string(cur_state), source);
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
|
|
|
af918f |
} else if (crmd_join_phase_count(crm_join_welcomed) != 0) {
|
|
|
af918f |
- crm_debug("join-%d: Still waiting on %d welcomed nodes",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_welcomed));
|
|
|
af918f |
+ int count = crmd_join_phase_count(crm_join_welcomed);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("join-%d: Still waiting on %d welcomed node%s "
|
|
|
af918f |
+ CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count),
|
|
|
af918f |
+ fsa_state2string(cur_state), source);
|
|
|
af918f |
crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
|
|
|
af918f |
} else if (crmd_join_phase_count(crm_join_integrated) != 0) {
|
|
|
af918f |
- crm_debug("join-%d: Still waiting on %d integrated nodes",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_integrated));
|
|
|
af918f |
+ int count = crmd_join_phase_count(crm_join_integrated);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("join-%d: Still waiting on %d integrated node%s "
|
|
|
af918f |
+ CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count),
|
|
|
af918f |
+ fsa_state2string(cur_state), source);
|
|
|
af918f |
crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
|
|
|
af918f |
} else if (crmd_join_phase_count(crm_join_finalized) != 0) {
|
|
|
af918f |
- crm_debug("join-%d: Still waiting on %d finalized nodes",
|
|
|
af918f |
- current_join_id, crmd_join_phase_count(crm_join_finalized));
|
|
|
af918f |
+ int count = crmd_join_phase_count(crm_join_finalized);
|
|
|
af918f |
+
|
|
|
af918f |
+ crm_debug("join-%d: Still waiting on %d finalized node%s "
|
|
|
af918f |
+ CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, count, pcmk__plural_s(count),
|
|
|
af918f |
+ fsa_state2string(cur_state), source);
|
|
|
af918f |
crmd_join_phase_log(LOG_DEBUG);
|
|
|
af918f |
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_debug("join-%d complete: %s", current_join_id, source);
|
|
|
af918f |
+ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s",
|
|
|
af918f |
+ current_join_id, fsa_state2string(cur_state), source);
|
|
|
af918f |
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 770cdaa5c477bfc75a6a38afc1d34f0d4f521ee1 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 13 Dec 2019 10:39:34 -0600
|
|
|
af918f |
Subject: [PATCH 09/11] Log: controller: improve CIB status deletion messages
|
|
|
af918f |
|
|
|
af918f |
---
|
|
|
af918f |
crmd/utils.c | 25 +++++++++++++++++--------
|
|
|
af918f |
1 file changed, 17 insertions(+), 8 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/utils.c b/crmd/utils.c
|
|
|
af918f |
index 761f5a7..47aa6f0 100644
|
|
|
af918f |
--- a/crmd/utils.c
|
|
|
af918f |
+++ b/crmd/utils.c
|
|
|
af918f |
@@ -983,14 +983,18 @@ update_dc(xmlNode * msg)
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
-#define STATUS_PATH_MAX 512
|
|
|
af918f |
static void
|
|
|
af918f |
erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
|
|
|
af918f |
{
|
|
|
af918f |
char *xpath = user_data;
|
|
|
af918f |
|
|
|
af918f |
- do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
|
|
|
af918f |
- "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc);
|
|
|
af918f |
+ if (rc == 0) {
|
|
|
af918f |
+ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
|
|
|
af918f |
+ xpath, call_id);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
|
|
|
af918f |
+ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
|
|
|
af918f |
+ }
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
|
|
|
af918f |
@@ -998,14 +1002,19 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
|
af918f |
void
|
|
|
af918f |
erase_status_tag(const char *uname, const char *tag, int options)
|
|
|
af918f |
{
|
|
|
af918f |
- if (fsa_cib_conn && uname) {
|
|
|
af918f |
+ CRM_CHECK(uname != NULL, return);
|
|
|
af918f |
+
|
|
|
af918f |
+ if (fsa_cib_conn == NULL) {
|
|
|
af918f |
+ crm_warn("Unable to delete CIB '%s' section for node %s: "
|
|
|
af918f |
+ "no CIB connection", tag, uname);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
int call_id;
|
|
|
af918f |
char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
|
|
|
af918f |
|
|
|
af918f |
- crm_info("Deleting %s status entries for %s " CRM_XS " xpath=%s",
|
|
|
af918f |
- tag, uname, xpath);
|
|
|
af918f |
- call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL,
|
|
|
af918f |
- cib_quorum_override | cib_xpath | options);
|
|
|
af918f |
+ options |= cib_quorum_override|cib_xpath;
|
|
|
af918f |
+ call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, options);
|
|
|
af918f |
+ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
|
|
|
af918f |
+ CRM_XS " xpath=%s", tag, uname, call_id, xpath);
|
|
|
af918f |
fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
|
|
|
af918f |
// CIB library handles freeing xpath
|
|
|
af918f |
}
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 19d74b26b9b30157499c2fc4bd2da55c408fc638 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 13 Dec 2019 10:36:56 -0600
|
|
|
af918f |
Subject: [PATCH 10/11] Refactor: controller: move erase_status_tag() to
|
|
|
af918f |
controld_based.c
|
|
|
af918f |
|
|
|
af918f |
---
|
|
|
af918f |
crmd/cib.c | 38 ++++++++++++++++++++++++++++++++++++++
|
|
|
af918f |
crmd/utils.c | 37 -------------------------------------
|
|
|
af918f |
2 files changed, 38 insertions(+), 37 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/cib.c b/crmd/cib.c
|
|
|
af918f |
index a8a097a..ff489b4 100644
|
|
|
af918f |
--- a/crmd/cib.c
|
|
|
af918f |
+++ b/crmd/cib.c
|
|
|
af918f |
@@ -224,3 +224,41 @@ controld_action_is_recordable(const char *action)
|
|
|
af918f |
}
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
+
|
|
|
af918f |
+static void
|
|
|
af918f |
+erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
af918f |
+ void *user_data)
|
|
|
af918f |
+{
|
|
|
af918f |
+ char *xpath = user_data;
|
|
|
af918f |
+
|
|
|
af918f |
+ if (rc == 0) {
|
|
|
af918f |
+ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
|
|
|
af918f |
+ xpath, call_id);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
|
|
|
af918f |
+ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
|
|
|
af918f |
+ }
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
+#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
|
|
|
af918f |
+
|
|
|
af918f |
+void
|
|
|
af918f |
+erase_status_tag(const char *uname, const char *tag, int options)
|
|
|
af918f |
+{
|
|
|
af918f |
+ CRM_CHECK(uname != NULL, return);
|
|
|
af918f |
+
|
|
|
af918f |
+ if (fsa_cib_conn == NULL) {
|
|
|
af918f |
+ crm_warn("Unable to delete CIB '%s' section for node %s: "
|
|
|
af918f |
+ "no CIB connection", tag, uname);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ int call_id;
|
|
|
af918f |
+ char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
|
|
|
af918f |
+
|
|
|
af918f |
+ options |= cib_quorum_override|cib_xpath;
|
|
|
af918f |
+ call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, options);
|
|
|
af918f |
+ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
|
|
|
af918f |
+ CRM_XS " xpath=%s", tag, uname, call_id, xpath);
|
|
|
af918f |
+ fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
|
|
|
af918f |
+ // CIB library handles freeing xpath
|
|
|
af918f |
+ }
|
|
|
af918f |
+}
|
|
|
af918f |
diff --git a/crmd/utils.c b/crmd/utils.c
|
|
|
af918f |
index 47aa6f0..b2a0b7d 100644
|
|
|
af918f |
--- a/crmd/utils.c
|
|
|
af918f |
+++ b/crmd/utils.c
|
|
|
af918f |
@@ -983,43 +983,6 @@ update_dc(xmlNode * msg)
|
|
|
af918f |
return TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
-static void
|
|
|
af918f |
-erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
|
|
|
af918f |
-{
|
|
|
af918f |
- char *xpath = user_data;
|
|
|
af918f |
-
|
|
|
af918f |
- if (rc == 0) {
|
|
|
af918f |
- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
|
|
|
af918f |
- xpath, call_id);
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
|
|
|
af918f |
- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
|
|
|
af918f |
- }
|
|
|
af918f |
-}
|
|
|
af918f |
-
|
|
|
af918f |
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
|
|
|
af918f |
-
|
|
|
af918f |
-void
|
|
|
af918f |
-erase_status_tag(const char *uname, const char *tag, int options)
|
|
|
af918f |
-{
|
|
|
af918f |
- CRM_CHECK(uname != NULL, return);
|
|
|
af918f |
-
|
|
|
af918f |
- if (fsa_cib_conn == NULL) {
|
|
|
af918f |
- crm_warn("Unable to delete CIB '%s' section for node %s: "
|
|
|
af918f |
- "no CIB connection", tag, uname);
|
|
|
af918f |
- } else {
|
|
|
af918f |
- int call_id;
|
|
|
af918f |
- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
|
|
|
af918f |
-
|
|
|
af918f |
- options |= cib_quorum_override|cib_xpath;
|
|
|
af918f |
- call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, options);
|
|
|
af918f |
- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
|
|
|
af918f |
- CRM_XS " xpath=%s", tag, uname, call_id, xpath);
|
|
|
af918f |
- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
|
|
|
af918f |
- // CIB library handles freeing xpath
|
|
|
af918f |
- }
|
|
|
af918f |
-}
|
|
|
af918f |
-
|
|
|
af918f |
void crmd_peer_down(crm_node_t *peer, bool full)
|
|
|
af918f |
{
|
|
|
af918f |
if(full && peer->state == NULL) {
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|
|
|
af918f |
|
|
|
af918f |
From 8ba2bfa5aca514dcd2ad6c8a4f88ffedd028d206 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 13 Dec 2019 11:16:25 -0600
|
|
|
af918f |
Subject: [PATCH 11/11] Refactor: controller: improve efficiency when deleting
|
|
|
af918f |
node state
|
|
|
af918f |
|
|
|
af918f |
Rename erase_status_xpath() to controld_delete_node_state() to follow current
|
|
|
af918f |
naming practice.
|
|
|
af918f |
|
|
|
af918f |
Instead of passing it a node_state subsection name, pass a new enum value
|
|
|
af918f |
indicating what to erase (resource history, transient node attributes, or
|
|
|
af918f |
both). This allows us to improve the log messages further, as well as improving
|
|
|
af918f |
efficiency when both need to be cleared.
|
|
|
af918f |
---
|
|
|
af918f |
crmd/callbacks.c | 15 +++++------
|
|
|
af918f |
crmd/cib.c | 69 +++++++++++++++++++++++++++++++++++++++------------
|
|
|
af918f |
crmd/crmd_utils.h | 11 +++++++-
|
|
|
af918f |
crmd/join_client.c | 3 ++-
|
|
|
af918f |
crmd/join_dc.c | 3 ++-
|
|
|
af918f |
crmd/lrm.c | 3 ++-
|
|
|
af918f |
crmd/remote_lrmd_ra.c | 24 +++++++++---------
|
|
|
af918f |
crmd/te_actions.c | 5 ++--
|
|
|
af918f |
8 files changed, 91 insertions(+), 42 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/callbacks.c b/crmd/callbacks.c
|
|
|
af918f |
index 7560470..419b154 100644
|
|
|
af918f |
--- a/crmd/callbacks.c
|
|
|
af918f |
+++ b/crmd/callbacks.c
|
|
|
af918f |
@@ -202,17 +202,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
|
|
|
af918f |
* transient attributes intact until it rejoins.
|
|
|
af918f |
*/
|
|
|
af918f |
if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
|
|
|
af918f |
- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
|
|
|
af918f |
+ controld_delete_node_state(node->uname,
|
|
|
af918f |
+ controld_section_attrs,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
} else if(AM_I_DC) {
|
|
|
af918f |
- if (appeared == FALSE) {
|
|
|
af918f |
- crm_info("Peer %s left us", node->uname);
|
|
|
af918f |
- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
|
|
|
af918f |
- } else {
|
|
|
af918f |
- crm_info("New peer %s we want to sync fence history with",
|
|
|
af918f |
- node->uname);
|
|
|
af918f |
+ if (appeared) {
|
|
|
af918f |
te_trigger_stonith_history_sync(FALSE);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ controld_delete_node_state(node->uname,
|
|
|
af918f |
+ controld_section_attrs,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
break;
|
|
|
af918f |
diff --git a/crmd/cib.c b/crmd/cib.c
|
|
|
af918f |
index ff489b4..c602130 100644
|
|
|
af918f |
--- a/crmd/cib.c
|
|
|
af918f |
+++ b/crmd/cib.c
|
|
|
af918f |
@@ -226,39 +226,76 @@ controld_action_is_recordable(const char *action)
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
static void
|
|
|
af918f |
-erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
af918f |
- void *user_data)
|
|
|
af918f |
+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
|
af918f |
+ void *user_data)
|
|
|
af918f |
{
|
|
|
af918f |
- char *xpath = user_data;
|
|
|
af918f |
+ char *desc = user_data;
|
|
|
af918f |
|
|
|
af918f |
if (rc == 0) {
|
|
|
af918f |
- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded",
|
|
|
af918f |
- xpath, call_id);
|
|
|
af918f |
+ crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id);
|
|
|
af918f |
} else {
|
|
|
af918f |
- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s "
|
|
|
af918f |
- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc);
|
|
|
af918f |
+ crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d",
|
|
|
af918f |
+ desc, call_id, pcmk_strerror(rc), rc);
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
-#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s"
|
|
|
af918f |
+// Searches for various portions of node_state to delete
|
|
|
af918f |
|
|
|
af918f |
+// Match a particular node's node_state (takes node name 1x)
|
|
|
af918f |
+#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"
|
|
|
af918f |
+
|
|
|
af918f |
+// Node's lrm section (name 1x)
|
|
|
af918f |
+#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM
|
|
|
af918f |
+
|
|
|
af918f |
+// Node's transient_attributes section (name 1x)
|
|
|
af918f |
+#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS
|
|
|
af918f |
+
|
|
|
af918f |
+// Everything under node_state (name 1x)
|
|
|
af918f |
+#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
|
|
|
af918f |
+
|
|
|
af918f |
+/*!
|
|
|
af918f |
+ * \internal
|
|
|
af918f |
+ * \brief Delete subsection of a node's CIB node_state
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * \param[in] uname Desired node
|
|
|
af918f |
+ * \param[in] section Subsection of node_state to delete
|
|
|
af918f |
+ * \param[in] options CIB call options to use
|
|
|
af918f |
+ */
|
|
|
af918f |
void
|
|
|
af918f |
-erase_status_tag(const char *uname, const char *tag, int options)
|
|
|
af918f |
+controld_delete_node_state(const char *uname, enum controld_section_e section,
|
|
|
af918f |
+ int options)
|
|
|
af918f |
{
|
|
|
af918f |
+ char *xpath = NULL;
|
|
|
af918f |
+ char *desc = NULL;
|
|
|
af918f |
+
|
|
|
af918f |
CRM_CHECK(uname != NULL, return);
|
|
|
af918f |
+ switch (section) {
|
|
|
af918f |
+ case controld_section_lrm:
|
|
|
af918f |
+ xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
|
|
|
af918f |
+ desc = crm_strdup_printf("resource history for node %s", uname);
|
|
|
af918f |
+ break;
|
|
|
af918f |
+ case controld_section_attrs:
|
|
|
af918f |
+ xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
|
|
|
af918f |
+ desc = crm_strdup_printf("transient attributes for node %s", uname);
|
|
|
af918f |
+ break;
|
|
|
af918f |
+ case controld_section_all:
|
|
|
af918f |
+ xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
|
|
|
af918f |
+ desc = crm_strdup_printf("all state for node %s", uname);
|
|
|
af918f |
+ break;
|
|
|
af918f |
+ }
|
|
|
af918f |
|
|
|
af918f |
if (fsa_cib_conn == NULL) {
|
|
|
af918f |
- crm_warn("Unable to delete CIB '%s' section for node %s: "
|
|
|
af918f |
- "no CIB connection", tag, uname);
|
|
|
af918f |
+ crm_warn("Unable to delete %s: no CIB connection", desc);
|
|
|
af918f |
+ free(desc);
|
|
|
af918f |
} else {
|
|
|
af918f |
int call_id;
|
|
|
af918f |
- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag);
|
|
|
af918f |
|
|
|
af918f |
options |= cib_quorum_override|cib_xpath;
|
|
|
af918f |
call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, xpath, NULL, options);
|
|
|
af918f |
- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) "
|
|
|
af918f |
- CRM_XS " xpath=%s", tag, uname, call_id, xpath);
|
|
|
af918f |
- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback);
|
|
|
af918f |
- // CIB library handles freeing xpath
|
|
|
af918f |
+ crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s",
|
|
|
af918f |
+ desc, call_id, xpath);
|
|
|
af918f |
+ fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback);
|
|
|
af918f |
+ // CIB library handles freeing desc
|
|
|
af918f |
}
|
|
|
af918f |
+ free(xpath);
|
|
|
af918f |
}
|
|
|
af918f |
diff --git a/crmd/crmd_utils.h b/crmd/crmd_utils.h
|
|
|
af918f |
index 955d859..9afa2ca 100644
|
|
|
af918f |
--- a/crmd/crmd_utils.h
|
|
|
af918f |
+++ b/crmd/crmd_utils.h
|
|
|
af918f |
@@ -96,7 +96,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags,
|
|
|
af918f |
xmlNode *parent, const char *source);
|
|
|
af918f |
void populate_cib_nodes(enum node_update_flags flags, const char *source);
|
|
|
af918f |
void crm_update_quorum(gboolean quorum, gboolean force_update);
|
|
|
af918f |
-void erase_status_tag(const char *uname, const char *tag, int options);
|
|
|
af918f |
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
|
|
|
af918f |
void update_attrd_remote_node_removed(const char *host, const char *user_name);
|
|
|
af918f |
void update_attrd_clear_failures(const char *host, const char *rsc,
|
|
|
af918f |
@@ -115,6 +114,16 @@ void crmd_peer_down(crm_node_t *peer, bool full);
|
|
|
af918f |
unsigned int cib_op_timeout(void);
|
|
|
af918f |
bool controld_action_is_recordable(const char *action);
|
|
|
af918f |
|
|
|
af918f |
+// Subsections of node_state
|
|
|
af918f |
+enum controld_section_e {
|
|
|
af918f |
+ controld_section_lrm,
|
|
|
af918f |
+ controld_section_attrs,
|
|
|
af918f |
+ controld_section_all,
|
|
|
af918f |
+};
|
|
|
af918f |
+
|
|
|
af918f |
+void controld_delete_node_state(const char *uname,
|
|
|
af918f |
+ enum controld_section_e section, int options);
|
|
|
af918f |
+
|
|
|
af918f |
const char *get_node_id(xmlNode *lrm_rsc_op);
|
|
|
af918f |
|
|
|
af918f |
/* Convenience macro for registering a CIB callback
|
|
|
af918f |
diff --git a/crmd/join_client.c b/crmd/join_client.c
|
|
|
af918f |
index 2142d21..9f572ad 100644
|
|
|
af918f |
--- a/crmd/join_client.c
|
|
|
af918f |
+++ b/crmd/join_client.c
|
|
|
af918f |
@@ -298,7 +298,8 @@ do_cl_join_finalize_respond(long long action,
|
|
|
af918f |
* present for legacy attrd, but given legacy attrd's imminent
|
|
|
af918f |
* demise, this is preferable to making intrusive changes to it.
|
|
|
af918f |
*/
|
|
|
af918f |
- erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
|
|
|
af918f |
+ controld_delete_node_state(fsa_our_uname, controld_section_attrs,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
|
|
|
af918f |
update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
|
|
|
af918f |
#endif
|
|
|
af918f |
diff --git a/crmd/join_dc.c b/crmd/join_dc.c
|
|
|
af918f |
index cdb3f77..6705022 100644
|
|
|
af918f |
--- a/crmd/join_dc.c
|
|
|
af918f |
+++ b/crmd/join_dc.c
|
|
|
af918f |
@@ -585,7 +585,8 @@ do_dc_join_ack(long long action,
|
|
|
af918f |
/* Update CIB with node's current LRM state. A new transition will be
|
|
|
af918f |
* triggered later, when the CIB notifies us of the change.
|
|
|
af918f |
*/
|
|
|
af918f |
- erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
af918f |
+ controld_delete_node_state(join_from, controld_section_lrm,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
if (safe_str_eq(join_from, fsa_our_uname)) {
|
|
|
af918f |
xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname);
|
|
|
af918f |
|
|
|
af918f |
diff --git a/crmd/lrm.c b/crmd/lrm.c
|
|
|
af918f |
index 80fcd69..2c9e475 100644
|
|
|
af918f |
--- a/crmd/lrm.c
|
|
|
af918f |
+++ b/crmd/lrm.c
|
|
|
af918f |
@@ -1421,7 +1421,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
/* Now delete the copy in the CIB */
|
|
|
af918f |
- erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
af918f |
+ controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
|
|
|
af918f |
/* And finally, _delete_ the value in attrd
|
|
|
af918f |
* Setting it to FALSE results in the PE sending us back here again
|
|
|
af918f |
diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c
|
|
|
af918f |
index 1214814..c4f58d6 100644
|
|
|
af918f |
--- a/crmd/remote_lrmd_ra.c
|
|
|
af918f |
+++ b/crmd/remote_lrmd_ra.c
|
|
|
af918f |
@@ -195,13 +195,13 @@ remote_node_up(const char *node_name)
|
|
|
af918f |
CRM_CHECK(node_name != NULL, return);
|
|
|
af918f |
crm_info("Announcing pacemaker_remote node %s", node_name);
|
|
|
af918f |
|
|
|
af918f |
- /* Clear node's operation history. The node's transient attributes should
|
|
|
af918f |
- * and normally will be cleared when the node leaves, but since remote node
|
|
|
af918f |
- * state has a number of corner cases, clear them here as well, to be sure.
|
|
|
af918f |
+ /* Clear node's entire state (resource history and transient attributes).
|
|
|
af918f |
+ * The transient attributes should and normally will be cleared when the
|
|
|
af918f |
+ * node leaves, but since remote node state has a number of corner cases,
|
|
|
af918f |
+ * clear them here as well, to be sure.
|
|
|
af918f |
*/
|
|
|
af918f |
call_opt = crmd_cib_smart_opt();
|
|
|
af918f |
- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
|
|
|
af918f |
- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
|
|
|
af918f |
+ controld_delete_node_state(node_name, controld_section_all, call_opt);
|
|
|
af918f |
|
|
|
af918f |
/* Clear node's probed attribute */
|
|
|
af918f |
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
|
|
|
af918f |
@@ -266,15 +266,15 @@ remote_node_down(const char *node_name, const enum down_opts opts)
|
|
|
af918f |
/* Purge node from attrd's memory */
|
|
|
af918f |
update_attrd_remote_node_removed(node_name, NULL);
|
|
|
af918f |
|
|
|
af918f |
- /* Purge node's transient attributes */
|
|
|
af918f |
- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt);
|
|
|
af918f |
-
|
|
|
af918f |
- /* Normally, the LRM operation history should be kept until the node comes
|
|
|
af918f |
- * back up. However, after a successful fence, we want to clear it, so we
|
|
|
af918f |
- * don't think resources are still running on the node.
|
|
|
af918f |
+ /* Normally, only node attributes should be erased, and the resource history
|
|
|
af918f |
+ * should be kept until the node comes back up. However, after a successful
|
|
|
af918f |
+ * fence, we want to clear the history as well, so we don't think resources
|
|
|
af918f |
+ * are still running on the node.
|
|
|
af918f |
*/
|
|
|
af918f |
if (opts == DOWN_ERASE_LRM) {
|
|
|
af918f |
- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt);
|
|
|
af918f |
+ controld_delete_node_state(node_name, controld_section_all, call_opt);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ controld_delete_node_state(node_name, controld_section_attrs, call_opt);
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
/* Ensure node is in the remote peer cache with lost state */
|
|
|
af918f |
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
|
|
|
af918f |
index 14097ab..19bb199 100644
|
|
|
af918f |
--- a/crmd/te_actions.c
|
|
|
af918f |
+++ b/crmd/te_actions.c
|
|
|
af918f |
@@ -150,9 +150,8 @@ send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
|
|
|
af918f |
/* Make sure it sticks */
|
|
|
af918f |
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
|
|
|
af918f |
|
|
|
af918f |
- erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
|
|
|
af918f |
- erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
|
|
|
af918f |
-
|
|
|
af918f |
+ controld_delete_node_state(peer->uname, controld_section_all,
|
|
|
af918f |
+ cib_scope_local);
|
|
|
af918f |
free_xml(node_state);
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|