diff --git a/.gitignore b/.gitignore index 302b56c..6e6662f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a.tar.gz -SOURCES/pacemaker-4b1f869.tar.gz +SOURCES/pacemaker-2deceaa.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index 1c52241..6a9af04 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz -dfd19e7ec7aa96520f4948fc37d48ea69835bbdb SOURCES/pacemaker-4b1f869.tar.gz +78c94fdcf59cfb064d4433e1b8f71fd856eeec5f SOURCES/pacemaker-2deceaa.tar.gz diff --git a/SOURCES/001-rules.patch b/SOURCES/001-rules.patch new file mode 100644 index 0000000..0133975 --- /dev/null +++ b/SOURCES/001-rules.patch @@ -0,0 +1,4947 @@ +From 2f10dde2f2a0ac7a3d74cb2f398be1deaba75615 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 6 Apr 2020 11:22:50 -0400 +Subject: [PATCH 01/17] Feature: scheduler: Add new expression_type values. + +--- + include/crm/pengine/rules.h | 4 +++- + lib/pengine/rules.c | 6 ++++++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/include/crm/pengine/rules.h b/include/crm/pengine/rules.h +index ebd3148..37f092b 100644 +--- a/include/crm/pengine/rules.h ++++ b/include/crm/pengine/rules.h +@@ -28,7 +28,9 @@ enum expression_type { + loc_expr, + role_expr, + time_expr, +- version_expr ++ version_expr, ++ rsc_expr, ++ op_expr + }; + + typedef struct pe_re_match_data { +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index fa9a222..130bada 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -189,6 +189,12 @@ find_expression_type(xmlNode * expr) + if (safe_str_eq(tag, "date_expression")) { + return time_expr; + ++ } else if (safe_str_eq(tag, "rsc_expression")) { ++ return rsc_expr; ++ ++ } else if (safe_str_eq(tag, "op_expression")) { ++ return op_expr; ++ + } else if (safe_str_eq(tag, XML_TAG_RULE)) { + return nested_rule; + +-- +1.8.3.1 + + +From bc7491e5226af2a2e7f1a9b2d61892d3af0767fe Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 3 Apr 2020 15:03:23 -0400 +Subject: [PATCH 02/17] Refactor: scheduler: Add new pe__eval_*_expr functions. + +These new functions all take the same input arguments - an xmlNodePtr +and a pe_rule_eval_data_t. This latter type holds all the parameters +that could possibly be useful for evaluating some rule. Most functions +will only need a few items out of this structure. + +Then, implement pe_test_*_expression in terms of these new functions. +--- + include/crm/pengine/common.h | 37 ++- + include/crm/pengine/rules.h | 13 - + include/crm/pengine/rules_internal.h | 5 + + lib/pengine/rules.c | 592 +++++++++++++++++++---------------- + 4 files changed, 363 insertions(+), 284 deletions(-) + +diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h +index 48c2b66..3a770b7 100644 +--- a/include/crm/pengine/common.h ++++ b/include/crm/pengine/common.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2019 the Pacemaker project contributors ++ * Copyright 2004-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -15,6 +15,9 @@ extern "C" { + #endif + + # include ++# include ++ ++# include + + extern gboolean was_processing_error; + extern gboolean was_processing_warning; +@@ -131,6 +134,38 @@ recovery2text(enum rsc_recovery_type type) + return "Unknown"; + } + ++typedef struct pe_re_match_data { ++ char *string; ++ int nregs; ++ regmatch_t *pmatch; ++} pe_re_match_data_t; ++ ++typedef struct pe_match_data { ++ pe_re_match_data_t *re; ++ GHashTable *params; ++ GHashTable *meta; ++} pe_match_data_t; ++ ++typedef struct pe_rsc_eval_data { ++ const char *standard; ++ const char *provider; ++ const char *agent; ++} pe_rsc_eval_data_t; ++ ++typedef struct pe_op_eval_data { ++ const char *op_name; ++ guint interval; ++} pe_op_eval_data_t; ++ ++typedef struct pe_rule_eval_data { ++ GHashTable *node_hash; ++ enum rsc_role_e role; ++ crm_time_t *now; ++ pe_match_data_t *match_data; ++ pe_rsc_eval_data_t *rsc_data; ++ pe_op_eval_data_t *op_data; ++} pe_rule_eval_data_t; ++ + #ifdef __cplusplus + } + #endif +diff --git a/include/crm/pengine/rules.h b/include/crm/pengine/rules.h +index 37f092b..d7bdbf9 100644 +--- a/include/crm/pengine/rules.h ++++ b/include/crm/pengine/rules.h +@@ -15,7 +15,6 @@ extern "C" { + #endif + + # include +-# include + + # include + # include +@@ -33,18 +32,6 @@ enum expression_type { + op_expr + }; + +-typedef struct pe_re_match_data { +- char *string; +- int nregs; +- regmatch_t *pmatch; +-} pe_re_match_data_t; +- +-typedef struct pe_match_data { +- pe_re_match_data_t *re; +- GHashTable *params; +- GHashTable *meta; +-} pe_match_data_t; +- + enum expression_type find_expression_type(xmlNode * expr); + + gboolean pe_evaluate_rules(xmlNode *ruleset, GHashTable *node_hash, +diff --git a/include/crm/pengine/rules_internal.h b/include/crm/pengine/rules_internal.h +index fd65c1e..8a22108 100644 +--- a/include/crm/pengine/rules_internal.h ++++ b/include/crm/pengine/rules_internal.h +@@ -21,6 +21,11 @@ void pe_free_alert_list(GListPtr alert_list); + + crm_time_t *pe_parse_xml_duration(crm_time_t * start, xmlNode * duration_spec); + ++gboolean pe__eval_attr_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data); ++int pe__eval_date_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data, ++ crm_time_t *next_change); ++gboolean pe__eval_role_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data); ++ + int pe_eval_date_expression(xmlNode *time_expr, + crm_time_t *now, + crm_time_t *next_change); +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index 130bada..3f316c2 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -219,201 +219,34 @@ find_expression_type(xmlNode * expr) + } + + gboolean +-pe_test_role_expression(xmlNode * expr, enum rsc_role_e role, crm_time_t * now) ++pe_test_role_expression(xmlNode *expr, enum rsc_role_e role, crm_time_t *now) + { +- gboolean accept = FALSE; +- const char *op = NULL; +- const char *value = NULL; +- +- if (role == RSC_ROLE_UNKNOWN) { +- return accept; +- } +- +- value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); +- op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); +- +- if (safe_str_eq(op, "defined")) { +- if (role > RSC_ROLE_STARTED) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "not_defined")) { +- if (role < RSC_ROLE_SLAVE && role > RSC_ROLE_UNKNOWN) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "eq")) { +- if (text2role(value) == role) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "ne")) { +- // Test "ne" only with promotable clone roles +- if (role < RSC_ROLE_SLAVE && role > RSC_ROLE_UNKNOWN) { +- accept = FALSE; +- +- } else if (text2role(value) != role) { +- accept = TRUE; +- } +- } +- return accept; ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = role, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ return pe__eval_role_expr(expr, &rule_data); + } + + gboolean + pe_test_attr_expression(xmlNode *expr, GHashTable *hash, crm_time_t *now, + pe_match_data_t *match_data) + { +- gboolean accept = FALSE; +- gboolean attr_allocated = FALSE; +- int cmp = 0; +- const char *h_val = NULL; +- GHashTable *table = NULL; +- +- const char *op = NULL; +- const char *type = NULL; +- const char *attr = NULL; +- const char *value = NULL; +- const char *value_source = NULL; +- +- attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE); +- op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); +- value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); +- type = crm_element_value(expr, XML_EXPR_ATTR_TYPE); +- value_source = crm_element_value(expr, XML_EXPR_ATTR_VALUE_SOURCE); +- +- if (attr == NULL || op == NULL) { +- pe_err("Invalid attribute or operation in expression" +- " (\'%s\' \'%s\' \'%s\')", crm_str(attr), crm_str(op), crm_str(value)); +- return FALSE; +- } +- +- if (match_data) { +- if (match_data->re) { +- char *resolved_attr = pe_expand_re_matches(attr, match_data->re); +- +- if (resolved_attr) { +- attr = (const char *) resolved_attr; +- attr_allocated = TRUE; +- } +- } +- +- if (safe_str_eq(value_source, "param")) { +- table = match_data->params; +- } else if (safe_str_eq(value_source, "meta")) { +- table = match_data->meta; +- } +- } +- +- if (table) { +- const char *param_name = value; +- const char *param_value = NULL; +- +- if (param_name && param_name[0]) { +- if ((param_value = (const char *)g_hash_table_lookup(table, param_name))) { +- value = param_value; +- } +- } +- } +- +- if (hash != NULL) { +- h_val = (const char *)g_hash_table_lookup(hash, attr); +- } +- +- if (attr_allocated) { +- free((char *)attr); +- attr = NULL; +- } +- +- if (value != NULL && h_val != NULL) { +- if (type == NULL) { +- if (safe_str_eq(op, "lt") +- || safe_str_eq(op, "lte") +- || safe_str_eq(op, "gt") +- || safe_str_eq(op, "gte")) { +- type = "number"; +- +- } else { +- type = "string"; +- } +- crm_trace("Defaulting to %s based comparison for '%s' op", type, op); +- } +- +- if (safe_str_eq(type, "string")) { +- cmp = strcasecmp(h_val, value); +- +- } else if (safe_str_eq(type, "number")) { +- int h_val_f = crm_parse_int(h_val, NULL); +- int value_f = crm_parse_int(value, NULL); +- +- if (h_val_f < value_f) { +- cmp = -1; +- } else if (h_val_f > value_f) { +- cmp = 1; +- } else { +- cmp = 0; +- } +- +- } else if (safe_str_eq(type, "version")) { +- cmp = compare_version(h_val, value); +- +- } +- +- } else if (value == NULL && h_val == NULL) { +- cmp = 0; +- } else if (value == NULL) { +- cmp = 1; +- } else { +- cmp = -1; +- } +- +- if (safe_str_eq(op, "defined")) { +- if (h_val != NULL) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "not_defined")) { +- if (h_val == NULL) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "eq")) { +- if ((h_val == value) || cmp == 0) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "ne")) { +- if ((h_val == NULL && value != NULL) +- || (h_val != NULL && value == NULL) +- || cmp != 0) { +- accept = TRUE; +- } +- +- } else if (value == NULL || h_val == NULL) { +- // The comparison is meaningless from this point on +- accept = FALSE; +- +- } else if (safe_str_eq(op, "lt")) { +- if (cmp < 0) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "lte")) { +- if (cmp <= 0) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "gt")) { +- if (cmp > 0) { +- accept = TRUE; +- } +- +- } else if (safe_str_eq(op, "gte")) { +- if (cmp >= 0) { +- accept = TRUE; +- } +- } +- +- return accept; ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = hash, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = match_data, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ return pe__eval_attr_expr(expr, &rule_data); + } + + /* As per the nethack rules: +@@ -587,10 +420,18 @@ pe_parse_xml_duration(crm_time_t * start, xmlNode * duration_spec) + * \return TRUE if date expression is in effect at given time, FALSE otherwise + */ + gboolean +-pe_test_date_expression(xmlNode *time_expr, crm_time_t *now, +- crm_time_t *next_change) ++pe_test_date_expression(xmlNode *expr, crm_time_t *now, crm_time_t *next_change) + { +- switch (pe_eval_date_expression(time_expr, now, next_change)) { ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ switch (pe__eval_date_expr(expr, &rule_data, next_change)) { + case pcmk_rc_within_range: + case pcmk_rc_ok: + return TRUE; +@@ -623,86 +464,18 @@ crm_time_set_if_earlier(crm_time_t *next_change, crm_time_t *t) + * \return Standard Pacemaker return code + */ + int +-pe_eval_date_expression(xmlNode *time_expr, crm_time_t *now, +- crm_time_t *next_change) ++pe_eval_date_expression(xmlNode *expr, crm_time_t *now, crm_time_t *next_change) + { +- crm_time_t *start = NULL; +- crm_time_t *end = NULL; +- const char *value = NULL; +- const char *op = crm_element_value(time_expr, "operation"); +- +- xmlNode *duration_spec = NULL; +- xmlNode *date_spec = NULL; +- +- // "undetermined" will also be returned for parsing errors +- int rc = pcmk_rc_undetermined; +- +- crm_trace("Testing expression: %s", ID(time_expr)); +- +- duration_spec = first_named_child(time_expr, "duration"); +- date_spec = first_named_child(time_expr, "date_spec"); +- +- value = crm_element_value(time_expr, "start"); +- if (value != NULL) { +- start = crm_time_new(value); +- } +- value = crm_element_value(time_expr, "end"); +- if (value != NULL) { +- end = crm_time_new(value); +- } +- +- if (start != NULL && end == NULL && duration_spec != NULL) { +- end = pe_parse_xml_duration(start, duration_spec); +- } +- +- if ((op == NULL) || safe_str_eq(op, "in_range")) { +- if ((start == NULL) && (end == NULL)) { +- // in_range requires at least one of start or end +- } else if ((start != NULL) && (crm_time_compare(now, start) < 0)) { +- rc = pcmk_rc_before_range; +- crm_time_set_if_earlier(next_change, start); +- } else if ((end != NULL) && (crm_time_compare(now, end) > 0)) { +- rc = pcmk_rc_after_range; +- } else { +- rc = pcmk_rc_within_range; +- if (end && next_change) { +- // Evaluation doesn't change until second after end +- crm_time_add_seconds(end, 1); +- crm_time_set_if_earlier(next_change, end); +- } +- } +- +- } else if (safe_str_eq(op, "date_spec")) { +- rc = pe_cron_range_satisfied(now, date_spec); +- // @TODO set next_change appropriately +- +- } else if (safe_str_eq(op, "gt")) { +- if (start == NULL) { +- // gt requires start +- } else if (crm_time_compare(now, start) > 0) { +- rc = pcmk_rc_within_range; +- } else { +- rc = pcmk_rc_before_range; +- +- // Evaluation doesn't change until second after start +- crm_time_add_seconds(start, 1); +- crm_time_set_if_earlier(next_change, start); +- } +- +- } else if (safe_str_eq(op, "lt")) { +- if (end == NULL) { +- // lt requires end +- } else if (crm_time_compare(now, end) < 0) { +- rc = pcmk_rc_within_range; +- crm_time_set_if_earlier(next_change, end); +- } else { +- rc = pcmk_rc_after_range; +- } +- } +- +- crm_time_free(start); +- crm_time_free(end); +- return rc; ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ return pe__eval_date_expr(expr, &rule_data, next_change); + } + + // Information about a block of nvpair elements +@@ -1111,6 +884,285 @@ pe_unpack_versioned_parameters(xmlNode *versioned_params, const char *ra_version + } + #endif + ++gboolean ++pe__eval_attr_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) ++{ ++ gboolean accept = FALSE; ++ gboolean attr_allocated = FALSE; ++ int cmp = 0; ++ const char *h_val = NULL; ++ GHashTable *table = NULL; ++ ++ const char *op = NULL; ++ const char *type = NULL; ++ const char *attr = NULL; ++ const char *value = NULL; ++ const char *value_source = NULL; ++ ++ attr = crm_element_value(expr, XML_EXPR_ATTR_ATTRIBUTE); ++ op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); ++ value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); ++ type = crm_element_value(expr, XML_EXPR_ATTR_TYPE); ++ value_source = crm_element_value(expr, XML_EXPR_ATTR_VALUE_SOURCE); ++ ++ if (attr == NULL || op == NULL) { ++ pe_err("Invalid attribute or operation in expression" ++ " (\'%s\' \'%s\' \'%s\')", crm_str(attr), crm_str(op), crm_str(value)); ++ return FALSE; ++ } ++ ++ if (rule_data->match_data) { ++ if (rule_data->match_data->re) { ++ char *resolved_attr = pe_expand_re_matches(attr, rule_data->match_data->re); ++ ++ if (resolved_attr) { ++ attr = (const char *) resolved_attr; ++ attr_allocated = TRUE; ++ } ++ } ++ ++ if (safe_str_eq(value_source, "param")) { ++ table = rule_data->match_data->params; ++ } else if (safe_str_eq(value_source, "meta")) { ++ table = rule_data->match_data->meta; ++ } ++ } ++ ++ if (table) { ++ const char *param_name = value; ++ const char *param_value = NULL; ++ ++ if (param_name && param_name[0]) { ++ if ((param_value = (const char *)g_hash_table_lookup(table, param_name))) { ++ value = param_value; ++ } ++ } ++ } ++ ++ if (rule_data->node_hash != NULL) { ++ h_val = (const char *)g_hash_table_lookup(rule_data->node_hash, attr); ++ } ++ ++ if (attr_allocated) { ++ free((char *)attr); ++ attr = NULL; ++ } ++ ++ if (value != NULL && h_val != NULL) { ++ if (type == NULL) { ++ if (safe_str_eq(op, "lt") ++ || safe_str_eq(op, "lte") ++ || safe_str_eq(op, "gt") ++ || safe_str_eq(op, "gte")) { ++ type = "number"; ++ ++ } else { ++ type = "string"; ++ } ++ crm_trace("Defaulting to %s based comparison for '%s' op", type, op); ++ } ++ ++ if (safe_str_eq(type, "string")) { ++ cmp = strcasecmp(h_val, value); ++ ++ } else if (safe_str_eq(type, "number")) { ++ int h_val_f = crm_parse_int(h_val, NULL); ++ int value_f = crm_parse_int(value, NULL); ++ ++ if (h_val_f < value_f) { ++ cmp = -1; ++ } else if (h_val_f > value_f) { ++ cmp = 1; ++ } else { ++ cmp = 0; ++ } ++ ++ } else if (safe_str_eq(type, "version")) { ++ cmp = compare_version(h_val, value); ++ ++ } ++ ++ } else if (value == NULL && h_val == NULL) { ++ cmp = 0; ++ } else if (value == NULL) { ++ cmp = 1; ++ } else { ++ cmp = -1; ++ } ++ ++ if (safe_str_eq(op, "defined")) { ++ if (h_val != NULL) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "not_defined")) { ++ if (h_val == NULL) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "eq")) { ++ if ((h_val == value) || cmp == 0) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "ne")) { ++ if ((h_val == NULL && value != NULL) ++ || (h_val != NULL && value == NULL) ++ || cmp != 0) { ++ accept = TRUE; ++ } ++ ++ } else if (value == NULL || h_val == NULL) { ++ // The comparison is meaningless from this point on ++ accept = FALSE; ++ ++ } else if (safe_str_eq(op, "lt")) { ++ if (cmp < 0) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "lte")) { ++ if (cmp <= 0) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "gt")) { ++ if (cmp > 0) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "gte")) { ++ if (cmp >= 0) { ++ accept = TRUE; ++ } ++ } ++ ++ return accept; ++} ++ ++int ++pe__eval_date_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data, crm_time_t *next_change) ++{ ++ crm_time_t *start = NULL; ++ crm_time_t *end = NULL; ++ const char *value = NULL; ++ const char *op = crm_element_value(expr, "operation"); ++ ++ xmlNode *duration_spec = NULL; ++ xmlNode *date_spec = NULL; ++ ++ // "undetermined" will also be returned for parsing errors ++ int rc = pcmk_rc_undetermined; ++ ++ crm_trace("Testing expression: %s", ID(expr)); ++ ++ duration_spec = first_named_child(expr, "duration"); ++ date_spec = first_named_child(expr, "date_spec"); ++ ++ value = crm_element_value(expr, "start"); ++ if (value != NULL) { ++ start = crm_time_new(value); ++ } ++ value = crm_element_value(expr, "end"); ++ if (value != NULL) { ++ end = crm_time_new(value); ++ } ++ ++ if (start != NULL && end == NULL && duration_spec != NULL) { ++ end = pe_parse_xml_duration(start, duration_spec); ++ } ++ ++ if ((op == NULL) || safe_str_eq(op, "in_range")) { ++ if ((start == NULL) && (end == NULL)) { ++ // in_range requires at least one of start or end ++ } else if ((start != NULL) && (crm_time_compare(rule_data->now, start) < 0)) { ++ rc = pcmk_rc_before_range; ++ crm_time_set_if_earlier(next_change, start); ++ } else if ((end != NULL) && (crm_time_compare(rule_data->now, end) > 0)) { ++ rc = pcmk_rc_after_range; ++ } else { ++ rc = pcmk_rc_within_range; ++ if (end && next_change) { ++ // Evaluation doesn't change until second after end ++ crm_time_add_seconds(end, 1); ++ crm_time_set_if_earlier(next_change, end); ++ } ++ } ++ ++ } else if (safe_str_eq(op, "date_spec")) { ++ rc = pe_cron_range_satisfied(rule_data->now, date_spec); ++ // @TODO set next_change appropriately ++ ++ } else if (safe_str_eq(op, "gt")) { ++ if (start == NULL) { ++ // gt requires start ++ } else if (crm_time_compare(rule_data->now, start) > 0) { ++ rc = pcmk_rc_within_range; ++ } else { ++ rc = pcmk_rc_before_range; ++ ++ // Evaluation doesn't change until second after start ++ crm_time_add_seconds(start, 1); ++ crm_time_set_if_earlier(next_change, start); ++ } ++ ++ } else if (safe_str_eq(op, "lt")) { ++ if (end == NULL) { ++ // lt requires end ++ } else if (crm_time_compare(rule_data->now, end) < 0) { ++ rc = pcmk_rc_within_range; ++ crm_time_set_if_earlier(next_change, end); ++ } else { ++ rc = pcmk_rc_after_range; ++ } ++ } ++ ++ crm_time_free(start); ++ crm_time_free(end); ++ return rc; ++} ++ ++gboolean ++pe__eval_role_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) ++{ ++ gboolean accept = FALSE; ++ const char *op = NULL; ++ const char *value = NULL; ++ ++ if (rule_data->role == RSC_ROLE_UNKNOWN) { ++ return accept; ++ } ++ ++ value = crm_element_value(expr, XML_EXPR_ATTR_VALUE); ++ op = crm_element_value(expr, XML_EXPR_ATTR_OPERATION); ++ ++ if (safe_str_eq(op, "defined")) { ++ if (rule_data->role > RSC_ROLE_STARTED) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "not_defined")) { ++ if (rule_data->role < RSC_ROLE_SLAVE && rule_data->role > RSC_ROLE_UNKNOWN) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "eq")) { ++ if (text2role(value) == rule_data->role) { ++ accept = TRUE; ++ } ++ ++ } else if (safe_str_eq(op, "ne")) { ++ // Test "ne" only with promotable clone roles ++ if (rule_data->role < RSC_ROLE_SLAVE && rule_data->role > RSC_ROLE_UNKNOWN) { ++ accept = FALSE; ++ ++ } else if (text2role(value) != rule_data->role) { ++ accept = TRUE; ++ } ++ } ++ return accept; ++} ++ + // Deprecated functions kept only for backward API compatibility + gboolean test_ruleset(xmlNode *ruleset, GHashTable *node_hash, crm_time_t *now); + gboolean test_rule(xmlNode *rule, GHashTable *node_hash, enum rsc_role_e role, +-- +1.8.3.1 + + +From 56a1337a54f3ba8a175ff3252658e1e43f7c670b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 28 Apr 2020 14:34:40 -0400 +Subject: [PATCH 03/17] Feature: scheduler: Add new rule tests for op_defaults + and rsc_defaults. + +These are like all the other rule evaluating functions, but they do not +have any wrappers for the older style API. +--- + include/crm/pengine/rules_internal.h | 2 ++ + lib/pengine/rules.c | 68 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 70 insertions(+) + +diff --git a/include/crm/pengine/rules_internal.h b/include/crm/pengine/rules_internal.h +index 8a22108..f60263a 100644 +--- a/include/crm/pengine/rules_internal.h ++++ b/include/crm/pengine/rules_internal.h +@@ -24,7 +24,9 @@ crm_time_t *pe_parse_xml_duration(crm_time_t * start, xmlNode * duration_spec); + gboolean pe__eval_attr_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data); + int pe__eval_date_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data, + crm_time_t *next_change); ++gboolean pe__eval_op_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data); + gboolean pe__eval_role_expr(xmlNode *expr, pe_rule_eval_data_t *rule_data); ++gboolean pe__eval_rsc_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data); + + int pe_eval_date_expression(xmlNode *time_expr, + crm_time_t *now, +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index 3f316c2..a5af57a 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -1123,6 +1123,38 @@ pe__eval_date_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data, crm_time_t * + } + + gboolean ++pe__eval_op_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) { ++ const char *name = crm_element_value(expr, XML_NVPAIR_ATTR_NAME); ++ const char *interval_s = crm_element_value(expr, XML_LRM_ATTR_INTERVAL); ++ guint interval; ++ ++ crm_trace("Testing op_defaults expression: %s", ID(expr)); ++ ++ if (rule_data->op_data == NULL) { ++ crm_trace("No operations data provided"); ++ return FALSE; ++ } ++ ++ interval = crm_parse_interval_spec(interval_s); ++ if (interval == 0 && errno != 0) { ++ crm_trace("Could not parse interval: %s", interval_s); ++ return FALSE; ++ } ++ ++ if (interval_s != NULL && interval != rule_data->op_data->interval) { ++ crm_trace("Interval doesn't match: %d != %d", interval, rule_data->op_data->interval); ++ return FALSE; ++ } ++ ++ if (!crm_str_eq(name, rule_data->op_data->op_name, TRUE)) { ++ crm_trace("Name doesn't match: %s != %s", name, rule_data->op_data->op_name); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ ++gboolean + pe__eval_role_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) + { + gboolean accept = FALSE; +@@ -1163,6 +1195,42 @@ pe__eval_role_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) + return accept; + } + ++gboolean ++pe__eval_rsc_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) ++{ ++ const char *class = crm_element_value(expr, XML_AGENT_ATTR_CLASS); ++ const char *provider = crm_element_value(expr, XML_AGENT_ATTR_PROVIDER); ++ const char *type = crm_element_value(expr, XML_EXPR_ATTR_TYPE); ++ ++ crm_trace("Testing rsc_defaults expression: %s", ID(expr)); ++ ++ if (rule_data->rsc_data == NULL) { ++ crm_trace("No resource data provided"); ++ return FALSE; ++ } ++ ++ if (class != NULL && ++ !crm_str_eq(class, rule_data->rsc_data->standard, TRUE)) { ++ crm_trace("Class doesn't match: %s != %s", class, rule_data->rsc_data->standard); ++ return FALSE; ++ } ++ ++ if ((provider == NULL && rule_data->rsc_data->provider != NULL) || ++ (provider != NULL && rule_data->rsc_data->provider == NULL) || ++ !crm_str_eq(provider, rule_data->rsc_data->provider, TRUE)) { ++ crm_trace("Provider doesn't match: %s != %s", provider, rule_data->rsc_data->provider); ++ return FALSE; ++ } ++ ++ if (type != NULL && ++ !crm_str_eq(type, rule_data->rsc_data->agent, TRUE)) { ++ crm_trace("Agent doesn't match: %s != %s", type, rule_data->rsc_data->agent); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ + // Deprecated functions kept only for backward API compatibility + gboolean test_ruleset(xmlNode *ruleset, GHashTable *node_hash, crm_time_t *now); + gboolean test_rule(xmlNode *rule, GHashTable *node_hash, enum rsc_role_e role, +-- +1.8.3.1 + + +From 5a4da3f77feee0d3bac50e9adc4eb4b35724dfb2 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 28 Apr 2020 14:41:08 -0400 +Subject: [PATCH 04/17] Refactor: scheduler: Reimplement core rule eval + functions. + +The core functions of pe_evaluate_rules, pe_test_rule, and +pe_test_expression have been turned into new, similarly named functions +that take a pe_rule_eval_data_t as an argument. The old ones still +exist as wrappers around the new ones. +--- + include/crm/pengine/rules.h | 7 ++ + lib/pengine/rules.c | 259 ++++++++++++++++++++++++++------------------ + 2 files changed, 162 insertions(+), 104 deletions(-) + +diff --git a/include/crm/pengine/rules.h b/include/crm/pengine/rules.h +index d7bdbf9..a74c629 100644 +--- a/include/crm/pengine/rules.h ++++ b/include/crm/pengine/rules.h +@@ -61,6 +61,13 @@ GHashTable *pe_unpack_versioned_parameters(xmlNode *versioned_params, const char + + char *pe_expand_re_matches(const char *string, pe_re_match_data_t * match_data); + ++gboolean pe_eval_rules(xmlNode *ruleset, pe_rule_eval_data_t *rule_data, ++ crm_time_t *next_change); ++gboolean pe_eval_expr(xmlNode *rule, pe_rule_eval_data_t *rule_data, ++ crm_time_t *next_change); ++gboolean pe_eval_subexpr(xmlNode *expr, pe_rule_eval_data_t *rule_data, ++ crm_time_t *next_change); ++ + #ifndef PCMK__NO_COMPAT + /* Everything here is deprecated and kept only for public API backward + * compatibility. It will be moved to compatibility.h when 2.1.0 is released. +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index a5af57a..a6353ef 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -38,25 +38,16 @@ gboolean + pe_evaluate_rules(xmlNode *ruleset, GHashTable *node_hash, crm_time_t *now, + crm_time_t *next_change) + { +- // If there are no rules, pass by default +- gboolean ruleset_default = TRUE; +- +- for (xmlNode *rule = first_named_child(ruleset, XML_TAG_RULE); +- rule != NULL; rule = crm_next_same_xml(rule)) { ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; + +- ruleset_default = FALSE; +- if (pe_test_rule(rule, node_hash, RSC_ROLE_UNKNOWN, now, next_change, +- NULL)) { +- /* Only the deprecated "lifetime" element of location constraints +- * may contain more than one rule at the top level -- the schema +- * limits a block of nvpairs to a single top-level rule. So, this +- * effectively means that a lifetime is active if any rule it +- * contains is active. +- */ +- return TRUE; +- } +- } +- return ruleset_default; ++ return pe_eval_rules(ruleset, &rule_data, next_change); + } + + gboolean +@@ -64,44 +55,16 @@ pe_test_rule(xmlNode *rule, GHashTable *node_hash, enum rsc_role_e role, + crm_time_t *now, crm_time_t *next_change, + pe_match_data_t *match_data) + { +- xmlNode *expr = NULL; +- gboolean test = TRUE; +- gboolean empty = TRUE; +- gboolean passed = TRUE; +- gboolean do_and = TRUE; +- const char *value = NULL; +- +- rule = expand_idref(rule, NULL); +- value = crm_element_value(rule, XML_RULE_ATTR_BOOLEAN_OP); +- if (safe_str_eq(value, "or")) { +- do_and = FALSE; +- passed = FALSE; +- } +- +- crm_trace("Testing rule %s", ID(rule)); +- for (expr = __xml_first_child_element(rule); expr != NULL; +- expr = __xml_next_element(expr)) { +- +- test = pe_test_expression(expr, node_hash, role, now, next_change, +- match_data); +- empty = FALSE; +- +- if (test && do_and == FALSE) { +- crm_trace("Expression %s/%s passed", ID(rule), ID(expr)); +- return TRUE; +- +- } else if (test == FALSE && do_and) { +- crm_trace("Expression %s/%s failed", ID(rule), ID(expr)); +- return FALSE; +- } +- } +- +- if (empty) { +- crm_err("Invalid Rule %s: rules must contain at least one expression", ID(rule)); +- } ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = role, ++ .now = now, ++ .match_data = match_data, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; + +- crm_trace("Rule %s %s", ID(rule), passed ? "passed" : "failed"); +- return passed; ++ return pe_eval_expr(rule, &rule_data, next_change); + } + + /*! +@@ -125,56 +88,16 @@ pe_test_expression(xmlNode *expr, GHashTable *node_hash, enum rsc_role_e role, + crm_time_t *now, crm_time_t *next_change, + pe_match_data_t *match_data) + { +- gboolean accept = FALSE; +- const char *uname = NULL; +- +- switch (find_expression_type(expr)) { +- case nested_rule: +- accept = pe_test_rule(expr, node_hash, role, now, next_change, +- match_data); +- break; +- case attr_expr: +- case loc_expr: +- /* these expressions can never succeed if there is +- * no node to compare with +- */ +- if (node_hash != NULL) { +- accept = pe_test_attr_expression(expr, node_hash, now, match_data); +- } +- break; +- +- case time_expr: +- accept = pe_test_date_expression(expr, now, next_change); +- break; +- +- case role_expr: +- accept = pe_test_role_expression(expr, role, now); +- break; +- +-#if ENABLE_VERSIONED_ATTRS +- case version_expr: +- if (node_hash && g_hash_table_lookup_extended(node_hash, +- CRM_ATTR_RA_VERSION, +- NULL, NULL)) { +- accept = pe_test_attr_expression(expr, node_hash, now, NULL); +- } else { +- // we are going to test it when we have ra-version +- accept = TRUE; +- } +- break; +-#endif +- +- default: +- CRM_CHECK(FALSE /* bad type */ , return FALSE); +- accept = FALSE; +- } +- if (node_hash) { +- uname = g_hash_table_lookup(node_hash, CRM_ATTR_UNAME); +- } ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = role, ++ .now = now, ++ .match_data = match_data, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; + +- crm_trace("Expression %s %s on %s", +- ID(expr), accept ? "passed" : "failed", uname ? uname : "all nodes"); +- return accept; ++ return pe_eval_subexpr(expr, &rule_data, next_change); + } + + enum expression_type +@@ -885,6 +808,134 @@ pe_unpack_versioned_parameters(xmlNode *versioned_params, const char *ra_version + #endif + + gboolean ++pe_eval_rules(xmlNode *ruleset, pe_rule_eval_data_t *rule_data, crm_time_t *next_change) ++{ ++ // If there are no rules, pass by default ++ gboolean ruleset_default = TRUE; ++ ++ for (xmlNode *rule = first_named_child(ruleset, XML_TAG_RULE); ++ rule != NULL; rule = crm_next_same_xml(rule)) { ++ ++ ruleset_default = FALSE; ++ if (pe_eval_expr(rule, rule_data, next_change)) { ++ /* Only the deprecated "lifetime" element of location constraints ++ * may contain more than one rule at the top level -- the schema ++ * limits a block of nvpairs to a single top-level rule. So, this ++ * effectively means that a lifetime is active if any rule it ++ * contains is active. ++ */ ++ return TRUE; ++ } ++ } ++ ++ return ruleset_default; ++} ++ ++gboolean ++pe_eval_expr(xmlNode *rule, pe_rule_eval_data_t *rule_data, crm_time_t *next_change) ++{ ++ xmlNode *expr = NULL; ++ gboolean test = TRUE; ++ gboolean empty = TRUE; ++ gboolean passed = TRUE; ++ gboolean do_and = TRUE; ++ const char *value = NULL; ++ ++ rule = expand_idref(rule, NULL); ++ value = crm_element_value(rule, XML_RULE_ATTR_BOOLEAN_OP); ++ if (safe_str_eq(value, "or")) { ++ do_and = FALSE; ++ passed = FALSE; ++ } ++ ++ crm_trace("Testing rule %s", ID(rule)); ++ for (expr = __xml_first_child_element(rule); expr != NULL; ++ expr = __xml_next_element(expr)) { ++ ++ test = pe_eval_subexpr(expr, rule_data, next_change); ++ empty = FALSE; ++ ++ if (test && do_and == FALSE) { ++ crm_trace("Expression %s/%s passed", ID(rule), ID(expr)); ++ return TRUE; ++ ++ } else if (test == FALSE && do_and) { ++ crm_trace("Expression %s/%s failed", ID(rule), ID(expr)); ++ return FALSE; ++ } ++ } ++ ++ if (empty) { ++ crm_err("Invalid Rule %s: rules must contain at least one expression", ID(rule)); ++ } ++ ++ crm_trace("Rule %s %s", ID(rule), passed ? "passed" : "failed"); ++ return passed; ++} ++ ++gboolean ++pe_eval_subexpr(xmlNode *expr, pe_rule_eval_data_t *rule_data, crm_time_t *next_change) ++{ ++ gboolean accept = FALSE; ++ const char *uname = NULL; ++ ++ switch (find_expression_type(expr)) { ++ case nested_rule: ++ accept = pe_eval_expr(expr, rule_data, next_change); ++ break; ++ case attr_expr: ++ case loc_expr: ++ /* these expressions can never succeed if there is ++ * no node to compare with ++ */ ++ if (rule_data->node_hash != NULL) { ++ accept = pe__eval_attr_expr(expr, rule_data); ++ } ++ break; ++ ++ case time_expr: ++ accept = pe_test_date_expression(expr, rule_data->now, next_change); ++ break; ++ ++ case role_expr: ++ accept = pe__eval_role_expr(expr, rule_data); ++ break; ++ ++ case rsc_expr: ++ accept = pe__eval_rsc_expr(expr, rule_data); ++ break; ++ ++ case op_expr: ++ accept = pe__eval_op_expr(expr, rule_data); ++ break; ++ ++#if ENABLE_VERSIONED_ATTRS ++ case version_expr: ++ if (rule_data->node_hash && ++ g_hash_table_lookup_extended(rule_data->node_hash, ++ CRM_ATTR_RA_VERSION, NULL, NULL)) { ++ accept = pe__eval_attr_expr(expr, rule_data); ++ } else { ++ // we are going to test it when we have ra-version ++ accept = TRUE; ++ } ++ break; ++#endif ++ ++ default: ++ CRM_CHECK(FALSE /* bad type */ , return FALSE); ++ accept = FALSE; ++ } ++ if (rule_data->node_hash) { ++ uname = g_hash_table_lookup(rule_data->node_hash, CRM_ATTR_UNAME); ++ } ++ ++ crm_trace("Expression %s %s on %s", ++ ID(expr), accept ? "passed" : "failed", uname ? uname : "all nodes"); ++ return accept; ++} ++ ++gboolean + pe__eval_attr_expr(xmlNodePtr expr, pe_rule_eval_data_t *rule_data) + { + gboolean accept = FALSE; +-- +1.8.3.1 + + +From ea6318252164578fd27dcef657e80f5225337a4b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 7 Apr 2020 15:57:06 -0400 +Subject: [PATCH 05/17] Refactor: scheduler: Add rule_data to unpack_data_s. + +This is just to get rid of a couple extra arguments to some internal +functions and make them look like the external functions. +--- + lib/pengine/rules.c | 65 ++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 44 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index a6353ef..2709d68 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -555,10 +555,9 @@ add_versioned_attributes(xmlNode * attr_set, xmlNode * versioned_attrs) + + typedef struct unpack_data_s { + gboolean overwrite; +- GHashTable *node_hash; + void *hash; +- crm_time_t *now; + crm_time_t *next_change; ++ pe_rule_eval_data_t *rule_data; + xmlNode *top; + } unpack_data_t; + +@@ -568,14 +567,14 @@ unpack_attr_set(gpointer data, gpointer user_data) + sorted_set_t *pair = data; + unpack_data_t *unpack_data = user_data; + +- if (!pe_evaluate_rules(pair->attr_set, unpack_data->node_hash, +- unpack_data->now, unpack_data->next_change)) { ++ if (!pe_eval_rules(pair->attr_set, unpack_data->rule_data, ++ unpack_data->next_change)) { + return; + } + + #if ENABLE_VERSIONED_ATTRS +- if (get_versioned_rule(pair->attr_set) && !(unpack_data->node_hash && +- g_hash_table_lookup_extended(unpack_data->node_hash, ++ if (get_versioned_rule(pair->attr_set) && !(unpack_data->rule_data->node_hash && ++ g_hash_table_lookup_extended(unpack_data->rule_data->node_hash, + CRM_ATTR_RA_VERSION, NULL, NULL))) { + // we haven't actually tested versioned expressions yet + return; +@@ -593,8 +592,8 @@ unpack_versioned_attr_set(gpointer data, gpointer user_data) + sorted_set_t *pair = data; + unpack_data_t *unpack_data = user_data; + +- if (pe_evaluate_rules(pair->attr_set, unpack_data->node_hash, +- unpack_data->now, unpack_data->next_change)) { ++ if (pe_eval_rules(pair->attr_set, unpack_data->rule_data, ++ unpack_data->next_change)) { + add_versioned_attributes(pair->attr_set, unpack_data->hash); + } + } +@@ -658,19 +657,17 @@ make_pairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, + * \param[in] top XML document root (used to expand id-ref's) + * \param[in] xml_obj XML element containing blocks of nvpair elements + * \param[in] set_name If not NULL, only use blocks of this element type +- * \param[in] node_hash Node attributes to use when evaluating rules + * \param[out] hash Where to store extracted name/value pairs + * \param[in] always_first If not NULL, process block with this ID first + * \param[in] overwrite Whether to replace existing values with same name +- * \param[in] now Time to use when evaluating rules ++ * \param[in] rule_data Matching parameters to use when unpacking + * \param[out] next_change If not NULL, set to when rule evaluation will change + * \param[in] unpack_func Function to call to unpack each block + */ + static void + unpack_nvpair_blocks(xmlNode *top, xmlNode *xml_obj, const char *set_name, +- GHashTable *node_hash, void *hash, +- const char *always_first, gboolean overwrite, +- crm_time_t *now, crm_time_t *next_change, ++ void *hash, const char *always_first, gboolean overwrite, ++ pe_rule_eval_data_t *rule_data, crm_time_t *next_change, + GFunc unpack_func) + { + GList *pairs = make_pairs(top, xml_obj, set_name, always_first); +@@ -678,11 +675,10 @@ unpack_nvpair_blocks(xmlNode *top, xmlNode *xml_obj, const char *set_name, + if (pairs) { + unpack_data_t data = { + .hash = hash, +- .node_hash = node_hash, +- .now = now, + .overwrite = overwrite, + .next_change = next_change, + .top = top, ++ .rule_data = rule_data + }; + + g_list_foreach(pairs, unpack_func, &data); +@@ -709,8 +705,17 @@ pe_unpack_nvpairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, + const char *always_first, gboolean overwrite, + crm_time_t *now, crm_time_t *next_change) + { +- unpack_nvpair_blocks(top, xml_obj, set_name, node_hash, hash, always_first, +- overwrite, now, next_change, unpack_attr_set); ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ unpack_nvpair_blocks(top, xml_obj, set_name, hash, always_first, ++ overwrite, &rule_data, next_change, unpack_attr_set); + } + + #if ENABLE_VERSIONED_ATTRS +@@ -720,8 +725,17 @@ pe_unpack_versioned_attributes(xmlNode *top, xmlNode *xml_obj, + xmlNode *hash, crm_time_t *now, + crm_time_t *next_change) + { +- unpack_nvpair_blocks(top, xml_obj, set_name, node_hash, hash, NULL, FALSE, +- now, next_change, unpack_versioned_attr_set); ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ unpack_nvpair_blocks(top, xml_obj, set_name, hash, NULL, FALSE, ++ &rule_data, next_change, unpack_versioned_attr_set); + } + #endif + +@@ -1366,6 +1380,15 @@ unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, + const char *always_first, gboolean overwrite, + crm_time_t *now) + { +- unpack_nvpair_blocks(top, xml_obj, set_name, node_hash, hash, always_first, +- overwrite, now, NULL, unpack_attr_set); ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = node_hash, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ ++ unpack_nvpair_blocks(top, xml_obj, set_name, hash, always_first, ++ overwrite, &rule_data, NULL, unpack_attr_set); + } +-- +1.8.3.1 + + +From 54646db6f5e4f1bb141b35798bcad5c3cc025afe Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 8 Apr 2020 10:41:41 -0400 +Subject: [PATCH 06/17] Refactor: scheduler: Change args to + pe__unpack_dataset_nvpairs. + +It should now take a pe_rule_eval_data_t instead of various separate +arguments. This will allow passing further data that needs to be tested +against in the future (such as rsc_defaults and op_defaults). It's also +convenient to make versions of pe_unpack_nvpairs and +pe_unpack_versioned_attributes that take the same arguments. + +Then, adapt callers of pe__unpack_dataset_nvpairs to pass the new +argument. +--- + include/crm/pengine/internal.h | 2 +- + include/crm/pengine/rules.h | 9 +++++++ + lib/pengine/complex.c | 41 ++++++++++++++++++++++------- + lib/pengine/rules.c | 23 ++++++++++++++-- + lib/pengine/unpack.c | 33 ++++++++++++++++++++--- + lib/pengine/utils.c | 60 +++++++++++++++++++++++++++++++----------- + 6 files changed, 137 insertions(+), 31 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 189ba7b..3e59502 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -460,7 +460,7 @@ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set); + void pe__register_messages(pcmk__output_t *out); + + void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, +- GHashTable *node_hash, GHashTable *hash, ++ pe_rule_eval_data_t *rule_data, GHashTable *hash, + const char *always_first, gboolean overwrite, + pe_working_set_t *data_set); + +diff --git a/include/crm/pengine/rules.h b/include/crm/pengine/rules.h +index a74c629..cbae8ed 100644 +--- a/include/crm/pengine/rules.h ++++ b/include/crm/pengine/rules.h +@@ -46,12 +46,21 @@ gboolean pe_test_expression(xmlNode *expr, GHashTable *node_hash, + crm_time_t *next_change, + pe_match_data_t *match_data); + ++void pe_eval_nvpairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, ++ pe_rule_eval_data_t *rule_data, GHashTable *hash, ++ const char *always_first, gboolean overwrite, ++ crm_time_t *next_change); ++ + void pe_unpack_nvpairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, + GHashTable *node_hash, GHashTable *hash, + const char *always_first, gboolean overwrite, + crm_time_t *now, crm_time_t *next_change); + + #if ENABLE_VERSIONED_ATTRS ++void pe_eval_versioned_attributes(xmlNode *top, xmlNode *xml_obj, ++ const char *set_name, pe_rule_eval_data_t *rule_data, ++ xmlNode *hash, crm_time_t *next_change); ++ + void pe_unpack_versioned_attributes(xmlNode *top, xmlNode *xml_obj, + const char *set_name, GHashTable *node_hash, + xmlNode *hash, crm_time_t *now, +diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c +index 16f3a71..d91c95e 100644 +--- a/lib/pengine/complex.c ++++ b/lib/pengine/complex.c +@@ -95,10 +95,17 @@ void + get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + pe_node_t * node, pe_working_set_t * data_set) + { +- GHashTable *node_hash = NULL; ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; + + if (node) { +- node_hash = node->details->attrs; ++ rule_data.node_hash = node->details->attrs; + } + + if (rsc->xml) { +@@ -112,7 +119,7 @@ get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + } + } + +- pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_META_SETS, node_hash, ++ pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_META_SETS, &rule_data, + meta_hash, NULL, FALSE, data_set); + + /* set anything else based on the parent */ +@@ -122,20 +129,27 @@ get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + + /* and finally check the defaults */ + pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_META_SETS, +- node_hash, meta_hash, NULL, FALSE, data_set); ++ &rule_data, meta_hash, NULL, FALSE, data_set); + } + + void + get_rsc_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + pe_node_t * node, pe_working_set_t * data_set) + { +- GHashTable *node_hash = NULL; ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; + + if (node) { +- node_hash = node->details->attrs; ++ rule_data.node_hash = node->details->attrs; + } + +- pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_ATTR_SETS, node_hash, ++ pe__unpack_dataset_nvpairs(rsc->xml, XML_TAG_ATTR_SETS, &rule_data, + meta_hash, NULL, FALSE, data_set); + + /* set anything else based on the parent */ +@@ -145,7 +159,7 @@ get_rsc_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + } else { + /* and finally check the defaults */ + pe__unpack_dataset_nvpairs(data_set->rsc_defaults, XML_TAG_ATTR_SETS, +- node_hash, meta_hash, NULL, FALSE, data_set); ++ &rule_data, meta_hash, NULL, FALSE, data_set); + } + } + +@@ -376,6 +390,15 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc, + bool remote_node = FALSE; + bool has_versioned_params = FALSE; + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + crm_log_xml_trace(xml_obj, "Processing resource input..."); + + if (id == NULL) { +@@ -706,7 +729,7 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc, + + (*rsc)->utilization = crm_str_table_new(); + +- pe__unpack_dataset_nvpairs((*rsc)->xml, XML_TAG_UTILIZATION, NULL, ++ pe__unpack_dataset_nvpairs((*rsc)->xml, XML_TAG_UTILIZATION, &rule_data, + (*rsc)->utilization, NULL, FALSE, data_set); + + /* data_set->resources = g_list_append(data_set->resources, (*rsc)); */ +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index 2709d68..7575011 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -686,6 +686,16 @@ unpack_nvpair_blocks(xmlNode *top, xmlNode *xml_obj, const char *set_name, + } + } + ++void ++pe_eval_nvpairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, ++ pe_rule_eval_data_t *rule_data, GHashTable *hash, ++ const char *always_first, gboolean overwrite, ++ crm_time_t *next_change) ++{ ++ unpack_nvpair_blocks(top, xml_obj, set_name, hash, always_first, ++ overwrite, rule_data, next_change, unpack_attr_set); ++} ++ + /*! + * \brief Extract nvpair blocks contained by an XML element into a hash table + * +@@ -714,12 +724,21 @@ pe_unpack_nvpairs(xmlNode *top, xmlNode *xml_obj, const char *set_name, + .op_data = NULL + }; + +- unpack_nvpair_blocks(top, xml_obj, set_name, hash, always_first, +- overwrite, &rule_data, next_change, unpack_attr_set); ++ pe_eval_nvpairs(top, xml_obj, set_name, &rule_data, hash, ++ always_first, overwrite, next_change); + } + + #if ENABLE_VERSIONED_ATTRS + void ++pe_eval_versioned_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, ++ pe_rule_eval_data_t *rule_data, xmlNode *hash, ++ crm_time_t *next_change) ++{ ++ unpack_nvpair_blocks(top, xml_obj, set_name, hash, NULL, FALSE, rule_data, ++ next_change, unpack_versioned_attr_set); ++} ++ ++void + pe_unpack_versioned_attributes(xmlNode *top, xmlNode *xml_obj, + const char *set_name, GHashTable *node_hash, + xmlNode *hash, crm_time_t *now, +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 532a3e6..8784857 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -188,9 +188,18 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) + const char *value = NULL; + GHashTable *config_hash = crm_str_table_new(); + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + data_set->config_hash = config_hash; + +- pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, NULL, config_hash, ++ pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, + CIB_OPTIONS_FIRST, FALSE, data_set); + + verify_pe_options(data_set->config_hash); +@@ -515,6 +524,15 @@ unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) + const char *type = NULL; + const char *score = NULL; + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL; + xml_obj = __xml_next_element(xml_obj)) { + +@@ -547,7 +565,7 @@ unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) + handle_startup_fencing(data_set, new_node); + + add_node_attrs(xml_obj, new_node, FALSE, data_set); +- pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, NULL, ++ pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, + new_node->details->utilization, NULL, + FALSE, data_set); + +@@ -3698,6 +3716,15 @@ add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite, + { + const char *cluster_name = NULL; + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + g_hash_table_insert(node->details->attrs, + strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); + +@@ -3719,7 +3746,7 @@ add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite, + strdup(cluster_name)); + } + +- pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, NULL, ++ pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, + node->details->attrs, NULL, overwrite, data_set); + + if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index c9b45e0..d01936d 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -597,10 +597,19 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, + + if (is_set(action->flags, pe_action_have_node_attrs) == FALSE + && action->node != NULL && action->op_entry != NULL) { ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = action->node->details->attrs, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + pe_set_action_bit(action, pe_action_have_node_attrs); + pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS, +- action->node->details->attrs, +- action->extra, NULL, FALSE, data_set); ++ &rule_data, action->extra, NULL, ++ FALSE, data_set); + } + + if (is_set(action->flags, pe_action_pseudo)) { +@@ -873,6 +882,15 @@ pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set + const char *timeout = NULL; + int timeout_ms = 0; + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); + child != NULL; child = crm_next_same_xml(child)) { + if (safe_str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME))) { +@@ -884,7 +902,7 @@ pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set + if (timeout == NULL && data_set->op_defaults) { + GHashTable *action_meta = crm_str_table_new(); + pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, +- NULL, action_meta, NULL, FALSE, data_set); ++ &rule_data, action_meta, NULL, FALSE, data_set); + timeout = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); + } + +@@ -964,10 +982,19 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai + pe_rsc_action_details_t *rsc_details = NULL; + #endif + ++ pe_rule_eval_data_t rule_data = { ++ .node_hash = NULL, ++ .role = RSC_ROLE_UNKNOWN, ++ .now = data_set->now, ++ .match_data = NULL, ++ .rsc_data = NULL, ++ .op_data = NULL ++ }; ++ + CRM_CHECK(action && action->rsc, return); + + // Cluster-wide +- pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, NULL, ++ pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, + action->meta, NULL, FALSE, data_set); + + // Probe timeouts default differently, so handle timeout default later +@@ -981,19 +1008,20 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai + xmlAttrPtr xIter = NULL; + + // take precedence over defaults +- pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, NULL, ++ pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data, + action->meta, NULL, TRUE, data_set); + + #if ENABLE_VERSIONED_ATTRS + rsc_details = pe_rsc_action_details(action); +- pe_unpack_versioned_attributes(data_set->input, xml_obj, +- XML_TAG_ATTR_SETS, NULL, +- rsc_details->versioned_parameters, +- data_set->now, NULL); +- pe_unpack_versioned_attributes(data_set->input, xml_obj, +- XML_TAG_META_SETS, NULL, +- rsc_details->versioned_meta, +- data_set->now, NULL); ++ ++ pe_eval_versioned_attributes(data_set->input, xml_obj, ++ XML_TAG_ATTR_SETS, &rule_data, ++ rsc_details->versioned_parameters, ++ NULL); ++ pe_eval_versioned_attributes(data_set->input, xml_obj, ++ XML_TAG_META_SETS, &rule_data, ++ rsc_details->versioned_meta, ++ NULL); + #endif + + /* Anything set as an XML property has highest precedence. +@@ -2693,14 +2721,14 @@ pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) + */ + void + pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, +- GHashTable *node_hash, GHashTable *hash, ++ pe_rule_eval_data_t *rule_data, GHashTable *hash, + const char *always_first, gboolean overwrite, + pe_working_set_t *data_set) + { + crm_time_t *next_change = crm_time_new_undefined(); + +- pe_unpack_nvpairs(data_set->input, xml_obj, set_name, node_hash, hash, +- always_first, overwrite, data_set->now, next_change); ++ pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, ++ always_first, overwrite, next_change); + if (crm_time_is_defined(next_change)) { + time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); + +-- +1.8.3.1 + + +From ad06f60bae1fcb5d204fa18a0b21ade78aaee5f4 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 8 Apr 2020 13:43:26 -0400 +Subject: [PATCH 07/17] Refactor: scheduler: unpack_operation should be static. + +--- + lib/pengine/utils.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index d01936d..c345875 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -23,8 +23,8 @@ + extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); + void print_str_str(gpointer key, gpointer value, gpointer user_data); + gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); +-void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, +- pe_working_set_t * data_set); ++static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, ++ pe_working_set_t * data_set); + static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, + gboolean include_disabled); + +@@ -968,7 +968,7 @@ unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, + * \param[in] container Resource that contains affected resource, if any + * \param[in] data_set Cluster state + */ +-void ++static void + unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, + pe_working_set_t * data_set) + { +-- +1.8.3.1 + + +From 7e57d955c9209af62dffc0639c50d51121028c26 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 8 Apr 2020 14:58:35 -0400 +Subject: [PATCH 08/17] Refactor: scheduler: Pass interval to unpack_operation. + +--- + lib/pengine/utils.c | 36 ++++++++++++++---------------------- + 1 file changed, 14 insertions(+), 22 deletions(-) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index c345875..1e3b0bd 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -24,7 +24,7 @@ extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); + void print_str_str(gpointer key, gpointer value, gpointer user_data); + gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); + static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, +- pe_working_set_t * data_set); ++ pe_working_set_t * data_set, guint interval_ms); + static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, + gboolean include_disabled); + +@@ -568,9 +568,13 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, + } + + if (rsc != NULL) { ++ guint interval_ms = 0; ++ + action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); ++ parse_op_key(key, NULL, NULL, &interval_ms); + +- unpack_operation(action, action->op_entry, rsc->container, data_set); ++ unpack_operation(action, action->op_entry, rsc->container, data_set, ++ interval_ms); + + if (save_action) { + rsc->actions = g_list_prepend(rsc->actions, action); +@@ -963,20 +967,20 @@ unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, + * and start delay values as integer milliseconds), requirements, and + * failure policy. + * +- * \param[in,out] action Action to unpack into +- * \param[in] xml_obj Operation XML (or NULL if all defaults) +- * \param[in] container Resource that contains affected resource, if any +- * \param[in] data_set Cluster state ++ * \param[in,out] action Action to unpack into ++ * \param[in] xml_obj Operation XML (or NULL if all defaults) ++ * \param[in] container Resource that contains affected resource, if any ++ * \param[in] data_set Cluster state ++ * \param[in] interval_ms How frequently to perform the operation + */ + static void + unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, +- pe_working_set_t * data_set) ++ pe_working_set_t * data_set, guint interval_ms) + { +- guint interval_ms = 0; + int timeout = 0; + char *value_ms = NULL; + const char *value = NULL; +- const char *field = NULL; ++ const char *field = XML_LRM_ATTR_INTERVAL; + char *default_timeout = NULL; + #if ENABLE_VERSIONED_ATTRS + pe_rsc_action_details_t *rsc_details = NULL; +@@ -1038,23 +1042,11 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai + g_hash_table_remove(action->meta, "id"); + + // Normalize interval to milliseconds +- field = XML_LRM_ATTR_INTERVAL; +- value = g_hash_table_lookup(action->meta, field); +- if (value != NULL) { +- interval_ms = crm_parse_interval_spec(value); +- +- } else if ((xml_obj == NULL) && !strcmp(action->task, RSC_STATUS)) { +- /* An orphaned recurring monitor will not have any XML. However, we +- * want the interval to be set, so the action can be properly detected +- * as a recurring monitor. Parse it from the key in this case. +- */ +- parse_op_key(action->uuid, NULL, NULL, &interval_ms); +- } + if (interval_ms > 0) { + value_ms = crm_strdup_printf("%u", interval_ms); + g_hash_table_replace(action->meta, strdup(field), value_ms); + +- } else if (value) { ++ } else if (g_hash_table_lookup(action->meta, field) != NULL) { + g_hash_table_remove(action->meta, field); + } + +-- +1.8.3.1 + + +From e4c411d9674e222647dd3ed31714c369f54ccad1 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 9 Apr 2020 16:15:17 -0400 +Subject: [PATCH 09/17] Feature: scheduler: Pass rsc_defaults and op_defaults + data. + +See: rhbz#1628701. +--- + lib/pengine/complex.c | 8 +++++++- + lib/pengine/utils.c | 15 +++++++++++++-- + 2 files changed, 20 insertions(+), 3 deletions(-) + +diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c +index d91c95e..1f06348 100644 +--- a/lib/pengine/complex.c ++++ b/lib/pengine/complex.c +@@ -95,12 +95,18 @@ void + get_meta_attributes(GHashTable * meta_hash, pe_resource_t * rsc, + pe_node_t * node, pe_working_set_t * data_set) + { ++ pe_rsc_eval_data_t rsc_rule_data = { ++ .standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS), ++ .provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER), ++ .agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE) ++ }; ++ + pe_rule_eval_data_t rule_data = { + .node_hash = NULL, + .role = RSC_ROLE_UNKNOWN, + .now = data_set->now, + .match_data = NULL, +- .rsc_data = NULL, ++ .rsc_data = &rsc_rule_data, + .op_data = NULL + }; + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 1e3b0bd..d5309ed 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -986,13 +986,24 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai + pe_rsc_action_details_t *rsc_details = NULL; + #endif + ++ pe_rsc_eval_data_t rsc_rule_data = { ++ .standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS), ++ .provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER), ++ .agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE) ++ }; ++ ++ pe_op_eval_data_t op_rule_data = { ++ .op_name = action->task, ++ .interval = interval_ms ++ }; ++ + pe_rule_eval_data_t rule_data = { + .node_hash = NULL, + .role = RSC_ROLE_UNKNOWN, + .now = data_set->now, + .match_data = NULL, +- .rsc_data = NULL, +- .op_data = NULL ++ .rsc_data = &rsc_rule_data, ++ .op_data = &op_rule_data + }; + + CRM_CHECK(action && action->rsc, return); +-- +1.8.3.1 + + +From 57eedcad739071530f01e1fd691734f7681a08a1 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Apr 2020 12:30:51 -0400 +Subject: [PATCH 10/17] Feature: xml: Add rsc_expression and op_expression to + the XML schema. + +--- + cts/cli/regression.upgrade.exp | 7 +- + cts/cli/regression.validity.exp | 22 ++- + xml/constraints-next.rng | 4 +- + xml/nodes-3.4.rng | 44 +++++ + xml/nvset-3.4.rng | 63 ++++++ + xml/options-3.4.rng | 111 +++++++++++ + xml/resources-3.4.rng | 425 ++++++++++++++++++++++++++++++++++++++++ + xml/rule-3.4.rng | 165 ++++++++++++++++ + 8 files changed, 833 insertions(+), 8 deletions(-) + create mode 100644 xml/nodes-3.4.rng + create mode 100644 xml/nvset-3.4.rng + create mode 100644 xml/options-3.4.rng + create mode 100644 xml/resources-3.4.rng + create mode 100644 xml/rule-3.4.rng + +diff --git a/cts/cli/regression.upgrade.exp b/cts/cli/regression.upgrade.exp +index 28ca057..50b22df 100644 +--- a/cts/cli/regression.upgrade.exp ++++ b/cts/cli/regression.upgrade.exp +@@ -79,8 +79,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.2 + update_validation debug: pacemaker-3.2-style configuration is also valid for pacemaker-3.3 + update_validation debug: Testing 'pacemaker-3.3' validation (17 of X) + update_validation debug: Configuration valid for schema: pacemaker-3.3 +-update_validation trace: Stopping at pacemaker-3.3 +-update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.3 ++update_validation debug: pacemaker-3.3-style configuration is also valid for pacemaker-3.4 ++update_validation debug: Testing 'pacemaker-3.4' validation (18 of X) ++update_validation debug: Configuration valid for schema: pacemaker-3.4 ++update_validation trace: Stopping at pacemaker-3.4 ++update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.4 + =#=#=#= Current cib after: Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping) =#=#=#= + + +diff --git a/cts/cli/regression.validity.exp b/cts/cli/regression.validity.exp +index 46e54b5..4407074 100644 +--- a/cts/cli/regression.validity.exp ++++ b/cts/cli/regression.validity.exp +@@ -105,7 +105,11 @@ update_validation debug: Testing 'pacemaker-3.3' validation (17 of X) + element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + update_validation trace: pacemaker-3.3 validation failed +-Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.3 ++update_validation debug: Testing 'pacemaker-3.4' validation (18 of X) ++element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++update_validation trace: pacemaker-3.4 validation failed ++Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.4 + =#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Invalid configuration (78) =#=#=#= + * Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation) + =#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= +@@ -198,7 +202,10 @@ update_validation trace: pacemaker-3.2 validation failed + update_validation debug: Testing 'pacemaker-3.3' validation (17 of X) + element cib: Relax-NG validity error : Invalid attribute validate-with for element cib + update_validation trace: pacemaker-3.3 validation failed +-Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.3 ++update_validation debug: Testing 'pacemaker-3.4' validation (18 of X) ++element cib: Relax-NG validity error : Invalid attribute validate-with for element cib ++update_validation trace: pacemaker-3.4 validation failed ++Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.4 + =#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Invalid configuration (78) =#=#=#= + * Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with) + =#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= +@@ -286,8 +293,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.2 + update_validation debug: pacemaker-3.2-style configuration is also valid for pacemaker-3.3 + update_validation debug: Testing 'pacemaker-3.3' validation (17 of X) + update_validation debug: Configuration valid for schema: pacemaker-3.3 +-update_validation trace: Stopping at pacemaker-3.3 +-update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.3 ++update_validation debug: pacemaker-3.3-style configuration is also valid for pacemaker-3.4 ++update_validation debug: Testing 'pacemaker-3.4' validation (18 of X) ++update_validation debug: Configuration valid for schema: pacemaker-3.4 ++update_validation trace: Stopping at pacemaker-3.4 ++update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.4 + unpack_resources error: Resource start-up disabled since no STONITH resources have been defined + unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option + unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity +@@ -393,6 +403,8 @@ element rsc_order: Relax-NG validity error : Invalid attribute first-action for + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + =#=#=#= Current cib after: Make resulting CIB invalid, and without validate-with attribute =#=#=#= + + +@@ -450,6 +462,8 @@ validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attrib + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + unpack_resources error: Resource start-up disabled since no STONITH resources have been defined + unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option + unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity +diff --git a/xml/constraints-next.rng b/xml/constraints-next.rng +index 7e0d98e..1fa3e75 100644 +--- a/xml/constraints-next.rng ++++ b/xml/constraints-next.rng +@@ -43,7 +43,7 @@ + + + +- ++ + + + +@@ -255,7 +255,7 @@ + + + +- ++ + + + +diff --git a/xml/nodes-3.4.rng b/xml/nodes-3.4.rng +new file mode 100644 +index 0000000..0132c72 +--- /dev/null ++++ b/xml/nodes-3.4.rng +@@ -0,0 +1,44 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ member ++ ping ++ remote ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/nvset-3.4.rng b/xml/nvset-3.4.rng +new file mode 100644 +index 0000000..91a7d23 +--- /dev/null ++++ b/xml/nvset-3.4.rng +@@ -0,0 +1,63 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/options-3.4.rng b/xml/options-3.4.rng +new file mode 100644 +index 0000000..22330d8 +--- /dev/null ++++ b/xml/options-3.4.rng +@@ -0,0 +1,111 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ cluster-infrastructure ++ ++ ++ ++ ++ ++ heartbeat ++ openais ++ classic openais ++ classic openais (with plugin) ++ cman ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ cluster-infrastructure ++ cluster_recheck_interval ++ dc_deadtime ++ default-action-timeout ++ default_action_timeout ++ default-migration-threshold ++ default_migration_threshold ++ default-resource-failure-stickiness ++ default_resource_failure_stickiness ++ default-resource-stickiness ++ default_resource_stickiness ++ election_timeout ++ expected-quorum-votes ++ is-managed-default ++ is_managed_default ++ no_quorum_policy ++ notification-agent ++ notification-recipient ++ remove_after_stop ++ shutdown_escalation ++ startup_fencing ++ stonith_action ++ stonith_enabled ++ stop_orphan_actions ++ stop_orphan_resources ++ symmetric_cluster ++ transition_idle_timeout ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/resources-3.4.rng b/xml/resources-3.4.rng +new file mode 100644 +index 0000000..fbb4b65 +--- /dev/null ++++ b/xml/resources-3.4.rng +@@ -0,0 +1,425 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ isolation ++ isolation-host ++ isolation-instance ++ isolation-wrapper ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ([0-9\-]+) ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ requires ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Stopped ++ Started ++ Slave ++ Master ++ ++ ++ ++ ++ ++ ++ ignore ++ block ++ stop ++ restart ++ standby ++ fence ++ restart-container ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ocf ++ ++ ++ ++ ++ lsb ++ heartbeat ++ stonith ++ upstart ++ service ++ systemd ++ nagios ++ ++ ++ ++ ++ +diff --git a/xml/rule-3.4.rng b/xml/rule-3.4.rng +new file mode 100644 +index 0000000..5d1daf0 +--- /dev/null ++++ b/xml/rule-3.4.rng +@@ -0,0 +1,165 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ or ++ and ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ lt ++ gt ++ lte ++ gte ++ eq ++ ne ++ defined ++ not_defined ++ ++ ++ ++ ++ ++ ++ ++ ++ string ++ number ++ version ++ ++ ++ ++ ++ ++ ++ literal ++ param ++ meta ++ ++ ++ ++ ++ ++ ++ ++ ++ in_range ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ gt ++ ++ ++ ++ lt ++ ++ ++ ++ ++ ++ date_spec ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From b0e2345d92fb7cf42c133b24457eeb07126db8a0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 27 Apr 2020 16:24:22 -0400 +Subject: [PATCH 11/17] Fix: scheduler: Change trace output in populate_hash. + +Only show the "Setting attribute:" text when it comes time to actually +set the attribute. Also show the value being set. This makes it +clearer that an attribute is actually being set, not just that the +function is processing something. +--- + lib/pengine/rules.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/lib/pengine/rules.c b/lib/pengine/rules.c +index 7575011..b0fca55 100644 +--- a/lib/pengine/rules.c ++++ b/lib/pengine/rules.c +@@ -463,7 +463,6 @@ populate_hash(xmlNode * nvpair_list, GHashTable * hash, gboolean overwrite, xmlN + name = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_NAME); + } + +- crm_trace("Setting attribute: %s", name); + value = crm_element_value(an_attr, XML_NVPAIR_ATTR_VALUE); + if (value == NULL) { + value = crm_element_value(ref_nvpair, XML_NVPAIR_ATTR_VALUE); +@@ -471,7 +470,6 @@ populate_hash(xmlNode * nvpair_list, GHashTable * hash, gboolean overwrite, xmlN + + if (name == NULL || value == NULL) { + continue; +- + } + + old_value = g_hash_table_lookup(hash, name); +@@ -484,6 +482,7 @@ populate_hash(xmlNode * nvpair_list, GHashTable * hash, gboolean overwrite, xmlN + continue; + + } else if (old_value == NULL) { ++ crm_trace("Setting attribute: %s = %s", name, value); + g_hash_table_insert(hash, strdup(name), strdup(value)); + + } else if (overwrite) { +-- +1.8.3.1 + + +From d35854384b231c79b8aba1ce4c5caf5dd51ec982 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 1 May 2020 15:45:31 -0400 +Subject: [PATCH 12/17] Test: scheduler: Add a regression test for op_defaults. + +--- + cts/cts-scheduler.in | 3 + + cts/scheduler/op-defaults.dot | 33 ++++++ + cts/scheduler/op-defaults.exp | 211 ++++++++++++++++++++++++++++++++++++++ + cts/scheduler/op-defaults.scores | 11 ++ + cts/scheduler/op-defaults.summary | 46 +++++++++ + cts/scheduler/op-defaults.xml | 87 ++++++++++++++++ + 6 files changed, 391 insertions(+) + create mode 100644 cts/scheduler/op-defaults.dot + create mode 100644 cts/scheduler/op-defaults.exp + create mode 100644 cts/scheduler/op-defaults.scores + create mode 100644 cts/scheduler/op-defaults.summary + create mode 100644 cts/scheduler/op-defaults.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 5d72205..b83f812 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -962,6 +962,9 @@ TESTS = [ + [ "shutdown-lock", "Ensure shutdown lock works properly" ], + [ "shutdown-lock-expiration", "Ensure shutdown lock expiration works properly" ], + ], ++ [ ++ [ "op-defaults", "Test op_defaults conditional expressions " ], ++ ], + + # @TODO: If pacemaker implements versioned attributes, uncomment these tests + #[ +diff --git a/cts/scheduler/op-defaults.dot b/cts/scheduler/op-defaults.dot +new file mode 100644 +index 0000000..5536c15 +--- /dev/null ++++ b/cts/scheduler/op-defaults.dot +@@ -0,0 +1,33 @@ ++ digraph "g" { ++"dummy-rsc_monitor_0 cluster01" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_0 cluster02" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_60000 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_start_0 cluster02" -> "dummy-rsc_monitor_60000 cluster02" [ style = bold] ++"dummy-rsc_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster01" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster02" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_monitor_0 cluster01" -> "ip-rsc2_start_0 cluster01" [ style = bold] ++"ip-rsc2_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_monitor_0 cluster02" -> "ip-rsc2_start_0 cluster01" [ style = bold] ++"ip-rsc2_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_monitor_10000 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_start_0 cluster01" -> "ip-rsc2_monitor_10000 cluster01" [ style = bold] ++"ip-rsc2_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster01" -> "ip-rsc_start_0 cluster02" [ style = bold] ++"ip-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster02" -> "ip-rsc_start_0 cluster02" [ style = bold] ++"ip-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_20000 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_start_0 cluster02" -> "ip-rsc_monitor_20000 cluster02" [ style = bold] ++"ip-rsc_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster01" -> "ping-rsc-ping_start_0 cluster01" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster02" -> "ping-rsc-ping_start_0 cluster01" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/op-defaults.exp b/cts/scheduler/op-defaults.exp +new file mode 100644 +index 0000000..b81eacb +--- /dev/null ++++ b/cts/scheduler/op-defaults.exp +@@ -0,0 +1,211 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/op-defaults.scores b/cts/scheduler/op-defaults.scores +new file mode 100644 +index 0000000..1c622f0 +--- /dev/null ++++ b/cts/scheduler/op-defaults.scores +@@ -0,0 +1,11 @@ ++Allocation scores: ++pcmk__native_allocate: dummy-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: dummy-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: fencing allocation score on cluster01: 0 ++pcmk__native_allocate: fencing allocation score on cluster02: 0 ++pcmk__native_allocate: ip-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: ip-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: ip-rsc2 allocation score on cluster01: 0 ++pcmk__native_allocate: ip-rsc2 allocation score on cluster02: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster01: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster02: 0 +diff --git a/cts/scheduler/op-defaults.summary b/cts/scheduler/op-defaults.summary +new file mode 100644 +index 0000000..b580939 +--- /dev/null ++++ b/cts/scheduler/op-defaults.summary +@@ -0,0 +1,46 @@ ++ ++Current cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Stopped ++ ip-rsc (ocf::heartbeat:IPaddr2): Stopped ++ ip-rsc2 (ocf::heartbeat:IPaddr2): Stopped ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped ++ ping-rsc-ping (ocf::pacemaker:ping): Stopped ++ ++Transition Summary: ++ * Start fencing ( cluster01 ) ++ * Start ip-rsc ( cluster02 ) ++ * Start ip-rsc2 ( cluster01 ) ++ * Start dummy-rsc ( cluster02 ) ++ * Start ping-rsc-ping ( cluster01 ) ++ ++Executing cluster transition: ++ * Resource action: fencing monitor on cluster02 ++ * Resource action: fencing monitor on cluster01 ++ * Resource action: ip-rsc monitor on cluster02 ++ * Resource action: ip-rsc monitor on cluster01 ++ * Resource action: ip-rsc2 monitor on cluster02 ++ * Resource action: ip-rsc2 monitor on cluster01 ++ * Resource action: dummy-rsc monitor on cluster02 ++ * Resource action: dummy-rsc monitor on cluster01 ++ * Resource action: ping-rsc-ping monitor on cluster02 ++ * Resource action: ping-rsc-ping monitor on cluster01 ++ * Resource action: fencing start on cluster01 ++ * Resource action: ip-rsc start on cluster02 ++ * Resource action: ip-rsc2 start on cluster01 ++ * Resource action: dummy-rsc start on cluster02 ++ * Resource action: ping-rsc-ping start on cluster01 ++ * Resource action: ip-rsc monitor=20000 on cluster02 ++ * Resource action: ip-rsc2 monitor=10000 on cluster01 ++ * Resource action: dummy-rsc monitor=60000 on cluster02 ++ ++Revised cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Started cluster01 ++ ip-rsc (ocf::heartbeat:IPaddr2): Started cluster02 ++ ip-rsc2 (ocf::heartbeat:IPaddr2): Started cluster01 ++ dummy-rsc (ocf::pacemaker:Dummy): Started cluster02 ++ ping-rsc-ping (ocf::pacemaker:ping): Started cluster01 ++ +diff --git a/cts/scheduler/op-defaults.xml b/cts/scheduler/op-defaults.xml +new file mode 100644 +index 0000000..ae3b248 +--- /dev/null ++++ b/cts/scheduler/op-defaults.xml +@@ -0,0 +1,87 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From 67067927bc1b8e000c06d2b5a4ae6b9223ca13c7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 13 May 2020 10:40:34 -0400 +Subject: [PATCH 13/17] Test: scheduler: Add a regression test for + rsc_defaults. + +--- + cts/cts-scheduler.in | 3 +- + cts/scheduler/rsc-defaults.dot | 18 ++++++ + cts/scheduler/rsc-defaults.exp | 124 +++++++++++++++++++++++++++++++++++++ + cts/scheduler/rsc-defaults.scores | 11 ++++ + cts/scheduler/rsc-defaults.summary | 38 ++++++++++++ + cts/scheduler/rsc-defaults.xml | 78 +++++++++++++++++++++++ + 6 files changed, 271 insertions(+), 1 deletion(-) + create mode 100644 cts/scheduler/rsc-defaults.dot + create mode 100644 cts/scheduler/rsc-defaults.exp + create mode 100644 cts/scheduler/rsc-defaults.scores + create mode 100644 cts/scheduler/rsc-defaults.summary + create mode 100644 cts/scheduler/rsc-defaults.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index b83f812..9022ce9 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -963,7 +963,8 @@ TESTS = [ + [ "shutdown-lock-expiration", "Ensure shutdown lock expiration works properly" ], + ], + [ +- [ "op-defaults", "Test op_defaults conditional expressions " ], ++ [ "op-defaults", "Test op_defaults conditional expressions" ], ++ [ "rsc-defaults", "Test rsc_defaults conditional expressions" ], + ], + + # @TODO: If pacemaker implements versioned attributes, uncomment these tests +diff --git a/cts/scheduler/rsc-defaults.dot b/cts/scheduler/rsc-defaults.dot +new file mode 100644 +index 0000000..d776614 +--- /dev/null ++++ b/cts/scheduler/rsc-defaults.dot +@@ -0,0 +1,18 @@ ++ digraph "g" { ++"dummy-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster01" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster02" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc2_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster01" -> "ping-rsc-ping_start_0 cluster02" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster02" -> "ping-rsc-ping_start_0 cluster02" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/rsc-defaults.exp b/cts/scheduler/rsc-defaults.exp +new file mode 100644 +index 0000000..4aec360 +--- /dev/null ++++ b/cts/scheduler/rsc-defaults.exp +@@ -0,0 +1,124 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/rsc-defaults.scores b/cts/scheduler/rsc-defaults.scores +new file mode 100644 +index 0000000..e7f1bab +--- /dev/null ++++ b/cts/scheduler/rsc-defaults.scores +@@ -0,0 +1,11 @@ ++Allocation scores: ++pcmk__native_allocate: dummy-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: dummy-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: fencing allocation score on cluster01: 0 ++pcmk__native_allocate: fencing allocation score on cluster02: 0 ++pcmk__native_allocate: ip-rsc allocation score on cluster01: -INFINITY ++pcmk__native_allocate: ip-rsc allocation score on cluster02: -INFINITY ++pcmk__native_allocate: ip-rsc2 allocation score on cluster01: -INFINITY ++pcmk__native_allocate: ip-rsc2 allocation score on cluster02: -INFINITY ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster01: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster02: 0 +diff --git a/cts/scheduler/rsc-defaults.summary b/cts/scheduler/rsc-defaults.summary +new file mode 100644 +index 0000000..0066f2e +--- /dev/null ++++ b/cts/scheduler/rsc-defaults.summary +@@ -0,0 +1,38 @@ ++2 of 5 resource instances DISABLED and 0 BLOCKED from further action due to failure ++ ++Current cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Stopped ++ ip-rsc (ocf::heartbeat:IPaddr2): Stopped (disabled) ++ ip-rsc2 (ocf::heartbeat:IPaddr2): Stopped (disabled) ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped (unmanaged) ++ ping-rsc-ping (ocf::pacemaker:ping): Stopped ++ ++Transition Summary: ++ * Start fencing ( cluster01 ) ++ * Start ping-rsc-ping ( cluster02 ) ++ ++Executing cluster transition: ++ * Resource action: fencing monitor on cluster02 ++ * Resource action: fencing monitor on cluster01 ++ * Resource action: ip-rsc monitor on cluster02 ++ * Resource action: ip-rsc monitor on cluster01 ++ * Resource action: ip-rsc2 monitor on cluster02 ++ * Resource action: ip-rsc2 monitor on cluster01 ++ * Resource action: dummy-rsc monitor on cluster02 ++ * Resource action: dummy-rsc monitor on cluster01 ++ * Resource action: ping-rsc-ping monitor on cluster02 ++ * Resource action: ping-rsc-ping monitor on cluster01 ++ * Resource action: fencing start on cluster01 ++ * Resource action: ping-rsc-ping start on cluster02 ++ ++Revised cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Started cluster01 ++ ip-rsc (ocf::heartbeat:IPaddr2): Stopped (disabled) ++ ip-rsc2 (ocf::heartbeat:IPaddr2): Stopped (disabled) ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped (unmanaged) ++ ping-rsc-ping (ocf::pacemaker:ping): Started cluster02 ++ +diff --git a/cts/scheduler/rsc-defaults.xml b/cts/scheduler/rsc-defaults.xml +new file mode 100644 +index 0000000..38cae8b +--- /dev/null ++++ b/cts/scheduler/rsc-defaults.xml +@@ -0,0 +1,78 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From bcfe068ccb3f3cb6cc3509257fbc4a59bc2b1a41 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 13 May 2020 12:47:35 -0400 +Subject: [PATCH 14/17] Test: scheduler: Add a regression test for op_defaults + with an AND expr. + +--- + cts/cts-scheduler.in | 1 + + cts/scheduler/op-defaults-2.dot | 33 ++++++ + cts/scheduler/op-defaults-2.exp | 211 ++++++++++++++++++++++++++++++++++++ + cts/scheduler/op-defaults-2.scores | 11 ++ + cts/scheduler/op-defaults-2.summary | 46 ++++++++ + cts/scheduler/op-defaults-2.xml | 73 +++++++++++++ + 6 files changed, 375 insertions(+) + create mode 100644 cts/scheduler/op-defaults-2.dot + create mode 100644 cts/scheduler/op-defaults-2.exp + create mode 100644 cts/scheduler/op-defaults-2.scores + create mode 100644 cts/scheduler/op-defaults-2.summary + create mode 100644 cts/scheduler/op-defaults-2.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 9022ce9..669b344 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -964,6 +964,7 @@ TESTS = [ + ], + [ + [ "op-defaults", "Test op_defaults conditional expressions" ], ++ [ "op-defaults-2", "Test op_defaults AND'ed conditional expressions" ], + [ "rsc-defaults", "Test rsc_defaults conditional expressions" ], + ], + +diff --git a/cts/scheduler/op-defaults-2.dot b/cts/scheduler/op-defaults-2.dot +new file mode 100644 +index 0000000..5c67bd8 +--- /dev/null ++++ b/cts/scheduler/op-defaults-2.dot +@@ -0,0 +1,33 @@ ++ digraph "g" { ++"dummy-rsc_monitor_0 cluster01" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_0 cluster02" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_10000 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_start_0 cluster02" -> "dummy-rsc_monitor_10000 cluster02" [ style = bold] ++"dummy-rsc_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster01" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster02" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster01" -> "ip-rsc_start_0 cluster02" [ style = bold] ++"ip-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_0 cluster02" -> "ip-rsc_start_0 cluster02" [ style = bold] ++"ip-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_monitor_20000 cluster02" [ style=bold color="green" fontcolor="black"] ++"ip-rsc_start_0 cluster02" -> "ip-rsc_monitor_20000 cluster02" [ style = bold] ++"ip-rsc_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster01" -> "ping-rsc-ping_start_0 cluster01" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster02" -> "ping-rsc-ping_start_0 cluster01" [ style = bold] ++"ping-rsc-ping_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"rsc-passes_monitor_0 cluster01" -> "rsc-passes_start_0 cluster01" [ style = bold] ++"rsc-passes_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"rsc-passes_monitor_0 cluster02" -> "rsc-passes_start_0 cluster01" [ style = bold] ++"rsc-passes_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"rsc-passes_monitor_10000 cluster01" [ style=bold color="green" fontcolor="black"] ++"rsc-passes_start_0 cluster01" -> "rsc-passes_monitor_10000 cluster01" [ style = bold] ++"rsc-passes_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/op-defaults-2.exp b/cts/scheduler/op-defaults-2.exp +new file mode 100644 +index 0000000..4324fde +--- /dev/null ++++ b/cts/scheduler/op-defaults-2.exp +@@ -0,0 +1,211 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/op-defaults-2.scores b/cts/scheduler/op-defaults-2.scores +new file mode 100644 +index 0000000..180c8b4 +--- /dev/null ++++ b/cts/scheduler/op-defaults-2.scores +@@ -0,0 +1,11 @@ ++Allocation scores: ++pcmk__native_allocate: dummy-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: dummy-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: fencing allocation score on cluster01: 0 ++pcmk__native_allocate: fencing allocation score on cluster02: 0 ++pcmk__native_allocate: ip-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: ip-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster01: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster02: 0 ++pcmk__native_allocate: rsc-passes allocation score on cluster01: 0 ++pcmk__native_allocate: rsc-passes allocation score on cluster02: 0 +diff --git a/cts/scheduler/op-defaults-2.summary b/cts/scheduler/op-defaults-2.summary +new file mode 100644 +index 0000000..16a68be +--- /dev/null ++++ b/cts/scheduler/op-defaults-2.summary +@@ -0,0 +1,46 @@ ++ ++Current cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Stopped ++ ip-rsc (ocf::heartbeat:IPaddr2): Stopped ++ rsc-passes (ocf::heartbeat:IPaddr2): Stopped ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped ++ ping-rsc-ping (ocf::pacemaker:ping): Stopped ++ ++Transition Summary: ++ * Start fencing ( cluster01 ) ++ * Start ip-rsc ( cluster02 ) ++ * Start rsc-passes ( cluster01 ) ++ * Start dummy-rsc ( cluster02 ) ++ * Start ping-rsc-ping ( cluster01 ) ++ ++Executing cluster transition: ++ * Resource action: fencing monitor on cluster02 ++ * Resource action: fencing monitor on cluster01 ++ * Resource action: ip-rsc monitor on cluster02 ++ * Resource action: ip-rsc monitor on cluster01 ++ * Resource action: rsc-passes monitor on cluster02 ++ * Resource action: rsc-passes monitor on cluster01 ++ * Resource action: dummy-rsc monitor on cluster02 ++ * Resource action: dummy-rsc monitor on cluster01 ++ * Resource action: ping-rsc-ping monitor on cluster02 ++ * Resource action: ping-rsc-ping monitor on cluster01 ++ * Resource action: fencing start on cluster01 ++ * Resource action: ip-rsc start on cluster02 ++ * Resource action: rsc-passes start on cluster01 ++ * Resource action: dummy-rsc start on cluster02 ++ * Resource action: ping-rsc-ping start on cluster01 ++ * Resource action: ip-rsc monitor=20000 on cluster02 ++ * Resource action: rsc-passes monitor=10000 on cluster01 ++ * Resource action: dummy-rsc monitor=10000 on cluster02 ++ ++Revised cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Started cluster01 ++ ip-rsc (ocf::heartbeat:IPaddr2): Started cluster02 ++ rsc-passes (ocf::heartbeat:IPaddr2): Started cluster01 ++ dummy-rsc (ocf::pacemaker:Dummy): Started cluster02 ++ ping-rsc-ping (ocf::pacemaker:ping): Started cluster01 ++ +diff --git a/cts/scheduler/op-defaults-2.xml b/cts/scheduler/op-defaults-2.xml +new file mode 100644 +index 0000000..9f3c288 +--- /dev/null ++++ b/cts/scheduler/op-defaults-2.xml +@@ -0,0 +1,73 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From 017b783c2037d641c40a39dd7ec3a9eba0aaa6df Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 13 May 2020 15:18:28 -0400 +Subject: [PATCH 15/17] Doc: Pacemaker Explained: Add documentation for + rsc_expr and op_expr. + +--- + doc/Pacemaker_Explained/en-US/Ch-Rules.txt | 174 +++++++++++++++++++++++++++++ + 1 file changed, 174 insertions(+) + +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt +index 9d617f6..5df5f82 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt +@@ -522,6 +522,124 @@ You may wish to write +end="2005-03-31T23:59:59"+ to avoid confusion. + ------- + ===== + ++== Resource Expressions == ++ ++An +rsc_expression+ is a rule condition based on a resource agent's properties. ++This rule is only valid within an +rsc_defaults+ or +op_defaults+ context. None ++of the matching attributes of +class+, +provider+, and +type+ are required. If ++one is omitted, all values of that attribute will match. For instance, omitting +++type+ means every type will match. ++ ++.Attributes of an rsc_expression Element ++[width="95%",cols="2m,<5",options="header",align="center"] ++|========================================================= ++ ++|Field ++|Description ++ ++|id ++|A unique name for the expression (required) ++ indexterm:[XML attribute,id attribute,rsc_expression element] ++ indexterm:[XML element,rsc_expression element,id attribute] ++ ++|class ++|The standard name to be matched against resource agents ++ indexterm:[XML attribute,class attribute,rsc_expression element] ++ indexterm:[XML element,rsc_expression element,class attribute] ++ ++|provider ++|If given, the vendor to be matched against resource agents. This ++ only makes sense for agents using the OCF spec. ++ indexterm:[XML attribute,provider attribute,rsc_expression element] ++ indexterm:[XML element,rsc_expression element,provider attribute] ++ ++|type ++|The name of the resource agent to be matched ++ indexterm:[XML attribute,type attribute,rsc_expression element] ++ indexterm:[XML element,rsc_expression element,type attribute] ++ ++|========================================================= ++ ++=== Example Resource-Based Expressions === ++ ++A small sample of how resource-based expressions can be used: ++ ++.True for all ocf:heartbeat:IPaddr2 resources ++==== ++[source,XML] ++---- ++ ++ ++ ++---- ++==== ++ ++.Provider doesn't apply to non-OCF resources ++==== ++[source,XML] ++---- ++ ++ ++ ++---- ++==== ++ ++== Operation Expressions == ++ ++An +op_expression+ is a rule condition based on an action of some resource ++agent. This rule is only valid within an +op_defaults+ context. ++ ++.Attributes of an op_expression Element ++[width="95%",cols="2m,<5",options="header",align="center"] ++|========================================================= ++ ++|Field ++|Description ++ ++|id ++|A unique name for the expression (required) ++ indexterm:[XML attribute,id attribute,op_expression element] ++ indexterm:[XML element,op_expression element,id attribute] ++ ++|name ++|The action name to match against. This can be any action supported by ++ the resource agent; common values include +monitor+, +start+, and +stop+ ++ (required). ++ indexterm:[XML attribute,name attribute,op_expression element] ++ indexterm:[XML element,op_expression element,name attribute] ++ ++|interval ++|The interval of the action to match against. If not given, only ++ the name attribute will be used to match. ++ indexterm:[XML attribute,interval attribute,op_expression element] ++ indexterm:[XML element,op_expression element,interval attribute] ++ ++|========================================================= ++ ++=== Example Operation-Based Expressions === ++ ++A small sample of how operation-based expressions can be used: ++ ++.True for all monitor actions ++==== ++[source,XML] ++---- ++ ++ ++ ++---- ++==== ++ ++.True for all monitor actions with a 10 second interval ++==== ++[source,XML] ++---- ++ ++ ++ ++---- ++==== ++ + == Using Rules to Determine Resource Location == + indexterm:[Rule,Determine Resource Location] + indexterm:[Resource,Location,Determine by Rules] +@@ -710,6 +828,62 @@ Rules may be used similarly in +instance_attributes+ or +utilization+ blocks. + Any single block may directly contain only a single rule, but that rule may + itself contain any number of rules. + +++rsc_expression+ and +op_expression+ blocks may additionally be used to set defaults ++on either a single resource or across an entire class of resources with a single ++rule. +rsc_expression+ may be used to select resource agents within both +rsc_defaults+ ++and +op_defaults+, while +op_expression+ may only be used within +op_defaults+. If ++multiple rules succeed for a given resource agent, the last one specified will be ++the one that takes effect. As with any other rule, boolean operations may be used ++to make more complicated expressions. ++ ++.Set all IPaddr2 resources to stopped ++===== ++[source,XML] ++------- ++ ++ ++ ++ ++ ++ ++ ++ ++------- ++===== ++ ++.Set all monitor action timeouts to 7 seconds ++===== ++[source,XML] ++------- ++ ++ ++ ++ ++ ++ ++ ++ ++------- ++===== ++ ++.Set the monitor action timeout on all IPaddr2 resources with a given monitor interval to 8 seconds ++===== ++[source,XML] ++------- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++------- ++===== ++ + === Using Rules to Control Cluster Options === + indexterm:[Rule,Controlling Cluster Options] + indexterm:[Cluster,Setting Options with Rules] +-- +1.8.3.1 + + +From b8dd16c5e454445f73416ae8b74649545ee1b472 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 13 May 2020 16:26:21 -0400 +Subject: [PATCH 16/17] Test: scheduler: Add a test for multiple rules applying + to the same resource. + +--- + cts/cts-scheduler.in | 1 + + cts/scheduler/op-defaults-3.dot | 14 +++++++ + cts/scheduler/op-defaults-3.exp | 83 +++++++++++++++++++++++++++++++++++++ + cts/scheduler/op-defaults-3.scores | 5 +++ + cts/scheduler/op-defaults-3.summary | 26 ++++++++++++ + cts/scheduler/op-defaults-3.xml | 54 ++++++++++++++++++++++++ + 6 files changed, 183 insertions(+) + create mode 100644 cts/scheduler/op-defaults-3.dot + create mode 100644 cts/scheduler/op-defaults-3.exp + create mode 100644 cts/scheduler/op-defaults-3.scores + create mode 100644 cts/scheduler/op-defaults-3.summary + create mode 100644 cts/scheduler/op-defaults-3.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 669b344..2c2d14f 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -965,6 +965,7 @@ TESTS = [ + [ + [ "op-defaults", "Test op_defaults conditional expressions" ], + [ "op-defaults-2", "Test op_defaults AND'ed conditional expressions" ], ++ [ "op-defaults-3", "Test op_defaults precedence" ], + [ "rsc-defaults", "Test rsc_defaults conditional expressions" ], + ], + +diff --git a/cts/scheduler/op-defaults-3.dot b/cts/scheduler/op-defaults-3.dot +new file mode 100644 +index 0000000..382f630 +--- /dev/null ++++ b/cts/scheduler/op-defaults-3.dot +@@ -0,0 +1,14 @@ ++ digraph "g" { ++"dummy-rsc_monitor_0 cluster01" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_0 cluster02" -> "dummy-rsc_start_0 cluster02" [ style = bold] ++"dummy-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_10000 cluster02" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_start_0 cluster02" -> "dummy-rsc_monitor_10000 cluster02" [ style = bold] ++"dummy-rsc_start_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster01" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster02" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/op-defaults-3.exp b/cts/scheduler/op-defaults-3.exp +new file mode 100644 +index 0000000..6d567dc +--- /dev/null ++++ b/cts/scheduler/op-defaults-3.exp +@@ -0,0 +1,83 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/op-defaults-3.scores b/cts/scheduler/op-defaults-3.scores +new file mode 100644 +index 0000000..0a5190a +--- /dev/null ++++ b/cts/scheduler/op-defaults-3.scores +@@ -0,0 +1,5 @@ ++Allocation scores: ++pcmk__native_allocate: dummy-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: dummy-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: fencing allocation score on cluster01: 0 ++pcmk__native_allocate: fencing allocation score on cluster02: 0 +diff --git a/cts/scheduler/op-defaults-3.summary b/cts/scheduler/op-defaults-3.summary +new file mode 100644 +index 0000000..a83eb15 +--- /dev/null ++++ b/cts/scheduler/op-defaults-3.summary +@@ -0,0 +1,26 @@ ++ ++Current cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Stopped ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped ++ ++Transition Summary: ++ * Start fencing ( cluster01 ) ++ * Start dummy-rsc ( cluster02 ) ++ ++Executing cluster transition: ++ * Resource action: fencing monitor on cluster02 ++ * Resource action: fencing monitor on cluster01 ++ * Resource action: dummy-rsc monitor on cluster02 ++ * Resource action: dummy-rsc monitor on cluster01 ++ * Resource action: fencing start on cluster01 ++ * Resource action: dummy-rsc start on cluster02 ++ * Resource action: dummy-rsc monitor=10000 on cluster02 ++ ++Revised cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Started cluster01 ++ dummy-rsc (ocf::pacemaker:Dummy): Started cluster02 ++ +diff --git a/cts/scheduler/op-defaults-3.xml b/cts/scheduler/op-defaults-3.xml +new file mode 100644 +index 0000000..4a8912e +--- /dev/null ++++ b/cts/scheduler/op-defaults-3.xml +@@ -0,0 +1,54 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From b9ccde16609e7d005ac0578a603da97a1808704a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 15 May 2020 13:48:47 -0400 +Subject: [PATCH 17/17] Test: scheduler: Add a test for rsc_defaults not + specifying type. + +--- + cts/cts-scheduler.in | 1 + + cts/scheduler/rsc-defaults-2.dot | 11 ++++++ + cts/scheduler/rsc-defaults-2.exp | 72 ++++++++++++++++++++++++++++++++++++ + cts/scheduler/rsc-defaults-2.scores | 7 ++++ + cts/scheduler/rsc-defaults-2.summary | 27 ++++++++++++++ + cts/scheduler/rsc-defaults-2.xml | 52 ++++++++++++++++++++++++++ + 6 files changed, 170 insertions(+) + create mode 100644 cts/scheduler/rsc-defaults-2.dot + create mode 100644 cts/scheduler/rsc-defaults-2.exp + create mode 100644 cts/scheduler/rsc-defaults-2.scores + create mode 100644 cts/scheduler/rsc-defaults-2.summary + create mode 100644 cts/scheduler/rsc-defaults-2.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 2c2d14f..346ada2 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -967,6 +967,7 @@ TESTS = [ + [ "op-defaults-2", "Test op_defaults AND'ed conditional expressions" ], + [ "op-defaults-3", "Test op_defaults precedence" ], + [ "rsc-defaults", "Test rsc_defaults conditional expressions" ], ++ [ "rsc-defaults-2", "Test rsc_defaults conditional expressions without type" ], + ], + + # @TODO: If pacemaker implements versioned attributes, uncomment these tests +diff --git a/cts/scheduler/rsc-defaults-2.dot b/cts/scheduler/rsc-defaults-2.dot +new file mode 100644 +index 0000000..b43c5e6 +--- /dev/null ++++ b/cts/scheduler/rsc-defaults-2.dot +@@ -0,0 +1,11 @@ ++ digraph "g" { ++"dummy-rsc_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"dummy-rsc_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster01" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"fencing_monitor_0 cluster02" -> "fencing_start_0 cluster01" [ style = bold] ++"fencing_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++"fencing_start_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster01" [ style=bold color="green" fontcolor="black"] ++"ping-rsc-ping_monitor_0 cluster02" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/rsc-defaults-2.exp b/cts/scheduler/rsc-defaults-2.exp +new file mode 100644 +index 0000000..e9e1b5f +--- /dev/null ++++ b/cts/scheduler/rsc-defaults-2.exp +@@ -0,0 +1,72 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/rsc-defaults-2.scores b/cts/scheduler/rsc-defaults-2.scores +new file mode 100644 +index 0000000..4b70f54 +--- /dev/null ++++ b/cts/scheduler/rsc-defaults-2.scores +@@ -0,0 +1,7 @@ ++Allocation scores: ++pcmk__native_allocate: dummy-rsc allocation score on cluster01: 0 ++pcmk__native_allocate: dummy-rsc allocation score on cluster02: 0 ++pcmk__native_allocate: fencing allocation score on cluster01: 0 ++pcmk__native_allocate: fencing allocation score on cluster02: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster01: 0 ++pcmk__native_allocate: ping-rsc-ping allocation score on cluster02: 0 +diff --git a/cts/scheduler/rsc-defaults-2.summary b/cts/scheduler/rsc-defaults-2.summary +new file mode 100644 +index 0000000..46a2a2d +--- /dev/null ++++ b/cts/scheduler/rsc-defaults-2.summary +@@ -0,0 +1,27 @@ ++ ++Current cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Stopped ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped (unmanaged) ++ ping-rsc-ping (ocf::pacemaker:ping): Stopped (unmanaged) ++ ++Transition Summary: ++ * Start fencing ( cluster01 ) ++ ++Executing cluster transition: ++ * Resource action: fencing monitor on cluster02 ++ * Resource action: fencing monitor on cluster01 ++ * Resource action: dummy-rsc monitor on cluster02 ++ * Resource action: dummy-rsc monitor on cluster01 ++ * Resource action: ping-rsc-ping monitor on cluster02 ++ * Resource action: ping-rsc-ping monitor on cluster01 ++ * Resource action: fencing start on cluster01 ++ ++Revised cluster status: ++Online: [ cluster01 cluster02 ] ++ ++ fencing (stonith:fence_xvm): Started cluster01 ++ dummy-rsc (ocf::pacemaker:Dummy): Stopped (unmanaged) ++ ping-rsc-ping (ocf::pacemaker:ping): Stopped (unmanaged) ++ +diff --git a/cts/scheduler/rsc-defaults-2.xml b/cts/scheduler/rsc-defaults-2.xml +new file mode 100644 +index 0000000..a160fae +--- /dev/null ++++ b/cts/scheduler/rsc-defaults-2.xml +@@ -0,0 +1,52 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/001-status-deletion.patch b/SOURCES/001-status-deletion.patch deleted file mode 100644 index ca35c21..0000000 --- a/SOURCES/001-status-deletion.patch +++ /dev/null @@ -1,420 +0,0 @@ -From 6c529bb624ad548f66ce6ef1fa80b77c688918f4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 22 Nov 2019 16:39:54 -0600 -Subject: [PATCH 1/4] Refactor: controller: rename struct recurring_op_s to - active_op_t - -... because it holds both recurring and pending non-recurring actions, -and the name was confusing ---- - daemons/controld/controld_execd.c | 18 +++++++++--------- - daemons/controld/controld_execd_state.c | 4 ++-- - daemons/controld/controld_lrm.h | 8 ++++---- - 3 files changed, 15 insertions(+), 15 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 9e8dd36..48f35dd 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -403,7 +403,7 @@ lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - GHashTableIter gIter; - const char *key = NULL; - rsc_history_t *entry = NULL; -- struct recurring_op_s *pending = NULL; -+ active_op_t *pending = NULL; - - crm_debug("Checking for active resources before exit"); - -@@ -909,7 +909,7 @@ static gboolean - lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) - { - const char *rsc = user_data; -- struct recurring_op_s *pending = value; -+ active_op_t *pending = value; - - if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { - crm_info("Removing op %s:%d for deleted resource %s", -@@ -1137,7 +1137,7 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, - { - int rc = pcmk_ok; - char *local_key = NULL; -- struct recurring_op_s *pending = NULL; -+ active_op_t *pending = NULL; - - CRM_CHECK(op != 0, return FALSE); - CRM_CHECK(rsc_id != NULL, return FALSE); -@@ -1203,7 +1203,7 @@ cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) - { - gboolean remove = FALSE; - struct cancel_data *data = user_data; -- struct recurring_op_s *op = (struct recurring_op_s *)value; -+ active_op_t *op = value; - - if (crm_str_eq(op->op_key, data->key, TRUE)) { - data->done = TRUE; -@@ -2107,7 +2107,7 @@ stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) - { - gboolean remove = FALSE; - struct stop_recurring_action_s *event = user_data; -- struct recurring_op_s *op = (struct recurring_op_s *)value; -+ active_op_t *op = value; - - if ((op->interval_ms != 0) - && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { -@@ -2124,7 +2124,7 @@ stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) - { - gboolean remove = FALSE; - lrm_state_t *lrm_state = user_data; -- struct recurring_op_s *op = (struct recurring_op_s *)value; -+ active_op_t *op = value; - - if (op->interval_ms != 0) { - crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, -@@ -2297,9 +2297,9 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat - * for them to complete during shutdown - */ - char *call_id_s = make_stop_id(rsc->id, call_id); -- struct recurring_op_s *pending = NULL; -+ active_op_t *pending = NULL; - -- pending = calloc(1, sizeof(struct recurring_op_s)); -+ pending = calloc(1, sizeof(active_op_t)); - crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); - - pending->call_id = call_id; -@@ -2517,7 +2517,7 @@ did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, - - void - process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, -- struct recurring_op_s *pending, xmlNode *action_xml) -+ active_op_t *pending, xmlNode *action_xml) - { - char *op_id = NULL; - char *op_key = NULL; -diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c -index 0e21d18..473da97 100644 ---- a/daemons/controld/controld_execd_state.c -+++ b/daemons/controld/controld_execd_state.c -@@ -44,7 +44,7 @@ free_deletion_op(gpointer value) - static void - free_recurring_op(gpointer value) - { -- struct recurring_op_s *op = (struct recurring_op_s *)value; -+ active_op_t *op = value; - - free(op->user_data); - free(op->rsc_id); -@@ -61,7 +61,7 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data) - { - lrmd_event_data_t event = { 0, }; - lrm_state_t *lrm_state = user_data; -- struct recurring_op_s *op = (struct recurring_op_s *)value; -+ active_op_t *op = value; - - crm_trace("Pre-emptively failing " CRM_OP_FMT " on %s (call=%s, %s)", - op->rsc_id, op->op_type, op->interval_ms, -diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h -index 598682b..27df5d7 100644 ---- a/daemons/controld/controld_lrm.h -+++ b/daemons/controld/controld_lrm.h -@@ -33,8 +33,8 @@ typedef struct resource_history_s { - - void history_free(gpointer data); - --/* TODO - Replace this with lrmd_event_data_t */ --struct recurring_op_s { -+// In-flight action (recurring or pending) -+typedef struct active_op_s { - guint interval_ms; - int call_id; - gboolean remove; -@@ -45,7 +45,7 @@ struct recurring_op_s { - char *op_key; - char *user_data; - GHashTable *params; --}; -+} active_op_t; - - typedef struct lrm_state_s { - const char *node_name; -@@ -164,4 +164,4 @@ void remote_ra_process_maintenance_nodes(xmlNode *xml); - gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); - - void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, -- struct recurring_op_s *pending, xmlNode *action_xml); -+ active_op_t *pending, xmlNode *action_xml); --- -1.8.3.1 - - -From 93a59f1df8fe11d365032d75f10cb4189ad2f1f8 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 22 Nov 2019 16:45:31 -0600 -Subject: [PATCH 2/4] Refactor: controller: convert active_op_t booleans to - bitmask - ---- - daemons/controld/controld_execd.c | 11 +++++------ - daemons/controld/controld_lrm.h | 8 ++++++-- - 2 files changed, 11 insertions(+), 8 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 48f35dd..2c9d9c0 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1148,18 +1148,17 @@ cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, - pending = g_hash_table_lookup(lrm_state->pending_ops, key); - - if (pending) { -- if (remove && pending->remove == FALSE) { -- pending->remove = TRUE; -+ if (remove && is_not_set(pending->flags, active_op_remove)) { -+ set_bit(pending->flags, active_op_remove); - crm_debug("Scheduling %s for removal", key); - } - -- if (pending->cancelled) { -+ if (is_set(pending->flags, active_op_cancelled)) { - crm_debug("Operation %s already cancelled", key); - free(local_key); - return FALSE; - } -- -- pending->cancelled = TRUE; -+ set_bit(pending->flags, active_op_cancelled); - - } else { - crm_info("No pending op found for %s", key); -@@ -2652,7 +2651,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - crm_err("Recurring operation %s was cancelled without transition information", - op_key); - -- } else if (pending->remove) { -+ } else if (is_set(pending->flags, active_op_remove)) { - /* This recurring operation was cancelled (by us) and pending, and we - * have been waiting for it to finish. - */ -diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h -index 27df5d7..3ab7048 100644 ---- a/daemons/controld/controld_lrm.h -+++ b/daemons/controld/controld_lrm.h -@@ -33,12 +33,16 @@ typedef struct resource_history_s { - - void history_free(gpointer data); - -+enum active_op_e { -+ active_op_remove = (1 << 0), -+ active_op_cancelled = (1 << 1), -+}; -+ - // In-flight action (recurring or pending) - typedef struct active_op_s { - guint interval_ms; - int call_id; -- gboolean remove; -- gboolean cancelled; -+ uint32_t flags; // bitmask of active_op_e - time_t start_time; - char *rsc_id; - char *op_type; --- -1.8.3.1 - - -From 4d087d021d325e26b41a9b36b5b190dc7b25334c Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 22 Nov 2019 16:58:25 -0600 -Subject: [PATCH 3/4] Refactor: controller: remove unused argument - ---- - daemons/controld/controld_execd.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 2c9d9c0..46c1958 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -43,8 +43,8 @@ static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int ca - - static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, - const char *rsc_id, const char *operation); --static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, -- xmlNode * msg, xmlNode * request); -+static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -+ const char *operation, xmlNode *msg); - - void send_direct_ack(const char *to_host, const char *to_sys, - lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); -@@ -1858,7 +1858,7 @@ do_lrm_invoke(long long action, - crm_rsc_delete, user_name); - - } else { -- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); -+ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); - } - - lrmd_free_rsc_info(rsc); -@@ -2170,8 +2170,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t - } - - static void --do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, -- xmlNode * request) -+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -+ const char *operation, xmlNode *msg) - { - int call_id = 0; - char *op_id = NULL; --- -1.8.3.1 - - -From 356b417274918b7da6cdd9c72c036c923160b318 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 6 Dec 2019 12:15:05 -0600 -Subject: [PATCH 4/4] Refactor: scheduler: combine two "if" statements - -... for readability, and ease of adding another block later ---- - lib/pacemaker/pcmk_sched_graph.c | 120 +++++++++++++++++++-------------------- - 1 file changed, 60 insertions(+), 60 deletions(-) - -diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c -index e5a8a01..a6967fe 100644 ---- a/lib/pacemaker/pcmk_sched_graph.c -+++ b/lib/pacemaker/pcmk_sched_graph.c -@@ -1088,71 +1088,71 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - return action_xml; - } - -- /* List affected resource */ -- if (action->rsc) { -- if (is_set(action->flags, pe_action_pseudo) == FALSE) { -- int lpc = 0; -- -- xmlNode *rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml)); -- -- const char *attr_list[] = { -- XML_AGENT_ATTR_CLASS, -- XML_AGENT_ATTR_PROVIDER, -- XML_ATTR_TYPE -- }; -- -- if (is_set(action->rsc->flags, pe_rsc_orphan) && action->rsc->clone_name) { -- /* Do not use the 'instance free' name here as that -- * might interfere with the instance we plan to keep. -- * Ie. if there are more than two named /anonymous/ -- * instances on a given node, we need to make sure the -- * command goes to the right one. -- * -- * Keep this block, even when everyone is using -- * 'instance free' anonymous clone names - it means -- * we'll do the right thing if anyone toggles the -- * unique flag to 'off' -- */ -- crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, -- action->rsc->clone_name); -- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); -- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); -+ if (action->rsc && is_not_set(action->flags, pe_action_pseudo)) { -+ int lpc = 0; -+ xmlNode *rsc_xml = NULL; -+ const char *attr_list[] = { -+ XML_AGENT_ATTR_CLASS, -+ XML_AGENT_ATTR_PROVIDER, -+ XML_ATTR_TYPE -+ }; -+ -+ // List affected resource -+ -+ rsc_xml = create_xml_node(action_xml, -+ crm_element_name(action->rsc->xml)); -+ if (is_set(action->rsc->flags, pe_rsc_orphan) -+ && action->rsc->clone_name) { -+ /* Do not use the 'instance free' name here as that -+ * might interfere with the instance we plan to keep. -+ * Ie. if there are more than two named /anonymous/ -+ * instances on a given node, we need to make sure the -+ * command goes to the right one. -+ * -+ * Keep this block, even when everyone is using -+ * 'instance free' anonymous clone names - it means -+ * we'll do the right thing if anyone toggles the -+ * unique flag to 'off' -+ */ -+ crm_debug("Using orphan clone name %s instead of %s", action->rsc->id, -+ action->rsc->clone_name); -+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name); -+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); - -- } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { -- const char *xml_id = ID(action->rsc->xml); -- -- crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, -- action->rsc->clone_name); -- -- /* ID is what we'd like client to use -- * ID_LONG is what they might know it as instead -- * -- * ID_LONG is only strictly needed /here/ during the -- * transition period until all nodes in the cluster -- * are running the new software /and/ have rebooted -- * once (meaning that they've only ever spoken to a DC -- * supporting this feature). -- * -- * If anyone toggles the unique flag to 'on', the -- * 'instance free' name will correspond to an orphan -- * and fall into the clause above instead -- */ -- crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); -- if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) { -- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); -- } else { -- crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); -- } -+ } else if (is_not_set(action->rsc->flags, pe_rsc_unique)) { -+ const char *xml_id = ID(action->rsc->xml); -+ -+ crm_debug("Using anonymous clone name %s for %s (aka. %s)", xml_id, action->rsc->id, -+ action->rsc->clone_name); - -+ /* ID is what we'd like client to use -+ * ID_LONG is what they might know it as instead -+ * -+ * ID_LONG is only strictly needed /here/ during the -+ * transition period until all nodes in the cluster -+ * are running the new software /and/ have rebooted -+ * once (meaning that they've only ever spoken to a DC -+ * supporting this feature). -+ * -+ * If anyone toggles the unique flag to 'on', the -+ * 'instance free' name will correspond to an orphan -+ * and fall into the clause above instead -+ */ -+ crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id); -+ if (action->rsc->clone_name && safe_str_neq(xml_id, action->rsc->clone_name)) { -+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name); - } else { -- CRM_ASSERT(action->rsc->clone_name == NULL); -- crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); -+ crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id); - } - -- for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { -- crm_xml_add(rsc_xml, attr_list[lpc], -- g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); -- } -+ } else { -+ CRM_ASSERT(action->rsc->clone_name == NULL); -+ crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id); -+ } -+ -+ for (lpc = 0; lpc < DIMOF(attr_list); lpc++) { -+ crm_xml_add(rsc_xml, attr_list[lpc], -+ g_hash_table_lookup(action->rsc->meta, attr_list[lpc])); - } - } - --- -1.8.3.1 - diff --git a/SOURCES/002-demote.patch b/SOURCES/002-demote.patch new file mode 100644 index 0000000..5da2515 --- /dev/null +++ b/SOURCES/002-demote.patch @@ -0,0 +1,8664 @@ +From f1f71b3f3c342987db0058e7db0030417f3f83fa Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 May 2020 08:22:00 -0500 +Subject: [PATCH 01/20] Refactor: scheduler: functionize comparing on-fail + values + +The action_fail_response enum values used for the "on-fail" operation +meta-attribute were initially intended to be in order of severity. +However as new values were added, they were added to the end (out of severity +order) to preserve API backward compatibility. + +This resulted in a convoluted comparison of values that will only get worse as +more values are added. + +This commit adds a comparison function to isolate that complexity. +--- + include/crm/pengine/common.h | 32 ++++++++++++------ + lib/pengine/unpack.c | 80 +++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 97 insertions(+), 15 deletions(-) + +diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h +index 3a770b7..2737b2e 100644 +--- a/include/crm/pengine/common.h ++++ b/include/crm/pengine/common.h +@@ -22,18 +22,29 @@ extern "C" { + extern gboolean was_processing_error; + extern gboolean was_processing_warning; + +-/* order is significant here +- * items listed in order of accending severeness +- * more severe actions take precedent over lower ones ++/* The order is (partially) significant here; the values from action_fail_ignore ++ * through action_fail_fence are in order of increasing severity. ++ * ++ * @COMPAT The values should be ordered and numbered per the "TODO" comments ++ * below, so all values are in order of severity and there is room for ++ * future additions, but that would break API compatibility. ++ * @TODO For now, we just use a function to compare the values specially, but ++ * at the next compatibility break, we should arrange things properly. + */ + enum action_fail_response { +- action_fail_ignore, +- action_fail_recover, +- action_fail_migrate, /* recover by moving it somewhere else */ +- action_fail_block, +- action_fail_stop, +- action_fail_standby, +- action_fail_fence, ++ action_fail_ignore, // @TODO = 10 ++ // @TODO action_fail_demote = 20, ++ action_fail_recover, // @TODO = 30 ++ // @TODO action_fail_reset_remote = 40, ++ // @TODO action_fail_restart_container = 50, ++ action_fail_migrate, // @TODO = 60 ++ action_fail_block, // @TODO = 70 ++ action_fail_stop, // @TODO = 80 ++ action_fail_standby, // @TODO = 90 ++ action_fail_fence, // @TODO = 100 ++ ++ // @COMPAT Values below here are out of order for API compatibility ++ + action_fail_restart_container, + + /* This is reserved for internal use for remote node connection resources. +@@ -44,6 +55,7 @@ enum action_fail_response { + */ + action_fail_reset_remote, + ++ action_fail_demote, + }; + + /* the "done" action must be the "pre" action +1 */ +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 3c6606b..f688881 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2770,6 +2770,78 @@ last_change_str(xmlNode *xml_op) + return ((when_s && *when_s)? when_s : "unknown time"); + } + ++/*! ++ * \internal ++ * \brief Compare two on-fail values ++ * ++ * \param[in] first One on-fail value to compare ++ * \param[in] second The other on-fail value to compare ++ * ++ * \return A negative number if second is more severe than first, zero if they ++ * are equal, or a positive number if first is more severe than second. ++ * \note This is only needed until the action_fail_response values can be ++ * renumbered at the next API compatibility break. ++ */ ++static int ++cmp_on_fail(enum action_fail_response first, enum action_fail_response second) ++{ ++ switch (first) { ++ case action_fail_reset_remote: ++ switch (second) { ++ case action_fail_ignore: ++ case action_fail_recover: ++ return 1; ++ case action_fail_reset_remote: ++ return 0; ++ default: ++ return -1; ++ } ++ break; ++ ++ case action_fail_restart_container: ++ switch (second) { ++ case action_fail_ignore: ++ case action_fail_recover: ++ case action_fail_reset_remote: ++ return 1; ++ case action_fail_restart_container: ++ return 0; ++ default: ++ return -1; ++ } ++ break; ++ ++ default: ++ break; ++ } ++ switch (second) { ++ case action_fail_reset_remote: ++ switch (first) { ++ case action_fail_ignore: ++ case action_fail_recover: ++ return -1; ++ default: ++ return 1; ++ } ++ break; ++ ++ case action_fail_restart_container: ++ switch (first) { ++ case action_fail_ignore: ++ case action_fail_recover: ++ case action_fail_reset_remote: ++ return -1; ++ default: ++ return 1; ++ } ++ break; ++ ++ default: ++ break; ++ } ++ return first - second; ++} ++ + static void + unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, + enum action_fail_response * on_fail, pe_working_set_t * data_set) +@@ -2829,10 +2901,7 @@ unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * x + } + + action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); +- if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || +- (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || +- (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || +- (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { ++ if (cmp_on_fail(*on_fail, action->on_fail) < 0) { + pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), + fail2text(action->on_fail), action->uuid, key); + *on_fail = action->on_fail; +@@ -3675,7 +3744,8 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + record_failed_op(xml_op, node, rsc, data_set); + +- if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { ++ if ((failure_strategy == action_fail_restart_container) ++ && cmp_on_fail(*on_fail, action_fail_recover) <= 0) { + *on_fail = failure_strategy; + } + +-- +1.8.3.1 + + +From ef246ff05d7459f9672b10ac1873e3191a3b46e9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 May 2020 08:27:47 -0500 +Subject: [PATCH 02/20] Fix: scheduler: disallow on-fail=stop for stop + operations + +because it would loop infinitely as long as the stop continued to fail +--- + lib/pengine/utils.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 20a8db5..3fb7e62 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -716,16 +716,25 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, + return action; + } + ++static bool ++valid_stop_on_fail(const char *value) ++{ ++ return safe_str_neq(value, "standby") ++ && safe_str_neq(value, "stop"); ++} ++ + static const char * + unpack_operation_on_fail(pe_action_t * action) + { + + const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); + +- if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { ++ if (safe_str_eq(action->task, CRMD_ACTION_STOP) ++ && !valid_stop_on_fail(value)) { ++ + pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop " +- "action to default value because 'standby' is not " +- "allowed for stop", action->rsc->id); ++ "action to default value because '%s' is not " ++ "allowed for stop", action->rsc->id, value); + return NULL; + } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { + /* demote on_fail defaults to master monitor value if present */ +-- +1.8.3.1 + + +From 8dceba792ffe65cd77c3aae430067638dbba63f9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 May 2020 08:50:33 -0500 +Subject: [PATCH 03/20] Refactor: scheduler: use more appropriate types in a + couple places + +--- + lib/pengine/unpack.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index f688881..6a350e5 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2244,7 +2244,7 @@ unpack_lrm_rsc_state(pe_node_t * node, xmlNode * rsc_entry, pe_working_set_t * d + xmlNode *rsc_op = NULL; + xmlNode *last_failure = NULL; + +- enum action_fail_response on_fail = FALSE; ++ enum action_fail_response on_fail = action_fail_ignore; + enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; + + crm_trace("[%s] Processing %s on %s", +@@ -2287,7 +2287,6 @@ unpack_lrm_rsc_state(pe_node_t * node, xmlNode * rsc_entry, pe_working_set_t * d + + /* process operations */ + saved_role = rsc->role; +- on_fail = action_fail_ignore; + rsc->role = RSC_ROLE_UNKNOWN; + sorted_op_list = g_list_sort(op_list, sort_op_by_callid); + +@@ -3376,7 +3375,7 @@ int pe__target_rc_from_xml(xmlNode *xml_op) + static enum action_fail_response + get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) + { +- int result = action_fail_recover; ++ enum action_fail_response result = action_fail_recover; + pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); + + result = action->on_fail; +-- +1.8.3.1 + + +From a4d6a20a990d1461184f888e21aa61cddff8996d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Jun 2020 12:05:57 -0500 +Subject: [PATCH 04/20] Low: libpacemaker: don't force stop when skipping + reload of failed resource + +Normal failure recovery will apply, which will stop if needed. + +(The stop was forced as of 2558d76f.) +--- + lib/pacemaker/pcmk_sched_native.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index bd8a0b5..ff2fb92 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -3362,9 +3362,19 @@ ReloadRsc(pe_resource_t * rsc, pe_node_t *node, pe_working_set_t * data_set) + pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); + return; + +- } else if (is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending)) { +- pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); +- stop_action(rsc, node, FALSE); /* Force a full restart, overkill? */ ++ } else if (is_set(rsc->flags, pe_rsc_failed)) { ++ /* We don't need to specify any particular actions here, normal failure ++ * recovery will apply. ++ */ ++ pe_rsc_trace(rsc, "%s: preventing reload because failed", rsc->id); ++ return; ++ ++ } else if (is_set(rsc->flags, pe_rsc_start_pending)) { ++ /* If a resource's configuration changed while a start was pending, ++ * force a full restart. ++ */ ++ pe_rsc_trace(rsc, "%s: preventing reload because start pending", rsc->id); ++ stop_action(rsc, node, FALSE); + return; + + } else if (node == NULL) { +-- +1.8.3.1 + + +From f2d244bc4306297d5960c0ba54e0a85a68e864ee Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Jun 2020 12:16:33 -0500 +Subject: [PATCH 05/20] Test: scheduler: test forcing a restart instead of + reload when start is pending + +--- + cts/cts-scheduler.in | 1 + + cts/scheduler/params-3.dot | 28 ++++++ + cts/scheduler/params-3.exp | 208 +++++++++++++++++++++++++++++++++++++++++ + cts/scheduler/params-3.scores | 21 +++++ + cts/scheduler/params-3.summary | 45 +++++++++ + cts/scheduler/params-3.xml | 154 ++++++++++++++++++++++++++++++ + 6 files changed, 457 insertions(+) + create mode 100644 cts/scheduler/params-3.dot + create mode 100644 cts/scheduler/params-3.exp + create mode 100644 cts/scheduler/params-3.scores + create mode 100644 cts/scheduler/params-3.summary + create mode 100644 cts/scheduler/params-3.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 346ada2..ae8247e 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -84,6 +84,7 @@ TESTS = [ + [ "params-0", "Params: No change" ], + [ "params-1", "Params: Changed" ], + [ "params-2", "Params: Resource definition" ], ++ [ "params-3", "Params: Restart instead of reload if start pending" ], + [ "params-4", "Params: Reload" ], + [ "params-5", "Params: Restart based on probe digest" ], + [ "novell-251689", "Resource definition change + target_role=stopped" ], +diff --git a/cts/scheduler/params-3.dot b/cts/scheduler/params-3.dot +new file mode 100644 +index 0000000..d681ee5 +--- /dev/null ++++ b/cts/scheduler/params-3.dot +@@ -0,0 +1,28 @@ ++ digraph "g" { ++"Cancel rsc_c001n02_monitor_5000 c001n02" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_monitor_0 c001n01" -> "DcIPaddr_start_0 c001n02" [ style = bold] ++"DcIPaddr_monitor_0 c001n01" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_monitor_0 c001n03" -> "DcIPaddr_start_0 c001n02" [ style = bold] ++"DcIPaddr_monitor_0 c001n03" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_monitor_0 c001n08" -> "DcIPaddr_start_0 c001n02" [ style = bold] ++"DcIPaddr_monitor_0 c001n08" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_monitor_5000 c001n02" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_start_0 c001n02" -> "DcIPaddr_monitor_5000 c001n02" [ style = bold] ++"DcIPaddr_start_0 c001n02" [ style=bold color="green" fontcolor="black"] ++"DcIPaddr_stop_0 c001n02" -> "DcIPaddr_start_0 c001n02" [ style = bold] ++"DcIPaddr_stop_0 c001n02" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n01_monitor_0 c001n02" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n01_monitor_0 c001n03" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n01_monitor_0 c001n08" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n02_monitor_0 c001n01" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n02_monitor_0 c001n03" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n02_monitor_0 c001n08" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n02_monitor_6000 c001n02" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n03_monitor_0 c001n01" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n03_monitor_0 c001n02" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n03_monitor_0 c001n08" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n08_monitor_0 c001n01" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n08_monitor_0 c001n02" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n08_monitor_0 c001n03" [ style=bold color="green" fontcolor="black"] ++"rsc_c001n08_monitor_5000 c001n08" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/params-3.exp b/cts/scheduler/params-3.exp +new file mode 100644 +index 0000000..5cccdec +--- /dev/null ++++ b/cts/scheduler/params-3.exp +@@ -0,0 +1,208 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/params-3.scores b/cts/scheduler/params-3.scores +new file mode 100644 +index 0000000..00417ea +--- /dev/null ++++ b/cts/scheduler/params-3.scores +@@ -0,0 +1,21 @@ ++Allocation scores: ++pcmk__native_allocate: DcIPaddr allocation score on c001n01: -INFINITY ++pcmk__native_allocate: DcIPaddr allocation score on c001n02: 0 ++pcmk__native_allocate: DcIPaddr allocation score on c001n03: -INFINITY ++pcmk__native_allocate: DcIPaddr allocation score on c001n08: -INFINITY ++pcmk__native_allocate: rsc_c001n01 allocation score on c001n01: 100 ++pcmk__native_allocate: rsc_c001n01 allocation score on c001n02: 0 ++pcmk__native_allocate: rsc_c001n01 allocation score on c001n03: 0 ++pcmk__native_allocate: rsc_c001n01 allocation score on c001n08: 0 ++pcmk__native_allocate: rsc_c001n02 allocation score on c001n01: 0 ++pcmk__native_allocate: rsc_c001n02 allocation score on c001n02: 100 ++pcmk__native_allocate: rsc_c001n02 allocation score on c001n03: 0 ++pcmk__native_allocate: rsc_c001n02 allocation score on c001n08: 0 ++pcmk__native_allocate: rsc_c001n03 allocation score on c001n01: 0 ++pcmk__native_allocate: rsc_c001n03 allocation score on c001n02: 0 ++pcmk__native_allocate: rsc_c001n03 allocation score on c001n03: 100 ++pcmk__native_allocate: rsc_c001n03 allocation score on c001n08: 0 ++pcmk__native_allocate: rsc_c001n08 allocation score on c001n01: 0 ++pcmk__native_allocate: rsc_c001n08 allocation score on c001n02: 0 ++pcmk__native_allocate: rsc_c001n08 allocation score on c001n03: 0 ++pcmk__native_allocate: rsc_c001n08 allocation score on c001n08: 100 +diff --git a/cts/scheduler/params-3.summary b/cts/scheduler/params-3.summary +new file mode 100644 +index 0000000..257f8ba +--- /dev/null ++++ b/cts/scheduler/params-3.summary +@@ -0,0 +1,45 @@ ++ ++Current cluster status: ++Online: [ c001n01 c001n02 c001n03 c001n08 ] ++ ++ DcIPaddr (ocf::heartbeat:IPaddr): Starting c001n02 ++ rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 ++ rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 ++ rsc_c001n03 (ocf::heartbeat:IPaddr): Started c001n03 ++ rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 ++ ++Transition Summary: ++ * Restart DcIPaddr ( c001n02 ) ++ ++Executing cluster transition: ++ * Resource action: DcIPaddr monitor on c001n08 ++ * Resource action: DcIPaddr monitor on c001n03 ++ * Resource action: DcIPaddr monitor on c001n01 ++ * Resource action: DcIPaddr stop on c001n02 ++ * Resource action: rsc_c001n08 monitor on c001n03 ++ * Resource action: rsc_c001n08 monitor on c001n02 ++ * Resource action: rsc_c001n08 monitor on c001n01 ++ * Resource action: rsc_c001n08 monitor=5000 on c001n08 ++ * Resource action: rsc_c001n02 monitor=6000 on c001n02 ++ * Resource action: rsc_c001n02 monitor on c001n08 ++ * Resource action: rsc_c001n02 monitor on c001n03 ++ * Resource action: rsc_c001n02 monitor on c001n01 ++ * Resource action: rsc_c001n02 cancel=5000 on c001n02 ++ * Resource action: rsc_c001n03 monitor on c001n08 ++ * Resource action: rsc_c001n03 monitor on c001n02 ++ * Resource action: rsc_c001n03 monitor on c001n01 ++ * Resource action: rsc_c001n01 monitor on c001n08 ++ * Resource action: rsc_c001n01 monitor on c001n03 ++ * Resource action: rsc_c001n01 monitor on c001n02 ++ * Resource action: DcIPaddr start on c001n02 ++ * Resource action: DcIPaddr monitor=5000 on c001n02 ++ ++Revised cluster status: ++Online: [ c001n01 c001n02 c001n03 c001n08 ] ++ ++ DcIPaddr (ocf::heartbeat:IPaddr): Started c001n02 ++ rsc_c001n08 (ocf::heartbeat:IPaddr): Started c001n08 ++ rsc_c001n02 (ocf::heartbeat:IPaddr): Started c001n02 ++ rsc_c001n03 (ocf::heartbeat:IPaddr): Started c001n03 ++ rsc_c001n01 (ocf::heartbeat:IPaddr): Started c001n01 ++ +diff --git a/cts/scheduler/params-3.xml b/cts/scheduler/params-3.xml +new file mode 100644 +index 0000000..ee6e157 +--- /dev/null ++++ b/cts/scheduler/params-3.xml +@@ -0,0 +1,154 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From ff6aebecf8b40b882bddbd0d78e3f8702f97147e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 13 Apr 2020 12:22:35 -0500 +Subject: [PATCH 06/20] Doc: libpacemaker: improve comments when logging + actions + +... with slight refactoring for consistency +--- + lib/pacemaker/pcmk_sched_native.c | 41 ++++++++++++++++++++++----------------- + 1 file changed, 23 insertions(+), 18 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index ff2fb92..f14e690 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -2348,8 +2348,6 @@ native_expand(pe_resource_t * rsc, pe_working_set_t * data_set) + } \ + } while(0) + +-static int rsc_width = 5; +-static int detail_width = 5; + static void + LogAction(const char *change, pe_resource_t *rsc, pe_node_t *origin, pe_node_t *destination, pe_action_t *action, pe_action_t *source, gboolean terminal) + { +@@ -2360,6 +2358,9 @@ LogAction(const char *change, pe_resource_t *rsc, pe_node_t *origin, pe_node_t * + bool same_role = FALSE; + bool need_role = FALSE; + ++ static int rsc_width = 5; ++ static int detail_width = 5; ++ + CRM_ASSERT(action); + CRM_ASSERT(destination != NULL || origin != NULL); + +@@ -2384,36 +2385,40 @@ LogAction(const char *change, pe_resource_t *rsc, pe_node_t *origin, pe_node_t * + same_role = TRUE; + } + +- if(need_role && origin == NULL) { +- /* Promoting from Stopped */ ++ if (need_role && (origin == NULL)) { ++ /* Starting and promoting a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), destination->details->uname); + +- } else if(need_role && destination == NULL) { +- /* Demoting a Master or Stopping a Slave */ ++ } else if (origin == NULL) { ++ /* Starting a resource */ ++ details = crm_strdup_printf("%s", destination->details->uname); ++ ++ } else if (need_role && (destination == NULL)) { ++ /* Stopping a promotable clone instance */ + details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); + +- } else if(origin == NULL || destination == NULL) { +- /* Starting or stopping a resource */ +- details = crm_strdup_printf("%s", origin?origin->details->uname:destination->details->uname); ++ } else if (destination == NULL) { ++ /* Stopping a resource */ ++ details = crm_strdup_printf("%s", origin->details->uname); + +- } else if(need_role && same_role && same_host) { +- /* Recovering or restarting a promotable clone resource */ ++ } else if (need_role && same_role && same_host) { ++ /* Recovering, restarting or re-promoting a promotable clone instance */ + details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); + +- } else if(same_role && same_host) { ++ } else if (same_role && same_host) { + /* Recovering or Restarting a normal resource */ + details = crm_strdup_printf("%s", origin->details->uname); + +- } else if(same_role && need_role) { +- /* Moving a promotable clone resource */ ++ } else if (need_role && same_role) { ++ /* Moving a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", origin->details->uname, destination->details->uname, role2text(rsc->role)); + +- } else if(same_role) { ++ } else if (same_role) { + /* Moving a normal resource */ + details = crm_strdup_printf("%s -> %s", origin->details->uname, destination->details->uname); + +- } else if(same_host) { +- /* Promoting or demoting a promotable clone resource */ ++ } else if (same_host) { ++ /* Promoting or demoting a promotable clone instance */ + details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), origin->details->uname); + + } else { +@@ -2560,7 +2565,7 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) + pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), + next->details->uname); + +- } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { ++ } else if (is_not_set(start->flags, pe_action_runnable)) { + LogAction("Stop", rsc, current, NULL, stop, + (stop && stop->reason)? stop : start, terminal); + STOP_SANITY_ASSERT(__LINE__); +-- +1.8.3.1 + + +From 98c3b649fa065b7e7a59029cc2f887bc462d170a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 13 Apr 2020 12:23:22 -0500 +Subject: [PATCH 07/20] Log: libpacemaker: check for re-promotes specifically + +If a promotable clone instance is being demoted and promoted on its current +node, without also stopping and starting, it previously would be logged as +"Leave" indicating unchanged, because the current and next role are the same. + +Now, check for this situation specifically, and log it as "Re-promote". + +Currently, the scheduler is not capable of generating this situation, but +upcoming changes will. +--- + lib/pacemaker/pcmk_sched_native.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index f14e690..89952bf 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -2561,9 +2561,17 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) + } else if (is_set(rsc->flags, pe_rsc_reload)) { + LogAction("Reload", rsc, current, next, start, NULL, terminal); + ++ + } else if (start == NULL || is_set(start->flags, pe_action_optional)) { +- pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), +- next->details->uname); ++ if ((demote != NULL) && (promote != NULL) ++ && is_not_set(demote->flags, pe_action_optional) ++ && is_not_set(promote->flags, pe_action_optional)) { ++ LogAction("Re-promote", rsc, current, next, promote, demote, ++ terminal); ++ } else { ++ pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, ++ role2text(rsc->role), next->details->uname); ++ } + + } else if (is_not_set(start->flags, pe_action_runnable)) { + LogAction("Stop", rsc, current, NULL, stop, +-- +1.8.3.1 + + +From fd55a6660574c0bca517fd519377340712fb443a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 13 Apr 2020 12:51:03 -0500 +Subject: [PATCH 08/20] Doc: libpacemaker: improve comments for resource state + and action matrices + +Also, make them static, for linker efficiency. +--- + lib/pacemaker/pcmk_sched_native.c | 39 ++++++++++++++++++++++++--------------- + 1 file changed, 24 insertions(+), 15 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index 89952bf..b9bca80 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -41,27 +41,36 @@ gboolean PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, + gboolean RoleError(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); + gboolean NullOp(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set); + +-/* *INDENT-OFF* */ +-enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { +-/* Current State */ +-/* Next State: Unknown Stopped Started Slave Master */ ++/* This array says what the *next* role should be when transitioning from one ++ * role to another. For example going from Stopped to Master, the next role is ++ * RSC_ROLE_SLAVE, because the resource must be started before being promoted. ++ * The current state then becomes Started, which is fed into this array again, ++ * giving a next role of RSC_ROLE_MASTER. ++ */ ++static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { ++ /* Current state Next state*/ ++ /* Unknown Stopped Started Slave Master */ + /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, + /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, + /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, +- /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, +- /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, ++ /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, ++ /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, + }; + +-gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(pe_resource_t*,pe_node_t*,gboolean,pe_working_set_t*) = { +-/* Current State */ +-/* Next State: Unknown Stopped Started Slave Master */ +- /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, +- /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, +- /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, +- /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, +- /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, ++typedef gboolean (*rsc_transition_fn)(pe_resource_t *rsc, pe_node_t *next, ++ gboolean optional, ++ pe_working_set_t *data_set); ++ ++// This array picks the function needed to transition from one role to another ++static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { ++ /* Current state Next state */ ++ /* Unknown Stopped Started Slave Master */ ++ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, ++ /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, ++ /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, ++ /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, ++ /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp , }, + }; +-/* *INDENT-ON* */ + + static gboolean + native_choose_node(pe_resource_t * rsc, pe_node_t * prefer, pe_working_set_t * data_set) +-- +1.8.3.1 + + +From 2f1e2df1f5ec67591cddf14f9dda1c52919dd53a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 26 May 2020 17:50:48 -0500 +Subject: [PATCH 09/20] Feature: xml: add on-fail="demote" option to resources + schema + +We don't need an XML schema version bump because it was already bumped since +the last release, for the rsc_expression/op_expression feature. +--- + xml/resources-3.4.rng | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xml/resources-3.4.rng b/xml/resources-3.4.rng +index fbb4b65..887dc1c 100644 +--- a/xml/resources-3.4.rng ++++ b/xml/resources-3.4.rng +@@ -388,6 +388,7 @@ + + ignore + block ++ demote + stop + restart + standby +-- +1.8.3.1 + + +From 874f75e0faad91c634860221d727e51e95d97f19 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 May 2020 08:29:37 -0500 +Subject: [PATCH 10/20] Feature: scheduler: new on-fail="demote" recovery + policy for promoted resources + +--- + include/crm/pengine/pe_types.h | 1 + + lib/pacemaker/pcmk_sched_native.c | 25 +++++++++++++++---- + lib/pengine/common.c | 3 +++ + lib/pengine/unpack.c | 51 ++++++++++++++++++++++++++++++++++++--- + lib/pengine/utils.c | 35 +++++++++++++++++++++++---- + 5 files changed, 102 insertions(+), 13 deletions(-) + +diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h +index ba88491..ed5eb12 100644 +--- a/include/crm/pengine/pe_types.h ++++ b/include/crm/pengine/pe_types.h +@@ -246,6 +246,7 @@ struct pe_node_s { + # define pe_rsc_allocating 0x00000200ULL + # define pe_rsc_merging 0x00000400ULL + ++# define pe_rsc_stop 0x00001000ULL + # define pe_rsc_reload 0x00002000ULL + # define pe_rsc_allow_remote_remotes 0x00004000ULL + +diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c +index b9bca80..4e3bd7c 100644 +--- a/lib/pacemaker/pcmk_sched_native.c ++++ b/lib/pacemaker/pcmk_sched_native.c +@@ -1205,6 +1205,7 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) + pe_node_t *chosen = NULL; + pe_node_t *current = NULL; + gboolean need_stop = FALSE; ++ bool need_promote = FALSE; + gboolean is_moving = FALSE; + gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; + +@@ -1309,8 +1310,15 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) + need_stop = TRUE; + + } else if (is_set(rsc->flags, pe_rsc_failed)) { +- pe_rsc_trace(rsc, "Recovering %s", rsc->id); +- need_stop = TRUE; ++ if (is_set(rsc->flags, pe_rsc_stop)) { ++ need_stop = TRUE; ++ pe_rsc_trace(rsc, "Recovering %s", rsc->id); ++ } else { ++ pe_rsc_trace(rsc, "Recovering %s by demotion", rsc->id); ++ if (rsc->next_role == RSC_ROLE_MASTER) { ++ need_promote = TRUE; ++ } ++ } + + } else if (is_set(rsc->flags, pe_rsc_block)) { + pe_rsc_trace(rsc, "Block %s", rsc->id); +@@ -1344,10 +1352,16 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set) + + + while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { ++ bool required = need_stop; ++ + next_role = rsc_state_matrix[role][rsc->role]; ++ if ((next_role == RSC_ROLE_MASTER) && need_promote) { ++ required = true; ++ } + pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), +- rsc->id, need_stop ? " required" : ""); +- if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { ++ rsc->id, (required? " required" : "")); ++ if (rsc_action_matrix[role][next_role](rsc, chosen, !required, ++ data_set) == FALSE) { + break; + } + role = next_role; +@@ -2631,7 +2645,8 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) + + free(key); + +- } else if (stop && is_set(rsc->flags, pe_rsc_failed)) { ++ } else if (stop && is_set(rsc->flags, pe_rsc_failed) ++ && is_set(rsc->flags, pe_rsc_stop)) { + /* 'stop' may be NULL if the failure was ignored */ + LogAction("Recover", rsc, current, next, stop, start, terminal); + STOP_SANITY_ASSERT(__LINE__); +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index ded6df8..f4f2106 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -326,6 +326,9 @@ fail2text(enum action_fail_response fail) + case action_fail_ignore: + result = "ignore"; + break; ++ case action_fail_demote: ++ result = "demote"; ++ break; + case action_fail_block: + result = "block"; + break; +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 6a350e5..a219805 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -108,6 +108,7 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, + */ + node->details->remote_requires_reset = TRUE; + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + } + } + +@@ -117,6 +118,7 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, + "and guest resource no longer exists", + node->details->uname, reason); + set_bit(node->details->remote_rsc->flags, pe_rsc_failed); ++ set_bit(node->details->remote_rsc->flags, pe_rsc_stop); + + } else if (pe__is_remote_node(node)) { + pe_resource_t *rsc = node->details->remote_rsc; +@@ -1914,6 +1916,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + */ + if (pe__is_guest_node(node)) { + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + should_fence = TRUE; + + } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { +@@ -1956,6 +1959,11 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* nothing to do */ + break; + ++ case action_fail_demote: ++ set_bit(rsc->flags, pe_rsc_failed); ++ demote_action(rsc, node, FALSE); ++ break; ++ + case action_fail_fence: + /* treat it as if it is still running + * but also mark the node as unclean +@@ -1992,12 +2000,14 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + case action_fail_recover: + if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + stop_action(rsc, node, FALSE); + } + break; + + case action_fail_restart_container: + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + + if (rsc->container && pe_rsc_is_bundled(rsc)) { + /* A bundle's remote connection can run on a different node than +@@ -2016,6 +2026,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + + case action_fail_reset_remote: + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + if (is_set(data_set->flags, pe_flag_stonith_enabled)) { + tmpnode = NULL; + if (rsc->is_remote_node) { +@@ -2071,8 +2082,17 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + } + + native_add_running(rsc, node, data_set); +- if (on_fail != action_fail_ignore) { +- set_bit(rsc->flags, pe_rsc_failed); ++ switch (on_fail) { ++ case action_fail_ignore: ++ break; ++ case action_fail_demote: ++ case action_fail_block: ++ set_bit(rsc->flags, pe_rsc_failed); ++ break; ++ default: ++ set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); ++ break; + } + + } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { +@@ -2595,6 +2615,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + } else { + /* Consider it failed here - forces a restart, prevents migration */ + set_bit(rsc->flags, pe_rsc_failed); ++ set_bit(rsc->flags, pe_rsc_stop); + clear_bit(rsc->flags, pe_rsc_allow_migrate); + } + } +@@ -2785,9 +2806,21 @@ static int + cmp_on_fail(enum action_fail_response first, enum action_fail_response second) + { + switch (first) { ++ case action_fail_demote: ++ switch (second) { ++ case action_fail_ignore: ++ return 1; ++ case action_fail_demote: ++ return 0; ++ default: ++ return -1; ++ } ++ break; ++ + case action_fail_reset_remote: + switch (second) { + case action_fail_ignore: ++ case action_fail_demote: + case action_fail_recover: + return 1; + case action_fail_reset_remote: +@@ -2800,6 +2833,7 @@ cmp_on_fail(enum action_fail_response first, enum action_fail_response second) + case action_fail_restart_container: + switch (second) { + case action_fail_ignore: ++ case action_fail_demote: + case action_fail_recover: + case action_fail_reset_remote: + return 1; +@@ -2814,9 +2848,13 @@ cmp_on_fail(enum action_fail_response first, enum action_fail_response second) + break; + } + switch (second) { ++ case action_fail_demote: ++ return (first == action_fail_ignore)? -1 : 1; ++ + case action_fail_reset_remote: + switch (first) { + case action_fail_ignore: ++ case action_fail_demote: + case action_fail_recover: + return -1; + default: +@@ -2827,6 +2865,7 @@ cmp_on_fail(enum action_fail_response first, enum action_fail_response second) + case action_fail_restart_container: + switch (first) { + case action_fail_ignore: ++ case action_fail_demote: + case action_fail_recover: + case action_fail_reset_remote: + return -1; +@@ -3426,7 +3465,11 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c + clear_past_failure = TRUE; + + } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { +- /* Demote from Master does not clear an error */ ++ ++ if (*on_fail == action_fail_demote) { ++ // Demote clears an error only if on-fail=demote ++ clear_past_failure = TRUE; ++ } + rsc->role = RSC_ROLE_SLAVE; + + } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { +@@ -3454,6 +3497,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c + + case action_fail_block: + case action_fail_ignore: ++ case action_fail_demote: + case action_fail_recover: + case action_fail_restart_container: + *on_fail = action_fail_ignore; +@@ -3714,6 +3758,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * that, ensure the remote connection is considered failed. + */ + set_bit(node->details->remote_rsc->flags, pe_rsc_failed); ++ set_bit(node->details->remote_rsc->flags, pe_rsc_stop); + } + + // fall through +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 3fb7e62..fee9efb 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -720,6 +720,7 @@ static bool + valid_stop_on_fail(const char *value) + { + return safe_str_neq(value, "standby") ++ && safe_str_neq(value, "demote") + && safe_str_neq(value, "stop"); + } + +@@ -727,6 +728,11 @@ static const char * + unpack_operation_on_fail(pe_action_t * action) + { + ++ const char *name = NULL; ++ const char *role = NULL; ++ const char *on_fail = NULL; ++ const char *interval_spec = NULL; ++ const char *enabled = NULL; + const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); + + if (safe_str_eq(action->task, CRMD_ACTION_STOP) +@@ -736,14 +742,10 @@ unpack_operation_on_fail(pe_action_t * action) + "action to default value because '%s' is not " + "allowed for stop", action->rsc->id, value); + return NULL; ++ + } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { + /* demote on_fail defaults to master monitor value if present */ + xmlNode *operation = NULL; +- const char *name = NULL; +- const char *role = NULL; +- const char *on_fail = NULL; +- const char *interval_spec = NULL; +- const char *enabled = NULL; + + CRM_CHECK(action->rsc != NULL, return NULL); + +@@ -766,12 +768,31 @@ unpack_operation_on_fail(pe_action_t * action) + continue; + } else if (crm_parse_interval_spec(interval_spec) == 0) { + continue; ++ } else if (safe_str_eq(on_fail, "demote")) { ++ continue; + } + + value = on_fail; + } + } else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) { + value = "ignore"; ++ ++ } else if (safe_str_eq(value, "demote")) { ++ name = crm_element_value(action->op_entry, "name"); ++ role = crm_element_value(action->op_entry, "role"); ++ on_fail = crm_element_value(action->op_entry, XML_OP_ATTR_ON_FAIL); ++ interval_spec = crm_element_value(action->op_entry, ++ XML_LRM_ATTR_INTERVAL); ++ ++ if (safe_str_neq(name, CRMD_ACTION_PROMOTE) ++ && (safe_str_neq(name, CRMD_ACTION_STATUS) ++ || safe_str_neq(role, "Master") ++ || (crm_parse_interval_spec(interval_spec) == 0))) { ++ pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s " ++ "action to default value because 'demote' is not " ++ "allowed for it", action->rsc->id, name); ++ return NULL; ++ } + } + + return value; +@@ -1170,6 +1191,10 @@ unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * contai + value = NULL; + } + ++ } else if (safe_str_eq(value, "demote")) { ++ action->on_fail = action_fail_demote; ++ value = "demote instance"; ++ + } else { + pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); + value = NULL; +-- +1.8.3.1 + + +From d29433ea57796de000f4fea8c60f8da1d903108b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jun 2020 16:03:14 -0500 +Subject: [PATCH 11/20] Test: scheduler: add regression tests for + on-fail="demote" + +--- + cts/cts-scheduler.in | 4 + + cts/scheduler/on_fail_demote1.dot | 64 ++ + cts/scheduler/on_fail_demote1.exp | 360 +++++++ + cts/scheduler/on_fail_demote1.scores | 470 +++++++++ + cts/scheduler/on_fail_demote1.summary | 86 ++ + cts/scheduler/on_fail_demote1.xml | 616 +++++++++++ + cts/scheduler/on_fail_demote2.dot | 22 + + cts/scheduler/on_fail_demote2.exp | 125 +++ + cts/scheduler/on_fail_demote2.scores | 127 +++ + cts/scheduler/on_fail_demote2.summary | 41 + + cts/scheduler/on_fail_demote2.xml | 221 ++++ + cts/scheduler/on_fail_demote3.dot | 12 + + cts/scheduler/on_fail_demote3.exp | 63 ++ + cts/scheduler/on_fail_demote3.scores | 127 +++ + cts/scheduler/on_fail_demote3.summary | 34 + + cts/scheduler/on_fail_demote3.xml | 221 ++++ + cts/scheduler/on_fail_demote4.dot | 383 +++++++ + cts/scheduler/on_fail_demote4.exp | 1818 +++++++++++++++++++++++++++++++++ + cts/scheduler/on_fail_demote4.scores | 470 +++++++++ + cts/scheduler/on_fail_demote4.summary | 187 ++++ + cts/scheduler/on_fail_demote4.xml | 625 ++++++++++++ + 21 files changed, 6076 insertions(+) + create mode 100644 cts/scheduler/on_fail_demote1.dot + create mode 100644 cts/scheduler/on_fail_demote1.exp + create mode 100644 cts/scheduler/on_fail_demote1.scores + create mode 100644 cts/scheduler/on_fail_demote1.summary + create mode 100644 cts/scheduler/on_fail_demote1.xml + create mode 100644 cts/scheduler/on_fail_demote2.dot + create mode 100644 cts/scheduler/on_fail_demote2.exp + create mode 100644 cts/scheduler/on_fail_demote2.scores + create mode 100644 cts/scheduler/on_fail_demote2.summary + create mode 100644 cts/scheduler/on_fail_demote2.xml + create mode 100644 cts/scheduler/on_fail_demote3.dot + create mode 100644 cts/scheduler/on_fail_demote3.exp + create mode 100644 cts/scheduler/on_fail_demote3.scores + create mode 100644 cts/scheduler/on_fail_demote3.summary + create mode 100644 cts/scheduler/on_fail_demote3.xml + create mode 100644 cts/scheduler/on_fail_demote4.dot + create mode 100644 cts/scheduler/on_fail_demote4.exp + create mode 100644 cts/scheduler/on_fail_demote4.scores + create mode 100644 cts/scheduler/on_fail_demote4.summary + create mode 100644 cts/scheduler/on_fail_demote4.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index ae8247e..0e68e73 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -478,6 +478,10 @@ TESTS = [ + [ "master-score-startup", "Use permanent master scores without LRM history" ], + [ "failed-demote-recovery", "Recover resource in slave role after demote fails" ], + [ "failed-demote-recovery-master", "Recover resource in master role after demote fails" ], ++ [ "on_fail_demote1", "Recovery with on-fail=\"demote\" on healthy cluster, remote, guest, and bundle nodes" ], ++ [ "on_fail_demote2", "Recovery with on-fail=\"demote\" with promotion on different node" ], ++ [ "on_fail_demote3", "Recovery with on-fail=\"demote\" with no promotion" ], ++ [ "on_fail_demote4", "Recovery with on-fail=\"demote\" on failed cluster, remote, guest, and bundle nodes" ], + ], + [ + [ "history-1", "Correctly parse stateful-1 resource state" ], +diff --git a/cts/scheduler/on_fail_demote1.dot b/cts/scheduler/on_fail_demote1.dot +new file mode 100644 +index 0000000..d11c1c1 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote1.dot +@@ -0,0 +1,64 @@ ++ digraph "g" { ++"bundled_demote_0 stateful-bundle-0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"bundled_demote_0 stateful-bundle-0" -> "stateful-bundle-master_demoted_0" [ style = bold] ++"bundled_demote_0 stateful-bundle-0" [ style=bold color="green" fontcolor="black"] ++"bundled_promote_0 stateful-bundle-0" -> "stateful-bundle-master_promoted_0" [ style = bold] ++"bundled_promote_0 stateful-bundle-0" [ style=bold color="green" fontcolor="black"] ++"lxc-ms-master_demote_0" -> "lxc-ms-master_demoted_0" [ style = bold] ++"lxc-ms-master_demote_0" -> "lxc-ms_demote_0 lxc2" [ style = bold] ++"lxc-ms-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_demoted_0" -> "lxc-ms-master_promote_0" [ style = bold] ++"lxc-ms-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_promote_0" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc-ms-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms_demote_0 lxc2" -> "lxc-ms-master_demoted_0" [ style = bold] ++"lxc-ms_demote_0 lxc2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc-ms_demote_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"lxc-ms_promote_0 lxc2" -> "lxc-ms-master_promoted_0" [ style = bold] ++"lxc-ms_promote_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"rsc1-clone_demote_0" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1-clone_demote_0" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"rsc1-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_demoted_0" -> "rsc1-clone_promote_0" [ style = bold] ++"rsc1-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promote_0" -> "rsc1_promote_0 rhel7-4" [ style = bold] ++"rsc1-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1_demote_0 rhel7-4" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1_demote_0 rhel7-4" -> "rsc1_promote_0 rhel7-4" [ style = bold] ++"rsc1_demote_0 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc1_promote_0 rhel7-4" -> "rsc1-clone_promoted_0" [ style = bold] ++"rsc1_promote_0 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc2-master_demote_0" -> "rsc2-master_demoted_0" [ style = bold] ++"rsc2-master_demote_0" -> "rsc2_demote_0 remote-rhel7-2" [ style = bold] ++"rsc2-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_demoted_0" -> "rsc2-master_promote_0" [ style = bold] ++"rsc2-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_promote_0" -> "rsc2_promote_0 remote-rhel7-2" [ style = bold] ++"rsc2-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2_demote_0 remote-rhel7-2" -> "rsc2-master_demoted_0" [ style = bold] ++"rsc2_demote_0 remote-rhel7-2" -> "rsc2_promote_0 remote-rhel7-2" [ style = bold] ++"rsc2_demote_0 remote-rhel7-2" [ style=bold color="green" fontcolor="black"] ++"rsc2_promote_0 remote-rhel7-2" -> "rsc2-master_promoted_0" [ style = bold] ++"rsc2_promote_0 remote-rhel7-2" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-master_demote_0" -> "bundled_demote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_demote_0" -> "stateful-bundle-master_demoted_0" [ style = bold] ++"stateful-bundle-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle_demoted_0" [ style = bold] ++"stateful-bundle-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_promote_0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_promoted_0" -> "stateful-bundle_promoted_0" [ style = bold] ++"stateful-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_demote_0" -> "stateful-bundle-master_demote_0" [ style = bold] ++"stateful-bundle_demote_0" -> "stateful-bundle_demoted_0" [ style = bold] ++"stateful-bundle_demote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_demoted_0" -> "stateful-bundle_promote_0" [ style = bold] ++"stateful-bundle_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_promote_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/cts/scheduler/on_fail_demote1.exp b/cts/scheduler/on_fail_demote1.exp +new file mode 100644 +index 0000000..ebe1dd5 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote1.exp +@@ -0,0 +1,360 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote1.scores b/cts/scheduler/on_fail_demote1.scores +new file mode 100644 +index 0000000..7df582f +--- /dev/null ++++ b/cts/scheduler/on_fail_demote1.scores +@@ -0,0 +1,470 @@ ++Allocation scores: ++Using the original execution date of: 2020-06-16 19:23:21Z ++bundled:0 promotion score on stateful-bundle-0: 10 ++bundled:1 promotion score on stateful-bundle-1: 5 ++bundled:2 promotion score on stateful-bundle-2: 5 ++lxc-ms:0 promotion score on lxc2: INFINITY ++lxc-ms:1 promotion score on lxc1: INFINITY ++pcmk__bundle_allocate: bundled:0 allocation score on stateful-bundle-0: 501 ++pcmk__bundle_allocate: bundled:1 allocation score on stateful-bundle-1: 501 ++pcmk__bundle_allocate: bundled:2 allocation score on stateful-bundle-2: 501 ++pcmk__bundle_allocate: stateful-bundle allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-0: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-2: -INFINITY ++pcmk__clone_allocate: bundled:0 allocation score on stateful-bundle-0: INFINITY ++pcmk__clone_allocate: bundled:1 allocation score on stateful-bundle-1: INFINITY ++pcmk__clone_allocate: bundled:2 allocation score on stateful-bundle-2: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms:0 allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms:1 allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on remote-rhel7-2: 6 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on lxc2: 6 ++pcmk__clone_allocate: rsc1:5 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on lxc1: 6 ++pcmk__clone_allocate: rsc1:6 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2-master allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on remote-rhel7-2: 11 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on lxc2: 6 ++pcmk__clone_allocate: rsc2:5 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on lxc1: 6 ++pcmk__clone_allocate: rsc2:6 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on lxc1: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on lxc2: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on remote-rhel7-2: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-1: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-3: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-4: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-5: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-0: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-1: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-2: 0 ++pcmk__native_allocate: Fencing allocation score on lxc1: -INFINITY ++pcmk__native_allocate: Fencing allocation score on lxc2: -INFINITY ++pcmk__native_allocate: Fencing allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: Fencing allocation score on rhel7-1: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-3: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-4: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-5: 0 ++pcmk__native_allocate: bundled:0 allocation score on stateful-bundle-0: INFINITY ++pcmk__native_allocate: bundled:1 allocation score on stateful-bundle-1: INFINITY ++pcmk__native_allocate: bundled:2 allocation score on stateful-bundle-2: INFINITY ++pcmk__native_allocate: container1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: container1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: container1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: container1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: container1 allocation score on rhel7-3: INFINITY ++pcmk__native_allocate: container1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: container1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: container2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: container2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: container2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: container2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: container2 allocation score on rhel7-3: INFINITY ++pcmk__native_allocate: container2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: container2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on lxc2: INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-4: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on lxc1: INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on lxc2: INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:1 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:2 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:2 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc1:3 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:3 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:3 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:4 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:4 allocation score on remote-rhel7-2: 6 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:5 allocation score on lxc2: 6 ++pcmk__native_allocate: rsc1:5 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on lxc1: 6 ++pcmk__native_allocate: rsc1:6 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:0 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:0 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:1 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:1 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:2 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:2 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc2:3 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:3 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:3 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on remote-rhel7-2: 11 ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:5 allocation score on lxc2: 6 ++pcmk__native_allocate: rsc2:5 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on lxc1: 6 ++pcmk__native_allocate: rsc2:6 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on remote-rhel7-2: 0 ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-5: 10000 ++pcmk__native_allocate: stateful-bundle-1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-1: 10000 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-4: 10000 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on remote-rhel7-2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on remote-rhel7-2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on remote-rhel7-2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-5: -INFINITY ++rsc1:0 promotion score on rhel7-4: 10 ++rsc1:1 promotion score on rhel7-3: 5 ++rsc1:2 promotion score on rhel7-5: 5 ++rsc1:3 promotion score on rhel7-1: 5 ++rsc1:4 promotion score on remote-rhel7-2: 5 ++rsc1:5 promotion score on lxc2: 5 ++rsc1:6 promotion score on lxc1: 5 ++rsc2:0 promotion score on rhel7-4: 10 ++rsc2:1 promotion score on rhel7-3: 5 ++rsc2:2 promotion score on rhel7-5: 5 ++rsc2:3 promotion score on rhel7-1: 5 ++rsc2:4 promotion score on remote-rhel7-2: 110 ++rsc2:5 promotion score on lxc2: 5 ++rsc2:6 promotion score on lxc1: 5 +diff --git a/cts/scheduler/on_fail_demote1.summary b/cts/scheduler/on_fail_demote1.summary +new file mode 100644 +index 0000000..b173582 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote1.summary +@@ -0,0 +1,86 @@ ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Current cluster status: ++Online: [ rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++RemoteOnline: [ remote-rhel7-2 ] ++GuestOnline: [ lxc1:container1 lxc2:container2 stateful-bundle-0:stateful-bundle-docker-0 stateful-bundle-1:stateful-bundle-docker-1 stateful-bundle-2:stateful-bundle-docker-2 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-4 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ rsc1 (ocf::pacemaker:Stateful): FAILED Master rhel7-4 ++ Slaves: [ lxc1 lxc2 remote-rhel7-2 rhel7-1 rhel7-3 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ rsc2 (ocf::pacemaker:Stateful): FAILED Master remote-rhel7-2 ++ Slaves: [ lxc1 lxc2 rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++ remote-rhel7-2 (ocf::pacemaker:remote): Started rhel7-1 ++ container1 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ container2 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ Clone Set: lxc-ms-master [lxc-ms] (promotable) ++ lxc-ms (ocf::pacemaker:Stateful): FAILED Master lxc2 ++ Slaves: [ lxc1 ] ++ Stopped: [ remote-rhel7-2 rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++ Container bundle set: stateful-bundle [pcmktest:http] ++ stateful-bundle-0 (192.168.122.131) (ocf::pacemaker:Stateful): FAILED Master rhel7-5 ++ stateful-bundle-1 (192.168.122.132) (ocf::pacemaker:Stateful): Slave rhel7-1 ++ stateful-bundle-2 (192.168.122.133) (ocf::pacemaker:Stateful): Slave rhel7-4 ++ ++Transition Summary: ++ * Re-promote rsc1:0 ( Master rhel7-4 ) ++ * Re-promote rsc2:4 ( Master remote-rhel7-2 ) ++ * Re-promote lxc-ms:0 ( Master lxc2 ) ++ * Re-promote bundled:0 ( Master stateful-bundle-0 ) ++ ++Executing cluster transition: ++ * Pseudo action: rsc1-clone_demote_0 ++ * Pseudo action: rsc2-master_demote_0 ++ * Pseudo action: lxc-ms-master_demote_0 ++ * Pseudo action: stateful-bundle_demote_0 ++ * Resource action: rsc1 demote on rhel7-4 ++ * Pseudo action: rsc1-clone_demoted_0 ++ * Pseudo action: rsc1-clone_promote_0 ++ * Resource action: rsc2 demote on remote-rhel7-2 ++ * Pseudo action: rsc2-master_demoted_0 ++ * Pseudo action: rsc2-master_promote_0 ++ * Resource action: lxc-ms demote on lxc2 ++ * Pseudo action: lxc-ms-master_demoted_0 ++ * Pseudo action: lxc-ms-master_promote_0 ++ * Pseudo action: stateful-bundle-master_demote_0 ++ * Resource action: rsc1 promote on rhel7-4 ++ * Pseudo action: rsc1-clone_promoted_0 ++ * Resource action: rsc2 promote on remote-rhel7-2 ++ * Pseudo action: rsc2-master_promoted_0 ++ * Resource action: lxc-ms promote on lxc2 ++ * Pseudo action: lxc-ms-master_promoted_0 ++ * Resource action: bundled demote on stateful-bundle-0 ++ * Pseudo action: stateful-bundle-master_demoted_0 ++ * Pseudo action: stateful-bundle_demoted_0 ++ * Pseudo action: stateful-bundle_promote_0 ++ * Pseudo action: stateful-bundle-master_promote_0 ++ * Resource action: bundled promote on stateful-bundle-0 ++ * Pseudo action: stateful-bundle-master_promoted_0 ++ * Pseudo action: stateful-bundle_promoted_0 ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Revised cluster status: ++Online: [ rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++RemoteOnline: [ remote-rhel7-2 ] ++GuestOnline: [ lxc1:container1 lxc2:container2 stateful-bundle-0:stateful-bundle-docker-0 stateful-bundle-1:stateful-bundle-docker-1 stateful-bundle-2:stateful-bundle-docker-2 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-4 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Masters: [ rhel7-4 ] ++ Slaves: [ lxc1 lxc2 remote-rhel7-2 rhel7-1 rhel7-3 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ remote-rhel7-2 ] ++ Slaves: [ lxc1 lxc2 rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++ remote-rhel7-2 (ocf::pacemaker:remote): Started rhel7-1 ++ container1 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ container2 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ Clone Set: lxc-ms-master [lxc-ms] (promotable) ++ Masters: [ lxc2 ] ++ Slaves: [ lxc1 ] ++ Container bundle set: stateful-bundle [pcmktest:http] ++ stateful-bundle-0 (192.168.122.131) (ocf::pacemaker:Stateful): Master rhel7-5 ++ stateful-bundle-1 (192.168.122.132) (ocf::pacemaker:Stateful): Slave rhel7-1 ++ stateful-bundle-2 (192.168.122.133) (ocf::pacemaker:Stateful): Slave rhel7-4 ++ +diff --git a/cts/scheduler/on_fail_demote1.xml b/cts/scheduler/on_fail_demote1.xml +new file mode 100644 +index 0000000..9f3ff20 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote1.xml +@@ -0,0 +1,616 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote2.dot b/cts/scheduler/on_fail_demote2.dot +new file mode 100644 +index 0000000..06193cb +--- /dev/null ++++ b/cts/scheduler/on_fail_demote2.dot +@@ -0,0 +1,22 @@ ++ digraph "g" { ++"Cancel rsc1_monitor_10000 rhel7-4" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"Cancel rsc1_monitor_10000 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"Cancel rsc1_monitor_11000 rhel7-3" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"Cancel rsc1_monitor_11000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc1-clone_demote_0" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1-clone_demote_0" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"rsc1-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_demoted_0" -> "rsc1-clone_promote_0" [ style = bold] ++"rsc1-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promote_0" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"rsc1-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1_demote_0 rhel7-4" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1_demote_0 rhel7-4" -> "rsc1_monitor_11000 rhel7-4" [ style = bold] ++"rsc1_demote_0 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_10000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_11000 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc1_promote_0 rhel7-3" -> "rsc1-clone_promoted_0" [ style = bold] ++"rsc1_promote_0 rhel7-3" -> "rsc1_monitor_10000 rhel7-3" [ style = bold] ++"rsc1_promote_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/on_fail_demote2.exp b/cts/scheduler/on_fail_demote2.exp +new file mode 100644 +index 0000000..492e86f +--- /dev/null ++++ b/cts/scheduler/on_fail_demote2.exp +@@ -0,0 +1,125 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote2.scores b/cts/scheduler/on_fail_demote2.scores +new file mode 100644 +index 0000000..25aea90 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote2.scores +@@ -0,0 +1,127 @@ ++Allocation scores: ++Using the original execution date of: 2020-06-16 19:23:21Z ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-2: 6 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-2: 6 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-5: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-1: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-2: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-3: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-4: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-2: 6 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-2: 6 ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-5: -INFINITY ++rsc1:0 promotion score on rhel7-4: -INFINITY ++rsc1:1 promotion score on rhel7-3: 5 ++rsc1:2 promotion score on rhel7-5: 5 ++rsc1:3 promotion score on rhel7-1: 5 ++rsc1:4 promotion score on rhel7-2: 5 ++rsc2:0 promotion score on rhel7-4: 10 ++rsc2:1 promotion score on rhel7-3: 5 ++rsc2:2 promotion score on rhel7-5: 5 ++rsc2:3 promotion score on rhel7-1: 5 ++rsc2:4 promotion score on rhel7-2: 5 +diff --git a/cts/scheduler/on_fail_demote2.summary b/cts/scheduler/on_fail_demote2.summary +new file mode 100644 +index 0000000..795a11d +--- /dev/null ++++ b/cts/scheduler/on_fail_demote2.summary +@@ -0,0 +1,41 @@ ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Current cluster status: ++Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-1 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ rsc1 (ocf::pacemaker:Stateful): FAILED Master rhel7-4 ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ rhel7-4 ] ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ ++Transition Summary: ++ * Demote rsc1:0 ( Master -> Slave rhel7-4 ) ++ * Promote rsc1:1 ( Slave -> Master rhel7-3 ) ++ ++Executing cluster transition: ++ * Resource action: rsc1 cancel=10000 on rhel7-4 ++ * Resource action: rsc1 cancel=11000 on rhel7-3 ++ * Pseudo action: rsc1-clone_demote_0 ++ * Resource action: rsc1 demote on rhel7-4 ++ * Pseudo action: rsc1-clone_demoted_0 ++ * Pseudo action: rsc1-clone_promote_0 ++ * Resource action: rsc1 monitor=11000 on rhel7-4 ++ * Resource action: rsc1 promote on rhel7-3 ++ * Pseudo action: rsc1-clone_promoted_0 ++ * Resource action: rsc1 monitor=10000 on rhel7-3 ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Revised cluster status: ++Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-1 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Masters: [ rhel7-3 ] ++ Slaves: [ rhel7-1 rhel7-2 rhel7-4 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ rhel7-4 ] ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ +diff --git a/cts/scheduler/on_fail_demote2.xml b/cts/scheduler/on_fail_demote2.xml +new file mode 100644 +index 0000000..ae91633 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote2.xml +@@ -0,0 +1,221 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote3.dot b/cts/scheduler/on_fail_demote3.dot +new file mode 100644 +index 0000000..e78325b +--- /dev/null ++++ b/cts/scheduler/on_fail_demote3.dot +@@ -0,0 +1,12 @@ ++ digraph "g" { ++"Cancel rsc1_monitor_10000 rhel7-4" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"Cancel rsc1_monitor_10000 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc1-clone_demote_0" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1-clone_demote_0" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"rsc1-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1_demote_0 rhel7-4" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1_demote_0 rhel7-4" -> "rsc1_monitor_11000 rhel7-4" [ style = bold] ++"rsc1_demote_0 rhel7-4" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_11000 rhel7-4" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/on_fail_demote3.exp b/cts/scheduler/on_fail_demote3.exp +new file mode 100644 +index 0000000..ed6bd6d +--- /dev/null ++++ b/cts/scheduler/on_fail_demote3.exp +@@ -0,0 +1,63 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote3.scores b/cts/scheduler/on_fail_demote3.scores +new file mode 100644 +index 0000000..a85639a +--- /dev/null ++++ b/cts/scheduler/on_fail_demote3.scores +@@ -0,0 +1,127 @@ ++Allocation scores: ++Using the original execution date of: 2020-06-16 19:23:21Z ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-2: 6 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-2: 6 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-5: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-1: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-2: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-3: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-4: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-2: 6 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-4: 11 ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-2: 6 ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-5: -INFINITY ++rsc1:0 promotion score on rhel7-4: -INFINITY ++rsc1:1 promotion score on rhel7-3: -INFINITY ++rsc1:2 promotion score on rhel7-5: -INFINITY ++rsc1:3 promotion score on rhel7-1: -INFINITY ++rsc1:4 promotion score on rhel7-2: -INFINITY ++rsc2:0 promotion score on rhel7-4: 10 ++rsc2:1 promotion score on rhel7-3: 5 ++rsc2:2 promotion score on rhel7-5: 5 ++rsc2:3 promotion score on rhel7-1: 5 ++rsc2:4 promotion score on rhel7-2: 5 +diff --git a/cts/scheduler/on_fail_demote3.summary b/cts/scheduler/on_fail_demote3.summary +new file mode 100644 +index 0000000..f1173fd +--- /dev/null ++++ b/cts/scheduler/on_fail_demote3.summary +@@ -0,0 +1,34 @@ ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Current cluster status: ++Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-1 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ rsc1 (ocf::pacemaker:Stateful): FAILED Master rhel7-4 ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ rhel7-4 ] ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ ++Transition Summary: ++ * Demote rsc1:0 ( Master -> Slave rhel7-4 ) ++ ++Executing cluster transition: ++ * Resource action: rsc1 cancel=10000 on rhel7-4 ++ * Pseudo action: rsc1-clone_demote_0 ++ * Resource action: rsc1 demote on rhel7-4 ++ * Pseudo action: rsc1-clone_demoted_0 ++ * Resource action: rsc1 monitor=11000 on rhel7-4 ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Revised cluster status: ++Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-1 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ rhel7-4 ] ++ Slaves: [ rhel7-1 rhel7-2 rhel7-3 rhel7-5 ] ++ +diff --git a/cts/scheduler/on_fail_demote3.xml b/cts/scheduler/on_fail_demote3.xml +new file mode 100644 +index 0000000..a7b6806 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote3.xml +@@ -0,0 +1,221 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote4.dot b/cts/scheduler/on_fail_demote4.dot +new file mode 100644 +index 0000000..4715cd3 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote4.dot +@@ -0,0 +1,383 @@ ++ digraph "g" { ++"Cancel rsc1_monitor_11000 rhel7-3" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"Cancel rsc1_monitor_11000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"Cancel rsc2_monitor_11000 rhel7-3" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"Cancel rsc2_monitor_11000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"Fencing_monitor_120000 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"Fencing_start_0 rhel7-5" -> "Fencing_monitor_120000 rhel7-5" [ style = bold] ++"Fencing_start_0 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"Fencing_stop_0 rhel7-4" -> "Fencing_start_0 rhel7-5" [ style = bold] ++"Fencing_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"bundled_demote_0 stateful-bundle-0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"bundled_demote_0 stateful-bundle-0" -> "bundled_stop_0 stateful-bundle-0" [ style = bold] ++"bundled_demote_0 stateful-bundle-0" -> "stateful-bundle-master_demoted_0" [ style = bold] ++"bundled_demote_0 stateful-bundle-0" [ style=bold color="green" fontcolor="orange"] ++"bundled_monitor_10000 stateful-bundle-0" [ style=bold color="green" fontcolor="black"] ++"bundled_monitor_11000 stateful-bundle-2" [ style=bold color="green" fontcolor="black"] ++"bundled_promote_0 stateful-bundle-0" -> "bundled_monitor_10000 stateful-bundle-0" [ style = bold] ++"bundled_promote_0 stateful-bundle-0" -> "stateful-bundle-master_promoted_0" [ style = bold] ++"bundled_promote_0 stateful-bundle-0" [ style=bold color="green" fontcolor="black"] ++"bundled_start_0 stateful-bundle-0" -> "bundled_monitor_10000 stateful-bundle-0" [ style = bold] ++"bundled_start_0 stateful-bundle-0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"bundled_start_0 stateful-bundle-0" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"bundled_start_0 stateful-bundle-0" -> "stateful-bundle-master_running_0" [ style = bold] ++"bundled_start_0 stateful-bundle-0" [ style=bold color="green" fontcolor="black"] ++"bundled_start_0 stateful-bundle-2" -> "bundled_monitor_11000 stateful-bundle-2" [ style = bold] ++"bundled_start_0 stateful-bundle-2" -> "stateful-bundle-master_running_0" [ style = bold] ++"bundled_start_0 stateful-bundle-2" [ style=bold color="green" fontcolor="black"] ++"bundled_stop_0 stateful-bundle-0" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"bundled_stop_0 stateful-bundle-0" -> "stateful-bundle-master_stopped_0" [ style = bold] ++"bundled_stop_0 stateful-bundle-0" [ style=bold color="green" fontcolor="orange"] ++"bundled_stop_0 stateful-bundle-2" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"bundled_stop_0 stateful-bundle-2" -> "bundled_stop_0 stateful-bundle-0" [ style = bold] ++"bundled_stop_0 stateful-bundle-2" -> "stateful-bundle-master_stopped_0" [ style = bold] ++"bundled_stop_0 stateful-bundle-2" [ style=bold color="green" fontcolor="orange"] ++"container2_monitor_20000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"container2_start_0 rhel7-3" -> "container2_monitor_20000 rhel7-3" [ style = bold] ++"container2_start_0 rhel7-3" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"container2_start_0 rhel7-3" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"container2_start_0 rhel7-3" -> "lxc2_start_0 rhel7-3" [ style = bold] ++"container2_start_0 rhel7-3" -> "rsc1_start_0 lxc2" [ style = bold] ++"container2_start_0 rhel7-3" -> "rsc2_start_0 lxc2" [ style = bold] ++"container2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"container2_stop_0 rhel7-3" -> "container2_start_0 rhel7-3" [ style = bold] ++"container2_stop_0 rhel7-3" -> "stonith 'reboot' lxc2" [ style = bold] ++"container2_stop_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"lxc-ms-master_demote_0" -> "lxc-ms-master_demoted_0" [ style = bold] ++"lxc-ms-master_demote_0" -> "lxc-ms_demote_0 lxc2" [ style = bold] ++"lxc-ms-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_demoted_0" -> "lxc-ms-master_promote_0" [ style = bold] ++"lxc-ms-master_demoted_0" -> "lxc-ms-master_start_0" [ style = bold] ++"lxc-ms-master_demoted_0" -> "lxc-ms-master_stop_0" [ style = bold] ++"lxc-ms-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_promote_0" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc-ms-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_running_0" -> "lxc-ms-master_promote_0" [ style = bold] ++"lxc-ms-master_running_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_start_0" -> "lxc-ms-master_running_0" [ style = bold] ++"lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"lxc-ms-master_start_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_stop_0" -> "lxc-ms-master_stopped_0" [ style = bold] ++"lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc2" [ style = bold] ++"lxc-ms-master_stop_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms-master_stopped_0" -> "lxc-ms-master_promote_0" [ style = bold] ++"lxc-ms-master_stopped_0" -> "lxc-ms-master_start_0" [ style = bold] ++"lxc-ms-master_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms_demote_0 lxc2" -> "lxc-ms-master_demoted_0" [ style = bold] ++"lxc-ms_demote_0 lxc2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc-ms_demote_0 lxc2" -> "lxc-ms_stop_0 lxc2" [ style = bold] ++"lxc-ms_demote_0 lxc2" [ style=bold color="green" fontcolor="orange"] ++"lxc-ms_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] ++"lxc-ms_promote_0 lxc2" -> "lxc-ms-master_promoted_0" [ style = bold] ++"lxc-ms_promote_0 lxc2" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] ++"lxc-ms_promote_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"lxc-ms_start_0 lxc2" -> "lxc-ms-master_running_0" [ style = bold] ++"lxc-ms_start_0 lxc2" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] ++"lxc-ms_start_0 lxc2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc-ms_start_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"lxc-ms_stop_0 lxc2" -> "lxc-ms-master_stopped_0" [ style = bold] ++"lxc-ms_stop_0 lxc2" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"lxc-ms_stop_0 lxc2" [ style=bold color="green" fontcolor="orange"] ++"lxc2_monitor_30000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"lxc2_start_0 rhel7-3" -> "lxc-ms_monitor_10000 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "lxc2_monitor_30000 rhel7-3" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "rsc1_monitor_11000 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "rsc1_start_0 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "rsc2_monitor_11000 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" -> "rsc2_start_0 lxc2" [ style = bold] ++"lxc2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"lxc2_stop_0 rhel7-3" -> "container2_stop_0 rhel7-3" [ style = bold] ++"lxc2_stop_0 rhel7-3" -> "lxc2_start_0 rhel7-3" [ style = bold] ++"lxc2_stop_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"remote-rhel7-2_monitor_60000 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"remote-rhel7-2_start_0 rhel7-1" -> "remote-rhel7-2_monitor_60000 rhel7-1" [ style = bold] ++"remote-rhel7-2_start_0 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"remote-rhel7-2_stop_0 rhel7-1" -> "remote-rhel7-2_start_0 rhel7-1" [ style = bold] ++"remote-rhel7-2_stop_0 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"rsc1-clone_demote_0" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1-clone_demote_0" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"rsc1-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_demoted_0" -> "rsc1-clone_promote_0" [ style = bold] ++"rsc1-clone_demoted_0" -> "rsc1-clone_start_0" [ style = bold] ++"rsc1-clone_demoted_0" -> "rsc1-clone_stop_0" [ style = bold] ++"rsc1-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promote_0" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"rsc1-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_running_0" -> "rsc1-clone_promote_0" [ style = bold] ++"rsc1-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_start_0" -> "rsc1-clone_running_0" [ style = bold] ++"rsc1-clone_start_0" -> "rsc1_start_0 lxc2" [ style = bold] ++"rsc1-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_stop_0" -> "rsc1-clone_stopped_0" [ style = bold] ++"rsc1-clone_stop_0" -> "rsc1_stop_0 lxc2" [ style = bold] ++"rsc1-clone_stop_0" -> "rsc1_stop_0 remote-rhel7-2" [ style = bold] ++"rsc1-clone_stop_0" -> "rsc1_stop_0 rhel7-4" [ style = bold] ++"rsc1-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_stopped_0" -> "rsc1-clone_promote_0" [ style = bold] ++"rsc1-clone_stopped_0" -> "rsc1-clone_start_0" [ style = bold] ++"rsc1-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1_demote_0 rhel7-4" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1_demote_0 rhel7-4" -> "rsc1_stop_0 rhel7-4" [ style = bold] ++"rsc1_demote_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"rsc1_monitor_10000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_11000 lxc2" [ style=bold color="green" fontcolor="black"] ++"rsc1_promote_0 rhel7-3" -> "rsc1-clone_promoted_0" [ style = bold] ++"rsc1_promote_0 rhel7-3" -> "rsc1_monitor_10000 rhel7-3" [ style = bold] ++"rsc1_promote_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc1_start_0 lxc2" -> "rsc1-clone_running_0" [ style = bold] ++"rsc1_start_0 lxc2" -> "rsc1_monitor_11000 lxc2" [ style = bold] ++"rsc1_start_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"rsc1_stop_0 lxc2" -> "rsc1-clone_stopped_0" [ style = bold] ++"rsc1_stop_0 lxc2" -> "rsc1_start_0 lxc2" [ style = bold] ++"rsc1_stop_0 lxc2" [ style=bold color="green" fontcolor="orange"] ++"rsc1_stop_0 remote-rhel7-2" -> "remote-rhel7-2_stop_0 rhel7-1" [ style = bold] ++"rsc1_stop_0 remote-rhel7-2" -> "rsc1-clone_stopped_0" [ style = bold] ++"rsc1_stop_0 remote-rhel7-2" [ style=bold color="green" fontcolor="orange"] ++"rsc1_stop_0 rhel7-4" -> "rsc1-clone_stopped_0" [ style = bold] ++"rsc1_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_demote_0" -> "rsc2-master_demoted_0" [ style = bold] ++"rsc2-master_demote_0" -> "rsc2_demote_0 remote-rhel7-2" [ style = bold] ++"rsc2-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_demoted_0" -> "rsc2-master_promote_0" [ style = bold] ++"rsc2-master_demoted_0" -> "rsc2-master_start_0" [ style = bold] ++"rsc2-master_demoted_0" -> "rsc2-master_stop_0" [ style = bold] ++"rsc2-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_promote_0" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"rsc2-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_running_0" -> "rsc2-master_promote_0" [ style = bold] ++"rsc2-master_running_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_start_0" -> "rsc2-master_running_0" [ style = bold] ++"rsc2-master_start_0" -> "rsc2_start_0 lxc2" [ style = bold] ++"rsc2-master_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_stop_0" -> "rsc2-master_stopped_0" [ style = bold] ++"rsc2-master_stop_0" -> "rsc2_stop_0 lxc2" [ style = bold] ++"rsc2-master_stop_0" -> "rsc2_stop_0 remote-rhel7-2" [ style = bold] ++"rsc2-master_stop_0" -> "rsc2_stop_0 rhel7-4" [ style = bold] ++"rsc2-master_stop_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2-master_stopped_0" -> "rsc2-master_promote_0" [ style = bold] ++"rsc2-master_stopped_0" -> "rsc2-master_start_0" [ style = bold] ++"rsc2-master_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"rsc2_demote_0 remote-rhel7-2" -> "rsc2-master_demoted_0" [ style = bold] ++"rsc2_demote_0 remote-rhel7-2" -> "rsc2_stop_0 remote-rhel7-2" [ style = bold] ++"rsc2_demote_0 remote-rhel7-2" [ style=bold color="green" fontcolor="orange"] ++"rsc2_monitor_10000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_11000 lxc2" [ style=bold color="green" fontcolor="black"] ++"rsc2_promote_0 rhel7-3" -> "rsc2-master_promoted_0" [ style = bold] ++"rsc2_promote_0 rhel7-3" -> "rsc2_monitor_10000 rhel7-3" [ style = bold] ++"rsc2_promote_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"rsc2_start_0 lxc2" -> "rsc2-master_running_0" [ style = bold] ++"rsc2_start_0 lxc2" -> "rsc2_monitor_11000 lxc2" [ style = bold] ++"rsc2_start_0 lxc2" [ style=bold color="green" fontcolor="black"] ++"rsc2_stop_0 lxc2" -> "rsc2-master_stopped_0" [ style = bold] ++"rsc2_stop_0 lxc2" -> "rsc2_start_0 lxc2" [ style = bold] ++"rsc2_stop_0 lxc2" [ style=bold color="green" fontcolor="orange"] ++"rsc2_stop_0 remote-rhel7-2" -> "remote-rhel7-2_stop_0 rhel7-1" [ style = bold] ++"rsc2_stop_0 remote-rhel7-2" -> "rsc2-master_stopped_0" [ style = bold] ++"rsc2_stop_0 remote-rhel7-2" [ style=bold color="green" fontcolor="orange"] ++"rsc2_stop_0 rhel7-4" -> "rsc2-master_stopped_0" [ style = bold] ++"rsc2_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-0_monitor_30000 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-0_start_0 rhel7-5" -> "bundled_monitor_10000 stateful-bundle-0" [ style = bold] ++"stateful-bundle-0_start_0 rhel7-5" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-0_start_0 rhel7-5" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-0_start_0 rhel7-5" -> "stateful-bundle-0_monitor_30000 rhel7-5" [ style = bold] ++"stateful-bundle-0_start_0 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-0_stop_0 rhel7-5" -> "stateful-bundle-0_start_0 rhel7-5" [ style = bold] ++"stateful-bundle-0_stop_0 rhel7-5" -> "stateful-bundle-docker-0_stop_0 rhel7-5" [ style = bold] ++"stateful-bundle-0_stop_0 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-2_monitor_30000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-2_start_0 rhel7-3" -> "bundled_monitor_11000 stateful-bundle-2" [ style = bold] ++"stateful-bundle-2_start_0 rhel7-3" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stateful-bundle-2_start_0 rhel7-3" -> "stateful-bundle-2_monitor_30000 rhel7-3" [ style = bold] ++"stateful-bundle-2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-2_stop_0 rhel7-4" -> "stateful-bundle-2_start_0 rhel7-3" [ style = bold] ++"stateful-bundle-2_stop_0 rhel7-4" -> "stateful-bundle-docker-2_stop_0 rhel7-4" [ style = bold] ++"stateful-bundle-2_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-docker-0_monitor_60000 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-docker-0_start_0 rhel7-5" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-docker-0_start_0 rhel7-5" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-docker-0_start_0 rhel7-5" -> "stateful-bundle-0_start_0 rhel7-5" [ style = bold] ++"stateful-bundle-docker-0_start_0 rhel7-5" -> "stateful-bundle-docker-0_monitor_60000 rhel7-5" [ style = bold] ++"stateful-bundle-docker-0_start_0 rhel7-5" -> "stateful-bundle_running_0" [ style = bold] ++"stateful-bundle-docker-0_start_0 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-docker-0_stop_0 rhel7-5" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stateful-bundle-docker-0_stop_0 rhel7-5" -> "stateful-bundle_stopped_0" [ style = bold] ++"stateful-bundle-docker-0_stop_0 rhel7-5" -> "stonith 'reboot' stateful-bundle-0" [ style = bold] ++"stateful-bundle-docker-0_stop_0 rhel7-5" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-docker-2_monitor_60000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-docker-2_start_0 rhel7-3" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stateful-bundle-docker-2_start_0 rhel7-3" -> "stateful-bundle-2_start_0 rhel7-3" [ style = bold] ++"stateful-bundle-docker-2_start_0 rhel7-3" -> "stateful-bundle-docker-2_monitor_60000 rhel7-3" [ style = bold] ++"stateful-bundle-docker-2_start_0 rhel7-3" -> "stateful-bundle_running_0" [ style = bold] ++"stateful-bundle-docker-2_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-docker-2_stop_0 rhel7-4" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stateful-bundle-docker-2_stop_0 rhel7-4" -> "stateful-bundle-ip-192.168.122.133_stop_0 rhel7-4" [ style = bold] ++"stateful-bundle-docker-2_stop_0 rhel7-4" -> "stateful-bundle_stopped_0" [ style = bold] ++"stateful-bundle-docker-2_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-ip-192.168.122.133_monitor_60000 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" -> "stateful-bundle-ip-192.168.122.133_monitor_60000 rhel7-3" [ style = bold] ++"stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style=bold color="green" fontcolor="black"] ++"stateful-bundle-ip-192.168.122.133_stop_0 rhel7-4" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stateful-bundle-ip-192.168.122.133_stop_0 rhel7-4" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_demote_0" -> "bundled_demote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_demote_0" -> "stateful-bundle-master_demoted_0" [ style = bold] ++"stateful-bundle-master_demote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle-master_start_0" [ style = bold] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle-master_stop_0" [ style = bold] ++"stateful-bundle-master_demoted_0" -> "stateful-bundle_demoted_0" [ style = bold] ++"stateful-bundle-master_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_promote_0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_promoted_0" -> "stateful-bundle_promoted_0" [ style = bold] ++"stateful-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_running_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle-master_running_0" -> "stateful-bundle_running_0" [ style = bold] ++"stateful-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_start_0" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_start_0" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stateful-bundle-master_start_0" -> "stateful-bundle-master_running_0" [ style = bold] ++"stateful-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_stop_0" -> "bundled_stop_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle-master_stop_0" -> "bundled_stop_0 stateful-bundle-2" [ style = bold] ++"stateful-bundle-master_stop_0" -> "stateful-bundle-master_stopped_0" [ style = bold] ++"stateful-bundle-master_stop_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle-master_stopped_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle-master_stopped_0" -> "stateful-bundle-master_start_0" [ style = bold] ++"stateful-bundle-master_stopped_0" -> "stateful-bundle_stopped_0" [ style = bold] ++"stateful-bundle-master_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_demote_0" -> "stateful-bundle-master_demote_0" [ style = bold] ++"stateful-bundle_demote_0" -> "stateful-bundle_demoted_0" [ style = bold] ++"stateful-bundle_demote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_demoted_0" -> "stateful-bundle_promote_0" [ style = bold] ++"stateful-bundle_demoted_0" -> "stateful-bundle_start_0" [ style = bold] ++"stateful-bundle_demoted_0" -> "stateful-bundle_stop_0" [ style = bold] ++"stateful-bundle_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_promote_0" -> "stateful-bundle-master_promote_0" [ style = bold] ++"stateful-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_running_0" -> "stateful-bundle_promote_0" [ style = bold] ++"stateful-bundle_running_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_start_0" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stateful-bundle_start_0" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stateful-bundle_start_0" -> "stateful-bundle-master_start_0" [ style = bold] ++"stateful-bundle_start_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_stop_0" -> "bundled_stop_0 stateful-bundle-0" [ style = bold] ++"stateful-bundle_stop_0" -> "bundled_stop_0 stateful-bundle-2" [ style = bold] ++"stateful-bundle_stop_0" -> "stateful-bundle-docker-0_stop_0 rhel7-5" [ style = bold] ++"stateful-bundle_stop_0" -> "stateful-bundle-docker-2_stop_0 rhel7-4" [ style = bold] ++"stateful-bundle_stop_0" -> "stateful-bundle-master_stop_0" [ style = bold] ++"stateful-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] ++"stateful-bundle_stopped_0" -> "stateful-bundle_promote_0" [ style = bold] ++"stateful-bundle_stopped_0" -> "stateful-bundle_start_0" [ style = bold] ++"stateful-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"stonith 'reboot' lxc2" -> "Fencing_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' lxc2" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' lxc2" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' lxc2" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stonith 'reboot' lxc2" -> "container2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc-ms-master_stop_0" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc-ms_demote_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc-ms_stop_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "lxc2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "remote-rhel7-2_start_0 rhel7-1" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc1-clone_stop_0" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc1_start_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc1_stop_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc2-master_stop_0" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc2_start_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "rsc2_stop_0 lxc2" [ style = bold] ++"stonith 'reboot' lxc2" -> "stateful-bundle-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' lxc2" -> "stateful-bundle-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' lxc2" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' lxc2" [ style=bold color="green" fontcolor="orange"] ++"stonith 'reboot' remote-rhel7-2" -> "Fencing_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "container2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "lxc2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "remote-rhel7-2_start_0 rhel7-1" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc1-clone_stop_0" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc1_start_0 lxc2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc1_stop_0 remote-rhel7-2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc2-master_stop_0" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc2_demote_0 remote-rhel7-2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc2_start_0 lxc2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "rsc2_stop_0 remote-rhel7-2" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stateful-bundle-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stateful-bundle-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" -> "stonith 'reboot' rhel7-4" [ style = bold] ++"stonith 'reboot' remote-rhel7-2" [ style=bold color="green" fontcolor="black"] ++"stonith 'reboot' rhel7-4" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "container2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc1-clone_stop_0" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc1_demote_0 rhel7-4" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc1_start_0 lxc2" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc1_stop_0 rhel7-4" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc2-master_stop_0" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc2_start_0 lxc2" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "rsc2_stop_0 rhel7-4" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stateful-bundle-docker-2_stop_0 rhel7-4" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stateful-bundle-ip-192.168.122.133_stop_0 rhel7-4" [ style = bold] ++"stonith 'reboot' rhel7-4" -> "stonith 'reboot' stateful-bundle-2" [ style = bold] ++"stonith 'reboot' rhel7-4" [ style=bold color="green" fontcolor="black"] ++"stonith 'reboot' stateful-bundle-0" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "container2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "rsc1_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "rsc2_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" -> "stateful-bundle-master_stop_0" [ style = bold] ++"stonith 'reboot' stateful-bundle-0" [ style=bold color="green" fontcolor="orange"] ++"stonith 'reboot' stateful-bundle-2" -> "bundled_promote_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "bundled_start_0 stateful-bundle-0" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "bundled_start_0 stateful-bundle-2" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "container2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "lxc-ms_promote_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "lxc-ms_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "rsc1_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "rsc1_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "rsc2_promote_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "rsc2_start_0 lxc2" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "stateful-bundle-docker-0_start_0 rhel7-5" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "stateful-bundle-docker-2_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "stateful-bundle-ip-192.168.122.133_start_0 rhel7-3" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" -> "stateful-bundle-master_stop_0" [ style = bold] ++"stonith 'reboot' stateful-bundle-2" [ style=bold color="green" fontcolor="orange"] ++} +diff --git a/cts/scheduler/on_fail_demote4.exp b/cts/scheduler/on_fail_demote4.exp +new file mode 100644 +index 0000000..0789a12 +--- /dev/null ++++ b/cts/scheduler/on_fail_demote4.exp +@@ -0,0 +1,1818 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/on_fail_demote4.scores b/cts/scheduler/on_fail_demote4.scores +new file mode 100644 +index 0000000..cde3fec +--- /dev/null ++++ b/cts/scheduler/on_fail_demote4.scores +@@ -0,0 +1,470 @@ ++Allocation scores: ++Using the original execution date of: 2020-06-16 19:23:21Z ++bundled:0 promotion score on stateful-bundle-0: 10 ++bundled:1 promotion score on stateful-bundle-1: 5 ++bundled:2 promotion score on stateful-bundle-2: 5 ++lxc-ms:0 promotion score on lxc2: INFINITY ++lxc-ms:1 promotion score on lxc1: INFINITY ++pcmk__bundle_allocate: bundled:0 allocation score on stateful-bundle-0: 501 ++pcmk__bundle_allocate: bundled:1 allocation score on stateful-bundle-1: 501 ++pcmk__bundle_allocate: bundled:2 allocation score on stateful-bundle-2: 501 ++pcmk__bundle_allocate: stateful-bundle allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-0 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-1 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on lxc1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on lxc2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-2 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-0 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-1 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-docker-2 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on lxc1: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on lxc2: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on remote-rhel7-2: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-1: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-3: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-4: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on rhel7-5: 0 ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-0: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-1: -INFINITY ++pcmk__bundle_allocate: stateful-bundle-master allocation score on stateful-bundle-2: -INFINITY ++pcmk__clone_allocate: bundled:0 allocation score on stateful-bundle-0: INFINITY ++pcmk__clone_allocate: bundled:1 allocation score on stateful-bundle-1: INFINITY ++pcmk__clone_allocate: bundled:2 allocation score on stateful-bundle-2: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms-master allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms:0 allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on lxc1: INFINITY ++pcmk__clone_allocate: lxc-ms:1 allocation score on lxc2: INFINITY ++pcmk__clone_allocate: lxc-ms:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: lxc-ms:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-4: 1 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on remote-rhel7-2: 1 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on lxc2: 6 ++pcmk__clone_allocate: rsc1:5 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:5 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on lxc1: 6 ++pcmk__clone_allocate: rsc1:6 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:6 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2-master allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2-master allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-4: 1 ++pcmk__clone_allocate: rsc2:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on remote-rhel7-2: 1 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:4 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on lxc1: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on lxc2: 6 ++pcmk__clone_allocate: rsc2:5 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:5 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on lxc1: 6 ++pcmk__clone_allocate: rsc2:6 allocation score on lxc2: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on remote-rhel7-2: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc2:6 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on lxc1: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on lxc2: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on remote-rhel7-2: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-1: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-3: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-4: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on rhel7-5: -INFINITY ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-0: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-1: 0 ++pcmk__clone_allocate: stateful-bundle-master allocation score on stateful-bundle-2: 0 ++pcmk__native_allocate: Fencing allocation score on lxc1: -INFINITY ++pcmk__native_allocate: Fencing allocation score on lxc2: -INFINITY ++pcmk__native_allocate: Fencing allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: Fencing allocation score on rhel7-1: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-3: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-4: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-5: 0 ++pcmk__native_allocate: bundled:0 allocation score on stateful-bundle-0: INFINITY ++pcmk__native_allocate: bundled:1 allocation score on stateful-bundle-1: INFINITY ++pcmk__native_allocate: bundled:2 allocation score on stateful-bundle-2: INFINITY ++pcmk__native_allocate: container1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: container1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: container1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: container1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: container1 allocation score on rhel7-3: INFINITY ++pcmk__native_allocate: container1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: container1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: container2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: container2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: container2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: container2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: container2 allocation score on rhel7-3: INFINITY ++pcmk__native_allocate: container2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: container2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on lxc2: INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc-ms:0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on lxc1: INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on lxc2: INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc-ms:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: lxc1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc1 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: lxc2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: lxc2 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: remote-rhel7-2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:2 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc1:3 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc1:3 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:3 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on lxc2: 6 ++pcmk__native_allocate: rsc1:5 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:5 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on lxc1: 6 ++pcmk__native_allocate: rsc1:6 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc1:6 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:6 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-3: 6 ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc2:2 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:2 allocation score on rhel7-5: 6 ++pcmk__native_allocate: rsc2:3 allocation score on lxc1: 0 ++pcmk__native_allocate: rsc2:3 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:3 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-1: 6 ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on lxc2: 6 ++pcmk__native_allocate: rsc2:5 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:5 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on lxc1: 6 ++pcmk__native_allocate: rsc2:6 allocation score on lxc2: 0 ++pcmk__native_allocate: rsc2:6 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc2:6 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-0 allocation score on rhel7-5: 10000 ++pcmk__native_allocate: stateful-bundle-1 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-1: 10000 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-3: 10000 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: stateful-bundle-2 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-0 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-1 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on lxc1: -10000 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on lxc2: -10000 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-docker-2 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.131 allocation score on rhel7-5: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-1: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.132 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on lxc2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on remote-rhel7-2: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-3: 0 ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: stateful-bundle-ip-192.168.122.133 allocation score on rhel7-5: -INFINITY ++rsc1:0 promotion score on none: 0 ++rsc1:1 promotion score on rhel7-3: 5 ++rsc1:2 promotion score on rhel7-5: 5 ++rsc1:3 promotion score on rhel7-1: 5 ++rsc1:4 promotion score on none: 0 ++rsc1:5 promotion score on lxc2: 5 ++rsc1:6 promotion score on lxc1: 5 ++rsc2:0 promotion score on none: 0 ++rsc2:1 promotion score on rhel7-3: 5 ++rsc2:2 promotion score on rhel7-5: 5 ++rsc2:3 promotion score on rhel7-1: 5 ++rsc2:4 promotion score on none: 0 ++rsc2:5 promotion score on lxc2: 5 ++rsc2:6 promotion score on lxc1: 5 +diff --git a/cts/scheduler/on_fail_demote4.summary b/cts/scheduler/on_fail_demote4.summary +new file mode 100644 +index 0000000..20520ff +--- /dev/null ++++ b/cts/scheduler/on_fail_demote4.summary +@@ -0,0 +1,187 @@ ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Current cluster status: ++RemoteNode remote-rhel7-2: UNCLEAN (offline) ++Node rhel7-4 (4): UNCLEAN (offline) ++Online: [ rhel7-1 rhel7-3 rhel7-5 ] ++GuestOnline: [ lxc1:container1 stateful-bundle-1:stateful-bundle-docker-1 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-4 (UNCLEAN) ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ rsc1 (ocf::pacemaker:Stateful): Master rhel7-4 (UNCLEAN) ++ rsc1 (ocf::pacemaker:Stateful): Slave remote-rhel7-2 (UNCLEAN) ++ Slaves: [ lxc1 rhel7-1 rhel7-3 rhel7-5 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ rsc2 (ocf::pacemaker:Stateful): Slave rhel7-4 (UNCLEAN) ++ rsc2 (ocf::pacemaker:Stateful): Master remote-rhel7-2 (UNCLEAN) ++ Slaves: [ lxc1 rhel7-1 rhel7-3 rhel7-5 ] ++ remote-rhel7-2 (ocf::pacemaker:remote): FAILED rhel7-1 ++ container1 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ container2 (ocf::heartbeat:VirtualDomain): FAILED rhel7-3 ++ Clone Set: lxc-ms-master [lxc-ms] (promotable) ++ Slaves: [ lxc1 ] ++ Stopped: [ remote-rhel7-2 rhel7-1 rhel7-3 rhel7-4 rhel7-5 ] ++ Container bundle set: stateful-bundle [pcmktest:http] ++ stateful-bundle-0 (192.168.122.131) (ocf::pacemaker:Stateful): FAILED Master rhel7-5 ++ stateful-bundle-1 (192.168.122.132) (ocf::pacemaker:Stateful): Slave rhel7-1 ++ stateful-bundle-2 (192.168.122.133) (ocf::pacemaker:Stateful): FAILED rhel7-4 (UNCLEAN) ++ ++Transition Summary: ++ * Fence (reboot) stateful-bundle-2 (resource: stateful-bundle-docker-2) 'guest is unclean' ++ * Fence (reboot) stateful-bundle-0 (resource: stateful-bundle-docker-0) 'guest is unclean' ++ * Fence (reboot) lxc2 (resource: container2) 'guest is unclean' ++ * Fence (reboot) remote-rhel7-2 'remote connection is unrecoverable' ++ * Fence (reboot) rhel7-4 'peer is no longer part of the cluster' ++ * Move Fencing ( rhel7-4 -> rhel7-5 ) ++ * Stop rsc1:0 ( Master rhel7-4 ) due to node availability ++ * Promote rsc1:1 ( Slave -> Master rhel7-3 ) ++ * Stop rsc1:4 ( Slave remote-rhel7-2 ) due to node availability ++ * Recover rsc1:5 ( Slave lxc2 ) ++ * Stop rsc2:0 ( Slave rhel7-4 ) due to node availability ++ * Promote rsc2:1 ( Slave -> Master rhel7-3 ) ++ * Stop rsc2:4 ( Master remote-rhel7-2 ) due to node availability ++ * Recover rsc2:5 ( Slave lxc2 ) ++ * Recover remote-rhel7-2 ( rhel7-1 ) ++ * Recover container2 ( rhel7-3 ) ++ * Recover lxc-ms:0 ( Master lxc2 ) ++ * Recover stateful-bundle-docker-0 ( rhel7-5 ) ++ * Restart stateful-bundle-0 ( rhel7-5 ) due to required stateful-bundle-docker-0 start ++ * Recover bundled:0 ( Master stateful-bundle-0 ) ++ * Move stateful-bundle-ip-192.168.122.133 ( rhel7-4 -> rhel7-3 ) ++ * Recover stateful-bundle-docker-2 ( rhel7-4 -> rhel7-3 ) ++ * Move stateful-bundle-2 ( rhel7-4 -> rhel7-3 ) ++ * Recover bundled:2 ( Slave stateful-bundle-2 ) ++ * Restart lxc2 ( rhel7-3 ) due to required container2 start ++ ++Executing cluster transition: ++ * Pseudo action: Fencing_stop_0 ++ * Resource action: rsc1 cancel=11000 on rhel7-3 ++ * Pseudo action: rsc1-clone_demote_0 ++ * Resource action: rsc2 cancel=11000 on rhel7-3 ++ * Pseudo action: rsc2-master_demote_0 ++ * Pseudo action: lxc-ms-master_demote_0 ++ * Resource action: stateful-bundle-0 stop on rhel7-5 ++ * Pseudo action: stateful-bundle-2_stop_0 ++ * Resource action: lxc2 stop on rhel7-3 ++ * Pseudo action: stateful-bundle_demote_0 ++ * Fencing remote-rhel7-2 (reboot) ++ * Fencing rhel7-4 (reboot) ++ * Pseudo action: rsc1_demote_0 ++ * Pseudo action: rsc1-clone_demoted_0 ++ * Pseudo action: rsc2_demote_0 ++ * Pseudo action: rsc2-master_demoted_0 ++ * Resource action: container2 stop on rhel7-3 ++ * Pseudo action: stateful-bundle-master_demote_0 ++ * Pseudo action: stonith-stateful-bundle-2-reboot on stateful-bundle-2 ++ * Pseudo action: stonith-lxc2-reboot on lxc2 ++ * Resource action: Fencing start on rhel7-5 ++ * Pseudo action: rsc1-clone_stop_0 ++ * Pseudo action: rsc2-master_stop_0 ++ * Pseudo action: lxc-ms_demote_0 ++ * Pseudo action: lxc-ms-master_demoted_0 ++ * Pseudo action: lxc-ms-master_stop_0 ++ * Pseudo action: bundled_demote_0 ++ * Pseudo action: stateful-bundle-master_demoted_0 ++ * Pseudo action: stateful-bundle_demoted_0 ++ * Pseudo action: stateful-bundle_stop_0 ++ * Resource action: Fencing monitor=120000 on rhel7-5 ++ * Pseudo action: rsc1_stop_0 ++ * Pseudo action: rsc1_stop_0 ++ * Pseudo action: rsc1_stop_0 ++ * Pseudo action: rsc1-clone_stopped_0 ++ * Pseudo action: rsc1-clone_start_0 ++ * Pseudo action: rsc2_stop_0 ++ * Pseudo action: rsc2_stop_0 ++ * Pseudo action: rsc2_stop_0 ++ * Pseudo action: rsc2-master_stopped_0 ++ * Pseudo action: rsc2-master_start_0 ++ * Resource action: remote-rhel7-2 stop on rhel7-1 ++ * Pseudo action: lxc-ms_stop_0 ++ * Pseudo action: lxc-ms-master_stopped_0 ++ * Pseudo action: lxc-ms-master_start_0 ++ * Resource action: stateful-bundle-docker-0 stop on rhel7-5 ++ * Pseudo action: stateful-bundle-docker-2_stop_0 ++ * Pseudo action: stonith-stateful-bundle-0-reboot on stateful-bundle-0 ++ * Resource action: remote-rhel7-2 start on rhel7-1 ++ * Resource action: remote-rhel7-2 monitor=60000 on rhel7-1 ++ * Resource action: container2 start on rhel7-3 ++ * Resource action: container2 monitor=20000 on rhel7-3 ++ * Pseudo action: stateful-bundle-master_stop_0 ++ * Pseudo action: stateful-bundle-ip-192.168.122.133_stop_0 ++ * Resource action: lxc2 start on rhel7-3 ++ * Resource action: lxc2 monitor=30000 on rhel7-3 ++ * Resource action: rsc1 start on lxc2 ++ * Pseudo action: rsc1-clone_running_0 ++ * Resource action: rsc2 start on lxc2 ++ * Pseudo action: rsc2-master_running_0 ++ * Resource action: lxc-ms start on lxc2 ++ * Pseudo action: lxc-ms-master_running_0 ++ * Pseudo action: bundled_stop_0 ++ * Resource action: stateful-bundle-ip-192.168.122.133 start on rhel7-3 ++ * Resource action: rsc1 monitor=11000 on lxc2 ++ * Pseudo action: rsc1-clone_promote_0 ++ * Resource action: rsc2 monitor=11000 on lxc2 ++ * Pseudo action: rsc2-master_promote_0 ++ * Pseudo action: lxc-ms-master_promote_0 ++ * Pseudo action: bundled_stop_0 ++ * Pseudo action: stateful-bundle-master_stopped_0 ++ * Resource action: stateful-bundle-ip-192.168.122.133 monitor=60000 on rhel7-3 ++ * Pseudo action: stateful-bundle_stopped_0 ++ * Pseudo action: stateful-bundle_start_0 ++ * Resource action: rsc1 promote on rhel7-3 ++ * Pseudo action: rsc1-clone_promoted_0 ++ * Resource action: rsc2 promote on rhel7-3 ++ * Pseudo action: rsc2-master_promoted_0 ++ * Resource action: lxc-ms promote on lxc2 ++ * Pseudo action: lxc-ms-master_promoted_0 ++ * Pseudo action: stateful-bundle-master_start_0 ++ * Resource action: stateful-bundle-docker-0 start on rhel7-5 ++ * Resource action: stateful-bundle-docker-0 monitor=60000 on rhel7-5 ++ * Resource action: stateful-bundle-0 start on rhel7-5 ++ * Resource action: stateful-bundle-0 monitor=30000 on rhel7-5 ++ * Resource action: stateful-bundle-docker-2 start on rhel7-3 ++ * Resource action: stateful-bundle-2 start on rhel7-3 ++ * Resource action: rsc1 monitor=10000 on rhel7-3 ++ * Resource action: rsc2 monitor=10000 on rhel7-3 ++ * Resource action: lxc-ms monitor=10000 on lxc2 ++ * Resource action: bundled start on stateful-bundle-0 ++ * Resource action: bundled start on stateful-bundle-2 ++ * Pseudo action: stateful-bundle-master_running_0 ++ * Resource action: stateful-bundle-docker-2 monitor=60000 on rhel7-3 ++ * Resource action: stateful-bundle-2 monitor=30000 on rhel7-3 ++ * Pseudo action: stateful-bundle_running_0 ++ * Resource action: bundled monitor=11000 on stateful-bundle-2 ++ * Pseudo action: stateful-bundle_promote_0 ++ * Pseudo action: stateful-bundle-master_promote_0 ++ * Resource action: bundled promote on stateful-bundle-0 ++ * Pseudo action: stateful-bundle-master_promoted_0 ++ * Pseudo action: stateful-bundle_promoted_0 ++ * Resource action: bundled monitor=10000 on stateful-bundle-0 ++Using the original execution date of: 2020-06-16 19:23:21Z ++ ++Revised cluster status: ++Online: [ rhel7-1 rhel7-3 rhel7-5 ] ++OFFLINE: [ rhel7-4 ] ++RemoteOnline: [ remote-rhel7-2 ] ++GuestOnline: [ lxc1:container1 lxc2:container2 stateful-bundle-0:stateful-bundle-docker-0 stateful-bundle-1:stateful-bundle-docker-1 stateful-bundle-2:stateful-bundle-docker-2 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-5 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Masters: [ rhel7-3 ] ++ Slaves: [ lxc1 lxc2 rhel7-1 rhel7-5 ] ++ Stopped: [ remote-rhel7-2 rhel7-4 ] ++ Clone Set: rsc2-master [rsc2] (promotable) ++ Masters: [ rhel7-3 ] ++ Slaves: [ lxc1 lxc2 rhel7-1 rhel7-5 ] ++ Stopped: [ remote-rhel7-2 rhel7-4 ] ++ remote-rhel7-2 (ocf::pacemaker:remote): Started rhel7-1 ++ container1 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ container2 (ocf::heartbeat:VirtualDomain): Started rhel7-3 ++ Clone Set: lxc-ms-master [lxc-ms] (promotable) ++ Masters: [ lxc2 ] ++ Slaves: [ lxc1 ] ++ Container bundle set: stateful-bundle [pcmktest:http] ++ stateful-bundle-0 (192.168.122.131) (ocf::pacemaker:Stateful): Master rhel7-5 ++ stateful-bundle-1 (192.168.122.132) (ocf::pacemaker:Stateful): Slave rhel7-1 ++ stateful-bundle-2 (192.168.122.133) (ocf::pacemaker:Stateful): Slave rhel7-3 ++ +diff --git a/cts/scheduler/on_fail_demote4.xml b/cts/scheduler/on_fail_demote4.xml +new file mode 100644 +index 0000000..eb4c4cc +--- /dev/null ++++ b/cts/scheduler/on_fail_demote4.xml +@@ -0,0 +1,625 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From 204961e95d9de140d998d71a0e53b5b9baa5d39e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 26 May 2020 18:04:32 -0500 +Subject: [PATCH 12/20] Doc: Pacemaker Explained: document new on-fail="demote" + option + +--- + doc/Pacemaker_Explained/en-US/Ch-Resources.txt | 36 ++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +index d8e7115..9df9243 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +@@ -676,6 +676,10 @@ a|The action to take if this action ever fails. Allowed values: + * +ignore:+ Pretend the resource did not fail. + * +block:+ Don't perform any further operations on the resource. + * +stop:+ Stop the resource and do not start it elsewhere. ++* +demote:+ Demote the resource, without a full restart. This is valid only for ++ +promote+ actions, and for +monitor+ actions with both a nonzero +interval+ ++ and +role+ set to +Master+; for any other action, a configuration error will ++ be logged, and the default behavior will be used. + * +restart:+ Stop the resource and start it again (possibly on a different node). + * +fence:+ STONITH the node on which the resource failed. + * +standby:+ Move _all_ resources away from the node on which the resource failed. +@@ -714,6 +718,38 @@ indexterm:[Action,Property,on-fail] + + |========================================================= + ++[NOTE] ++==== ++When +on-fail+ is set to +demote+, recovery from failure by a successful demote ++causes the cluster to recalculate whether and where a new instance should be ++promoted. The node with the failure is eligible, so if master scores have not ++changed, it will be promoted again. ++ ++There is no direct equivalent of +migration-threshold+ for the master role, but ++the same effect can be achieved with a location constraint using a ++<> with a node attribute expression for the resource's fail ++count. ++ ++For example, to immediately ban the master role from a node with any failed ++promote or master monitor: ++[source,XML] ++---- ++ ++ ++ ++ ++ ++ ++---- ++ ++This example assumes that there is a promotable clone of the +my_primitive+ ++resource (note that the primitive name, not the clone name, is used in the ++rule), and that there is a recurring 10-second-interval monitor configured for ++the master role (fail count attributes specify the interval in milliseconds). ++==== ++ + [[s-resource-monitoring]] + === Monitoring Resources for Failure === + +-- +1.8.3.1 + + +From d4b9117e72b178bb6f4458cd89bee13060f78dcb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 26 May 2020 18:10:33 -0500 +Subject: [PATCH 13/20] Doc: Pacemaker Explained: correct on-fail default + +--- + doc/Pacemaker_Explained/en-US/Ch-Resources.txt | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +index 9df9243..88892db 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +@@ -669,8 +669,13 @@ XML attributes take precedence over +nvpair+ elements if both are specified. + indexterm:[Action,Property,timeout] + + |on-fail +-|restart '(except for +stop+ operations, which default to' fence 'when +- STONITH is enabled and' block 'otherwise)' ++a|Varies by action: ++ ++* +stop+: +fence+ if +stonith-enabled+ is true or +block+ otherwise ++* +demote+: +on-fail+ of the +monitor+ action with +role+ set to +Master+, if ++ present, enabled, and configured to a value other than +demote+, or +restart+ ++ otherwise ++* all other actions: +restart+ + a|The action to take if this action ever fails. Allowed values: + + * +ignore:+ Pretend the resource did not fail. +-- +1.8.3.1 + + +From 0b683445318c783ecef8d6f023b35a6c056ee321 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Jun 2020 15:05:56 -0500 +Subject: [PATCH 14/20] Refactor: scheduler: functionize checking quorum policy + in effect + +... for readability and ease of future changes +--- + lib/pengine/utils.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index fee9efb..5d6b836 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -481,6 +481,17 @@ sort_rsc_priority(gconstpointer a, gconstpointer b) + return 0; + } + ++static enum pe_quorum_policy ++effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) ++{ ++ enum pe_quorum_policy policy = data_set->no_quorum_policy; ++ ++ if (is_set(data_set->flags, pe_flag_have_quorum)) { ++ policy = no_quorum_ignore; ++ } ++ return policy; ++} ++ + pe_action_t * + custom_action(pe_resource_t * rsc, char *key, const char *task, + pe_node_t * on_node, gboolean optional, gboolean save_action, +@@ -593,6 +604,7 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, + + if (rsc != NULL) { + enum action_tasks a_task = text2task(action->task); ++ enum pe_quorum_policy quorum_policy = effective_quorum_policy(rsc, data_set); + int warn_level = LOG_TRACE; + + if (save_action) { +@@ -675,13 +687,11 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, + crm_trace("Action %s requires only stonith", action->uuid); + action->runnable = TRUE; + #endif +- } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE +- && data_set->no_quorum_policy == no_quorum_stop) { ++ } else if (quorum_policy == no_quorum_stop) { + pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "no quorum", pe_action_runnable, TRUE); + crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); + +- } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE +- && data_set->no_quorum_policy == no_quorum_freeze) { ++ } else if (quorum_policy == no_quorum_freeze) { + pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); + if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { + pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, NULL, "quorum freeze", pe_action_runnable, TRUE); +-- +1.8.3.1 + + +From b1ae359382f15e28e90d9144ca7b1d5f04820c10 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Jun 2020 15:06:32 -0500 +Subject: [PATCH 15/20] Feature: scheduler: support "demote" choice for + no-quorum-policy option + +If quorum is lost, promotable resources in the master role will be demoted but +left running, and all other resources will be stopped. +--- + daemons/controld/controld_control.c | 2 +- + include/crm/pengine/pe_types.h | 3 ++- + lib/common/options.c | 1 + + lib/pengine/common.c | 2 +- + lib/pengine/pe_output.c | 14 ++++++++++++++ + lib/pengine/unpack.c | 7 +++++++ + lib/pengine/utils.c | 14 ++++++++++++++ + 7 files changed, 40 insertions(+), 3 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 7d29205..059eb7b 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -626,7 +626,7 @@ static pcmk__cluster_option_t crmd_opts[] = { + + // Already documented in libpe_status (other values must be kept identical) + { +- "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", ++ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide", + "stop", pcmk__valid_quorum, NULL, NULL + }, + { +diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h +index ed5eb12..f3cb4ef 100644 +--- a/include/crm/pengine/pe_types.h ++++ b/include/crm/pengine/pe_types.h +@@ -61,7 +61,8 @@ enum pe_quorum_policy { + no_quorum_freeze, + no_quorum_stop, + no_quorum_ignore, +- no_quorum_suicide ++ no_quorum_suicide, ++ no_quorum_demote + }; + + enum node_type { +diff --git a/lib/common/options.c b/lib/common/options.c +index 9399642..9e041c9 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -407,6 +407,7 @@ pcmk__valid_quorum(const char *value) + return safe_str_eq(value, "stop") + || safe_str_eq(value, "freeze") + || safe_str_eq(value, "ignore") ++ || safe_str_eq(value, "demote") + || safe_str_eq(value, "suicide"); + } + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index f4f2106..37f287b 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -54,7 +54,7 @@ static pcmk__cluster_option_t pe_opts[] = { + * long description + */ + { +- "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", ++ "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide", + "stop", pcmk__valid_quorum, + "What to do when the cluster does not have quorum", + NULL +diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c +index 75bf0d5..ad469ab 100644 +--- a/lib/pengine/pe_output.c ++++ b/lib/pengine/pe_output.c +@@ -729,6 +729,11 @@ pe__cluster_options_html(pcmk__output_t *out, va_list args) { + out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); + break; + ++ case no_quorum_demote: ++ out->list_item(out, NULL, "No quorum policy: Demote promotable " ++ "resources and stop all other resources"); ++ break; ++ + case no_quorum_ignore: + out->list_item(out, NULL, "No quorum policy: Ignore"); + break; +@@ -785,6 +790,11 @@ pe__cluster_options_text(pcmk__output_t *out, va_list args) { + out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); + break; + ++ case no_quorum_demote: ++ out->list_item(out, NULL, "No quorum policy: Demote promotable " ++ "resources and stop all other resources"); ++ break; ++ + case no_quorum_ignore: + out->list_item(out, NULL, "No quorum policy: Ignore"); + break; +@@ -817,6 +827,10 @@ pe__cluster_options_xml(pcmk__output_t *out, va_list args) { + xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "stop"); + break; + ++ case no_quorum_demote: ++ xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "demote"); ++ break; ++ + case no_quorum_ignore: + xmlSetProp(node, (pcmkXmlStr) "no-quorum-policy", (pcmkXmlStr) "ignore"); + break; +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index a219805..a480680 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -268,6 +268,9 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) + } else if (safe_str_eq(value, "freeze")) { + data_set->no_quorum_policy = no_quorum_freeze; + ++ } else if (safe_str_eq(value, "demote")) { ++ data_set->no_quorum_policy = no_quorum_demote; ++ + } else if (safe_str_eq(value, "suicide")) { + if (is_set(data_set->flags, pe_flag_stonith_enabled)) { + int do_panic = 0; +@@ -297,6 +300,10 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) + case no_quorum_stop: + crm_debug("On loss of quorum: Stop ALL resources"); + break; ++ case no_quorum_demote: ++ crm_debug("On loss of quorum: " ++ "Demote promotable resources and stop other resources"); ++ break; + case no_quorum_suicide: + crm_notice("On loss of quorum: Fence all remaining nodes"); + break; +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 5d6b836..f8b631a 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -488,6 +488,20 @@ effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) + + if (is_set(data_set->flags, pe_flag_have_quorum)) { + policy = no_quorum_ignore; ++ ++ } else if (data_set->no_quorum_policy == no_quorum_demote) { ++ switch (rsc->role) { ++ case RSC_ROLE_MASTER: ++ case RSC_ROLE_SLAVE: ++ if (rsc->next_role > RSC_ROLE_SLAVE) { ++ rsc->next_role = RSC_ROLE_SLAVE; ++ } ++ policy = no_quorum_ignore; ++ break; ++ default: ++ policy = no_quorum_stop; ++ break; ++ } + } + return policy; + } +-- +1.8.3.1 + + +From 5d809e136f2927259ad570e409e3bbb68f7ce7b4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 17 Jun 2020 12:29:50 -0500 +Subject: [PATCH 16/20] Test: scheduler: add regression test for + no-quorum-policy="demote" + +--- + cts/cts-scheduler.in | 1 + + cts/scheduler/no_quorum_demote.dot | 22 ++++ + cts/scheduler/no_quorum_demote.exp | 81 ++++++++++++ + cts/scheduler/no_quorum_demote.scores | 72 +++++++++++ + cts/scheduler/no_quorum_demote.summary | 38 ++++++ + cts/scheduler/no_quorum_demote.xml | 224 +++++++++++++++++++++++++++++++++ + 6 files changed, 438 insertions(+) + create mode 100644 cts/scheduler/no_quorum_demote.dot + create mode 100644 cts/scheduler/no_quorum_demote.exp + create mode 100644 cts/scheduler/no_quorum_demote.scores + create mode 100644 cts/scheduler/no_quorum_demote.summary + create mode 100644 cts/scheduler/no_quorum_demote.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index 0e68e73..9e34379 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -482,6 +482,7 @@ TESTS = [ + [ "on_fail_demote2", "Recovery with on-fail=\"demote\" with promotion on different node" ], + [ "on_fail_demote3", "Recovery with on-fail=\"demote\" with no promotion" ], + [ "on_fail_demote4", "Recovery with on-fail=\"demote\" on failed cluster, remote, guest, and bundle nodes" ], ++ [ "no_quorum_demote", "Promotable demotion and primitive stop with no-quorum-policy=\"demote\"" ], + ], + [ + [ "history-1", "Correctly parse stateful-1 resource state" ], +diff --git a/cts/scheduler/no_quorum_demote.dot b/cts/scheduler/no_quorum_demote.dot +new file mode 100644 +index 0000000..ea5b30c +--- /dev/null ++++ b/cts/scheduler/no_quorum_demote.dot +@@ -0,0 +1,22 @@ ++ digraph "g" { ++"Cancel rsc1_monitor_10000 rhel7-1" -> "rsc1_demote_0 rhel7-1" [ style = bold] ++"Cancel rsc1_monitor_10000 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"Fencing_monitor_120000 rhel7-1" [ style=dashed color="red" fontcolor="black"] ++"Fencing_start_0 rhel7-1" -> "Fencing_monitor_120000 rhel7-1" [ style = dashed] ++"Fencing_start_0 rhel7-1" [ style=dashed color="red" fontcolor="black"] ++"Fencing_stop_0 rhel7-1" -> "Fencing_start_0 rhel7-1" [ style = dashed] ++"Fencing_stop_0 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"rsc1-clone_demote_0" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1-clone_demote_0" -> "rsc1_demote_0 rhel7-1" [ style = bold] ++"rsc1-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1_demote_0 rhel7-1" -> "rsc1-clone_demoted_0" [ style = bold] ++"rsc1_demote_0 rhel7-1" -> "rsc1_monitor_11000 rhel7-1" [ style = bold] ++"rsc1_demote_0 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_11000 rhel7-1" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_10000 rhel7-2" [ style=dashed color="red" fontcolor="black"] ++"rsc2_start_0 rhel7-2" -> "rsc2_monitor_10000 rhel7-2" [ style = dashed] ++"rsc2_start_0 rhel7-2" [ style=dashed color="red" fontcolor="black"] ++"rsc2_stop_0 rhel7-2" -> "rsc2_start_0 rhel7-2" [ style = dashed] ++"rsc2_stop_0 rhel7-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/no_quorum_demote.exp b/cts/scheduler/no_quorum_demote.exp +new file mode 100644 +index 0000000..245574c +--- /dev/null ++++ b/cts/scheduler/no_quorum_demote.exp +@@ -0,0 +1,81 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/no_quorum_demote.scores b/cts/scheduler/no_quorum_demote.scores +new file mode 100644 +index 0000000..dddc57b +--- /dev/null ++++ b/cts/scheduler/no_quorum_demote.scores +@@ -0,0 +1,72 @@ ++Allocation scores: ++Using the original execution date of: 2020-06-17 17:26:35Z ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1-clone allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-1: 11 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-2: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:0 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-1: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-2: 6 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:1 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-1: 10 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-2: 5 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:2 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-1: 10 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-2: 5 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:3 allocation score on rhel7-5: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-1: 10 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-2: 5 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-3: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-4: 0 ++pcmk__clone_allocate: rsc1:4 allocation score on rhel7-5: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-1: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-2: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-3: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-4: 0 ++pcmk__native_allocate: Fencing allocation score on rhel7-5: 0 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-1: 11 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:0 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-2: 6 ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:1 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:2 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:3 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-1: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-2: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-3: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-4: -INFINITY ++pcmk__native_allocate: rsc1:4 allocation score on rhel7-5: -INFINITY ++pcmk__native_allocate: rsc2 allocation score on rhel7-1: 0 ++pcmk__native_allocate: rsc2 allocation score on rhel7-2: 0 ++pcmk__native_allocate: rsc2 allocation score on rhel7-3: 0 ++pcmk__native_allocate: rsc2 allocation score on rhel7-4: 0 ++pcmk__native_allocate: rsc2 allocation score on rhel7-5: 0 ++rsc1:0 promotion score on rhel7-1: 10 ++rsc1:1 promotion score on rhel7-2: 5 ++rsc1:2 promotion score on none: 0 ++rsc1:3 promotion score on none: 0 ++rsc1:4 promotion score on none: 0 +diff --git a/cts/scheduler/no_quorum_demote.summary b/cts/scheduler/no_quorum_demote.summary +new file mode 100644 +index 0000000..9b69ca1 +--- /dev/null ++++ b/cts/scheduler/no_quorum_demote.summary +@@ -0,0 +1,38 @@ ++Using the original execution date of: 2020-06-17 17:26:35Z ++ ++Current cluster status: ++Online: [ rhel7-1 rhel7-2 ] ++OFFLINE: [ rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Started rhel7-1 ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Masters: [ rhel7-1 ] ++ Slaves: [ rhel7-2 ] ++ Stopped: [ rhel7-3 rhel7-4 rhel7-5 ] ++ rsc2 (ocf::pacemaker:Dummy): Started rhel7-2 ++ ++Transition Summary: ++ * Stop Fencing ( rhel7-1 ) due to no quorum ++ * Demote rsc1:0 ( Master -> Slave rhel7-1 ) ++ * Stop rsc2 ( rhel7-2 ) due to no quorum ++ ++Executing cluster transition: ++ * Resource action: Fencing stop on rhel7-1 ++ * Resource action: rsc1 cancel=10000 on rhel7-1 ++ * Pseudo action: rsc1-clone_demote_0 ++ * Resource action: rsc2 stop on rhel7-2 ++ * Resource action: rsc1 demote on rhel7-1 ++ * Pseudo action: rsc1-clone_demoted_0 ++ * Resource action: rsc1 monitor=11000 on rhel7-1 ++Using the original execution date of: 2020-06-17 17:26:35Z ++ ++Revised cluster status: ++Online: [ rhel7-1 rhel7-2 ] ++OFFLINE: [ rhel7-3 rhel7-4 rhel7-5 ] ++ ++ Fencing (stonith:fence_xvm): Stopped ++ Clone Set: rsc1-clone [rsc1] (promotable) ++ Slaves: [ rhel7-1 rhel7-2 ] ++ Stopped: [ rhel7-3 rhel7-4 rhel7-5 ] ++ rsc2 (ocf::pacemaker:Dummy): Stopped ++ +diff --git a/cts/scheduler/no_quorum_demote.xml b/cts/scheduler/no_quorum_demote.xml +new file mode 100644 +index 0000000..8497f0a +--- /dev/null ++++ b/cts/scheduler/no_quorum_demote.xml +@@ -0,0 +1,224 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + + +From 015b5c012ce41a8035260522f67127135937baa2 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 May 2020 12:13:20 -0500 +Subject: [PATCH 17/20] Doc: Pacemaker Explained: document + no-quorum-policy=demote + +--- + doc/Pacemaker_Explained/en-US/Ch-Options.txt | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt +index faefe7c..b158f00 100644 +--- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt ++++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt +@@ -181,6 +181,8 @@ What to do when the cluster does not have quorum. Allowed values: + * +ignore:+ continue all resource management + * +freeze:+ continue resource management, but don't recover resources from nodes not in the affected partition + * +stop:+ stop all resources in the affected cluster partition ++* +demote:+ demote promotable resources and stop all other resources in the ++ affected cluster partition + * +suicide:+ fence all nodes in the affected cluster partition + + | batch-limit | 0 | +-- +1.8.3.1 + + +From 01c5ec67e0a6ee1395d771f8fbaf619a44ab2ca2 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Jun 2020 19:23:11 -0500 +Subject: [PATCH 18/20] Low: scheduler: match initial no-quorum-policy struct + value to actual default + +It doesn't matter in practice since the actual default is parsed from the +option definition via pe_pref(), but it's confusing to have them different. +--- + lib/pengine/status.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/pengine/status.c b/lib/pengine/status.c +index 8dc5095..ca34639 100644 +--- a/lib/pengine/status.c ++++ b/lib/pengine/status.c +@@ -360,7 +360,7 @@ set_working_set_defaults(pe_working_set_t * data_set) + + data_set->order_id = 1; + data_set->action_id = 1; +- data_set->no_quorum_policy = no_quorum_freeze; ++ data_set->no_quorum_policy = no_quorum_stop; + + data_set->flags = 0x0ULL; + set_bit(data_set->flags, pe_flag_stop_rsc_orphans); +-- +1.8.3.1 + + +From 7eec572dbba3ade059e5206a2ba496f9da3a68bc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 5 Jun 2020 10:02:05 -0500 +Subject: [PATCH 19/20] Build: libcrmcommon: bump CRM feature set + +... for op_expression/rsc_expression rules, on-fail=demote, and +no-quorum-policy=demote +--- + include/crm/crm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index d2ffb61..dc2adc1 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -51,7 +51,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.3.0" ++# define CRM_FEATURE_SET "3.4.0" + + # define EOS '\0' + # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) +-- +1.8.3.1 + + +From c4429d86ef00bb1749adc476f9c6874e3f5d95b9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jun 2020 14:38:35 -0500 +Subject: [PATCH 20/20] Log: scheduler: downgrade "active on" messages to trace + +... now that they're logged more often via pcmk__rsc_is_filtered() +--- + lib/pengine/native.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index f0d83d7..20658a0 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -359,22 +359,22 @@ native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const c + gboolean + native_active(pe_resource_t * rsc, gboolean all) + { +- GListPtr gIter = rsc->running_on; +- +- for (; gIter != NULL; gIter = gIter->next) { ++ for (GList *gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { + pe_node_t *a_node = (pe_node_t *) gIter->data; + + if (a_node->details->unclean) { +- crm_debug("Resource %s: node %s is unclean", rsc->id, a_node->details->uname); ++ pe_rsc_trace(rsc, "Resource %s: node %s is unclean", ++ rsc->id, a_node->details->uname); + return TRUE; + } else if (a_node->details->online == FALSE) { +- crm_debug("Resource %s: node %s is offline", rsc->id, a_node->details->uname); ++ pe_rsc_trace(rsc, "Resource %s: node %s is offline", ++ rsc->id, a_node->details->uname); + } else { +- crm_debug("Resource %s active on %s", rsc->id, a_node->details->uname); ++ pe_rsc_trace(rsc, "Resource %s active on %s", ++ rsc->id, a_node->details->uname); + return TRUE; + } + } +- + return FALSE; + } + +-- +1.8.3.1 + diff --git a/SOURCES/002-status-deletion.patch b/SOURCES/002-status-deletion.patch deleted file mode 100644 index 1a31cdc..0000000 --- a/SOURCES/002-status-deletion.patch +++ /dev/null @@ -1,2064 +0,0 @@ -From 9e4addbcb67ea8e36ba853f1e401d8a6cb6a0aa3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 20 Dec 2019 11:34:06 -0600 -Subject: [PATCH 1/8] Refactor: scheduler: reduce code duplication when - displaying resources - -Refactor native_output_string() to use GString, for readability and -maintainability. Refactor common_print() to use it, to reduce duplication and -ensure displays are consistent. - -This makes a couple small changes in how things are shown: - -* If pe_print_dev is enabled (a debugging flag not actually used by anything), - the additional resource fields are shown with the resource flags rather than - their own parenthesized list. - -* The new output model is now consistent with the legacy print model in - displaying resource flags with commas (not spaces) between them. ---- - include/crm/pengine/common.h | 24 +-- - lib/pengine/native.c | 410 +++++++++++++++++-------------------------- - 2 files changed, 168 insertions(+), 266 deletions(-) - -diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h -index e497f9c..48c2b66 100644 ---- a/include/crm/pengine/common.h -+++ b/include/crm/pengine/common.h -@@ -1,22 +1,12 @@ --/* -- * Copyright 2004-2018 the Pacemaker project contributors -+/* -+ * Copyright 2004-2019 the Pacemaker project contributors - * - * The version control history for this file may have further details. -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public -- * License as published by the Free Software Foundation; either -- * version 2 of the License, or (at your option) any later version. -- * -- * This software is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - */ -+ - #ifndef PE_COMMON__H - # define PE_COMMON__H - -@@ -104,7 +94,7 @@ enum pe_print_options { - pe_print_html = 0x0002, - pe_print_ncurses = 0x0004, - pe_print_printf = 0x0008, -- pe_print_dev = 0x0010, -+ pe_print_dev = 0x0010, // Debugging (@COMPAT probably not useful) - pe_print_details = 0x0020, - pe_print_max_details = 0x0040, - pe_print_rsconly = 0x0080, -diff --git a/lib/pengine/native.c b/lib/pengine/native.c -index fdb98e0..8fd98bc 100644 ---- a/lib/pengine/native.c -+++ b/lib/pengine/native.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -490,165 +490,172 @@ native_print_xml(resource_t * rsc, const char *pre_text, long options, void *pri - } - } - --/* making this inline rather than a macro prevents a coverity "unreachable" -- * warning on the first usage -- */ --static inline const char * --comma_if(int i) -+// Append a flag to resource description string's flags list -+static bool -+add_output_flag(GString *s, const char *flag_desc, bool have_flags) - { -- return i? ", " : ""; -+ g_string_append(s, (have_flags? ", " : " (")); -+ g_string_append(s, flag_desc); -+ return true; - } - --static char * --flags_string(pe_resource_t *rsc, pe_node_t *node, long options, -- const char *target_role) -+// Append a node name to resource description string's node list -+static bool -+add_output_node(GString *s, const char *node, bool have_nodes) - { -- char *flags[6] = { NULL, }; -- char *result = NULL; -- int ndx = 0; -+ g_string_append(s, (have_nodes? " " : " [ ")); -+ g_string_append(s, node); -+ return true; -+} -+ -+/*! -+ * \internal -+ * \brief Create a string description of a resource -+ * -+ * \param[in] rsc Resource to describe -+ * \param[in] name Desired identifier for the resource -+ * \param[in] node If not NULL, node that resource is "on" -+ * \param[in] options Bitmask of pe_print_* -+ * \param[in] target_role Resource's target role -+ * \param[in] show_nodes Whether to display nodes when multiply active -+ * -+ * \return Newly allocated string description of resource -+ * \note Caller must free the result with g_free(). -+ */ -+static gchar * -+native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, -+ long options, const char *target_role, bool show_nodes) -+{ -+ const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ const char *provider = NULL; -+ const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); -+ char *retval = NULL; -+ GString *outstr = NULL; -+ bool have_flags = false; -+ -+ CRM_CHECK(name != NULL, name = "unknown"); -+ CRM_CHECK(kind != NULL, kind = "unknown"); -+ CRM_CHECK(class != NULL, class = "unknown"); -+ -+ if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { -+ provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -+ } - -- if (node && node->details->online == FALSE && node->details->unclean) { -- flags[ndx++] = strdup("UNCLEAN"); -+ if (is_set(options, pe_print_rsconly) -+ || pcmk__list_of_multiple(rsc->running_on)) { -+ node = NULL; - } - -+ // We need a string of at least this size -+ outstr = g_string_sized_new(strlen(name) + strlen(class) + strlen(kind) -+ + (provider? (strlen(provider) + 2) : 0) -+ + (node? strlen(node->details->uname) + 1 : 0) -+ + 11); -+ -+ // Resource name and agent -+ g_string_printf(outstr, "%s\t(%s%s%s:%s):\t", name, class, -+ /* @COMPAT This should be a single ':' (see CLBZ#5395) but -+ * to avoid breaking anything relying on it, we're keeping -+ * it like this until the next minor version bump. -+ */ -+ (provider? "::" : ""), (provider? provider : ""), kind); -+ -+ // State on node -+ if (is_set(rsc->flags, pe_rsc_orphan)) { -+ g_string_append(outstr, " ORPHANED"); -+ } -+ if (is_set(rsc->flags, pe_rsc_failed)) { -+ enum rsc_role_e role = native_displayable_role(rsc); -+ -+ if (role > RSC_ROLE_SLAVE) { -+ g_string_append_printf(outstr, " FAILED %s", role2text(role)); -+ } else { -+ g_string_append(outstr, " FAILED"); -+ } -+ } else { -+ g_string_append(outstr, native_displayable_state(rsc, options)); -+ } -+ if (node) { -+ g_string_append_printf(outstr, " %s", node->details->uname); -+ } -+ -+ // Flags, as: ( [...]) -+ if (node && !(node->details->online) && node->details->unclean) { -+ have_flags = add_output_flag(outstr, "UNCLEAN", have_flags); -+ } - if (is_set(options, pe_print_pending)) { - const char *pending_task = native_pending_task(rsc); - - if (pending_task) { -- flags[ndx++] = strdup(pending_task); -+ have_flags = add_output_flag(outstr, pending_task, have_flags); - } - } -- - if (target_role) { - enum rsc_role_e target_role_e = text2role(target_role); - -- /* Ignore target role Started, as it is the default anyways -- * (and would also allow a Master to be Master). -- * Show if target role limits our abilities. */ -+ /* Only show target role if it limits our abilities (i.e. ignore -+ * Started, as it is the default anyways, and doesn't prevent the -+ * resource from becoming Master). -+ */ - if (target_role_e == RSC_ROLE_STOPPED) { -- flags[ndx++] = strdup("disabled"); -+ have_flags = add_output_flag(outstr, "disabled", have_flags); - - } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable) - && target_role_e == RSC_ROLE_SLAVE) { -- flags[ndx++] = crm_strdup_printf("target-role:%s", target_role); -+ have_flags = add_output_flag(outstr, "target-role:", have_flags); -+ g_string_append(outstr, target_role); - } - } -- - if (is_set(rsc->flags, pe_rsc_block)) { -- flags[ndx++] = strdup("blocked"); -- -+ have_flags = add_output_flag(outstr, "blocked", have_flags); - } else if (is_not_set(rsc->flags, pe_rsc_managed)) { -- flags[ndx++] = strdup("unmanaged"); -+ have_flags = add_output_flag(outstr, "unmanaged", have_flags); - } -- - if (is_set(rsc->flags, pe_rsc_failure_ignored)) { -- flags[ndx++] = strdup("failure ignored"); -+ have_flags = add_output_flag(outstr, "failure ignored", have_flags); - } -- -- if (ndx > 0) { -- char *total = g_strjoinv(" ", flags); -- -- result = crm_strdup_printf(" (%s)", total); -- g_free(total); -- } -- -- while (--ndx >= 0) { -- free(flags[ndx]); -- } -- return result; --} -- --static char * --native_output_string(resource_t *rsc, const char *name, node_t *node, long options, -- const char *target_role) { -- const char *desc = NULL; -- const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -- const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); -- enum rsc_role_e role = native_displayable_role(rsc); -- -- char *retval = NULL; -- -- char *unames = NULL; -- char *provider = NULL; -- const char *orphan = NULL; -- char *role_s = NULL; -- char *node_s = NULL; -- char *print_dev_s = NULL; -- char *flags_s = NULL; -- -- CRM_ASSERT(kind != NULL); -- -- if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { -- provider = crm_strdup_printf("::%s", crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER)); -+ if (is_set(options, pe_print_dev)) { -+ if (is_set(options, pe_rsc_provisional)) { -+ have_flags = add_output_flag(outstr, "provisional", have_flags); -+ } -+ if (is_not_set(options, pe_rsc_runnable)) { -+ have_flags = add_output_flag(outstr, "non-startable", have_flags); -+ } -+ have_flags = add_output_flag(outstr, "variant:", have_flags); -+ g_string_append_printf(outstr, "%s priority:%f", -+ crm_element_name(rsc->xml), -+ (double) (rsc->priority)); - } -- -- if (is_set(rsc->flags, pe_rsc_orphan)) { -- orphan = " ORPHANED"; -+ if (have_flags) { -+ g_string_append(outstr, ")"); - } - -- if (role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { -- role_s = crm_strdup_printf(" FAILED %s", role2text(role)); -- } else if (is_set(rsc->flags, pe_rsc_failed)) { -- role_s = crm_strdup_printf(" FAILED"); -- } else { -- role_s = crm_strdup_printf(" %s", native_displayable_state(rsc, options)); -- } -+ // User-supplied description -+ if (is_set(options, pe_print_rsconly) -+ || pcmk__list_of_multiple(rsc->running_on)) { -+ const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC); - -- if (node) { -- node_s = crm_strdup_printf(" %s", node->details->uname); -+ if (desc) { -+ g_string_append_printf(outstr, " %s", desc); -+ } - } - -- if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -- } -+ if (show_nodes && is_not_set(options, pe_print_rsconly) -+ && pcmk__list_of_multiple(rsc->running_on)) { -+ bool have_nodes = false; - -- if (is_not_set(options, pe_print_rsconly) && g_list_length(rsc->running_on) > 1) { -- GListPtr gIter = rsc->running_on; -- gchar **arr = calloc(g_list_length(rsc->running_on)+1, sizeof(gchar *)); -- int i = 0; -- char *total = NULL; -+ for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) { -+ pe_node_t *n = (pe_node_t *) iter->data; - -- for (; gIter != NULL; gIter = gIter->next) { -- node_t *n = (node_t *) gIter->data; -- arr[i] = (gchar *) strdup(n->details->uname); -- i++; -+ have_nodes = add_output_node(outstr, n->details->uname, have_nodes); -+ } -+ if (have_nodes) { -+ g_string_append(outstr, " ]"); - } -- -- total = g_strjoinv(" ", arr); -- unames = crm_strdup_printf(" [ %s ]", total); -- -- g_free(total); -- g_strfreev(arr); - } - -- if (is_set(options, pe_print_dev)) { -- print_dev_s = crm_strdup_printf(" (%s%svariant=%s, priority=%f)", -- is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", -- is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", -- crm_element_name(rsc->xml), (double)rsc->priority); -- } -- -- flags_s = flags_string(rsc, node, options, target_role); -- -- retval = crm_strdup_printf("%s\t(%s%s:%s):\t%s%s%s%s%s%s%s%s", -- name, class, -- provider ? provider : "", -- kind, -- orphan ? orphan : "", -- role_s, -- node_s ? node_s : "", -- print_dev_s ? print_dev_s : "", -- flags_s ? flags_s : "", -- desc ? " " : "", desc ? desc : "", -- unames ? unames : ""); -- -- free(provider); -- free(role_s); -- free(node_s); -- free(unames); -- free(print_dev_s); -- free(flags_s); -- -+ retval = outstr->str; -+ g_string_free(outstr, FALSE); - return retval; - } - -@@ -656,7 +663,6 @@ void - pe__common_output_html(pcmk__output_t *out, resource_t * rsc, - const char *name, node_t *node, long options) - { -- char *s = NULL; - const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); - const char *target_role = NULL; - -@@ -675,10 +681,6 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc, - target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); - } - -- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- node = NULL; -- } -- - if (is_not_set(rsc->flags, pe_rsc_managed)) { - cl = "rsc-managed"; - -@@ -698,10 +700,14 @@ pe__common_output_html(pcmk__output_t *out, resource_t * rsc, - cl = "rsc-ok"; - } - -- s = native_output_string(rsc, name, node, options, target_role); -- list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL); -- pcmk_create_html_node(list_node, "span", NULL, cl, s); -- free(s); -+ { -+ gchar *s = native_output_string(rsc, name, node, options, target_role, -+ true); -+ -+ list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL); -+ pcmk_create_html_node(list_node, "span", NULL, cl, s); -+ g_free(s); -+ } - - if (is_set(options, pe_print_details)) { - GHashTableIter iter; -@@ -744,7 +750,6 @@ void - pe__common_output_text(pcmk__output_t *out, resource_t * rsc, - const char *name, node_t *node, long options) - { -- char *s = NULL; - const char *target_role = NULL; - - CRM_ASSERT(rsc->variant == pe_native); -@@ -758,13 +763,13 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc, - target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); - } - -- if (is_set(options, pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- node = NULL; -- } -+ { -+ gchar *s = native_output_string(rsc, name, node, options, target_role, -+ true); - -- s = native_output_string(rsc, name, node, options, target_role); -- out->list_item(out, NULL, "%s", s); -- free(s); -+ out->list_item(out, NULL, "%s", s); -+ g_free(s); -+ } - - if (is_set(options, pe_print_details)) { - GHashTableIter iter; -@@ -806,22 +811,14 @@ pe__common_output_text(pcmk__output_t *out, resource_t * rsc, - void - common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data) - { -- const char *desc = NULL; -- const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -- const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); - const char *target_role = NULL; -- enum rsc_role_e role = native_displayable_role(rsc); -- -- int offset = 0; -- int flagOffset = 0; -- char buffer[LINE_MAX]; -- char flagBuffer[LINE_MAX]; - - CRM_ASSERT(rsc->variant == pe_native); -- CRM_ASSERT(kind != NULL); - - if (rsc->meta) { -- const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC); -+ const char *is_internal = g_hash_table_lookup(rsc->meta, -+ XML_RSC_ATTR_INTERNAL_RSC); -+ - if (crm_is_true(is_internal) && is_not_set(options, pe_print_implicit)) { - crm_trace("skipping print of internal resource %s", rsc->id); - return; -@@ -829,17 +826,13 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n - target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); - } - -- if (pre_text == NULL && (options & pe_print_printf)) { -- pre_text = " "; -- } -- - if (options & pe_print_xml) { - native_print_xml(rsc, pre_text, options, print_data); - return; - } - -- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- node = NULL; -+ if ((pre_text == NULL) && (options & pe_print_printf)) { -+ pre_text = " "; - } - - if (options & pe_print_html) { -@@ -849,10 +842,10 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n - } else if (is_set(rsc->flags, pe_rsc_failed)) { - status_print(""); - -- } else if (rsc->variant == pe_native && (rsc->running_on == NULL)) { -+ } else if (rsc->running_on == NULL) { - status_print(""); - -- } else if (g_list_length(rsc->running_on) > 1) { -+ } else if (pcmk__list_of_multiple(rsc->running_on)) { - status_print(""); - - } else if (is_set(rsc->flags, pe_rsc_failure_ignored)) { -@@ -863,106 +856,29 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n - } - } - -- if(pre_text) { -- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", pre_text); -- } -- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", name); -- offset += snprintf(buffer + offset, LINE_MAX - offset, "\t(%s", class); -- if (is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) { -- const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); -- offset += snprintf(buffer + offset, LINE_MAX - offset, "::%s", prov); -- } -- offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s):\t", kind); -- if(is_set(rsc->flags, pe_rsc_orphan)) { -- offset += snprintf(buffer + offset, LINE_MAX - offset, " ORPHANED "); -- } -- if(role > RSC_ROLE_SLAVE && is_set(rsc->flags, pe_rsc_failed)) { -- offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED %s", role2text(role)); -- } else if(is_set(rsc->flags, pe_rsc_failed)) { -- offset += snprintf(buffer + offset, LINE_MAX - offset, "FAILED"); -- } else { -- const char *rsc_state = native_displayable_state(rsc, options); -- -- offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_state); -- } -- -- if(node) { -- offset += snprintf(buffer + offset, LINE_MAX - offset, " %s", node->details->uname); -- -- if (node->details->online == FALSE && node->details->unclean) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%sUNCLEAN", comma_if(flagOffset)); -- } -- } -- -- if (options & pe_print_pending) { -- const char *pending_task = native_pending_task(rsc); -- -- if (pending_task) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%s%s", comma_if(flagOffset), pending_task); -- } -- } -- -- if (target_role) { -- enum rsc_role_e target_role_e = text2role(target_role); -- -- /* Ignore target role Started, as it is the default anyways -- * (and would also allow a Master to be Master). -- * Show if target role limits our abilities. */ -- if (target_role_e == RSC_ROLE_STOPPED) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%sdisabled", comma_if(flagOffset)); -- -- } else if (is_set(uber_parent(rsc)->flags, pe_rsc_promotable) -- && target_role_e == RSC_ROLE_SLAVE) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%starget-role:%s", comma_if(flagOffset), target_role); -- } -- } -- -- if (is_set(rsc->flags, pe_rsc_block)) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%sblocked", comma_if(flagOffset)); -- -- } else if (is_not_set(rsc->flags, pe_rsc_managed)) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%sunmanaged", comma_if(flagOffset)); -- } -- -- if(is_set(rsc->flags, pe_rsc_failure_ignored)) { -- flagOffset += snprintf(flagBuffer + flagOffset, LINE_MAX - flagOffset, -- "%sfailure ignored", comma_if(flagOffset)); -- } -- -- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- desc = crm_element_value(rsc->xml, XML_ATTR_DESC); -- } -- -- CRM_LOG_ASSERT(offset > 0); -- if(flagOffset > 0) { -- status_print("%s (%s)%s%s", buffer, flagBuffer, desc?" ":"", desc?desc:""); -- } else { -- status_print("%s%s%s", buffer, desc?" ":"", desc?desc:""); -+ { -+ gchar *resource_s = native_output_string(rsc, name, node, options, -+ target_role, false); -+ status_print("%s%s", (pre_text? pre_text : ""), resource_s); -+ g_free(resource_s); - } - - #if CURSES_ENABLED -- if ((options & pe_print_rsconly) || g_list_length(rsc->running_on) > 1) { -- /* Done */ -- -- } else if (options & pe_print_ncurses) { -+ if (is_set(options, pe_print_ncurses) -+ && is_not_set(options, pe_print_rsconly) -+ && !pcmk__list_of_multiple(rsc->running_on)) { - /* coverity[negative_returns] False positive */ - move(-1, 0); - } - #endif - -- if (options & pe_print_html) { -+ if (is_set(options, pe_print_html)) { - status_print(" "); - } - -- if ((options & pe_print_rsconly)) { -+ if (is_not_set(options, pe_print_rsconly) -+ && pcmk__list_of_multiple(rsc->running_on)) { - -- } else if (g_list_length(rsc->running_on) > 1) { - GListPtr gIter = rsc->running_on; - int counter = 0; - -@@ -1025,10 +941,6 @@ common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *n - GHashTableIter iter; - node_t *n = NULL; - -- status_print("%s\t(%s%svariant=%s, priority=%f)", pre_text, -- is_set(rsc->flags, pe_rsc_provisional) ? "provisional, " : "", -- is_set(rsc->flags, pe_rsc_runnable) ? "" : "non-startable, ", -- crm_element_name(rsc->xml), (double)rsc->priority); - status_print("%s\tAllowed Nodes", pre_text); - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&n)) { --- -1.8.3.1 - - -From 41e911be8ea9151b3f0758c2c22c0e69b8b78d93 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 19 Dec 2019 17:18:41 -0600 -Subject: [PATCH 2/8] Log: scheduler: drop redundant trace messages - -We logged "applying placement constraints" three times. ---- - lib/pacemaker/pcmk_sched_allocate.c | 17 ++++------------- - 1 file changed, 4 insertions(+), 13 deletions(-) - -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index ca43c71..dde8b69 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -623,21 +623,15 @@ check_actions(pe_working_set_t * data_set) - } - } - --static gboolean -+static void - apply_placement_constraints(pe_working_set_t * data_set) - { -- GListPtr gIter = NULL; -- -- crm_trace("Applying constraints..."); -- -- for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { -+ for (GList *gIter = data_set->placement_constraints; -+ gIter != NULL; gIter = gIter->next) { - pe__location_t *cons = gIter->data; - - cons->rsc_lh->cmds->rsc_location(cons->rsc_lh, cons); - } -- -- return TRUE; -- - } - - static gboolean -@@ -994,10 +988,7 @@ stage2(pe_working_set_t * data_set) - { - GListPtr gIter = NULL; - -- crm_trace("Applying placement constraints"); -- -- gIter = data_set->nodes; -- for (; gIter != NULL; gIter = gIter->next) { -+ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - - if (node == NULL) { --- -1.8.3.1 - - -From 7fe136e19b5018d609beb8bad4e34234739572c9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Sat, 7 Dec 2019 12:13:11 -0600 -Subject: [PATCH 3/8] Refactor: libcrmcommon: convenience functions for list - length comparisons - -... for efficiency and readability ---- - include/crm/common/internal.h | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h -index da2c7d7..484c836 100644 ---- a/include/crm/common/internal.h -+++ b/include/crm/common/internal.h -@@ -126,6 +126,20 @@ crm_getpid_s() - return crm_strdup_printf("%lu", (unsigned long) getpid()); - } - -+// More efficient than g_list_length(list) == 1 -+static inline bool -+pcmk__list_of_1(GList *list) -+{ -+ return list && (list->next == NULL); -+} -+ -+// More efficient than g_list_length(list) > 1 -+static inline bool -+pcmk__list_of_multiple(GList *list) -+{ -+ return list && (list->next != NULL); -+} -+ - /* convenience functions for failure-related node attributes */ - - #define CRM_FAIL_COUNT_PREFIX "fail-count" --- -1.8.3.1 - - -From 9ff4f6bca540576f0a3333c959e8014ed168353f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 16 Dec 2019 14:13:30 -0600 -Subject: [PATCH 4/8] Refactor: libcrmcommon: add convenience macros for - plurals - -I've avoided making s_if_plural() an official API due to its hackiness, but -it really is the best solution for now. Promote it to pcmk__plural_s(), along -with a companion macro pcmk__plural_alt() for more complicated plurals. ---- - include/crm/common/internal.h | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h -index 484c836..ee560c9 100644 ---- a/include/crm/common/internal.h -+++ b/include/crm/common/internal.h -@@ -107,6 +107,29 @@ bool crm_compress_string(const char *data, int length, int max, char **result, - unsigned int *result_len); - gint crm_alpha_sort(gconstpointer a, gconstpointer b); - -+/* Correctly displaying singular or plural is complicated; consider "1 node has" -+ * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. -+ * -+ * if (a == 1) { -+ * crm_info("singular message"): -+ * } else { -+ * crm_info("plural message"); -+ * } -+ * -+ * though even that's not sufficient for all languages besides English (if we -+ * ever desire to do translations of output and log messages). But the following -+ * convenience macros are "good enough" and more concise for many cases. -+ */ -+ -+/* Example: -+ * crm_info("Found %d %s", nentries, -+ * pcmk__plural_alt(nentries, "entry", "entries")); -+ */ -+#define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) -+ -+// Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); -+#define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") -+ - static inline char * - crm_concat(const char *prefix, const char *suffix, char join) - { --- -1.8.3.1 - - -From 0378db5030400202e59b2bae0dabd65d00a3e9c8 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 12 Dec 2019 20:50:50 -0600 -Subject: [PATCH 5/8] Log: controller: improve join messages - ---- - daemons/controld/controld_fsa.c | 81 ++++---- - daemons/controld/controld_join_dc.c | 383 +++++++++++++++++++++--------------- - 2 files changed, 268 insertions(+), 196 deletions(-) - -diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c -index 6760224..b985fa9 100644 ---- a/daemons/controld/controld_fsa.c -+++ b/daemons/controld/controld_fsa.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -460,12 +460,53 @@ log_fsa_input(fsa_data_t * stored_msg) - } - } - -+static void -+check_join_counts(fsa_data_t *msg_data) -+{ -+ int count; -+ guint npeers; -+ -+ count = crmd_join_phase_count(crm_join_finalized); -+ if (count > 0) { -+ crm_err("%d cluster node%s failed to confirm join", -+ count, pcmk__plural_s(count)); -+ crmd_join_phase_log(LOG_NOTICE); -+ return; -+ } -+ -+ npeers = crm_active_peers(); -+ count = crmd_join_phase_count(crm_join_confirmed); -+ if (count == npeers) { -+ if (npeers == 1) { -+ crm_debug("Sole active cluster node is fully joined"); -+ } else { -+ crm_debug("All %d active cluster nodes are fully joined", count); -+ } -+ -+ } else if (count > npeers) { -+ crm_err("New election needed because more nodes confirmed join " -+ "than are in membership (%d > %u)", count, npeers); -+ register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -+ -+ } else if (saved_ccm_membership_id != crm_peer_seq) { -+ crm_info("New join needed because membership changed (%llu -> %llu)", -+ saved_ccm_membership_id, crm_peer_seq); -+ register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); -+ -+ } else { -+ crm_warn("Only %d of %u active cluster nodes fully joined " -+ "(%d did not respond to offer)", -+ count, npeers, crmd_join_phase_count(crm_join_welcomed)); -+ } -+} -+ - long long - do_state_transition(long long actions, - enum crmd_fsa_state cur_state, - enum crmd_fsa_state next_state, fsa_data_t * msg_data) - { - int level = LOG_INFO; -+ int count = 0; - long long tmp = actions; - gboolean clear_recovery_bit = TRUE; - -@@ -563,13 +604,14 @@ do_state_transition(long long actions, - crm_warn("Progressed to state %s after %s", - fsa_state2string(next_state), fsa_cause2string(cause)); - } -- if (crmd_join_phase_count(crm_join_welcomed) > 0) { -- crm_warn("%u cluster nodes failed to respond" -- " to the join offer.", crmd_join_phase_count(crm_join_welcomed)); -+ count = crmd_join_phase_count(crm_join_welcomed); -+ if (count > 0) { -+ crm_warn("%d cluster node%s failed to respond to join offer", -+ count, pcmk__plural_s(count)); - crmd_join_phase_log(LOG_NOTICE); - - } else { -- crm_debug("All %d cluster nodes responded to the join offer.", -+ crm_debug("All cluster nodes (%d) responded to join offer", - crmd_join_phase_count(crm_join_integrated)); - } - break; -@@ -581,34 +623,7 @@ do_state_transition(long long actions, - crm_info("Progressed to state %s after %s", - fsa_state2string(next_state), fsa_cause2string(cause)); - } -- -- if (crmd_join_phase_count(crm_join_finalized) > 0) { -- crm_err("%u cluster nodes failed to confirm their join.", -- crmd_join_phase_count(crm_join_finalized)); -- crmd_join_phase_log(LOG_NOTICE); -- -- } else if (crmd_join_phase_count(crm_join_confirmed) -- == crm_active_peers()) { -- crm_debug("All %u cluster nodes are" -- " eligible to run resources.", crm_active_peers()); -- -- } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) { -- crm_err("We have more confirmed nodes than our membership does: %d vs. %d", -- crmd_join_phase_count(crm_join_confirmed), crm_active_peers()); -- register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); -- -- } else if (saved_ccm_membership_id != crm_peer_seq) { -- crm_info("Membership changed: %llu -> %llu - join restart", -- saved_ccm_membership_id, crm_peer_seq); -- register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); -- -- } else { -- crm_warn("Only %u of %u cluster " -- "nodes are eligible to run resources - continue %d", -- crmd_join_phase_count(crm_join_confirmed), -- crm_active_peers(), crmd_join_phase_count(crm_join_welcomed)); -- } --/* initialize_join(FALSE); */ -+ check_join_counts(msg_data); - break; - - case S_STOPPING: -diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c -index 988aaa6..54324b2 100644 ---- a/daemons/controld/controld_join_dc.c -+++ b/daemons/controld/controld_join_dc.c -@@ -26,7 +26,11 @@ void finalize_join_for(gpointer key, gpointer value, gpointer user_data); - void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); - gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); - -+/* Numeric counter used to identify join rounds (an unsigned int would be -+ * appropriate, except we get and set it in XML as int) -+ */ - static int current_join_id = 0; -+ - unsigned long long saved_ccm_membership_id = 0; - - void -@@ -34,12 +38,7 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase - { - enum crm_join_phase last = 0; - -- if(node == NULL) { -- crm_err("Could not update join because node not specified" -- CRM_XS " join-%u source=%s phase=%s", -- current_join_id, source, crm_join_phase_str(phase)); -- return; -- } -+ CRM_CHECK(node != NULL, return); - - /* Remote nodes do not participate in joins */ - if (is_set(node->flags, crm_remote_node)) { -@@ -49,21 +48,23 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase - last = node->join; - - if(phase == last) { -- crm_trace("%s: Node %s[%u] - join-%u phase still %s", -- source, node->uname, node->id, current_join_id, -- crm_join_phase_str(last)); -+ crm_trace("Node %s join-%d phase is still %s " -+ CRM_XS " nodeid=%u source=%s", -+ node->uname, current_join_id, crm_join_phase_str(last), -+ node->id, source); - - } else if ((phase <= crm_join_none) || (phase == (last + 1))) { - node->join = phase; -- crm_info("%s: Node %s[%u] - join-%u phase %s -> %s", -- source, node->uname, node->id, current_join_id, -- crm_join_phase_str(last), crm_join_phase_str(phase)); -+ crm_trace("Node %s join-%d phase is now %s (was %s) " -+ CRM_XS " nodeid=%u source=%s", -+ node->uname, current_join_id, crm_join_phase_str(phase), -+ crm_join_phase_str(last), node->id, source); - - } else { -- crm_err("Could not update join for node %s because phase transition invalid " -- CRM_XS " join-%u source=%s node_id=%u last=%s new=%s", -- node->uname, current_join_id, source, node->id, -- crm_join_phase_str(last), crm_join_phase_str(phase)); -+ crm_warn("Rejecting join-%d phase update for node %s because " -+ "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", -+ current_join_id, node->uname, crm_join_phase_str(last), -+ crm_join_phase_str(phase), node->id, source); - } - } - -@@ -73,9 +74,7 @@ initialize_join(gboolean before) - GHashTableIter iter; - crm_node_t *peer = NULL; - -- /* clear out/reset a bunch of stuff */ -- crm_debug("join-%d: Initializing join data (flag=%s)", -- current_join_id, before ? "true" : "false"); -+ crm_debug("Starting new join round join-%d", current_join_id); - - g_hash_table_iter_init(&iter, crm_peer_cache); - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { -@@ -128,7 +127,9 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) - - CRM_ASSERT(member != NULL); - if (crm_is_peer_active(member) == FALSE) { -- crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state); -+ crm_info("Not making join-%d offer to inactive node %s", -+ current_join_id, -+ (member->uname? member->uname : "with unknown name")); - if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) { - /* You would think this unsafe, but in fact this plus an - * active resource is what causes it to be fenced. -@@ -145,17 +146,21 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) - } - - if (member->uname == NULL) { -- crm_info("No recipient for welcome message.(Node uuid:%s)", member->uuid); -+ crm_info("Not making join-%d offer to node uuid %s with unknown name", -+ current_join_id, member->uuid); - return; - } - - if (saved_ccm_membership_id != crm_peer_seq) { - saved_ccm_membership_id = crm_peer_seq; -- crm_info("Making join offers based on membership %llu", crm_peer_seq); -+ crm_info("Making join-%d offers based on membership event %llu", -+ current_join_id, crm_peer_seq); - } - - if(user_data && member->join > crm_join_none) { -- crm_info("Skipping %s: already known %d", member->uname, member->join); -+ crm_info("Not making join-%d offer to already known node %s (%s)", -+ current_join_id, member->uname, -+ crm_join_phase_str(member->join)); - return; - } - -@@ -166,14 +171,11 @@ join_make_offer(gpointer key, gpointer value, gpointer user_data) - // Advertise our feature set so the joining node can bail if not compatible - crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - -- /* send the welcome */ -- crm_info("join-%d: Sending offer to %s", current_join_id, member->uname); -- -+ crm_info("Sending join-%d offer to %s", current_join_id, member->uname); - send_cluster_message(member, crm_msg_crmd, offer, TRUE); - free_xml(offer); - - crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed); -- /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */ - } - - /* A_DC_JOIN_OFFER_ALL */ -@@ -183,6 +185,8 @@ do_dc_join_offer_all(long long action, - enum crmd_fsa_state cur_state, - enum crmd_fsa_input current_input, fsa_data_t * msg_data) - { -+ int count; -+ - /* Reset everyone's status back to down or in_ccm in the CIB. - * Any nodes that are active in the CIB but not in the cluster membership - * will be seen as offline by the scheduler anyway. -@@ -197,9 +201,11 @@ do_dc_join_offer_all(long long action, - } - g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); - -+ count = crmd_join_phase_count(crm_join_welcomed); -+ crm_info("Waiting on join-%d requests from %d outstanding node%s", -+ current_join_id, count, pcmk__plural_s(count)); -+ - // Don't waste time by invoking the scheduler yet -- crm_info("join-%d: Waiting on %d outstanding join acks", -- current_join_id, crmd_join_phase_count(crm_join_welcomed)); - } - - /* A_DC_JOIN_OFFER_ONE */ -@@ -211,50 +217,40 @@ do_dc_join_offer_one(long long action, - { - crm_node_t *member; - ha_msg_input_t *welcome = NULL; -- -- const char *op = NULL; -+ int count; - const char *join_to = NULL; - -- if (msg_data->data) { -- welcome = fsa_typed_data(fsa_dt_ha_msg); -- -- } else { -- crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes"); -+ if (msg_data->data == NULL) { -+ crm_info("Making join-%d offers to any unconfirmed nodes " -+ "because an unknown node joined", current_join_id); - g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); - check_join_state(cur_state, __FUNCTION__); - return; - } - -+ welcome = fsa_typed_data(fsa_dt_ha_msg); - if (welcome == NULL) { -- crm_err("Attempt to send welcome message without a message to reply to!"); -+ // fsa_typed_data() already logged an error - return; - } - - join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); - if (join_to == NULL) { -- crm_err("Attempt to send welcome message without a host to reply to!"); -+ crm_err("Can't make join-%d offer to unknown node", current_join_id); - return; - } -- - member = crm_get_peer(0, join_to); -- op = crm_element_value(welcome->msg, F_CRM_TASK); -- if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { -- /* note: it _is_ possible that a node will have been -- * sick or starting up when the original offer was made. -- * however, it will either re-announce itself in due course -- * _or_ we can re-store the original offer on the client. -- */ -- crm_trace("(Re-)offering membership to %s...", join_to); -- } - -- crm_info("join-%d: Processing %s request from %s in state %s", -- current_join_id, op, join_to, fsa_state2string(cur_state)); -+ /* It is possible that a node will have been sick or starting up when the -+ * original offer was made. However, it will either re-announce itself in -+ * due course, or we can re-store the original offer on the client. -+ */ - - crm_update_peer_join(__FUNCTION__, member, crm_join_none); - join_make_offer(NULL, member, NULL); - -- /* always offer to the DC (ourselves) -- * this ensures the correct value for max_generation_from -+ /* If the offer isn't to the local node, make an offer to the local node as -+ * well, to ensure the correct value for max_generation_from. - */ - if (strcmp(join_to, fsa_our_uname) != 0) { - member = crm_get_peer(0, fsa_our_uname); -@@ -266,9 +262,11 @@ do_dc_join_offer_one(long long action, - */ - abort_transition(INFINITY, tg_restart, "Node join", NULL); - -+ count = crmd_join_phase_count(crm_join_welcomed); -+ crm_info("Waiting on join-%d requests from %d outstanding node%s", -+ current_join_id, count, pcmk__plural_s(count)); -+ - // Don't waste time by invoking the scheduler yet -- crm_debug("Waiting on %d outstanding join acks for join-%d", -- crmd_join_phase_count(crm_join_welcomed), current_join_id); - } - - static int -@@ -301,22 +299,31 @@ do_dc_join_filter_offer(long long action, - - int cmp = 0; - int join_id = -1; -+ int count = 0; - gboolean ack_nack_bool = TRUE; -- const char *ack_nack = CRMD_JOINSTATE_MEMBER; - ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); - - const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); - const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); - const char *join_version = crm_element_value(join_ack->msg, - XML_ATTR_CRM_VERSION); -+ crm_node_t *join_node = NULL; - -- crm_node_t *join_node = crm_get_peer(0, join_from); -- -- crm_debug("Processing req from %s", join_from); -+ if (join_from == NULL) { -+ crm_err("Ignoring invalid join request without node name"); -+ return; -+ } -+ join_node = crm_get_peer(0, join_from); - -- generation = join_ack->xml; - crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); -+ if (join_id != current_join_id) { -+ crm_debug("Ignoring join-%d request from %s because we are on join-%d", -+ join_id, join_from, current_join_id); -+ check_join_state(cur_state, __FUNCTION__); -+ return; -+ } - -+ generation = join_ack->xml; - if (max_generation_xml != NULL && generation != NULL) { - int lpc = 0; - -@@ -331,68 +338,71 @@ do_dc_join_filter_offer(long long action, - } - } - -- if (join_id != current_join_id) { -- crm_debug("Invalid response from %s: join-%d vs. join-%d", -- join_from, join_id, current_join_id); -- check_join_state(cur_state, __FUNCTION__); -- return; -+ if (ref == NULL) { -+ ref = "none"; // for logging only -+ } - -- } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) { -- crm_err("Node %s is not a member", join_from); -+ if (crm_is_peer_active(join_node) == FALSE) { -+ crm_err("Rejecting join-%d request from inactive node %s " -+ CRM_XS " ref=%s", join_id, join_from, ref); - ack_nack_bool = FALSE; - - } else if (generation == NULL) { -- crm_err("Generation was NULL"); -+ crm_err("Rejecting invalid join-%d request from node %s " -+ "missing CIB generation " CRM_XS " ref=%s", -+ join_id, join_from, ref); - ack_nack_bool = FALSE; - - } else if ((join_version == NULL) - || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { -- crm_err("Node %s feature set (%s) is incompatible with ours (%s)", -- join_from, (join_version? join_version : "pre-3.1.0"), -- CRM_FEATURE_SET); -+ crm_err("Rejecting join-%d request from node %s because feature set %s" -+ " is incompatible with ours (%s) " CRM_XS " ref=%s", -+ join_id, join_from, (join_version? join_version : "pre-3.1.0"), -+ CRM_FEATURE_SET, ref); - ack_nack_bool = FALSE; - - } else if (max_generation_xml == NULL) { -+ crm_debug("Accepting join-%d request from %s " -+ "(with first CIB generation) " CRM_XS " ref=%s", -+ join_id, join_from, ref); - max_generation_xml = copy_xml(generation); - max_generation_from = strdup(join_from); - - } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { -- crm_debug("%s has a better generation number than" -- " the current max %s", join_from, max_generation_from); -- if (max_generation_xml) { -- crm_log_xml_debug(max_generation_xml, "Max generation"); -- } -- crm_log_xml_debug(generation, "Their generation"); -+ crm_debug("Accepting join-%d request from %s (with better " -+ "CIB generation than current best from %s) " CRM_XS " ref=%s", -+ join_id, join_from, max_generation_from, ref); -+ crm_log_xml_debug(max_generation_xml, "Old max generation"); -+ crm_log_xml_debug(generation, "New max generation"); - - free(max_generation_from); - free_xml(max_generation_xml); - - max_generation_from = strdup(join_from); - max_generation_xml = copy_xml(join_ack->xml); -+ -+ } else { -+ crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", -+ join_id, join_from, ref); - } - - if (ack_nack_bool == FALSE) { -- /* NACK this client */ -- ack_nack = CRMD_JOINSTATE_NACK; - crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack); -- crm_err("Rejecting cluster join request from %s " CRM_XS -- " NACK join-%d ref=%s", join_from, join_id, ref); -- -+ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK); - } else { -- crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); - crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated); -+ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); - } - -- crm_update_peer_expected(__FUNCTION__, join_node, ack_nack); -- -- crm_debug("%u nodes have been integrated into join-%d", -- crmd_join_phase_count(crm_join_integrated), join_id); -- -+ count = crmd_join_phase_count(crm_join_integrated); -+ crm_debug("%d node%s currently integrated in join-%d", -+ count, pcmk__plural_s(count), join_id); - - if (check_join_state(cur_state, __FUNCTION__) == FALSE) { - // Don't waste time by invoking the scheduler yet -- crm_debug("join-%d: Still waiting on %d outstanding offers", -- join_id, crmd_join_phase_count(crm_join_welcomed)); -+ count = crmd_join_phase_count(crm_join_welcomed); -+ crm_debug("Waiting on join-%d requests from %d outstanding node%s", -+ join_id, count, pcmk__plural_s(count)); - } - } - -@@ -405,21 +415,24 @@ do_dc_join_finalize(long long action, - { - char *sync_from = NULL; - int rc = pcmk_ok; -+ int count_welcomed = crmd_join_phase_count(crm_join_welcomed); -+ int count_integrated = crmd_join_phase_count(crm_join_integrated); - - /* This we can do straight away and avoid clients timing us out - * while we compute the latest CIB - */ -- crm_debug("Finalizing join-%d for %d clients", -- current_join_id, crmd_join_phase_count(crm_join_integrated)); -- -- crmd_join_phase_log(LOG_INFO); -- if (crmd_join_phase_count(crm_join_welcomed) != 0) { -- crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed)); -+ if (count_welcomed != 0) { -+ crm_debug("Waiting on join-%d requests from %d outstanding node%s " -+ "before finalizing join", current_join_id, count_welcomed, -+ pcmk__plural_s(count_welcomed)); -+ crmd_join_phase_log(LOG_DEBUG); - /* crmd_fsa_stall(FALSE); Needed? */ - return; - -- } else if (crmd_join_phase_count(crm_join_integrated) == 0) { -- /* Nothing to do */ -+ } else if (count_integrated == 0) { -+ crm_debug("Finalization not needed for join-%d at the current time", -+ current_join_id); -+ crmd_join_phase_log(LOG_DEBUG); - check_join_state(fsa_state, __FUNCTION__); - return; - } -@@ -430,8 +443,9 @@ do_dc_join_finalize(long long action, - } - - if (is_set(fsa_input_register, R_IN_TRANSITION)) { -- crm_warn("Delaying response to cluster join offer while transition in progress " -- CRM_XS " join-%d", current_join_id); -+ crm_warn("Delaying join-%d finalization while transition in progress", -+ current_join_id); -+ crmd_join_phase_log(LOG_DEBUG); - crmd_fsa_stall(FALSE); - return; - } -@@ -440,18 +454,20 @@ do_dc_join_finalize(long long action, - /* ask for the agreed best CIB */ - sync_from = strdup(max_generation_from); - set_bit(fsa_input_register, R_CIB_ASKED); -- crm_notice("Syncing the Cluster Information Base from %s to rest of cluster " -- CRM_XS " join-%d", sync_from, current_join_id); -- crm_log_xml_notice(max_generation_xml, "Requested version"); -+ crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", -+ current_join_id, count_integrated, -+ pcmk__plural_s(count_integrated), sync_from); -+ crm_log_xml_notice(max_generation_xml, "Requested CIB version"); - - } else { - /* Send _our_ CIB out to everyone */ - sync_from = strdup(fsa_our_uname); -- crm_info("join-%d: Syncing our CIB to the rest of the cluster", -- current_join_id); -- crm_log_xml_debug(max_generation_xml, "Requested version"); -+ crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", -+ current_join_id, count_integrated, -+ pcmk__plural_s(count_integrated)); -+ crm_log_xml_debug(max_generation_xml, "Requested CIB version"); - } -- -+ crmd_join_phase_log(LOG_DEBUG); - - rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); - fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); -@@ -463,26 +479,33 @@ finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, voi - CRM_LOG_ASSERT(-EPERM != rc); - clear_bit(fsa_input_register, R_CIB_ASKED); - if (rc != pcmk_ok) { -- do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR), -- "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc)); -+ do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), -+ "Could not sync CIB from %s in join-%d: %s", -+ (char *) user_data, current_join_id, pcmk_strerror(rc)); - - /* restart the whole join process */ - register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); - -- } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) { -+ } else if (!AM_I_DC) { -+ crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); -+ -+ } else if (fsa_state != S_FINALIZE_JOIN) { -+ crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)", -+ current_join_id, fsa_state2string(fsa_state)); -+ -+ } else { - set_bit(fsa_input_register, R_HAVE_CIB); - clear_bit(fsa_input_register, R_CIB_ASKED); - - /* make sure dc_uuid is re-set to us */ - if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { -- crm_debug("Notifying %d clients of join-%d results", -- crmd_join_phase_count(crm_join_integrated), current_join_id); -+ int count_integrated = crmd_join_phase_count(crm_join_integrated); -+ -+ crm_debug("Notifying %d node%s of join-%d results", -+ count_integrated, pcmk__plural_s(count_integrated), -+ current_join_id); - g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); - } -- -- } else { -- crm_debug("No longer the DC in S_FINALIZE_JOIN: %s in %s", -- AM_I_DC ? "DC" : "controller", fsa_state2string(fsa_state)); - } - } - -@@ -492,11 +515,14 @@ join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * outp - fsa_data_t *msg_data = NULL; - - if (rc == pcmk_ok) { -- crm_debug("Join update %d complete", call_id); -+ crm_debug("join-%d node history update (via CIB call %d) complete", -+ current_join_id, call_id); - check_join_state(fsa_state, __FUNCTION__); - - } else { -- crm_err("Join update %d failed", call_id); -+ crm_err("join-%d node history update (via CIB call %d) failed: %s " -+ "(next transition may determine resource status incorrectly)", -+ current_join_id, call_id, pcmk_strerror(rc)); - crm_log_xml_debug(msg, "failed"); - register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); - } -@@ -515,61 +541,75 @@ do_dc_join_ack(long long action, - - const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); - const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); -- crm_node_t *peer = crm_get_peer(0, join_from); -+ crm_node_t *peer = NULL; - -- if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) { -- crm_debug("Ignoring op=%s message from %s", op, join_from); -+ // Sanity checks -+ if (join_from == NULL) { -+ crm_warn("Ignoring message received without node identification"); -+ return; -+ } -+ if (op == NULL) { -+ crm_warn("Ignoring message received from %s without task", join_from); - return; - } - -- crm_trace("Processing ack from %s", join_from); -- crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); -+ if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { -+ crm_debug("Ignoring '%s' message from %s while waiting for '%s'", -+ op, join_from, CRM_OP_JOIN_CONFIRM); -+ return; -+ } - -+ if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { -+ crm_warn("Ignoring join confirmation from %s without valid join ID", -+ join_from); -+ return; -+ } -+ -+ peer = crm_get_peer(0, join_from); - if (peer->join != crm_join_finalized) { -- crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)", -- join_id, join_from, peer->join); -+ crm_info("Ignoring out-of-sequence join-%d confirmation from %s " -+ "(currently %s not %s)", -+ join_id, join_from, crm_join_phase_str(peer->join), -+ crm_join_phase_str(crm_join_finalized)); - return; -+ } - -- } else if (join_id != current_join_id) { -- crm_err("Invalid response from %s: join-%d vs. join-%d", -- join_from, join_id, current_join_id); -+ if (join_id != current_join_id) { -+ crm_err("Rejecting join-%d confirmation from %s " -+ "because currently on join-%d", -+ join_id, join_from, current_join_id); - crm_update_peer_join(__FUNCTION__, peer, crm_join_nack); - return; - } - - crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed); - -- crm_info("join-%d: Updating node state to %s for %s", -- join_id, CRMD_JOINSTATE_MEMBER, join_from); -- -- /* update CIB with the current LRM status from the node -- * We don't need to notify the TE of these updates, a transition will -- * be started in due time -+ /* Update CIB with node's current executor state. A new transition will be -+ * triggered later, when the CIB notifies us of the change. - */ - erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); -- - if (safe_str_eq(join_from, fsa_our_uname)) { - xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); - - if (now_dc_lrmd_state != NULL) { -- crm_debug("Local executor state updated from query"); - fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, - cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); - free_xml(now_dc_lrmd_state); -+ crm_debug("Updating local node history for join-%d " -+ "from query result (via CIB call %d)", join_id, call_id); - } else { -- crm_warn("Local executor state updated from join acknowledgement because query failed"); - fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, - cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); -+ crm_warn("Updating local node history from join-%d confirmation " -+ "because query failed (via CIB call %d)", join_id, call_id); - } - } else { -- crm_debug("Executor state for %s updated from join acknowledgement", -- join_from); - fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, - cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); -+ crm_debug("Updating node history for %s from join-%d confirmation " -+ "(via CIB call %d)", join_from, join_id, call_id); - } -- - fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); -- crm_debug("join-%d: Registered callback for CIB status update %d", join_id, call_id); - } - - void -@@ -581,17 +621,16 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) - const char *join_to = join_node->uname; - - if(join_node->join != crm_join_integrated) { -- crm_trace("Skipping %s in state %d", join_to, join_node->join); -+ crm_trace("Not updating non-integrated node %s (%s) for join-%d", -+ join_to, crm_join_phase_str(join_node->join), -+ current_join_id); - return; - } - -- /* make sure a node entry exists for the new node */ -- crm_trace("Creating node entry for %s", join_to); -- -+ crm_trace("Updating node state for %s", join_to); - tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); - set_uuid(tmp1, XML_ATTR_UUID, join_node); - crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); -- - fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); - free_xml(tmp1); - -@@ -610,11 +649,10 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) - return; - } - -- /* send the ack/nack to the node */ -- acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); -- -- crm_debug("join-%d: ACK'ing join request from %s", -+ // Acknowledge node's join request -+ crm_debug("Acknowledging join-%d request from %s", - current_join_id, join_to); -+ acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); - crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); - crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized); - crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); -@@ -629,11 +667,11 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source) - { - static unsigned long long highest_seq = 0; - -- crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); -- - if (saved_ccm_membership_id != crm_peer_seq) { -- crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)", -- source, saved_ccm_membership_id, crm_peer_seq, highest_seq); -+ crm_debug("join-%d: Membership changed from %llu to %llu " -+ CRM_XS " highest=%llu state=%s for=%s", -+ current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq, -+ fsa_state2string(cur_state), source); - if(highest_seq < crm_peer_seq) { - /* Don't spam the FSA with duplicates */ - highest_seq = crm_peer_seq; -@@ -642,34 +680,53 @@ check_join_state(enum crmd_fsa_state cur_state, const char *source) - - } else if (cur_state == S_INTEGRATION) { - if (crmd_join_phase_count(crm_join_welcomed) == 0) { -- crm_debug("join-%d: Integration of %d peers complete: %s", -- current_join_id, crmd_join_phase_count(crm_join_integrated), source); -+ int count = crmd_join_phase_count(crm_join_integrated); -+ -+ crm_debug("join-%d: Integration of %d peer%s complete " -+ CRM_XS " state=%s for=%s", -+ current_join_id, count, pcmk__plural_s(count), -+ fsa_state2string(cur_state), source); - register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); - return TRUE; - } - - } else if (cur_state == S_FINALIZE_JOIN) { - if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { -- crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); -+ crm_debug("join-%d: Delaying finalization until we have CIB " -+ CRM_XS " state=%s for=%s", -+ current_join_id, fsa_state2string(cur_state), source); - return TRUE; - - } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { -- crm_debug("join-%d: Still waiting on %d welcomed nodes", -- current_join_id, crmd_join_phase_count(crm_join_welcomed)); -+ int count = crmd_join_phase_count(crm_join_welcomed); -+ -+ crm_debug("join-%d: Still waiting on %d welcomed node%s " -+ CRM_XS " state=%s for=%s", -+ current_join_id, count, pcmk__plural_s(count), -+ fsa_state2string(cur_state), source); - crmd_join_phase_log(LOG_DEBUG); - - } else if (crmd_join_phase_count(crm_join_integrated) != 0) { -- crm_debug("join-%d: Still waiting on %d integrated nodes", -- current_join_id, crmd_join_phase_count(crm_join_integrated)); -+ int count = crmd_join_phase_count(crm_join_integrated); -+ -+ crm_debug("join-%d: Still waiting on %d integrated node%s " -+ CRM_XS " state=%s for=%s", -+ current_join_id, count, pcmk__plural_s(count), -+ fsa_state2string(cur_state), source); - crmd_join_phase_log(LOG_DEBUG); - - } else if (crmd_join_phase_count(crm_join_finalized) != 0) { -- crm_debug("join-%d: Still waiting on %d finalized nodes", -- current_join_id, crmd_join_phase_count(crm_join_finalized)); -+ int count = crmd_join_phase_count(crm_join_finalized); -+ -+ crm_debug("join-%d: Still waiting on %d finalized node%s " -+ CRM_XS " state=%s for=%s", -+ current_join_id, count, pcmk__plural_s(count), -+ fsa_state2string(cur_state), source); - crmd_join_phase_log(LOG_DEBUG); - - } else { -- crm_debug("join-%d complete: %s", current_join_id, source); -+ crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", -+ current_join_id, fsa_state2string(cur_state), source); - register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); - return TRUE; - } --- -1.8.3.1 - - -From 034b27734d05e8aeddb586f2daaede8314f9516f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 13 Dec 2019 10:39:34 -0600 -Subject: [PATCH 6/8] Log: controller: improve CIB status deletion messages - ---- - daemons/controld/controld_utils.c | 25 +++++++++++++++++-------- - 1 file changed, 17 insertions(+), 8 deletions(-) - -diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c -index 3acd488..bb8ace9 100644 ---- a/daemons/controld/controld_utils.c -+++ b/daemons/controld/controld_utils.c -@@ -751,14 +751,18 @@ update_dc(xmlNode * msg) - return TRUE; - } - --#define STATUS_PATH_MAX 512 - static void - erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) - { - char *xpath = user_data; - -- do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, -- "Deletion of \"%s\": %s (rc=%d)", xpath, pcmk_strerror(rc), rc); -+ if (rc == 0) { -+ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", -+ xpath, call_id); -+ } else { -+ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " -+ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); -+ } - } - - #define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" -@@ -766,14 +770,19 @@ erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void - void - erase_status_tag(const char *uname, const char *tag, int options) - { -- if (fsa_cib_conn && uname) { -+ CRM_CHECK(uname != NULL, return); -+ -+ if (fsa_cib_conn == NULL) { -+ crm_warn("Unable to delete CIB '%s' section for node %s: " -+ "no CIB connection", tag, uname); -+ } else { - int call_id; - char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); - -- crm_info("Deleting %s status entries for %s " CRM_XS " xpath=%s", -- tag, uname, xpath); -- call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, -- cib_quorum_override | cib_xpath | options); -+ options |= cib_quorum_override|cib_xpath; -+ call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); -+ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " -+ CRM_XS " xpath=%s", tag, uname, call_id, xpath); - fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); - // CIB library handles freeing xpath - } --- -1.8.3.1 - - -From 73510818bc9905dcc130893198590b10c0067425 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 13 Dec 2019 10:36:56 -0600 -Subject: [PATCH 7/8] Refactor: controller: move erase_status_tag() to - controld_based.c - ---- - daemons/controld/controld_based.c | 38 ++++++++++++++++++++++++++++++++++++++ - daemons/controld/controld_utils.c | 37 ------------------------------------- - 2 files changed, 38 insertions(+), 37 deletions(-) - -diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c -index e6a4612..1db5650 100644 ---- a/daemons/controld/controld_based.c -+++ b/daemons/controld/controld_based.c -@@ -168,3 +168,41 @@ controld_action_is_recordable(const char *action) - } - return TRUE; - } -+ -+static void -+erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, -+ void *user_data) -+{ -+ char *xpath = user_data; -+ -+ if (rc == 0) { -+ crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", -+ xpath, call_id); -+ } else { -+ crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " -+ CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); -+ } -+} -+ -+#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" -+ -+void -+erase_status_tag(const char *uname, const char *tag, int options) -+{ -+ CRM_CHECK(uname != NULL, return); -+ -+ if (fsa_cib_conn == NULL) { -+ crm_warn("Unable to delete CIB '%s' section for node %s: " -+ "no CIB connection", tag, uname); -+ } else { -+ int call_id; -+ char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); -+ -+ options |= cib_quorum_override|cib_xpath; -+ call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); -+ crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " -+ CRM_XS " xpath=%s", tag, uname, call_id, xpath); -+ fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); -+ // CIB library handles freeing xpath -+ } -+} -diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c -index bb8ace9..4ed6aeb 100644 ---- a/daemons/controld/controld_utils.c -+++ b/daemons/controld/controld_utils.c -@@ -751,43 +751,6 @@ update_dc(xmlNode * msg) - return TRUE; - } - --static void --erase_xpath_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) --{ -- char *xpath = user_data; -- -- if (rc == 0) { -- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", -- xpath, call_id); -- } else { -- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " -- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); -- } --} -- --#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" -- --void --erase_status_tag(const char *uname, const char *tag, int options) --{ -- CRM_CHECK(uname != NULL, return); -- -- if (fsa_cib_conn == NULL) { -- crm_warn("Unable to delete CIB '%s' section for node %s: " -- "no CIB connection", tag, uname); -- } else { -- int call_id; -- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); -- -- options |= cib_quorum_override|cib_xpath; -- call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); -- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " -- CRM_XS " xpath=%s", tag, uname, call_id, xpath); -- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); -- // CIB library handles freeing xpath -- } --} -- - void crmd_peer_down(crm_node_t *peer, bool full) - { - if(full && peer->state == NULL) { --- -1.8.3.1 - - -From c4cc759e733db894957d039f65572cc21704224f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 13 Dec 2019 11:16:25 -0600 -Subject: [PATCH 8/8] Refactor: controller: improve efficiency when deleting - node state - -Rename erase_status_xpath() to controld_delete_node_state() to follow current -naming practice. - -Instead of passing it a node_state subsection name, pass a new enum value -indicating what to erase (resource history, transient node attributes, or -both). This allows us to improve the log messages further, as well as improving -efficiency when both need to be cleared. ---- - daemons/controld/controld_based.c | 69 +++++++++++++++++++++++++++-------- - daemons/controld/controld_callbacks.c | 8 +++- - daemons/controld/controld_execd.c | 3 +- - daemons/controld/controld_fencing.c | 5 +-- - daemons/controld/controld_join_dc.c | 3 +- - daemons/controld/controld_remote_ra.c | 24 ++++++------ - daemons/controld/controld_utils.h | 11 +++++- - 7 files changed, 87 insertions(+), 36 deletions(-) - -diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c -index 1db5650..008a02d 100644 ---- a/daemons/controld/controld_based.c -+++ b/daemons/controld/controld_based.c -@@ -170,39 +170,76 @@ controld_action_is_recordable(const char *action) - } - - static void --erase_xpath_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, -- void *user_data) -+cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, -+ void *user_data) - { -- char *xpath = user_data; -+ char *desc = user_data; - - if (rc == 0) { -- crm_debug("Deletion of '%s' from CIB (via CIB call %d) succeeded", -- xpath, call_id); -+ crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); - } else { -- crm_warn("Deletion of '%s' from CIB (via CIB call %d) failed: %s " -- CRM_XS " rc=%d", xpath, call_id, pcmk_strerror(rc), rc); -+ crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", -+ desc, call_id, pcmk_strerror(rc), rc); - } - } - --#define XPATH_STATUS_TAG "//node_state[@uname='%s']/%s" -+// Searches for various portions of node_state to delete - -+// Match a particular node's node_state (takes node name 1x) -+#define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" -+ -+// Node's lrm section (name 1x) -+#define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM -+ -+// Node's transient_attributes section (name 1x) -+#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS -+ -+// Everything under node_state (name 1x) -+#define XPATH_NODE_ALL XPATH_NODE_STATE "/*" -+ -+/*! -+ * \internal -+ * \brief Delete subsection of a node's CIB node_state -+ * -+ * \param[in] uname Desired node -+ * \param[in] section Subsection of node_state to delete -+ * \param[in] options CIB call options to use -+ */ - void --erase_status_tag(const char *uname, const char *tag, int options) -+controld_delete_node_state(const char *uname, enum controld_section_e section, -+ int options) - { -+ char *xpath = NULL; -+ char *desc = NULL; -+ - CRM_CHECK(uname != NULL, return); -+ switch (section) { -+ case controld_section_lrm: -+ xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); -+ desc = crm_strdup_printf("resource history for node %s", uname); -+ break; -+ case controld_section_attrs: -+ xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); -+ desc = crm_strdup_printf("transient attributes for node %s", uname); -+ break; -+ case controld_section_all: -+ xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); -+ desc = crm_strdup_printf("all state for node %s", uname); -+ break; -+ } - - if (fsa_cib_conn == NULL) { -- crm_warn("Unable to delete CIB '%s' section for node %s: " -- "no CIB connection", tag, uname); -+ crm_warn("Unable to delete %s: no CIB connection", desc); -+ free(desc); - } else { - int call_id; -- char *xpath = crm_strdup_printf(XPATH_STATUS_TAG, uname, tag); - - options |= cib_quorum_override|cib_xpath; - call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); -- crm_info("Deleting CIB '%s' section for node %s (via CIB call %d) " -- CRM_XS " xpath=%s", tag, uname, call_id, xpath); -- fsa_register_cib_callback(call_id, FALSE, xpath, erase_xpath_callback); -- // CIB library handles freeing xpath -+ crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", -+ desc, call_id, xpath); -+ fsa_register_cib_callback(call_id, FALSE, desc, cib_delete_callback); -+ // CIB library handles freeing desc - } -+ free(xpath); - } -diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c -index 5cbd392..f7e3db2 100644 ---- a/daemons/controld/controld_callbacks.c -+++ b/daemons/controld/controld_callbacks.c -@@ -200,14 +200,18 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - * transient attributes intact until it rejoins. - */ - if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { -- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); -+ controld_delete_node_state(node->uname, -+ controld_section_attrs, -+ cib_scope_local); - } - - } else if(AM_I_DC) { - if (appeared) { - te_trigger_stonith_history_sync(FALSE); - } else { -- erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); -+ controld_delete_node_state(node->uname, -+ controld_section_attrs, -+ cib_scope_local); - } - } - break; -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 46c1958..b7deeae 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1411,7 +1411,8 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys, - } - - /* Now delete the copy in the CIB */ -- erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); -+ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, -+ cib_scope_local); - - /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE - * would result in the scheduler sending us back here again -diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c -index d9b1e1e..9897cf3 100644 ---- a/daemons/controld/controld_fencing.c -+++ b/daemons/controld/controld_fencing.c -@@ -229,9 +229,8 @@ send_stonith_update(crm_action_t *action, const char *target, const char *uuid) - /* Make sure it sticks */ - /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ - -- erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); -- erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); -- -+ controld_delete_node_state(peer->uname, controld_section_all, -+ cib_scope_local); - free_xml(node_state); - return; - } -diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c -index 54324b2..ac6b430 100644 ---- a/daemons/controld/controld_join_dc.c -+++ b/daemons/controld/controld_join_dc.c -@@ -587,7 +587,8 @@ do_dc_join_ack(long long action, - /* Update CIB with node's current executor state. A new transition will be - * triggered later, when the CIB notifies us of the change. - */ -- erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); -+ controld_delete_node_state(join_from, controld_section_lrm, -+ cib_scope_local); - if (safe_str_eq(join_from, fsa_our_uname)) { - xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); - -diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c -index 4fbae45..2d3dfa7 100644 ---- a/daemons/controld/controld_remote_ra.c -+++ b/daemons/controld/controld_remote_ra.c -@@ -181,13 +181,13 @@ remote_node_up(const char *node_name) - CRM_CHECK(node_name != NULL, return); - crm_info("Announcing pacemaker_remote node %s", node_name); - -- /* Clear node's operation history. The node's transient attributes should -- * and normally will be cleared when the node leaves, but since remote node -- * state has a number of corner cases, clear them here as well, to be sure. -+ /* Clear node's entire state (resource history and transient attributes). -+ * The transient attributes should and normally will be cleared when the -+ * node leaves, but since remote node state has a number of corner cases, -+ * clear them here as well, to be sure. - */ - call_opt = crmd_cib_smart_opt(); -- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); -- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); -+ controld_delete_node_state(node_name, controld_section_all, call_opt); - - /* Clear node's probed attribute */ - update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); -@@ -252,15 +252,15 @@ remote_node_down(const char *node_name, const enum down_opts opts) - /* Purge node from attrd's memory */ - update_attrd_remote_node_removed(node_name, NULL); - -- /* Purge node's transient attributes */ -- erase_status_tag(node_name, XML_TAG_TRANSIENT_NODEATTRS, call_opt); -- -- /* Normally, the LRM operation history should be kept until the node comes -- * back up. However, after a successful fence, we want to clear it, so we -- * don't think resources are still running on the node. -+ /* Normally, only node attributes should be erased, and the resource history -+ * should be kept until the node comes back up. However, after a successful -+ * fence, we want to clear the history as well, so we don't think resources -+ * are still running on the node. - */ - if (opts == DOWN_ERASE_LRM) { -- erase_status_tag(node_name, XML_CIB_TAG_LRM, call_opt); -+ controld_delete_node_state(node_name, controld_section_all, call_opt); -+ } else { -+ controld_delete_node_state(node_name, controld_section_attrs, call_opt); - } - - /* Ensure node is in the remote peer cache with lost state */ -diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h -index cf04f13..f902361 100644 ---- a/daemons/controld/controld_utils.h -+++ b/daemons/controld/controld_utils.h -@@ -70,7 +70,6 @@ xmlNode *create_node_state_update(crm_node_t *node, int flags, - xmlNode *parent, const char *source); - void populate_cib_nodes(enum node_update_flags flags, const char *source); - void crm_update_quorum(gboolean quorum, gboolean force_update); --void erase_status_tag(const char *uname, const char *tag, int options); - void controld_close_attrd_ipc(void); - void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); - void update_attrd_remote_node_removed(const char *host, const char *user_name); -@@ -87,6 +86,16 @@ unsigned int cib_op_timeout(void); - bool feature_set_compatible(const char *dc_version, const char *join_version); - bool controld_action_is_recordable(const char *action); - -+// Subsections of node_state -+enum controld_section_e { -+ controld_section_lrm, -+ controld_section_attrs, -+ controld_section_all, -+}; -+ -+void controld_delete_node_state(const char *uname, -+ enum controld_section_e section, int options); -+ - const char *get_node_id(xmlNode *lrm_rsc_op); - - /* Convenience macro for registering a CIB callback --- -1.8.3.1 - diff --git a/SOURCES/003-return-codes.patch b/SOURCES/003-return-codes.patch deleted file mode 100644 index e4448af..0000000 --- a/SOURCES/003-return-codes.patch +++ /dev/null @@ -1,908 +0,0 @@ -From 55ebd895ba2c64713c3db2590ffe22c15b8563e3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 13 Dec 2019 16:05:05 -0600 -Subject: [PATCH] Refactor: libcrmcommon: introduce new set of return codes - -Since we plan to introduce a high-level public API, it's a good time to -introduce some best practices. - -Most Pacemaker API functions currently return an integer return code, such that -its absolute value is either a system error number or a custom pcmk_err_* -number. This is less than ideal because system error numbers are constrained -only to the positive int range, so there's the possibility (though not noticed -in the wild) that system errors and custom errors could collide. - -The new method being introduced here still uses an integer return code, -but negative values are from a new enumeration, and positive values are -system error numbers. 0 still represents success. - -It is expected that the new method will be used with new functions, and -existing internal functions will be gradually refactored to use it as well. -Existing public API functions can be addressed at the next backward -compatibility break (2.1.0). ---- - include/crm/common/results.h | 59 ++++- - lib/common/results.c | 536 ++++++++++++++++++++++++++++++------------- - tools/crm_error.c | 100 +++++--- - 3 files changed, 493 insertions(+), 202 deletions(-) - -diff --git a/include/crm/common/results.h b/include/crm/common/results.h -index 7a32110..b29a016 100644 ---- a/include/crm/common/results.h -+++ b/include/crm/common/results.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2019 the Pacemaker project contributors -+ * Copyright 2012-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -49,11 +49,21 @@ extern "C" { - /* - * Function return codes - * -+ * Most Pacemaker API functions return an integer return code. There are two -+ * alternative interpretations. The legacy interpration is that the absolute -+ * value of the return code is either a system error number or a custom -+ * pcmk_err_* number. This is less than ideal because system error numbers are -+ * constrained only to the positive int range, so there's the possibility -+ * (though not noticed in the wild) that system errors and custom errors could -+ * collide. The new intepretation is that negative values are from the pcmk_rc_e -+ * enum, and positive values are system error numbers. Both use 0 for success. -+ * - * For system error codes, see: - * - /usr/include/asm-generic/errno.h - * - /usr/include/asm-generic/errno-base.h - */ - -+// Legacy custom return codes for Pacemaker API functions (deprecated) - # define pcmk_ok 0 - # define PCMK_ERROR_OFFSET 190 /* Replacements on non-linux systems, see include/portability.h */ - # define PCMK_CUSTOM_OFFSET 200 /* Purely custom codes */ -@@ -75,6 +85,48 @@ extern "C" { - # define pcmk_err_bad_nvpair 216 - # define pcmk_err_unknown_format 217 - -+/*! -+ * \enum pcmk_rc_e -+ * \brief Return codes for Pacemaker API functions -+ * -+ * Any Pacemaker API function documented as returning a "standard Pacemaker -+ * return code" will return pcmk_rc_ok (0) on success, and one of this -+ * enumeration's other (negative) values or a (positive) system error number -+ * otherwise. The custom codes are at -1001 and lower, so that the caller may -+ * use -1 through -1000 for their own custom values if desired. While generally -+ * referred to as "errors", nonzero values simply indicate a result, which might -+ * or might not be an error depending on the calling context. -+ */ -+enum pcmk_rc_e { -+ /* When adding new values, use consecutively lower numbers, update the array -+ * in lib/common/results.c, and test with crm_error. -+ */ -+ pcmk_rc_no_quorum = -1017, -+ pcmk_rc_schema_validation = -1016, -+ pcmk_rc_schema_unchanged = -1015, -+ pcmk_rc_transform_failed = -1014, -+ pcmk_rc_old_data = -1013, -+ pcmk_rc_diff_failed = -1012, -+ pcmk_rc_diff_resync = -1011, -+ pcmk_rc_cib_modified = -1010, -+ pcmk_rc_cib_backup = -1009, -+ pcmk_rc_cib_save = -1008, -+ pcmk_rc_cib_corrupt = -1007, -+ pcmk_rc_multiple = -1006, -+ pcmk_rc_node_unknown = -1005, -+ pcmk_rc_already = -1004, -+ pcmk_rc_bad_nvpair = -1003, -+ pcmk_rc_unknown_format = -1002, -+ // Developers: Use a more specific code than pcmk_rc_error whenever possible -+ pcmk_rc_error = -1001, -+ -+ // Values -1 through -1000 reserved for caller use -+ -+ pcmk_rc_ok = 0 -+ -+ // Positive values reserved for system error numbers -+}; -+ - /* - * Exit status codes - * -@@ -150,6 +202,11 @@ typedef enum crm_exit_e { - CRM_EX_MAX = 255, // ensure crm_exit_t can hold this - } crm_exit_t; - -+const char *pcmk_rc_name(int rc); -+const char *pcmk_rc_str(int rc); -+crm_exit_t pcmk_rc2exitc(int rc); -+int pcmk_rc2legacy(int rc); -+int pcmk_legacy2rc(int legacy_rc); - const char *pcmk_strerror(int rc); - const char *pcmk_errorname(int rc); - const char *bz2_strerror(int rc); -diff --git a/lib/common/results.c b/lib/common/results.c -index b80191c..189648f 100644 ---- a/lib/common/results.c -+++ b/lib/common/results.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -22,148 +22,14 @@ - #include - #include - -+// @COMPAT Legacy function return codes -+ -+//! \deprecated Use standard return codes and pcmk_rc_name() instead - const char * - pcmk_errorname(int rc) - { -- int error = abs(rc); -- -- switch (error) { -- case E2BIG: return "E2BIG"; -- case EACCES: return "EACCES"; -- case EADDRINUSE: return "EADDRINUSE"; -- case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; -- case EAFNOSUPPORT: return "EAFNOSUPPORT"; -- case EAGAIN: return "EAGAIN"; -- case EALREADY: return "EALREADY"; -- case EBADF: return "EBADF"; -- case EBADMSG: return "EBADMSG"; -- case EBUSY: return "EBUSY"; -- case ECANCELED: return "ECANCELED"; -- case ECHILD: return "ECHILD"; -- case ECOMM: return "ECOMM"; -- case ECONNABORTED: return "ECONNABORTED"; -- case ECONNREFUSED: return "ECONNREFUSED"; -- case ECONNRESET: return "ECONNRESET"; -- /* case EDEADLK: return "EDEADLK"; */ -- case EDESTADDRREQ: return "EDESTADDRREQ"; -- case EDOM: return "EDOM"; -- case EDQUOT: return "EDQUOT"; -- case EEXIST: return "EEXIST"; -- case EFAULT: return "EFAULT"; -- case EFBIG: return "EFBIG"; -- case EHOSTDOWN: return "EHOSTDOWN"; -- case EHOSTUNREACH: return "EHOSTUNREACH"; -- case EIDRM: return "EIDRM"; -- case EILSEQ: return "EILSEQ"; -- case EINPROGRESS: return "EINPROGRESS"; -- case EINTR: return "EINTR"; -- case EINVAL: return "EINVAL"; -- case EIO: return "EIO"; -- case EISCONN: return "EISCONN"; -- case EISDIR: return "EISDIR"; -- case ELIBACC: return "ELIBACC"; -- case ELOOP: return "ELOOP"; -- case EMFILE: return "EMFILE"; -- case EMLINK: return "EMLINK"; -- case EMSGSIZE: return "EMSGSIZE"; --#ifdef EMULTIHOP // Not available on OpenBSD -- case EMULTIHOP: return "EMULTIHOP"; --#endif -- case ENAMETOOLONG: return "ENAMETOOLONG"; -- case ENETDOWN: return "ENETDOWN"; -- case ENETRESET: return "ENETRESET"; -- case ENETUNREACH: return "ENETUNREACH"; -- case ENFILE: return "ENFILE"; -- case ENOBUFS: return "ENOBUFS"; -- case ENODATA: return "ENODATA"; -- case ENODEV: return "ENODEV"; -- case ENOENT: return "ENOENT"; -- case ENOEXEC: return "ENOEXEC"; -- case ENOKEY: return "ENOKEY"; -- case ENOLCK: return "ENOLCK"; --#ifdef ENOLINK // Not available on OpenBSD -- case ENOLINK: return "ENOLINK"; --#endif -- case ENOMEM: return "ENOMEM"; -- case ENOMSG: return "ENOMSG"; -- case ENOPROTOOPT: return "ENOPROTOOPT"; -- case ENOSPC: return "ENOSPC"; -- case ENOSR: return "ENOSR"; -- case ENOSTR: return "ENOSTR"; -- case ENOSYS: return "ENOSYS"; -- case ENOTBLK: return "ENOTBLK"; -- case ENOTCONN: return "ENOTCONN"; -- case ENOTDIR: return "ENOTDIR"; -- case ENOTEMPTY: return "ENOTEMPTY"; -- case ENOTSOCK: return "ENOTSOCK"; -- /* case ENOTSUP: return "ENOTSUP"; */ -- case ENOTTY: return "ENOTTY"; -- case ENOTUNIQ: return "ENOTUNIQ"; -- case ENXIO: return "ENXIO"; -- case EOPNOTSUPP: return "EOPNOTSUPP"; -- case EOVERFLOW: return "EOVERFLOW"; -- case EPERM: return "EPERM"; -- case EPFNOSUPPORT: return "EPFNOSUPPORT"; -- case EPIPE: return "EPIPE"; -- case EPROTO: return "EPROTO"; -- case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; -- case EPROTOTYPE: return "EPROTOTYPE"; -- case ERANGE: return "ERANGE"; -- case EREMOTE: return "EREMOTE"; -- case EREMOTEIO: return "EREMOTEIO"; -- -- case EROFS: return "EROFS"; -- case ESHUTDOWN: return "ESHUTDOWN"; -- case ESPIPE: return "ESPIPE"; -- case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; -- case ESRCH: return "ESRCH"; -- case ESTALE: return "ESTALE"; -- case ETIME: return "ETIME"; -- case ETIMEDOUT: return "ETIMEDOUT"; -- case ETXTBSY: return "ETXTBSY"; -- case EUNATCH: return "EUNATCH"; -- case EUSERS: return "EUSERS"; -- /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ -- case EXDEV: return "EXDEV"; -- --#ifdef EBADE -- /* Not available on OSX */ -- case EBADE: return "EBADE"; -- case EBADFD: return "EBADFD"; -- case EBADSLT: return "EBADSLT"; -- case EDEADLOCK: return "EDEADLOCK"; -- case EBADR: return "EBADR"; -- case EBADRQC: return "EBADRQC"; -- case ECHRNG: return "ECHRNG"; --#ifdef EISNAM /* Not available on Illumos/Solaris */ -- case EISNAM: return "EISNAM"; -- case EKEYEXPIRED: return "EKEYEXPIRED"; -- case EKEYREJECTED: return "EKEYREJECTED"; -- case EKEYREVOKED: return "EKEYREVOKED"; --#endif -- case EL2HLT: return "EL2HLT"; -- case EL2NSYNC: return "EL2NSYNC"; -- case EL3HLT: return "EL3HLT"; -- case EL3RST: return "EL3RST"; -- case ELIBBAD: return "ELIBBAD"; -- case ELIBMAX: return "ELIBMAX"; -- case ELIBSCN: return "ELIBSCN"; -- case ELIBEXEC: return "ELIBEXEC"; --#ifdef ENOMEDIUM /* Not available on Illumos/Solaris */ -- case ENOMEDIUM: return "ENOMEDIUM"; -- case EMEDIUMTYPE: return "EMEDIUMTYPE"; --#endif -- case ENONET: return "ENONET"; -- case ENOPKG: return "ENOPKG"; -- case EREMCHG: return "EREMCHG"; -- case ERESTART: return "ERESTART"; -- case ESTRPIPE: return "ESTRPIPE"; --#ifdef EUCLEAN /* Not available on Illumos/Solaris */ -- case EUCLEAN: return "EUCLEAN"; --#endif -- case EXFULL: return "EXFULL"; --#endif -- -+ rc = abs(rc); -+ switch (rc) { - case pcmk_err_generic: return "pcmk_err_generic"; - case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; - case pcmk_err_schema_validation: return "pcmk_err_schema_validation"; -@@ -180,24 +46,26 @@ pcmk_errorname(int rc) - case pcmk_err_already: return "pcmk_err_already"; - case pcmk_err_bad_nvpair: return "pcmk_err_bad_nvpair"; - case pcmk_err_unknown_format: return "pcmk_err_unknown_format"; -+ default: return pcmk_rc_name(rc); // system errno - } -- return "Unknown"; - } - -+//! \deprecated Use standard return codes and pcmk_rc_str() instead - const char * - pcmk_strerror(int rc) - { -- int error = abs(rc); -- -- if (error == 0) { -+ if (rc == 0) { - return "OK"; -+ } - -- // Of course error > 0 ... unless someone passed INT_MIN as rc -- } else if ((error > 0) && (error < PCMK_ERROR_OFFSET)) { -- return strerror(error); -+ rc = abs(rc); -+ -+ // Of course rc > 0 ... unless someone passed INT_MIN as rc -+ if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) { -+ return strerror(rc); - } - -- switch (error) { -+ switch (rc) { - case pcmk_err_generic: - return "Generic Pacemaker error"; - case pcmk_err_no_quorum: -@@ -253,11 +121,313 @@ pcmk_strerror(int rc) - case ENOKEY: - return "Required key not available"; - } -- - crm_err("Unknown error code: %d", rc); - return "Unknown error"; - } - -+// Standard Pacemaker API return codes -+ -+/* This array is used only for nonzero values of pcmk_rc_e. Its values must be -+ * kept in the exact reverse order of the enum value numbering (i.e. add new -+ * values to the end of the array). -+ */ -+static struct pcmk__rc_info { -+ const char *name; -+ const char *desc; -+ int legacy_rc; -+} pcmk__rcs[] = { -+ { "pcmk_rc_error", -+ "Error", -+ -pcmk_err_generic, -+ }, -+ { "pcmk_rc_unknown_format", -+ "Unknown output format", -+ -pcmk_err_unknown_format, -+ }, -+ { "pcmk_rc_bad_nvpair", -+ "Bad name/value pair given", -+ -pcmk_err_bad_nvpair, -+ }, -+ { "pcmk_rc_already", -+ "Already in requested state", -+ -pcmk_err_already, -+ }, -+ { "pcmk_rc_node_unknown", -+ "Node not found", -+ -pcmk_err_node_unknown, -+ }, -+ { "pcmk_rc_multiple", -+ "Resource active on multiple nodes", -+ -pcmk_err_multiple, -+ }, -+ { "pcmk_rc_cib_corrupt", -+ "Could not parse on-disk configuration", -+ -pcmk_err_cib_corrupt, -+ }, -+ { "pcmk_rc_cib_save", -+ "Could not save new configuration to disk", -+ -pcmk_err_cib_save, -+ }, -+ { "pcmk_rc_cib_backup", -+ "Could not archive previous configuration", -+ -pcmk_err_cib_backup, -+ }, -+ { "pcmk_rc_cib_modified", -+ "On-disk configuration was manually modified", -+ -pcmk_err_cib_modified, -+ }, -+ { "pcmk_rc_diff_resync", -+ "Application of update diff failed, requesting full refresh", -+ -pcmk_err_diff_resync, -+ }, -+ { "pcmk_rc_diff_failed", -+ "Application of update diff failed", -+ -pcmk_err_diff_failed, -+ }, -+ { "pcmk_rc_old_data", -+ "Update was older than existing configuration", -+ -pcmk_err_old_data, -+ }, -+ { "pcmk_rc_transform_failed", -+ "Schema transform failed", -+ -pcmk_err_transform_failed, -+ }, -+ { "pcmk_rc_schema_unchanged", -+ "Schema is already the latest available", -+ -pcmk_err_schema_unchanged, -+ }, -+ { "pcmk_rc_schema_validation", -+ "Update does not conform to the configured schema", -+ -pcmk_err_schema_validation, -+ }, -+ { "pcmk_rc_no_quorum", -+ "Operation requires quorum", -+ -pcmk_err_no_quorum, -+ }, -+}; -+ -+#define PCMK__N_RC (sizeof(pcmk__rcs) / sizeof(struct pcmk__rc_info)) -+ -+/*! -+ * \brief Get a return code constant name as a string -+ * -+ * \param[in] rc Integer return code to convert -+ * -+ * \return String of constant name corresponding to rc -+ */ -+const char * -+pcmk_rc_name(int rc) -+{ -+ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { -+ return pcmk__rcs[pcmk_rc_error - rc].name; -+ } -+ switch (rc) { -+ case pcmk_rc_ok: return "pcmk_rc_ok"; -+ case E2BIG: return "E2BIG"; -+ case EACCES: return "EACCES"; -+ case EADDRINUSE: return "EADDRINUSE"; -+ case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; -+ case EAFNOSUPPORT: return "EAFNOSUPPORT"; -+ case EAGAIN: return "EAGAIN"; -+ case EALREADY: return "EALREADY"; -+ case EBADF: return "EBADF"; -+ case EBADMSG: return "EBADMSG"; -+ case EBUSY: return "EBUSY"; -+ case ECANCELED: return "ECANCELED"; -+ case ECHILD: return "ECHILD"; -+ case ECOMM: return "ECOMM"; -+ case ECONNABORTED: return "ECONNABORTED"; -+ case ECONNREFUSED: return "ECONNREFUSED"; -+ case ECONNRESET: return "ECONNRESET"; -+ /* case EDEADLK: return "EDEADLK"; */ -+ case EDESTADDRREQ: return "EDESTADDRREQ"; -+ case EDOM: return "EDOM"; -+ case EDQUOT: return "EDQUOT"; -+ case EEXIST: return "EEXIST"; -+ case EFAULT: return "EFAULT"; -+ case EFBIG: return "EFBIG"; -+ case EHOSTDOWN: return "EHOSTDOWN"; -+ case EHOSTUNREACH: return "EHOSTUNREACH"; -+ case EIDRM: return "EIDRM"; -+ case EILSEQ: return "EILSEQ"; -+ case EINPROGRESS: return "EINPROGRESS"; -+ case EINTR: return "EINTR"; -+ case EINVAL: return "EINVAL"; -+ case EIO: return "EIO"; -+ case EISCONN: return "EISCONN"; -+ case EISDIR: return "EISDIR"; -+ case ELIBACC: return "ELIBACC"; -+ case ELOOP: return "ELOOP"; -+ case EMFILE: return "EMFILE"; -+ case EMLINK: return "EMLINK"; -+ case EMSGSIZE: return "EMSGSIZE"; -+#ifdef EMULTIHOP // Not available on OpenBSD -+ case EMULTIHOP: return "EMULTIHOP"; -+#endif -+ case ENAMETOOLONG: return "ENAMETOOLONG"; -+ case ENETDOWN: return "ENETDOWN"; -+ case ENETRESET: return "ENETRESET"; -+ case ENETUNREACH: return "ENETUNREACH"; -+ case ENFILE: return "ENFILE"; -+ case ENOBUFS: return "ENOBUFS"; -+ case ENODATA: return "ENODATA"; -+ case ENODEV: return "ENODEV"; -+ case ENOENT: return "ENOENT"; -+ case ENOEXEC: return "ENOEXEC"; -+ case ENOKEY: return "ENOKEY"; -+ case ENOLCK: return "ENOLCK"; -+#ifdef ENOLINK // Not available on OpenBSD -+ case ENOLINK: return "ENOLINK"; -+#endif -+ case ENOMEM: return "ENOMEM"; -+ case ENOMSG: return "ENOMSG"; -+ case ENOPROTOOPT: return "ENOPROTOOPT"; -+ case ENOSPC: return "ENOSPC"; -+ case ENOSR: return "ENOSR"; -+ case ENOSTR: return "ENOSTR"; -+ case ENOSYS: return "ENOSYS"; -+ case ENOTBLK: return "ENOTBLK"; -+ case ENOTCONN: return "ENOTCONN"; -+ case ENOTDIR: return "ENOTDIR"; -+ case ENOTEMPTY: return "ENOTEMPTY"; -+ case ENOTSOCK: return "ENOTSOCK"; -+#if ENOTSUP != EOPNOTSUPP -+ case ENOTSUP: return "ENOTSUP"; -+#endif -+ case ENOTTY: return "ENOTTY"; -+ case ENOTUNIQ: return "ENOTUNIQ"; -+ case ENXIO: return "ENXIO"; -+ case EOPNOTSUPP: return "EOPNOTSUPP"; -+ case EOVERFLOW: return "EOVERFLOW"; -+ case EPERM: return "EPERM"; -+ case EPFNOSUPPORT: return "EPFNOSUPPORT"; -+ case EPIPE: return "EPIPE"; -+ case EPROTO: return "EPROTO"; -+ case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; -+ case EPROTOTYPE: return "EPROTOTYPE"; -+ case ERANGE: return "ERANGE"; -+ case EREMOTE: return "EREMOTE"; -+ case EREMOTEIO: return "EREMOTEIO"; -+ case EROFS: return "EROFS"; -+ case ESHUTDOWN: return "ESHUTDOWN"; -+ case ESPIPE: return "ESPIPE"; -+ case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; -+ case ESRCH: return "ESRCH"; -+ case ESTALE: return "ESTALE"; -+ case ETIME: return "ETIME"; -+ case ETIMEDOUT: return "ETIMEDOUT"; -+ case ETXTBSY: return "ETXTBSY"; -+ case EUNATCH: return "EUNATCH"; -+ case EUSERS: return "EUSERS"; -+ /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ -+ case EXDEV: return "EXDEV"; -+ -+#ifdef EBADE // Not available on OS X -+ case EBADE: return "EBADE"; -+ case EBADFD: return "EBADFD"; -+ case EBADSLT: return "EBADSLT"; -+ case EDEADLOCK: return "EDEADLOCK"; -+ case EBADR: return "EBADR"; -+ case EBADRQC: return "EBADRQC"; -+ case ECHRNG: return "ECHRNG"; -+#ifdef EISNAM // Not available on OS X, Illumos, Solaris -+ case EISNAM: return "EISNAM"; -+ case EKEYEXPIRED: return "EKEYEXPIRED"; -+ case EKEYREJECTED: return "EKEYREJECTED"; -+ case EKEYREVOKED: return "EKEYREVOKED"; -+#endif -+ case EL2HLT: return "EL2HLT"; -+ case EL2NSYNC: return "EL2NSYNC"; -+ case EL3HLT: return "EL3HLT"; -+ case EL3RST: return "EL3RST"; -+ case ELIBBAD: return "ELIBBAD"; -+ case ELIBMAX: return "ELIBMAX"; -+ case ELIBSCN: return "ELIBSCN"; -+ case ELIBEXEC: return "ELIBEXEC"; -+#ifdef ENOMEDIUM // Not available on OS X, Illumos, Solaris -+ case ENOMEDIUM: return "ENOMEDIUM"; -+ case EMEDIUMTYPE: return "EMEDIUMTYPE"; -+#endif -+ case ENONET: return "ENONET"; -+ case ENOPKG: return "ENOPKG"; -+ case EREMCHG: return "EREMCHG"; -+ case ERESTART: return "ERESTART"; -+ case ESTRPIPE: return "ESTRPIPE"; -+#ifdef EUCLEAN // Not available on OS X, Illumos, Solaris -+ case EUCLEAN: return "EUCLEAN"; -+#endif -+ case EXFULL: return "EXFULL"; -+#endif // EBADE -+ default: return "Unknown"; -+ } -+} -+ -+/*! -+ * \brief Get a user-friendly description of a return code -+ * -+ * \param[in] rc Integer return code to convert -+ * -+ * \return String description of rc -+ */ -+const char * -+pcmk_rc_str(int rc) -+{ -+ if (rc == pcmk_rc_ok) { -+ return "OK"; -+ } -+ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { -+ return pcmk__rcs[pcmk_rc_error - rc].desc; -+ } -+ if (rc < 0) { -+ return "Unknown error"; -+ } -+ return strerror(rc); -+} -+ -+// This returns negative values for errors -+//! \deprecated Use standard return codes instead -+int -+pcmk_rc2legacy(int rc) -+{ -+ if (rc >= 0) { -+ return -rc; // OK or system errno -+ } -+ if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < PCMK__N_RC)) { -+ return pcmk__rcs[pcmk_rc_error - rc].legacy_rc; -+ } -+ return -pcmk_err_generic; -+} -+ -+//! \deprecated Use standard return codes instead -+int -+pcmk_legacy2rc(int legacy_rc) -+{ -+ legacy_rc = abs(legacy_rc); -+ switch (legacy_rc) { -+ case pcmk_err_no_quorum: return pcmk_rc_no_quorum; -+ case pcmk_err_schema_validation: return pcmk_rc_schema_validation; -+ case pcmk_err_schema_unchanged: return pcmk_rc_schema_unchanged; -+ case pcmk_err_transform_failed: return pcmk_rc_transform_failed; -+ case pcmk_err_old_data: return pcmk_rc_old_data; -+ case pcmk_err_diff_failed: return pcmk_rc_diff_failed; -+ case pcmk_err_diff_resync: return pcmk_rc_diff_resync; -+ case pcmk_err_cib_modified: return pcmk_rc_cib_modified; -+ case pcmk_err_cib_backup: return pcmk_rc_cib_backup; -+ case pcmk_err_cib_save: return pcmk_rc_cib_save; -+ case pcmk_err_cib_corrupt: return pcmk_rc_cib_corrupt; -+ case pcmk_err_multiple: return pcmk_rc_multiple; -+ case pcmk_err_node_unknown: return pcmk_rc_node_unknown; -+ case pcmk_err_already: return pcmk_rc_already; -+ case pcmk_err_bad_nvpair: return pcmk_rc_bad_nvpair; -+ case pcmk_err_unknown_format: return pcmk_rc_unknown_format; -+ case pcmk_err_generic: return pcmk_rc_error; -+ case pcmk_ok: return pcmk_rc_ok; -+ default: return legacy_rc; // system errno -+ } -+} -+ -+// Exit status codes -+ - const char * - crm_exit_name(crm_exit_t exit_code) - { -@@ -347,26 +517,17 @@ crm_exit_str(crm_exit_t exit_code) - case CRM_EX_TIMEOUT: return "Timeout occurred"; - case CRM_EX_MAX: return "Error occurred"; - } -- if (exit_code > 128) { -+ if ((exit_code > 128) && (exit_code < CRM_EX_MAX)) { - return "Interrupted by signal"; - } - return "Unknown exit status"; - } - --/*! -- * \brief Map an errno to a similar exit status -- * -- * \param[in] errno Error number to map -- * -- * \return Exit status corresponding to errno -- */ -+//! \deprecated Use standard return codes and pcmk_rc2exitc() instead - crm_exit_t - crm_errno2exit(int rc) - { - rc = abs(rc); // Convenience for functions that return -errno -- if (rc == EOPNOTSUPP) { -- rc = ENOTSUP; // Values are same on Linux, can't use both in case -- } - switch (rc) { - case pcmk_ok: - return CRM_EX_OK; -@@ -384,6 +545,48 @@ crm_errno2exit(int rc) - case pcmk_err_bad_nvpair: - return CRM_EX_INVALID_PARAM; - -+ case pcmk_err_already: -+ return CRM_EX_EXISTS; -+ -+ case pcmk_err_multiple: -+ return CRM_EX_MULTIPLE; -+ -+ case pcmk_err_node_unknown: -+ case pcmk_err_unknown_format: -+ return CRM_EX_NOSUCH; -+ -+ default: -+ return pcmk_rc2exitc(rc); // system errno -+ } -+} -+ -+/*! -+ * \brief Map a function return code to the most similar exit code -+ * -+ * \param[in] rc Function return code -+ * -+ * \return Most similar exit code -+ */ -+crm_exit_t -+pcmk_rc2exitc(int rc) -+{ -+ switch (rc) { -+ case pcmk_rc_ok: -+ return CRM_EX_OK; -+ -+ case pcmk_rc_no_quorum: -+ return CRM_EX_QUORUM; -+ -+ case pcmk_rc_old_data: -+ return CRM_EX_OLD; -+ -+ case pcmk_rc_schema_validation: -+ case pcmk_rc_transform_failed: -+ return CRM_EX_CONFIG; -+ -+ case pcmk_rc_bad_nvpair: -+ return CRM_EX_INVALID_PARAM; -+ - case EACCES: - return CRM_EX_INSUFFICIENT_PRIV; - -@@ -414,22 +617,25 @@ crm_errno2exit(int rc) - return CRM_EX_DISCONNECT; - - case EEXIST: -- case pcmk_err_already: -+ case pcmk_rc_already: - return CRM_EX_EXISTS; - - case EIO: - return CRM_EX_IOERR; - - case ENOTSUP: -+#if EOPNOTSUPP != ENOTSUP -+ case EOPNOTSUPP: -+#endif - return CRM_EX_UNIMPLEMENT_FEATURE; - - case ENOTUNIQ: -- case pcmk_err_multiple: -+ case pcmk_rc_multiple: - return CRM_EX_MULTIPLE; - - case ENXIO: -- case pcmk_err_node_unknown: -- case pcmk_err_unknown_format: -+ case pcmk_rc_node_unknown: -+ case pcmk_rc_unknown_format: - return CRM_EX_NOSUCH; - - case ETIME: -@@ -441,6 +647,8 @@ crm_errno2exit(int rc) - } - } - -+// Other functions -+ - const char * - bz2_strerror(int rc) - { -diff --git a/tools/crm_error.c b/tools/crm_error.c -index f6dc73c..0dcae05 100644 ---- a/tools/crm_error.c -+++ b/tools/crm_error.c -@@ -1,21 +1,10 @@ --/* -- * Copyright 2012-2018 the Pacemaker project contributors -+/* -+ * Copyright 2012-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public -- * License as published by the Free Software Foundation; either -- * version 2 of the License, or (at your option) any later version. -- * -- * This software is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * General Public License for more details. -- * -- * You should have received a copy of the GNU General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * This source code is licensed under the GNU General Public License version 2 -+ * or later (GPLv2+) WITHOUT ANY WARRANTY. - */ - - #include -@@ -33,12 +22,31 @@ static struct crm_option long_options[] = { - "\n\t\t\tUseful for looking for sources of the error in source code"}, - - {"list", 0, 0, 'l', "\tShow all known errors."}, -- {"exit", 0, 0, 'X', "\tInterpret as exit code rather than function return value"}, -+ {"exit", 0, 0, 'X', "\tInterpret as exit code rather than legacy function return value"}, -+ {"rc", 0, 0, 'r', "\tInterpret as return code rather than legacy function return value"}, - - {0, 0, 0, 0} - }; - /* *INDENT-ON* */ - -+static bool as_exit_code = false; -+static bool as_rc = false; -+ -+static void -+get_strings(int rc, const char **name, const char **str) -+{ -+ if (as_exit_code) { -+ *str = crm_exit_str((crm_exit_t) rc); -+ *name = crm_exit_name(rc); -+ } else if (as_rc) { -+ *str = pcmk_rc_str(rc); -+ *name = pcmk_rc_name(rc); -+ } else { -+ *str = pcmk_strerror(rc); -+ *name = pcmk_errorname(rc); -+ } -+} -+ - int - main(int argc, char **argv) - { -@@ -49,10 +57,12 @@ main(int argc, char **argv) - - bool do_list = FALSE; - bool with_name = FALSE; -- bool as_exit_code = FALSE; -+ -+ const char *name = NULL; -+ const char *desc = NULL; - - crm_log_cli_init("crm_error"); -- crm_set_options(NULL, "[options] -- rc", long_options, -+ crm_set_options(NULL, "[options] -- [...]", long_options, - "Tool for displaying the textual name or description of a reported error code"); - - while (flag >= 0) { -@@ -73,6 +83,9 @@ main(int argc, char **argv) - case 'l': - do_list = TRUE; - break; -+ case 'r': -+ as_rc = true; -+ break; - case 'X': - as_exit_code = TRUE; - break; -@@ -83,30 +96,43 @@ main(int argc, char **argv) - } - - if(do_list) { -- for (rc = 0; rc < 256; rc++) { -- const char *name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); -- const char *desc = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); -+ int start, end, width; -+ -+ // 256 is a hacky magic number that "should" be enough -+ if (as_rc) { -+ start = pcmk_rc_error - 256; -+ end = PCMK_CUSTOM_OFFSET; -+ width = 4; -+ } else { -+ start = 0; -+ end = 256; -+ width = 3; -+ } -+ -+ for (rc = start; rc < end; rc++) { -+ if (rc == (pcmk_rc_error + 1)) { -+ // Values in between are reserved for callers, no use iterating -+ rc = pcmk_rc_ok; -+ } -+ get_strings(rc, &name, &desc); - if (!name || !strcmp(name, "Unknown") || !strcmp(name, "CRM_EX_UNKNOWN")) { -- /* Unknown */ -+ // Undefined - } else if(with_name) { -- printf("%.3d: %-26s %s\n", rc, name, desc); -+ printf("% .*d: %-26s %s\n", width, rc, name, desc); - } else { -- printf("%.3d: %s\n", rc, desc); -+ printf("% .*d: %s\n", width, rc, desc); - } - } -- return CRM_EX_OK; -- } - -- for (lpc = optind; lpc < argc; lpc++) { -- const char *str, *name; -- -- rc = crm_atoi(argv[lpc], NULL); -- str = as_exit_code? crm_exit_str(rc) : pcmk_strerror(rc); -- if(with_name) { -- name = as_exit_code? crm_exit_name(rc) : pcmk_errorname(rc); -- printf("%s - %s\n", name, str); -- } else { -- printf("%s\n", str); -+ } else { -+ for (lpc = optind; lpc < argc; lpc++) { -+ rc = crm_atoi(argv[lpc], NULL); -+ get_strings(rc, &name, &desc); -+ if (with_name) { -+ printf("%s - %s\n", name, desc); -+ } else { -+ printf("%s\n", desc); -+ } - } - } - return CRM_EX_OK; --- -1.8.3.1 - diff --git a/SOURCES/003-trace.patch b/SOURCES/003-trace.patch new file mode 100644 index 0000000..e56e644 --- /dev/null +++ b/SOURCES/003-trace.patch @@ -0,0 +1,30 @@ +From 47c3e06b098c7e148c54675588d03b4d2bea40b5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 22 Jun 2020 16:20:01 -0400 +Subject: [PATCH] Fix: libpacemaker: Don't allow a potential NULL in a format + string. + +This is only tripping up F32 s390x builds, but I don't suppose there's +any reason it couldn't come up elsewhere later. +--- + lib/pacemaker/pcmk_sched_constraints.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_constraints.c b/lib/pacemaker/pcmk_sched_constraints.c +index 9c3a88d..d8c3e69 100644 +--- a/lib/pacemaker/pcmk_sched_constraints.c ++++ b/lib/pacemaker/pcmk_sched_constraints.c +@@ -1595,8 +1595,8 @@ custom_action_order(pe_resource_t * lh_rsc, char *lh_action_task, pe_action_t * + order = calloc(1, sizeof(pe__ordering_t)); + + crm_trace("Creating[%d] %s %s %s - %s %s %s", data_set->order_id, +- lh_rsc?lh_rsc->id:"NA", lh_action_task, lh_action?lh_action->uuid:"NA", +- rh_rsc?rh_rsc->id:"NA", rh_action_task, rh_action?rh_action->uuid:"NA"); ++ lh_rsc?lh_rsc->id:"NA", lh_action_task?lh_action_task:"NA", lh_action?lh_action->uuid:"NA", ++ rh_rsc?rh_rsc->id:"NA", rh_action_task?rh_action_task:"NA", rh_action?rh_action->uuid:"NA"); + + /* CRM_ASSERT(data_set->order_id != 291); */ + +-- +1.8.3.1 + diff --git a/SOURCES/004-test.patch b/SOURCES/004-test.patch new file mode 100644 index 0000000..e17850b --- /dev/null +++ b/SOURCES/004-test.patch @@ -0,0 +1,27 @@ +From 7ed7675615ada7d0be5654e0dcb26de60cf5b5e9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 22 Jun 2020 20:03:56 -0500 +Subject: [PATCH] Test: scheduler: explicitly disable concurrent-fencing in + on_fail_demote4 + +... so the expected output is the same regardless of what default the build was +compiled with +--- + cts/scheduler/on_fail_demote4.xml | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/cts/scheduler/on_fail_demote4.xml b/cts/scheduler/on_fail_demote4.xml +index eb4c4cc..1082266 100644 +--- a/cts/scheduler/on_fail_demote4.xml ++++ b/cts/scheduler/on_fail_demote4.xml +@@ -8,6 +8,7 @@ + + + ++ + + + +-- +1.8.3.1 + diff --git a/SOURCES/004-unused.patch b/SOURCES/004-unused.patch deleted file mode 100644 index e732b42..0000000 --- a/SOURCES/004-unused.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 6df10102c02f93890c1994136b3ce6a60b33a05e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 6 Jan 2020 11:01:38 -0600 -Subject: [PATCH] Refactor: controller: remove unused function arguments - -... and rename affected functions ---- - daemons/controld/controld_execd.c | 2 +- - daemons/controld/controld_fsa.c | 1 - - daemons/controld/controld_fsa.h | 4 ++-- - daemons/controld/controld_join_client.c | 4 ++-- - daemons/controld/controld_join_dc.c | 32 ++++++++++++++------------------ - 5 files changed, 19 insertions(+), 24 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index f068413..16751b9 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -839,7 +839,7 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) - } - - xmlNode * --do_lrm_query(gboolean is_replace, const char *node_name) -+controld_query_executor_state(const char *node_name) - { - lrm_state_t *lrm_state = lrm_state_find(node_name); - -diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c -index bd732bc..db2b3f3 100644 ---- a/daemons/controld/controld_fsa.c -+++ b/daemons/controld/controld_fsa.c -@@ -41,7 +41,6 @@ enum crmd_fsa_state fsa_state = S_STARTING; - - extern uint highest_born_on; - extern uint num_join_invites; --extern void initialize_join(gboolean before); - - #define DOT_PREFIX "actions:trace: " - #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) -diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h -index 06794cb..8aaaadf 100644 ---- a/daemons/controld/controld_fsa.h -+++ b/daemons/controld/controld_fsa.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -484,7 +484,7 @@ extern gboolean ever_had_quorum; - // These should be moved elsewhere - void do_update_cib_nodes(gboolean overwrite, const char *caller); - int crmd_cib_smart_opt(void); --xmlNode *do_lrm_query(gboolean, const char *node_name); -+xmlNode *controld_query_executor_state(const char *node_name); - - const char *fsa_input2string(enum crmd_fsa_input input); - const char *fsa_state2string(enum crmd_fsa_state state); -diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c -index 4ac0d2a..383ee29 100644 ---- a/daemons/controld/controld_join_client.c -+++ b/daemons/controld/controld_join_client.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -264,7 +264,7 @@ do_cl_join_finalize_respond(long long action, - update_dc_expected(input->msg); - - /* send our status section to the DC */ -- tmp1 = do_lrm_query(TRUE, fsa_our_uname); -+ tmp1 = controld_query_executor_state(fsa_our_uname); - if (tmp1 != NULL) { - xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, - CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); -diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c -index ac6b430..885b2a9 100644 ---- a/daemons/controld/controld_join_dc.c -+++ b/daemons/controld/controld_join_dc.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -21,7 +21,6 @@ char *max_epoch = NULL; - char *max_generation_from = NULL; - xmlNode *max_generation_xml = NULL; - --void initialize_join(gboolean before); - void finalize_join_for(gpointer key, gpointer value, gpointer user_data); - void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); - gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); -@@ -68,8 +67,8 @@ crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase - } - } - --void --initialize_join(gboolean before) -+static void -+start_join_round() - { - GHashTableIter iter; - crm_node_t *peer = NULL; -@@ -80,19 +79,16 @@ initialize_join(gboolean before) - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { - crm_update_peer_join(__FUNCTION__, peer, crm_join_none); - } -- -- if (before) { -- if (max_generation_from != NULL) { -- free(max_generation_from); -- max_generation_from = NULL; -- } -- if (max_generation_xml != NULL) { -- free_xml(max_generation_xml); -- max_generation_xml = NULL; -- } -- clear_bit(fsa_input_register, R_HAVE_CIB); -- clear_bit(fsa_input_register, R_CIB_ASKED); -+ if (max_generation_from != NULL) { -+ free(max_generation_from); -+ max_generation_from = NULL; -+ } -+ if (max_generation_xml != NULL) { -+ free_xml(max_generation_xml); -+ max_generation_xml = NULL; - } -+ clear_bit(fsa_input_register, R_HAVE_CIB); -+ clear_bit(fsa_input_register, R_CIB_ASKED); - } - - /*! -@@ -192,7 +188,7 @@ do_dc_join_offer_all(long long action, - * will be seen as offline by the scheduler anyway. - */ - current_join_id++; -- initialize_join(TRUE); -+ start_join_round(); - /* do_update_cib_nodes(TRUE, __FUNCTION__); */ - - update_dc(NULL); -@@ -590,7 +586,7 @@ do_dc_join_ack(long long action, - controld_delete_node_state(join_from, controld_section_lrm, - cib_scope_local); - if (safe_str_eq(join_from, fsa_our_uname)) { -- xmlNode *now_dc_lrmd_state = do_lrm_query(TRUE, fsa_our_uname); -+ xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); - - if (now_dc_lrmd_state != NULL) { - fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, --- -1.8.3.1 - diff --git a/SOURCES/005-shutdown-lock.patch b/SOURCES/005-shutdown-lock.patch deleted file mode 100644 index 9a4fe46..0000000 --- a/SOURCES/005-shutdown-lock.patch +++ /dev/null @@ -1,207 +0,0 @@ -From 4bdda97ff76d0e682f4f58bc632cd2cbd417c423 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 12:52:21 -0600 -Subject: [PATCH 01/18] Log: controller: improve messages when deleting CIB - resource history - -This also moves delete_rsc_status() to controld_based.c and renames it. ---- - daemons/controld/controld_based.c | 71 +++++++++++++++++++++++++++++++++++++++ - daemons/controld/controld_execd.c | 47 +++++--------------------- - daemons/controld/controld_utils.h | 4 ++- - 3 files changed, 83 insertions(+), 39 deletions(-) - -diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c -index 42e321f..f3a7c4f 100644 ---- a/daemons/controld/controld_based.c -+++ b/daemons/controld/controld_based.c -@@ -243,3 +243,74 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, - } - free(xpath); - } -+ -+// Takes node name and resource ID -+#define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ -+ "[@" XML_ATTR_UNAME "='%s'] /" \ -+ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ -+ "/" XML_LRM_TAG_RESOURCE \ -+ "[@" XML_ATTR_ID "='%s']" -+// @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks -+ -+/*! -+ * \internal -+ * \brief Clear resource history from CIB for a given resource and node -+ * -+ * \param[in] rsc_id ID of resource to be cleared -+ * \param[in] node Node whose resource history should be cleared -+ * \param[in] user_name ACL user name to use -+ * \param[in] call_options CIB call options -+ * -+ * \return Standard Pacemaker return code -+ */ -+int -+controld_delete_resource_history(const char *rsc_id, const char *node, -+ const char *user_name, int call_options) -+{ -+ char *desc = NULL; -+ char *xpath = NULL; -+ int rc = pcmk_rc_ok; -+ -+ CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); -+ -+ desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); -+ if (fsa_cib_conn == NULL) { -+ crm_err("Unable to clear %s: no CIB connection", desc); -+ free(desc); -+ return ENOTCONN; -+ } -+ -+ // Ask CIB to delete the entry -+ xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); -+ rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, xpath, NULL, -+ NULL, call_options|cib_xpath, user_name); -+ -+ if (rc < 0) { -+ rc = pcmk_legacy2rc(rc); -+ crm_err("Could not delete resource status of %s on %s%s%s: %s " -+ CRM_XS " rc=%d", rsc_id, node, -+ (user_name? " for user " : ""), (user_name? user_name : ""), -+ pcmk_rc_str(rc), rc); -+ free(desc); -+ free(xpath); -+ return rc; -+ } -+ -+ if (is_set(call_options, cib_sync_call)) { -+ if (is_set(call_options, cib_dryrun)) { -+ crm_debug("Deletion of %s would succeed", desc); -+ } else { -+ crm_debug("Deletion of %s succeeded", desc); -+ } -+ free(desc); -+ -+ } else { -+ crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", -+ desc, rc, xpath); -+ fsa_register_cib_callback(rc, FALSE, desc, cib_delete_callback); -+ // CIB library handles freeing desc -+ } -+ -+ free(xpath); -+ return pcmk_rc_ok; -+} -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 16751b9..212739e 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -36,8 +36,6 @@ struct delete_event_s { - static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); - static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); - static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); --static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, -- const char *user_name); - - static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, - const char *rsc_id, const char *operation); -@@ -169,7 +167,8 @@ update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_ - - if (op->rsc_deleted) { - crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); -- delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); -+ controld_delete_resource_history(op->rsc_id, lrm_state->node_name, -+ NULL, crmd_cib_smart_opt()); - return; - } - -@@ -917,31 +916,6 @@ lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) - return FALSE; - } - --/* -- * Remove the rsc from the CIB -- * -- * Avoids refreshing the entire LRM section of this host -- */ --#define RSC_TEMPLATE "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" -- --static int --delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, -- const char *user_name) --{ -- char *rsc_xpath = NULL; -- int rc = pcmk_ok; -- -- CRM_CHECK(rsc_id != NULL, return -ENXIO); -- -- rsc_xpath = crm_strdup_printf(RSC_TEMPLATE, lrm_state->node_name, rsc_id); -- -- rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, -- NULL, NULL, call_options | cib_xpath, user_name); -- -- free(rsc_xpath); -- return rc; --} -- - static void - delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, - GHashTableIter * rsc_gIter, int rc, const char *user_name) -@@ -958,7 +932,8 @@ delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rs - else - g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); - crm_debug("sync: Sending delete op for %s", rsc_id_copy); -- delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); -+ controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, -+ user_name, crmd_cib_smart_opt()); - - g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); - free(rsc_id_copy); -@@ -1694,21 +1669,17 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, - gboolean unregister = TRUE; - - #if ENABLE_ACL -- int cib_rc = delete_rsc_status(lrm_state, rsc->id, -- cib_dryrun|cib_sync_call, user_name); -+ int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, -+ user_name, -+ cib_dryrun|cib_sync_call); - -- if (cib_rc != pcmk_ok) { -+ if (cib_rc != pcmk_rc_ok) { - lrmd_event_data_t *op = NULL; - -- crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" -- CRM_XS " rc=%d", -- rsc->id, from_sys, (user_name? user_name : "unknown"), -- from_host, pcmk_strerror(cib_rc), cib_rc); -- - op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); - op->op_status = PCMK_LRM_OP_ERROR; - -- if (cib_rc == -EACCES) { -+ if (cib_rc == EACCES) { - op->rc = PCMK_OCF_INSUFFICIENT_PRIV; - } else { - op->rc = PCMK_OCF_UNKNOWN_ERROR; -diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h -index f902361..ca8cddb 100644 ---- a/daemons/controld/controld_utils.h -+++ b/daemons/controld/controld_utils.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -95,6 +95,8 @@ enum controld_section_e { - - void controld_delete_node_state(const char *uname, - enum controld_section_e section, int options); -+int controld_delete_resource_history(const char *rsc_id, const char *node, -+ const char *user_name, int call_options); - - const char *get_node_id(xmlNode *lrm_rsc_op); - --- -1.8.3.1 - diff --git a/SOURCES/005-sysconfig.patch b/SOURCES/005-sysconfig.patch new file mode 100644 index 0000000..4e49cab --- /dev/null +++ b/SOURCES/005-sysconfig.patch @@ -0,0 +1,32 @@ +From 85040eb19b9405464b01a7e67eb6769d2a03c611 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 19 Jun 2020 17:49:22 -0500 +Subject: [PATCH] Doc: sysconfig: remove outdated reference to wildcards in + PCMK_trace_files + +Wildcards stopped working when the log filtering implementation changed in +1.1.8 to support PCMK_trace_tags. It's not worth the effort to fix at this +point, so just update the comment in the sysconfig file. +--- + daemons/pacemakerd/pacemaker.sysconfig | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/daemons/pacemakerd/pacemaker.sysconfig b/daemons/pacemakerd/pacemaker.sysconfig +index c7745d8..e4a5c4d 100644 +--- a/daemons/pacemakerd/pacemaker.sysconfig ++++ b/daemons/pacemakerd/pacemaker.sysconfig +@@ -34,9 +34,8 @@ + # Log all messages from a comma-separated list of functions. + # PCMK_trace_functions=function1,function2,function3 + +-# Log all messages from a comma-separated list of files (no path). +-# Wildcards are supported, e.g. PCMK_trace_files=prefix*.c +-# PCMK_trace_files=file.c,other.h ++# Log all messages from a comma-separated list of file names (without path). ++# PCMK_trace_files=file1.c,file2.c + + # Log all messages matching comma-separated list of formats. + # PCMK_trace_formats="Sent delete %d" +-- +1.8.3.1 + diff --git a/SOURCES/006-ipc_refactor.patch b/SOURCES/006-ipc_refactor.patch new file mode 100644 index 0000000..55e7b95 --- /dev/null +++ b/SOURCES/006-ipc_refactor.patch @@ -0,0 +1,5198 @@ +From f0cfb7b02202b832f5655ee80580d053638e0be1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 3 Apr 2020 10:27:29 -0500 +Subject: [PATCH 1/8] Refactor: libcrmcommon: combine IPC internal headers + +Combine ipcs_internal.h into ipc_internal.h. +--- + daemons/attrd/attrd_utils.c | 2 +- + daemons/attrd/pacemaker-attrd.c | 2 +- + daemons/based/based_messages.c | 2 +- + daemons/based/based_remote.c | 2 +- + daemons/based/pacemaker-based.h | 2 +- + daemons/controld/controld_control.c | 2 +- + daemons/controld/controld_fsa.h | 2 +- + daemons/controld/controld_messages.c | 2 +- + daemons/controld/controld_messages.h | 2 +- + daemons/execd/execd_alerts.c | 2 +- + daemons/execd/execd_commands.c | 2 +- + daemons/execd/pacemaker-execd.c | 2 +- + daemons/execd/pacemaker-execd.h | 2 +- + daemons/execd/remoted_proxy.c | 2 +- + daemons/fenced/fenced_commands.c | 2 +- + daemons/fenced/fenced_history.c | 2 +- + daemons/fenced/fenced_remote.c | 2 +- + daemons/fenced/pacemaker-fenced.c | 2 +- + daemons/pacemakerd/pacemakerd.c | 4 +- + daemons/schedulerd/pacemaker-schedulerd.c | 2 +- + include/crm/cib/internal.h | 2 +- + include/crm/common/Makefile.am | 2 +- + include/crm/common/ipc_internal.h | 144 ++++++++++++++++++++++++++++- + include/crm/common/ipcs_internal.h | 149 ------------------------------ + include/crm_internal.h | 2 +- + lib/cib/cib_remote.c | 2 +- + lib/common/ipc.c | 4 +- + lib/common/mainloop.c | 2 +- + lib/common/remote.c | 2 +- + lib/lrmd/lrmd_client.c | 2 +- + lib/pacemaker/pcmk_sched_messages.c | 2 +- + maint/mocked/based.c | 2 +- + maint/mocked/based.h | 2 +- + 33 files changed, 172 insertions(+), 187 deletions(-) + delete mode 100644 include/crm/common/ipcs_internal.h + +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index b60b452..c311ddc 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -17,7 +17,7 @@ + #include + + #include +-#include ++#include + #include + + #include "pacemaker-attrd.h" +diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c +index 61e5493..0e944ed 100644 +--- a/daemons/attrd/pacemaker-attrd.c ++++ b/daemons/attrd/pacemaker-attrd.c +@@ -25,7 +25,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 4438e28..af0a3a2 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -24,7 +24,7 @@ + #include + + #include +-#include ++#include + #include + + #include +diff --git a/daemons/based/based_remote.c b/daemons/based/based_remote.c +index ca75b73..70261c3 100644 +--- a/daemons/based/based_remote.c ++++ b/daemons/based/based_remote.c +@@ -26,7 +26,7 @@ + + #include + #include +-#include ++#include + #include + #include + #include +diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h +index 0d7a5b9..c88ce7c 100644 +--- a/daemons/based/pacemaker-based.h ++++ b/daemons/based/pacemaker-based.h +@@ -22,7 +22,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 1ddcada..7d29205 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -18,7 +18,7 @@ + #include + #include + #include +-#include ++#include + + #include + +diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h +index b76a7d2..28eea56 100644 +--- a/daemons/controld/controld_fsa.h ++++ b/daemons/controld/controld_fsa.h +@@ -16,7 +16,7 @@ + # include + # include + # include +-# include ++# include + + /*! States the controller can be in */ + enum crmd_fsa_state { +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 62719ad..59b2069 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -18,7 +18,7 @@ + #include + #include + #include +-#include ++#include + + #include + +diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h +index db3ade3..4018deb 100644 +--- a/daemons/controld/controld_messages.h ++++ b/daemons/controld/controld_messages.h +@@ -11,7 +11,7 @@ + # define XML_CRM_MESSAGES__H + + # include +-# include ++# include + # include + # include + # include +diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c +index 10eca36..ffe60b5 100644 +--- a/daemons/execd/execd_alerts.c ++++ b/daemons/execd/execd_alerts.c +@@ -14,7 +14,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index 4d0e457..8da63b4 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -25,7 +25,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include "pacemaker-execd.h" +diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c +index df27d1a..c06da7a 100644 +--- a/daemons/execd/pacemaker-execd.c ++++ b/daemons/execd/pacemaker-execd.c +@@ -18,7 +18,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h +index 7ba3e78..d86894b 100644 +--- a/daemons/execd/pacemaker-execd.h ++++ b/daemons/execd/pacemaker-execd.h +@@ -11,7 +11,7 @@ + # define PACEMAKER_EXECD__H + + # include +-# include ++# include + # include + # include + +diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c +index ef0d108..dda7eed 100644 +--- a/daemons/execd/remoted_proxy.c ++++ b/daemons/execd/remoted_proxy.c +@@ -18,7 +18,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index d13d1a1..5df472f 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -25,7 +25,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 710d6fe..b48662c 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -16,7 +16,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index b89c40a..bf24acb 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -26,7 +26,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 450814c..6a2935a 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -24,7 +24,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index 5ed4626..64c30e2 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -24,13 +24,11 @@ + #include /* indirectly: CRM_EX_* */ + #include /* cib_channel_ro */ + #include +-#include ++#include + #include + #include + #include + +-#include /* PCMK__SPECIAL_PID*, ... */ +- + #ifdef SUPPORT_COROSYNC + #include + #endif +diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c +index 0146ca2..885386d 100644 +--- a/daemons/schedulerd/pacemaker-schedulerd.c ++++ b/daemons/schedulerd/pacemaker-schedulerd.c +@@ -21,7 +21,7 @@ + + #include + +-#include ++#include + #include + #include + #include +diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h +index df16280..b43cf08 100644 +--- a/include/crm/cib/internal.h ++++ b/include/crm/cib/internal.h +@@ -10,7 +10,7 @@ + #ifndef CIB_INTERNAL__H + # define CIB_INTERNAL__H + # include +-# include ++# include + + # define CIB_OP_SLAVE "cib_slave" + # define CIB_OP_SLAVEALL "cib_slave_all" +diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am +index b38a5c5..776e4a7 100644 +--- a/include/crm/common/Makefile.am ++++ b/include/crm/common/Makefile.am +@@ -13,7 +13,7 @@ headerdir=$(pkgincludedir)/crm/common + + header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ + nvpair.h acl.h +-noinst_HEADERS = ipcs_internal.h internal.h alerts_internal.h \ ++noinst_HEADERS = internal.h alerts_internal.h \ + iso8601_internal.h remote_internal.h xml_internal.h \ + ipc_internal.h output.h cmdline_internal.h curses_internal.h \ + attrd_internal.h options_internal.h +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 7113d78..a85210d 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2019 the Pacemaker project contributors ++ * Copyright 2013-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -10,10 +10,26 @@ + #ifndef PCMK__IPC_INTERNAL_H + #define PCMK__IPC_INTERNAL_H + +-#include ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#include // bool ++#include // uint32_t ++#include // struct iovec ++#include // uid_t, gid_t, pid_t, size_t ++ ++#ifdef HAVE_GNUTLS_GNUTLS_H ++# include // gnutls_session_t ++#endif + +-#include /* US_AUTH_GETPEEREID */ ++#include // guint, gpointer, GQueue, ... ++#include // xmlNode ++#include // qb_ipcs_connection_t, ... + ++#include // US_AUTH_GETPEEREID ++#include ++#include // mainloop_io_t + + /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that + require a delicate handling anyway (socket-based activation with systemd); +@@ -69,4 +85,126 @@ + int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, + gid_t refgid, pid_t *gotpid); + ++typedef struct pcmk__client_s pcmk__client_t; ++ ++enum pcmk__client_type { ++ PCMK__CLIENT_IPC = 1, ++ PCMK__CLIENT_TCP = 2, ++# ifdef HAVE_GNUTLS_GNUTLS_H ++ PCMK__CLIENT_TLS = 3, ++# endif ++}; ++ ++struct pcmk__remote_s { ++ /* Shared */ ++ char *buffer; ++ size_t buffer_size; ++ size_t buffer_offset; ++ int auth_timeout; ++ int tcp_socket; ++ mainloop_io_t *source; ++ ++ /* CIB-only */ ++ bool authenticated; ++ char *token; ++ ++ /* TLS only */ ++# ifdef HAVE_GNUTLS_GNUTLS_H ++ gnutls_session_t *tls_session; ++ bool tls_handshake_complete; ++# endif ++}; ++ ++enum pcmk__client_flags { ++ pcmk__client_proxied = 0x00001, /* ipc_proxy code only */ ++ pcmk__client_privileged = 0x00002, /* root or cluster user */ ++}; ++ ++struct pcmk__client_s { ++ unsigned int pid; ++ ++ uid_t uid; ++ gid_t gid; ++ ++ char *id; ++ char *name; ++ char *user; ++ ++ /* Provided for server use (not used by library) */ ++ /* @TODO merge options, flags, and kind (reserving lower bits for server) */ ++ long long options; ++ ++ int request_id; ++ uint32_t flags; ++ void *userdata; ++ ++ int event_timer; ++ GQueue *event_queue; ++ ++ /* Depending on the value of kind, only some of the following ++ * will be populated/valid ++ */ ++ enum pcmk__client_type kind; ++ ++ qb_ipcs_connection_t *ipcs; /* IPC */ ++ ++ struct pcmk__remote_s *remote; /* TCP/TLS */ ++ ++ unsigned int queue_backlog; /* IPC queue length after last flush */ ++ unsigned int queue_max; /* Evict client whose queue grows this big */ ++}; ++ ++guint pcmk__ipc_client_count(void); ++void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data); ++void pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data); ++ ++void pcmk__client_cleanup(void); ++ ++pcmk__client_t *pcmk__find_client(qb_ipcs_connection_t *c); ++pcmk__client_t *pcmk__find_client_by_id(const char *id); ++const char *pcmk__client_name(pcmk__client_t *c); ++const char *pcmk__client_type_str(enum pcmk__client_type client_type); ++ ++pcmk__client_t *pcmk__new_unauth_client(void *key); ++pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid); ++void pcmk__free_client(pcmk__client_t *c); ++void pcmk__drop_all_clients(qb_ipcs_service_t *s); ++bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax); ++ ++void pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, ++ uint32_t request, uint32_t flags, const char *tag); ++#define pcmk__ipc_send_ack(c, req, flags, tag) \ ++ pcmk__ipc_send_ack_as(__FUNCTION__, __LINE__, (c), (req), (flags), (tag)) ++ ++int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, ++ uint32_t max_send_size, ++ struct iovec **result, ssize_t *bytes); ++int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, ++ uint32_t flags); ++int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); ++xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, ++ uint32_t *id, uint32_t *flags); ++ ++int pcmk__client_pid(qb_ipcs_connection_t *c); ++ ++void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, ++ struct qb_ipcs_service_handlers *cb); ++void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, ++ struct qb_ipcs_service_handlers *cb); ++qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); ++ ++void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, ++ qb_ipcs_service_t **ipcs_rw, ++ qb_ipcs_service_t **ipcs_shm, ++ struct qb_ipcs_service_handlers *ro_cb, ++ struct qb_ipcs_service_handlers *rw_cb); ++ ++void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, ++ qb_ipcs_service_t *ipcs_rw, ++ qb_ipcs_service_t *ipcs_shm); ++ ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/include/crm/common/ipcs_internal.h b/include/crm/common/ipcs_internal.h +deleted file mode 100644 +index c631dfc..0000000 +--- a/include/crm/common/ipcs_internal.h ++++ /dev/null +@@ -1,149 +0,0 @@ +-/* +- * Copyright 2013-2020 the Pacemaker project contributors +- * +- * The version control history for this file may have further details. +- * +- * This source code is licensed under the GNU Lesser General Public License +- * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +- */ +- +-#ifndef CRM_COMMON_IPCS__H +-# define CRM_COMMON_IPCS__H +- +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-# include +-# include +-# ifdef HAVE_GNUTLS_GNUTLS_H +-# undef KEYFILE +-# include +-# endif +- +-# include +-# include +- +-typedef struct pcmk__client_s pcmk__client_t; +- +-enum pcmk__client_type { +- PCMK__CLIENT_IPC = 1, +- PCMK__CLIENT_TCP = 2, +-# ifdef HAVE_GNUTLS_GNUTLS_H +- PCMK__CLIENT_TLS = 3, +-# endif +-}; +- +-struct pcmk__remote_s { +- /* Shared */ +- char *buffer; +- size_t buffer_size; +- size_t buffer_offset; +- int auth_timeout; +- int tcp_socket; +- mainloop_io_t *source; +- +- /* CIB-only */ +- bool authenticated; +- char *token; +- +- /* TLS only */ +-# ifdef HAVE_GNUTLS_GNUTLS_H +- gnutls_session_t *tls_session; +- bool tls_handshake_complete; +-# endif +-}; +- +-enum pcmk__client_flags { +- pcmk__client_proxied = 0x00001, /* ipc_proxy code only */ +- pcmk__client_privileged = 0x00002, /* root or cluster user */ +-}; +- +-struct pcmk__client_s { +- uint pid; +- +- uid_t uid; +- gid_t gid; +- +- char *id; +- char *name; +- char *user; +- +- /* Provided for server use (not used by library) */ +- /* @TODO merge options, flags, and kind (reserving lower bits for server) */ +- long long options; +- +- int request_id; +- uint32_t flags; +- void *userdata; +- +- int event_timer; +- GQueue *event_queue; +- +- /* Depending on the value of kind, only some of the following +- * will be populated/valid +- */ +- enum pcmk__client_type kind; +- +- qb_ipcs_connection_t *ipcs; /* IPC */ +- +- struct pcmk__remote_s *remote; /* TCP/TLS */ +- +- unsigned int queue_backlog; /* IPC queue length after last flush */ +- unsigned int queue_max; /* Evict client whose queue grows this big */ +-}; +- +-guint pcmk__ipc_client_count(void); +-void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data); +-void pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data); +- +-void pcmk__client_cleanup(void); +- +-pcmk__client_t *pcmk__find_client(qb_ipcs_connection_t *c); +-pcmk__client_t *pcmk__find_client_by_id(const char *id); +-const char *pcmk__client_name(pcmk__client_t *c); +-const char *pcmk__client_type_str(enum pcmk__client_type client_type); +- +-pcmk__client_t *pcmk__new_unauth_client(void *key); +-pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid); +-void pcmk__free_client(pcmk__client_t *c); +-void pcmk__drop_all_clients(qb_ipcs_service_t *s); +-bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax); +- +-void pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, +- uint32_t request, uint32_t flags, const char *tag); +-#define pcmk__ipc_send_ack(c, req, flags, tag) \ +- pcmk__ipc_send_ack_as(__FUNCTION__, __LINE__, (c), (req), (flags), (tag)) +- +-int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, +- uint32_t max_send_size, +- struct iovec **result, ssize_t *bytes); +-int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, +- uint32_t flags); +-int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); +-xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, +- uint32_t *id, uint32_t *flags); +- +-int pcmk__client_pid(qb_ipcs_connection_t *c); +- +-void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, +- struct qb_ipcs_service_handlers *cb); +-void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, +- struct qb_ipcs_service_handlers *cb); +-qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); +- +-void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, +- qb_ipcs_service_t **ipcs_rw, +- qb_ipcs_service_t **ipcs_shm, +- struct qb_ipcs_service_handlers *ro_cb, +- struct qb_ipcs_service_handlers *rw_cb); +- +-void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, +- qb_ipcs_service_t *ipcs_rw, +- qb_ipcs_service_t *ipcs_shm); +- +-#ifdef __cplusplus +-} +-#endif +- +-#endif +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 882cad8..15f9d2b 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -26,7 +26,7 @@ + + # include + # include +-# include ++# include + # include + # include + +diff --git a/lib/cib/cib_remote.c b/lib/cib/cib_remote.c +index ed93700..a011810 100644 +--- a/lib/cib/cib_remote.c ++++ b/lib/cib/cib_remote.c +@@ -23,7 +23,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index 34bd594..defaa7e 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -35,9 +35,7 @@ + #include /* indirectly: pcmk_err_generic */ + #include + #include +-#include +- +-#include /* PCMK__SPECIAL_PID* */ ++#include + + #define PCMK_IPC_VERSION 1 + +diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c +index e3640f5..10450e4 100644 +--- a/lib/common/mainloop.c ++++ b/lib/common/mainloop.c +@@ -23,7 +23,7 @@ + #include + #include + #include +-#include ++#include + + #include + +diff --git a/lib/common/remote.c b/lib/common/remote.c +index 94e06dd..3a97e54 100644 +--- a/lib/common/remote.c ++++ b/lib/common/remote.c +@@ -28,7 +28,7 @@ + #include + #include + +-#include ++#include + #include + #include + #include +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index a2c7200..a6c023b 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -27,7 +27,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/lib/pacemaker/pcmk_sched_messages.c b/lib/pacemaker/pcmk_sched_messages.c +index 9d013be..3d09a5e 100644 +--- a/lib/pacemaker/pcmk_sched_messages.c ++++ b/lib/pacemaker/pcmk_sched_messages.c +@@ -20,7 +20,7 @@ + + #include + #include +-#include ++#include + + gboolean show_scores = FALSE; + gboolean show_utilization = FALSE; +diff --git a/maint/mocked/based.c b/maint/mocked/based.c +index 0d5fd2d..2cfad9f 100644 +--- a/maint/mocked/based.c ++++ b/maint/mocked/based.c +@@ -23,7 +23,7 @@ + + #include + #if 0 +-#include "crm/common/ipcs_internal.h" /* pcmk__client_t */ ++#include "crm/common/ipc_internal.h" /* pcmk__client_t */ + #include "crm/common/xml.h" /* crm_xml_add */ + #endif + #include "crm/msg_xml.h" /* F_SUBTYPE */ +diff --git a/maint/mocked/based.h b/maint/mocked/based.h +index ef1dc95..c214c08 100644 +--- a/maint/mocked/based.h ++++ b/maint/mocked/based.h +@@ -11,7 +11,7 @@ + #include /* size_t */ + #include /* bool */ + +-#include /* pcmk__client_t */ ++#include /* pcmk__client_t */ + + + struct module_s; +-- +1.8.3.1 + + +From 6743aee47c7b4cfa107ef02512fda7bebdd29efc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Apr 2020 17:01:50 -0500 +Subject: [PATCH 2/8] Refactor: libcrmcommon: separate IPC code into multiple + files + +A significant amount of new IPC code will be added soon, so avoid the file +sizes becoming ridiculous. + +Before: + 210 include/crm/common/ipc_internal.h + 1846 lib/common/ipc.c + 2056 total + +After: + 215 include/crm/common/ipc_internal.h + 755 lib/common/ipc_client.c + 103 lib/common/ipc_common.c + 903 lib/common/ipc_server.c + 146 lib/common/messages.c + 2122 total + +Rename newly exposed symbols per current style guidelines. +--- + include/crm/common/ipc.h | 25 +- + include/crm/common/ipc_internal.h | 5 + + lib/common/Makefile.am | 5 +- + lib/common/crmcommon_private.h | 37 +- + lib/common/ipc.c | 1846 ------------------------------------- + lib/common/ipc_client.c | 755 +++++++++++++++ + lib/common/ipc_common.c | 103 +++ + lib/common/ipc_server.c | 903 ++++++++++++++++++ + lib/common/messages.c | 146 +++ + 9 files changed, 1969 insertions(+), 1856 deletions(-) + delete mode 100644 lib/common/ipc.c + create mode 100644 lib/common/ipc_client.c + create mode 100644 lib/common/ipc_common.c + create mode 100644 lib/common/ipc_server.c + create mode 100644 lib/common/messages.c + +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index 79763f6..a0df956 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -24,17 +24,30 @@ extern "C" { + #include + #include + +-/* clplumbing based IPC */ ++/* ++ * Message creation utilities ++ * ++ * These are used for both IPC messages and cluster layer messages. However, ++ * since this is public API, they stay in this header for backward ++ * compatibility. ++ */ + +-# define create_reply(request, xml_response_data) create_reply_adv(request, xml_response_data, __FUNCTION__); +-xmlNode *create_reply_adv(xmlNode * request, xmlNode * xml_response_data, const char *origin); ++#define create_reply(request, xml_response_data) \ ++ create_reply_adv(request, xml_response_data, __FUNCTION__) + +-# define create_request(task, xml_data, host_to, sys_to, sys_from, uuid_from) create_request_adv(task, xml_data, host_to, sys_to, sys_from, uuid_from, __FUNCTION__) ++xmlNode *create_reply_adv(xmlNode *request, xmlNode *xml_response_data, ++ const char *origin); + +-xmlNode *create_request_adv(const char *task, xmlNode * xml_data, const char *host_to, +- const char *sys_to, const char *sys_from, const char *uuid_from, ++#define create_request(task, xml_data, host_to, sys_to, sys_from, uuid_from) \ ++ create_request_adv(task, xml_data, host_to, sys_to, sys_from, uuid_from, \ ++ __FUNCTION__) ++ ++xmlNode *create_request_adv(const char *task, xmlNode *xml_data, ++ const char *host_to, const char *sys_to, ++ const char *sys_from, const char *uuid_from, + const char *origin); + ++ + /* *INDENT-OFF* */ + enum crm_ipc_flags + { +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index a85210d..6a1fcf3 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -85,6 +85,11 @@ extern "C" { + int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, + gid_t refgid, pid_t *gotpid); + ++ ++/* ++ * Server-related ++ */ ++ + typedef struct pcmk__client_s pcmk__client_t; + + enum pcmk__client_type { +diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am +index fae17f5..29404a6 100644 +--- a/lib/common/Makefile.am ++++ b/lib/common/Makefile.am +@@ -47,10 +47,13 @@ endif + libcrmcommon_la_SOURCES += cmdline.c + libcrmcommon_la_SOURCES += digest.c + libcrmcommon_la_SOURCES += io.c +-libcrmcommon_la_SOURCES += ipc.c ++libcrmcommon_la_SOURCES += ipc_client.c ++libcrmcommon_la_SOURCES += ipc_common.c ++libcrmcommon_la_SOURCES += ipc_server.c + libcrmcommon_la_SOURCES += iso8601.c + libcrmcommon_la_SOURCES += logging.c + libcrmcommon_la_SOURCES += mainloop.c ++libcrmcommon_la_SOURCES += messages.c + libcrmcommon_la_SOURCES += nvpair.c + libcrmcommon_la_SOURCES += operations.c + libcrmcommon_la_SOURCES += options.c +diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h +index dfb1e54..d06fa20 100644 +--- a/lib/common/crmcommon_private.h ++++ b/lib/common/crmcommon_private.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2018-2019 the Pacemaker project contributors ++ * Copyright 2018-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -14,6 +14,17 @@ + * declared with G_GNUC_INTERNAL for efficiency. + */ + ++#include // uint8_t, uint32_t ++#include // bool ++#include // size_t ++#include // GList ++#include // xmlNode, xmlAttr ++#include // struct qb_ipc_response_header ++ ++/* ++ * XML and ACLs ++ */ ++ + enum xml_private_flags { + xpf_none = 0x0000, + xpf_dirty = 0x0001, +@@ -40,8 +51,8 @@ typedef struct xml_private_s { + long check; + uint32_t flags; + char *user; +- GListPtr acls; +- GListPtr deleted_objs; ++ GList *acls; ++ GList *deleted_objs; + } xml_private_t; + + G_GNUC_INTERNAL +@@ -86,4 +97,24 @@ pcmk__xml_attr_value(const xmlAttr *attr) + : (const char *) attr->children->content; + } + ++/* ++ * IPC ++ */ ++ ++#define PCMK__IPC_VERSION 1 ++ ++typedef struct pcmk__ipc_header_s { ++ struct qb_ipc_response_header qb; ++ uint32_t size_uncompressed; ++ uint32_t size_compressed; ++ uint32_t flags; ++ uint8_t version; ++} pcmk__ipc_header_t; ++ ++G_GNUC_INTERNAL ++unsigned int pcmk__ipc_buffer_size(unsigned int max); ++ ++G_GNUC_INTERNAL ++bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); ++ + #endif // CRMCOMMON_PRIVATE__H +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +deleted file mode 100644 +index defaa7e..0000000 +--- a/lib/common/ipc.c ++++ /dev/null +@@ -1,1846 +0,0 @@ +-/* +- * Copyright 2004-2020 the Pacemaker project contributors +- * +- * The version control history for this file may have further details. +- * +- * This source code is licensed under the GNU Lesser General Public License +- * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +- */ +- +-#include +- +-#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) +-# ifdef US_AUTH_PEERCRED_UCRED +-# ifndef _GNU_SOURCE +-# define _GNU_SOURCE +-# endif +-# endif +-# include +-#elif defined(US_AUTH_GETPEERUCRED) +-# include +-#endif +- +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-#include /* indirectly: pcmk_err_generic */ +-#include +-#include +-#include +- +-#define PCMK_IPC_VERSION 1 +- +-/* Evict clients whose event queue grows this large (by default) */ +-#define PCMK_IPC_DEFAULT_QUEUE_MAX 500 +- +-struct crm_ipc_response_header { +- struct qb_ipc_response_header qb; +- uint32_t size_uncompressed; +- uint32_t size_compressed; +- uint32_t flags; +- uint8_t version; /* Protect against version changes for anyone that might bother to statically link us */ +-}; +- +-static int hdr_offset = 0; +-static unsigned int ipc_buffer_max = 0; +-static unsigned int pick_ipc_buffer(unsigned int max); +- +-static inline void +-crm_ipc_init(void) +-{ +- if (hdr_offset == 0) { +- hdr_offset = sizeof(struct crm_ipc_response_header); +- } +- if (ipc_buffer_max == 0) { +- ipc_buffer_max = pick_ipc_buffer(0); +- } +-} +- +-unsigned int +-crm_ipc_default_buffer_size(void) +-{ +- return pick_ipc_buffer(0); +-} +- +-static char * +-generateReference(const char *custom1, const char *custom2) +-{ +- static uint ref_counter = 0; +- +- return crm_strdup_printf("%s-%s-%lld-%u", +- (custom1? custom1 : "_empty_"), +- (custom2? custom2 : "_empty_"), +- (long long) time(NULL), ref_counter++); +-} +- +-xmlNode * +-create_request_adv(const char *task, xmlNode * msg_data, +- const char *host_to, const char *sys_to, +- const char *sys_from, const char *uuid_from, const char *origin) +-{ +- char *true_from = NULL; +- xmlNode *request = NULL; +- char *reference = generateReference(task, sys_from); +- +- if (uuid_from != NULL) { +- true_from = generate_hash_key(sys_from, uuid_from); +- } else if (sys_from != NULL) { +- true_from = strdup(sys_from); +- } else { +- crm_err("No sys from specified"); +- } +- +- // host_from will get set for us if necessary by the controller when routed +- request = create_xml_node(NULL, __FUNCTION__); +- crm_xml_add(request, F_CRM_ORIGIN, origin); +- crm_xml_add(request, F_TYPE, T_CRM); +- crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET); +- crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); +- crm_xml_add(request, F_CRM_REFERENCE, reference); +- crm_xml_add(request, F_CRM_TASK, task); +- crm_xml_add(request, F_CRM_SYS_TO, sys_to); +- crm_xml_add(request, F_CRM_SYS_FROM, true_from); +- +- /* HOSTTO will be ignored if it is to the DC anyway. */ +- if (host_to != NULL && strlen(host_to) > 0) { +- crm_xml_add(request, F_CRM_HOST_TO, host_to); +- } +- +- if (msg_data != NULL) { +- add_message_xml(request, F_CRM_DATA, msg_data); +- } +- free(reference); +- free(true_from); +- +- return request; +-} +- +-/* +- * This method adds a copy of xml_response_data +- */ +-xmlNode * +-create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin) +-{ +- xmlNode *reply = NULL; +- +- const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM); +- const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM); +- const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO); +- const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE); +- const char *operation = crm_element_value(original_request, F_CRM_TASK); +- const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); +- +- if (type == NULL) { +- crm_err("Cannot create new_message, no message type in original message"); +- CRM_ASSERT(type != NULL); +- return NULL; +-#if 0 +- } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { +- crm_err("Cannot create new_message, original message was not a request"); +- return NULL; +-#endif +- } +- reply = create_xml_node(NULL, __FUNCTION__); +- if (reply == NULL) { +- crm_err("Cannot create new_message, malloc failed"); +- return NULL; +- } +- +- crm_xml_add(reply, F_CRM_ORIGIN, origin); +- crm_xml_add(reply, F_TYPE, T_CRM); +- crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); +- crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE); +- crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference); +- crm_xml_add(reply, F_CRM_TASK, operation); +- +- /* since this is a reply, we reverse the from and to */ +- crm_xml_add(reply, F_CRM_SYS_TO, sys_from); +- crm_xml_add(reply, F_CRM_SYS_FROM, sys_to); +- +- /* HOSTTO will be ignored if it is to the DC anyway. */ +- if (host_from != NULL && strlen(host_from) > 0) { +- crm_xml_add(reply, F_CRM_HOST_TO, host_from); +- } +- +- if (xml_response_data != NULL) { +- add_message_xml(reply, F_CRM_DATA, xml_response_data); +- } +- +- return reply; +-} +- +-/* Libqb based IPC */ +- +-/* Server... */ +- +-static GHashTable *client_connections = NULL; +- +-/*! +- * \internal +- * \brief Count IPC clients +- * +- * \return Number of active IPC client connections +- */ +-guint +-pcmk__ipc_client_count() +-{ +- return client_connections? g_hash_table_size(client_connections) : 0; +-} +- +-/*! +- * \internal +- * \brief Execute a function for each active IPC client connection +- * +- * \param[in] func Function to call +- * \param[in] user_data Pointer to pass to function +- * +- * \note The parameters are the same as for g_hash_table_foreach(). +- */ +-void +-pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) +-{ +- if ((func != NULL) && (client_connections != NULL)) { +- g_hash_table_foreach(client_connections, func, user_data); +- } +-} +- +-/*! +- * \internal +- * \brief Remote IPC clients based on iterative function result +- * +- * \param[in] func Function to call for each active IPC client +- * \param[in] user_data Pointer to pass to function +- * +- * \note The parameters are the same as for g_hash_table_foreach_remove(). +- */ +-void +-pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data) +-{ +- if ((func != NULL) && (client_connections != NULL)) { +- g_hash_table_foreach_remove(client_connections, func, user_data); +- } +-} +- +-pcmk__client_t * +-pcmk__find_client(qb_ipcs_connection_t *c) +-{ +- if (client_connections) { +- return g_hash_table_lookup(client_connections, c); +- } +- +- crm_trace("No client found for %p", c); +- return NULL; +-} +- +-pcmk__client_t * +-pcmk__find_client_by_id(const char *id) +-{ +- gpointer key; +- pcmk__client_t *client; +- GHashTableIter iter; +- +- if (client_connections && id) { +- g_hash_table_iter_init(&iter, client_connections); +- while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { +- if (strcmp(client->id, id) == 0) { +- return client; +- } +- } +- } +- +- crm_trace("No client found with id=%s", id); +- return NULL; +-} +- +-const char * +-pcmk__client_name(pcmk__client_t *c) +-{ +- if (c == NULL) { +- return "null"; +- } else if (c->name == NULL && c->id == NULL) { +- return "unknown"; +- } else if (c->name == NULL) { +- return c->id; +- } else { +- return c->name; +- } +-} +- +-const char * +-pcmk__client_type_str(enum pcmk__client_type client_type) +-{ +- switch (client_type) { +- case PCMK__CLIENT_IPC: +- return "IPC"; +- case PCMK__CLIENT_TCP: +- return "TCP"; +-#ifdef HAVE_GNUTLS_GNUTLS_H +- case PCMK__CLIENT_TLS: +- return "TLS"; +-#endif +- default: +- return "unknown"; +- } +-} +- +-void +-pcmk__client_cleanup(void) +-{ +- if (client_connections != NULL) { +- int active = g_hash_table_size(client_connections); +- +- if (active) { +- crm_err("Exiting with %d active IPC client%s", +- active, pcmk__plural_s(active)); +- } +- g_hash_table_destroy(client_connections); client_connections = NULL; +- } +-} +- +-void +-pcmk__drop_all_clients(qb_ipcs_service_t *service) +-{ +- qb_ipcs_connection_t *c = NULL; +- +- if (service == NULL) { +- return; +- } +- +- c = qb_ipcs_connection_first_get(service); +- +- while (c != NULL) { +- qb_ipcs_connection_t *last = c; +- +- c = qb_ipcs_connection_next_get(service, last); +- +- /* There really shouldn't be anyone connected at this point */ +- crm_notice("Disconnecting client %p, pid=%d...", +- last, pcmk__client_pid(last)); +- qb_ipcs_disconnect(last); +- qb_ipcs_connection_unref(last); +- } +-} +- +-/*! +- * \internal +- * \brief Allocate a new pcmk__client_t object based on an IPC connection +- * +- * \param[in] c IPC connection (or NULL to allocate generic client) +- * \param[in] key Connection table key (or NULL to use sane default) +- * \param[in] uid_client UID corresponding to c (ignored if c is NULL) +- * +- * \return Pointer to new pcmk__client_t (or NULL on error) +- */ +-static pcmk__client_t * +-client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) +-{ +- pcmk__client_t *client = calloc(1, sizeof(pcmk__client_t)); +- +- if (client == NULL) { +- crm_perror(LOG_ERR, "Allocating client"); +- return NULL; +- } +- +- if (c) { +-#if ENABLE_ACL +- client->user = pcmk__uid2username(uid_client); +- if (client->user == NULL) { +- client->user = strdup("#unprivileged"); +- CRM_CHECK(client->user != NULL, free(client); return NULL); +- crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", +- uid_client); +- } +-#endif +- client->ipcs = c; +- client->kind = PCMK__CLIENT_IPC; +- client->pid = pcmk__client_pid(c); +- if (key == NULL) { +- key = c; +- } +- } +- +- client->id = crm_generate_uuid(); +- if (client->id == NULL) { +- crm_err("Could not generate UUID for client"); +- free(client->user); +- free(client); +- return NULL; +- } +- if (key == NULL) { +- key = client->id; +- } +- if (client_connections == NULL) { +- crm_trace("Creating IPC client table"); +- client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); +- } +- g_hash_table_insert(client_connections, key, client); +- return client; +-} +- +-/*! +- * \brief Allocate a new pcmk__client_t object and generate its ID +- * +- * \param[in] key What to use as connections hash table key (NULL to use ID) +- * +- * \return Pointer to new pcmk__client_t (asserts on failure) +- */ +-pcmk__client_t * +-pcmk__new_unauth_client(void *key) +-{ +- pcmk__client_t *client = client_from_connection(NULL, key, 0); +- +- CRM_ASSERT(client != NULL); +- return client; +-} +- +-pcmk__client_t * +-pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) +-{ +- gid_t uid_cluster = 0; +- gid_t gid_cluster = 0; +- +- pcmk__client_t *client = NULL; +- +- CRM_CHECK(c != NULL, return NULL); +- +- if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { +- static bool need_log = TRUE; +- +- if (need_log) { +- crm_warn("Could not find user and group IDs for user %s", +- CRM_DAEMON_USER); +- need_log = FALSE; +- } +- } +- +- if (uid_client != 0) { +- crm_trace("Giving group %u access to new IPC connection", gid_cluster); +- /* Passing -1 to chown(2) means don't change */ +- qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); +- } +- +- /* TODO: Do our own auth checking, return NULL if unauthorized */ +- client = client_from_connection(c, NULL, uid_client); +- if (client == NULL) { +- return NULL; +- } +- +- if ((uid_client == 0) || (uid_client == uid_cluster)) { +- /* Remember when a connection came from root or hacluster */ +- set_bit(client->flags, pcmk__client_privileged); +- } +- +- crm_debug("New IPC client %s for PID %u with uid %d and gid %d", +- client->id, client->pid, uid_client, gid_client); +- return client; +-} +- +-static struct iovec * +-pcmk__new_ipc_event(void) +-{ +- struct iovec *iov = calloc(2, sizeof(struct iovec)); +- +- CRM_ASSERT(iov != NULL); +- return iov; +-} +- +-/*! +- * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() +- * +- * \param[in] event I/O vector to free +- */ +-void +-pcmk_free_ipc_event(struct iovec *event) +-{ +- if (event != NULL) { +- free(event[0].iov_base); +- free(event[1].iov_base); +- free(event); +- } +-} +- +-static void +-free_event(gpointer data) +-{ +- pcmk_free_ipc_event((struct iovec *) data); +-} +- +-static void +-add_event(pcmk__client_t *c, struct iovec *iov) +-{ +- if (c->event_queue == NULL) { +- c->event_queue = g_queue_new(); +- } +- g_queue_push_tail(c->event_queue, iov); +-} +- +-void +-pcmk__free_client(pcmk__client_t *c) +-{ +- if (c == NULL) { +- return; +- } +- +- if (client_connections) { +- if (c->ipcs) { +- crm_trace("Destroying %p/%p (%d remaining)", +- c, c->ipcs, g_hash_table_size(client_connections) - 1); +- g_hash_table_remove(client_connections, c->ipcs); +- +- } else { +- crm_trace("Destroying remote connection %p (%d remaining)", +- c, g_hash_table_size(client_connections) - 1); +- g_hash_table_remove(client_connections, c->id); +- } +- } +- +- if (c->event_timer) { +- g_source_remove(c->event_timer); +- } +- +- if (c->event_queue) { +- crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); +- g_queue_free_full(c->event_queue, free_event); +- } +- +- free(c->id); +- free(c->name); +- free(c->user); +- if (c->remote) { +- if (c->remote->auth_timeout) { +- g_source_remove(c->remote->auth_timeout); +- } +- free(c->remote->buffer); +- free(c->remote); +- } +- free(c); +-} +- +-/*! +- * \internal +- * \brief Raise IPC eviction threshold for a client, if allowed +- * +- * \param[in,out] client Client to modify +- * \param[in] qmax New threshold (as non-NULL string) +- * +- * \return TRUE if change was allowed, FALSE otherwise +- */ +-bool +-pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) +-{ +- if (is_set(client->flags, pcmk__client_privileged)) { +- long long qmax_int; +- +- errno = 0; +- qmax_int = crm_parse_ll(qmax, NULL); +- if ((errno == 0) && (qmax_int > 0)) { +- client->queue_max = (unsigned int) qmax_int; +- return TRUE; +- } +- } +- return FALSE; +-} +- +-int +-pcmk__client_pid(qb_ipcs_connection_t *c) +-{ +- struct qb_ipcs_connection_stats stats; +- +- stats.client_pid = 0; +- qb_ipcs_connection_stats_get(c, &stats, 0); +- return stats.client_pid; +-} +- +-/*! +- * \internal +- * \brief Retrieve message XML from data read from client IPC +- * +- * \param[in] c IPC client connection +- * \param[in] data Data read from client connection +- * \param[out] id Where to store message ID from libqb header +- * \param[out] flags Where to store flags from libqb header +- * +- * \return Message XML on success, NULL otherwise +- */ +-xmlNode * +-pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, +- uint32_t *flags) +-{ +- xmlNode *xml = NULL; +- char *uncompressed = NULL; +- char *text = ((char *)data) + sizeof(struct crm_ipc_response_header); +- struct crm_ipc_response_header *header = data; +- +- if (id) { +- *id = ((struct qb_ipc_response_header *)data)->id; +- } +- if (flags) { +- *flags = header->flags; +- } +- +- if (is_set(header->flags, crm_ipc_proxied)) { +- /* Mark this client as being the endpoint of a proxy connection. +- * Proxy connections responses are sent on the event channel, to avoid +- * blocking the controller serving as proxy. +- */ +- c->flags |= pcmk__client_proxied; +- } +- +- if(header->version > PCMK_IPC_VERSION) { +- crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", +- header->version, PCMK_IPC_VERSION); +- return NULL; +- } +- +- if (header->size_compressed) { +- int rc = 0; +- unsigned int size_u = 1 + header->size_uncompressed; +- uncompressed = calloc(1, size_u); +- +- crm_trace("Decompressing message data %u bytes into %u bytes", +- header->size_compressed, size_u); +- +- rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); +- text = uncompressed; +- +- if (rc != BZ_OK) { +- crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", +- bz2_strerror(rc), rc); +- free(uncompressed); +- return NULL; +- } +- } +- +- CRM_ASSERT(text[header->size_uncompressed - 1] == 0); +- +- xml = string2xml(text); +- crm_log_xml_trace(xml, "[IPC received]"); +- +- free(uncompressed); +- return xml; +-} +- +-static int crm_ipcs_flush_events(pcmk__client_t *c); +- +-static gboolean +-crm_ipcs_flush_events_cb(gpointer data) +-{ +- pcmk__client_t *c = data; +- +- c->event_timer = 0; +- crm_ipcs_flush_events(c); +- return FALSE; +-} +- +-/*! +- * \internal +- * \brief Add progressive delay before next event queue flush +- * +- * \param[in,out] c Client connection to add delay to +- * \param[in] queue_len Current event queue length +- */ +-static inline void +-delay_next_flush(pcmk__client_t *c, unsigned int queue_len) +-{ +- /* Delay a maximum of 1.5 seconds */ +- guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; +- +- c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); +-} +- +-/*! +- * \internal +- * \brief Send client any messages in its queue +- * +- * \param[in] c Client to flush +- * +- * \return Standard Pacemaker return value +- */ +-static int +-crm_ipcs_flush_events(pcmk__client_t *c) +-{ +- int rc = pcmk_rc_ok; +- ssize_t qb_rc = 0; +- unsigned int sent = 0; +- unsigned int queue_len = 0; +- +- if (c == NULL) { +- return rc; +- +- } else if (c->event_timer) { +- /* There is already a timer, wait until it goes off */ +- crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); +- return rc; +- } +- +- if (c->event_queue) { +- queue_len = g_queue_get_length(c->event_queue); +- } +- while (sent < 100) { +- struct crm_ipc_response_header *header = NULL; +- struct iovec *event = NULL; +- +- if (c->event_queue) { +- // We don't pop unless send is successful +- event = g_queue_peek_head(c->event_queue); +- } +- if (event == NULL) { // Queue is empty +- break; +- } +- +- qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); +- if (qb_rc < 0) { +- rc = (int) -qb_rc; +- break; +- } +- event = g_queue_pop_head(c->event_queue); +- +- sent++; +- header = event[0].iov_base; +- if (header->size_compressed) { +- crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", +- header->qb.id, c->ipcs, c->pid, (long long) qb_rc); +- } else { +- crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", +- header->qb.id, c->ipcs, c->pid, (long long) qb_rc, +- (char *) (event[1].iov_base)); +- } +- pcmk_free_ipc_event(event); +- } +- +- queue_len -= sent; +- if (sent > 0 || queue_len) { +- crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", +- sent, queue_len, c->ipcs, c->pid, +- pcmk_rc_str(rc), (long long) qb_rc); +- } +- +- if (queue_len) { +- +- /* Allow clients to briefly fall behind on processing incoming messages, +- * but drop completely unresponsive clients so the connection doesn't +- * consume resources indefinitely. +- */ +- if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { +- if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { +- /* Don't evict for a new or shrinking backlog */ +- crm_warn("Client with process ID %u has a backlog of %u messages " +- CRM_XS " %p", c->pid, queue_len, c->ipcs); +- } else { +- crm_err("Evicting client with process ID %u due to backlog of %u messages " +- CRM_XS " %p", c->pid, queue_len, c->ipcs); +- c->queue_backlog = 0; +- qb_ipcs_disconnect(c->ipcs); +- return rc; +- } +- } +- +- c->queue_backlog = queue_len; +- delay_next_flush(c, queue_len); +- +- } else { +- /* Event queue is empty, there is no backlog */ +- c->queue_backlog = 0; +- } +- +- return rc; +-} +- +-/*! +- * \internal +- * \brief Create an I/O vector for sending an IPC XML message +- * +- * \param[in] request Identifier for libqb response header +- * \param[in] message XML message to send +- * \param[in] max_send_size If 0, default IPC buffer size is used +- * \param[out] result Where to store prepared I/O vector +- * \param[out] bytes Size of prepared data in bytes +- * +- * \return Standard Pacemaker return code +- */ +-int +-pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, +- uint32_t max_send_size, struct iovec **result, +- ssize_t *bytes) +-{ +- static unsigned int biggest = 0; +- struct iovec *iov; +- unsigned int total = 0; +- char *compressed = NULL; +- char *buffer = NULL; +- struct crm_ipc_response_header *header = NULL; +- +- if ((message == NULL) || (result == NULL)) { +- return EINVAL; +- } +- +- header = calloc(1, sizeof(struct crm_ipc_response_header)); +- if (header == NULL) { +- return ENOMEM; /* errno mightn't be set by allocator */ +- } +- +- buffer = dump_xml_unformatted(message); +- crm_ipc_init(); +- +- if (max_send_size == 0) { +- max_send_size = ipc_buffer_max; +- } +- CRM_LOG_ASSERT(max_send_size != 0); +- +- *result = NULL; +- iov = pcmk__new_ipc_event(); +- iov[0].iov_len = hdr_offset; +- iov[0].iov_base = header; +- +- header->version = PCMK_IPC_VERSION; +- header->size_uncompressed = 1 + strlen(buffer); +- total = iov[0].iov_len + header->size_uncompressed; +- +- if (total < max_send_size) { +- iov[1].iov_base = buffer; +- iov[1].iov_len = header->size_uncompressed; +- +- } else { +- unsigned int new_size = 0; +- +- if (pcmk__compress(buffer, (unsigned int) header->size_uncompressed, +- (unsigned int) max_send_size, &compressed, +- &new_size) == pcmk_rc_ok) { +- +- header->flags |= crm_ipc_compressed; +- header->size_compressed = new_size; +- +- iov[1].iov_len = header->size_compressed; +- iov[1].iov_base = compressed; +- +- free(buffer); +- +- biggest = QB_MAX(header->size_compressed, biggest); +- +- } else { +- crm_log_xml_trace(message, "EMSGSIZE"); +- biggest = QB_MAX(header->size_uncompressed, biggest); +- +- crm_err("Could not compress %u-byte message into less than IPC " +- "limit of %u bytes; set PCMK_ipc_buffer to higher value " +- "(%u bytes suggested)", +- header->size_uncompressed, max_send_size, 4 * biggest); +- +- free(compressed); +- free(buffer); +- pcmk_free_ipc_event(iov); +- return EMSGSIZE; +- } +- } +- +- header->qb.size = iov[0].iov_len + iov[1].iov_len; +- header->qb.id = (int32_t)request; /* Replying to a specific request */ +- +- *result = iov; +- CRM_ASSERT(header->qb.size > 0); +- if (bytes != NULL) { +- *bytes = header->qb.size; +- } +- return pcmk_rc_ok; +-} +- +-int +-pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) +-{ +- int rc = pcmk_rc_ok; +- static uint32_t id = 1; +- struct crm_ipc_response_header *header = iov[0].iov_base; +- +- if (c->flags & pcmk__client_proxied) { +- /* _ALL_ replies to proxied connections need to be sent as events */ +- if (is_not_set(flags, crm_ipc_server_event)) { +- flags |= crm_ipc_server_event; +- /* this flag lets us know this was originally meant to be a response. +- * even though we're sending it over the event channel. */ +- flags |= crm_ipc_proxied_relay_response; +- } +- } +- +- header->flags |= flags; +- if (flags & crm_ipc_server_event) { +- header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ +- +- if (flags & crm_ipc_server_free) { +- crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); +- add_event(c, iov); +- +- } else { +- struct iovec *iov_copy = pcmk__new_ipc_event(); +- +- crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); +- iov_copy[0].iov_len = iov[0].iov_len; +- iov_copy[0].iov_base = malloc(iov[0].iov_len); +- memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); +- +- iov_copy[1].iov_len = iov[1].iov_len; +- iov_copy[1].iov_base = malloc(iov[1].iov_len); +- memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); +- +- add_event(c, iov_copy); +- } +- +- } else { +- ssize_t qb_rc; +- +- CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ +- +- qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); +- if (qb_rc < header->qb.size) { +- if (qb_rc < 0) { +- rc = (int) -qb_rc; +- } +- crm_notice("Response %d to pid %d failed: %s " +- CRM_XS " bytes=%u rc=%lld ipcs=%p", +- header->qb.id, c->pid, pcmk_rc_str(rc), +- header->qb.size, (long long) qb_rc, c->ipcs); +- +- } else { +- crm_trace("Response %d sent, %lld bytes to %p[%d]", +- header->qb.id, (long long) qb_rc, c->ipcs, c->pid); +- } +- +- if (flags & crm_ipc_server_free) { +- pcmk_free_ipc_event(iov); +- } +- } +- +- if (flags & crm_ipc_server_event) { +- rc = crm_ipcs_flush_events(c); +- } else { +- crm_ipcs_flush_events(c); +- } +- +- if ((rc == EPIPE) || (rc == ENOTCONN)) { +- crm_trace("Client %p disconnected", c->ipcs); +- } +- return rc; +-} +- +-int +-pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, +- uint32_t flags) +-{ +- struct iovec *iov = NULL; +- int rc = pcmk_rc_ok; +- +- if (c == NULL) { +- return EINVAL; +- } +- crm_ipc_init(); +- rc = pcmk__ipc_prepare_iov(request, message, ipc_buffer_max, &iov, NULL); +- if (rc == pcmk_rc_ok) { +- rc = pcmk__ipc_send_iov(c, iov, flags | crm_ipc_server_free); +- } else { +- pcmk_free_ipc_event(iov); +- crm_notice("IPC message to pid %d failed: %s " CRM_XS " rc=%d", +- c->pid, pcmk_rc_str(rc), rc); +- } +- return rc; +-} +- +-void +-pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, +- uint32_t request, uint32_t flags, const char *tag) +-{ +- if (flags & crm_ipc_client_response) { +- xmlNode *ack = create_xml_node(NULL, tag); +- +- crm_trace("Ack'ing IPC message from %s", pcmk__client_name(c)); +- c->request_id = 0; +- crm_xml_add(ack, "function", function); +- crm_xml_add_int(ack, "line", line); +- pcmk__ipc_send_xml(c, request, ack, flags); +- free_xml(ack); +- } +-} +- +-/*! +- * \internal +- * \brief Add an IPC server to the main loop for the pacemaker-based API +- * +- * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API +- * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API +- * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API +- * \param[in] ro_cb IPC callbacks for read-only API +- * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs +- * +- * \note This function exits fatally if unable to create the servers. +- */ +-void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, +- qb_ipcs_service_t **ipcs_rw, +- qb_ipcs_service_t **ipcs_shm, +- struct qb_ipcs_service_handlers *ro_cb, +- struct qb_ipcs_service_handlers *rw_cb) +-{ +- *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, +- QB_IPC_NATIVE, ro_cb); +- +- *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, +- QB_IPC_NATIVE, rw_cb); +- +- *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, +- QB_IPC_SHM, rw_cb); +- +- if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { +- crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); +- crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); +- crm_exit(CRM_EX_FATAL); +- } +-} +- +-/*! +- * \internal +- * \brief Destroy IPC servers for pacemaker-based API +- * +- * \param[out] ipcs_ro IPC server for read-only pacemaker-based API +- * \param[out] ipcs_rw IPC server for read/write pacemaker-based API +- * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API +- * +- * \note This is a convenience function for calling qb_ipcs_destroy() for each +- * argument. +- */ +-void +-pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, +- qb_ipcs_service_t *ipcs_rw, +- qb_ipcs_service_t *ipcs_shm) +-{ +- qb_ipcs_destroy(ipcs_ro); +- qb_ipcs_destroy(ipcs_rw); +- qb_ipcs_destroy(ipcs_shm); +-} +- +-/*! +- * \internal +- * \brief Add an IPC server to the main loop for the pacemaker-controld API +- * +- * \param[in] cb IPC callbacks +- * +- * \return Newly created IPC server +- */ +-qb_ipcs_service_t * +-pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) +-{ +- return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); +-} +- +-/*! +- * \internal +- * \brief Add an IPC server to the main loop for the pacemaker-attrd API +- * +- * \param[in] cb IPC callbacks +- * +- * \note This function exits fatally if unable to create the servers. +- */ +-void +-pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, +- struct qb_ipcs_service_handlers *cb) +-{ +- *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); +- +- if (*ipcs == NULL) { +- crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); +- crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); +- crm_exit(CRM_EX_FATAL); +- } +-} +- +-/*! +- * \internal +- * \brief Add an IPC server to the main loop for the pacemaker-fenced API +- * +- * \param[in] cb IPC callbacks +- * +- * \note This function exits fatally if unable to create the servers. +- */ +-void +-pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, +- struct qb_ipcs_service_handlers *cb) +-{ +- *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, +- QB_LOOP_HIGH); +- +- if (*ipcs == NULL) { +- crm_err("Failed to create fencer: exiting and inhibiting respawn."); +- crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); +- crm_exit(CRM_EX_FATAL); +- } +-} +- +-/* Client... */ +- +-#define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */ +-#define MAX_MSG_SIZE 128*1024 /* 128k default */ +- +-struct crm_ipc_s { +- struct pollfd pfd; +- +- /* the max size we can send/receive over ipc */ +- unsigned int max_buf_size; +- /* Size of the allocated 'buffer' */ +- unsigned int buf_size; +- int msg_size; +- int need_reply; +- char *buffer; +- char *name; +- +- qb_ipcc_connection_t *ipc; +- +-}; +- +-static unsigned int +-pick_ipc_buffer(unsigned int max) +-{ +- static unsigned int global_max = 0; +- +- if (global_max == 0) { +- const char *env = getenv("PCMK_ipc_buffer"); +- +- if (env) { +- int env_max = crm_parse_int(env, "0"); +- +- global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE; +- +- } else { +- global_max = MAX_MSG_SIZE; +- } +- } +- +- return QB_MAX(max, global_max); +-} +- +-crm_ipc_t * +-crm_ipc_new(const char *name, size_t max_size) +-{ +- crm_ipc_t *client = NULL; +- +- client = calloc(1, sizeof(crm_ipc_t)); +- +- client->name = strdup(name); +- client->buf_size = pick_ipc_buffer(max_size); +- client->buffer = malloc(client->buf_size); +- +- /* Clients initiating connection pick the max buf size */ +- client->max_buf_size = client->buf_size; +- +- client->pfd.fd = -1; +- client->pfd.events = POLLIN; +- client->pfd.revents = 0; +- +- return client; +-} +- +-/*! +- * \brief Establish an IPC connection to a Pacemaker component +- * +- * \param[in] client Connection instance obtained from crm_ipc_new() +- * +- * \return TRUE on success, FALSE otherwise (in which case errno will be set; +- * specifically, in case of discovering the remote side is not +- * authentic, its value is set to ECONNABORTED). +- */ +-bool +-crm_ipc_connect(crm_ipc_t * client) +-{ +- uid_t cl_uid = 0; +- gid_t cl_gid = 0; +- pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; +- int rv; +- +- client->need_reply = FALSE; +- client->ipc = qb_ipcc_connect(client->name, client->buf_size); +- +- if (client->ipc == NULL) { +- crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); +- return FALSE; +- } +- +- client->pfd.fd = crm_ipc_get_fd(client); +- if (client->pfd.fd < 0) { +- rv = errno; +- /* message already omitted */ +- crm_ipc_close(client); +- errno = rv; +- return FALSE; +- } +- +- rv = pcmk_daemon_user(&cl_uid, &cl_gid); +- if (rv < 0) { +- /* message already omitted */ +- crm_ipc_close(client); +- errno = -rv; +- return FALSE; +- } +- +- if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, +- &found_pid, &found_uid, +- &found_gid))) { +- crm_err("Daemon (IPC %s) is not authentic:" +- " process %lld (uid: %lld, gid: %lld)", +- client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), +- (long long) found_uid, (long long) found_gid); +- crm_ipc_close(client); +- errno = ECONNABORTED; +- return FALSE; +- +- } else if (rv < 0) { +- errno = -rv; +- crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", +- client->name); +- crm_ipc_close(client); +- errno = -rv; +- return FALSE; +- } +- +- qb_ipcc_context_set(client->ipc, client); +- +-#ifdef HAVE_IPCS_GET_BUFFER_SIZE +- client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); +- if (client->max_buf_size > client->buf_size) { +- free(client->buffer); +- client->buffer = calloc(1, client->max_buf_size); +- client->buf_size = client->max_buf_size; +- } +-#endif +- +- return TRUE; +-} +- +-void +-crm_ipc_close(crm_ipc_t * client) +-{ +- if (client) { +- crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); +- +- if (client->ipc) { +- qb_ipcc_connection_t *ipc = client->ipc; +- +- client->ipc = NULL; +- qb_ipcc_disconnect(ipc); +- } +- } +-} +- +-void +-crm_ipc_destroy(crm_ipc_t * client) +-{ +- if (client) { +- if (client->ipc && qb_ipcc_is_connected(client->ipc)) { +- crm_notice("Destroying an active IPC connection to %s", client->name); +- /* The next line is basically unsafe +- * +- * If this connection was attached to mainloop and mainloop is active, +- * the 'disconnected' callback will end up back here and we'll end +- * up free'ing the memory twice - something that can still happen +- * even without this if we destroy a connection and it closes before +- * we call exit +- */ +- /* crm_ipc_close(client); */ +- } +- crm_trace("Destroying IPC connection to %s: %p", client->name, client); +- free(client->buffer); +- free(client->name); +- free(client); +- } +-} +- +-int +-crm_ipc_get_fd(crm_ipc_t * client) +-{ +- int fd = 0; +- +- if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { +- return fd; +- } +- errno = EINVAL; +- crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", +- (client? client->name : "unspecified client")); +- return -errno; +-} +- +-bool +-crm_ipc_connected(crm_ipc_t * client) +-{ +- bool rc = FALSE; +- +- if (client == NULL) { +- crm_trace("No client"); +- return FALSE; +- +- } else if (client->ipc == NULL) { +- crm_trace("No connection"); +- return FALSE; +- +- } else if (client->pfd.fd < 0) { +- crm_trace("Bad descriptor"); +- return FALSE; +- } +- +- rc = qb_ipcc_is_connected(client->ipc); +- if (rc == FALSE) { +- client->pfd.fd = -EINVAL; +- } +- return rc; +-} +- +-/*! +- * \brief Check whether an IPC connection is ready to be read +- * +- * \param[in] client Connection to check +- * +- * \return Positive value if ready to be read, 0 if not ready, -errno on error +- */ +-int +-crm_ipc_ready(crm_ipc_t *client) +-{ +- int rc; +- +- CRM_ASSERT(client != NULL); +- +- if (crm_ipc_connected(client) == FALSE) { +- return -ENOTCONN; +- } +- +- client->pfd.revents = 0; +- rc = poll(&(client->pfd), 1, 0); +- return (rc < 0)? -errno : rc; +-} +- +-// \return Standard Pacemaker return code +-static int +-crm_ipc_decompress(crm_ipc_t * client) +-{ +- struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer; +- +- if (header->size_compressed) { +- int rc = 0; +- unsigned int size_u = 1 + header->size_uncompressed; +- /* never let buf size fall below our max size required for ipc reads. */ +- unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size); +- char *uncompressed = calloc(1, new_buf_size); +- +- crm_trace("Decompressing message data %u bytes into %u bytes", +- header->size_compressed, size_u); +- +- rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u, +- client->buffer + hdr_offset, header->size_compressed, 1, 0); +- +- if (rc != BZ_OK) { +- crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", +- bz2_strerror(rc), rc); +- free(uncompressed); +- return EILSEQ; +- } +- +- /* +- * This assert no longer holds true. For an identical msg, some clients may +- * require compression, and others may not. If that same msg (event) is sent +- * to multiple clients, it could result in some clients receiving a compressed +- * msg even though compression was not explicitly required for them. +- * +- * CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max); +- */ +- CRM_ASSERT(size_u == header->size_uncompressed); +- +- memcpy(uncompressed, client->buffer, hdr_offset); /* Preserve the header */ +- header = (struct crm_ipc_response_header *)(void*)uncompressed; +- +- free(client->buffer); +- client->buf_size = new_buf_size; +- client->buffer = uncompressed; +- } +- +- CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0); +- return pcmk_rc_ok; +-} +- +-long +-crm_ipc_read(crm_ipc_t * client) +-{ +- struct crm_ipc_response_header *header = NULL; +- +- CRM_ASSERT(client != NULL); +- CRM_ASSERT(client->ipc != NULL); +- CRM_ASSERT(client->buffer != NULL); +- +- crm_ipc_init(); +- +- client->buffer[0] = 0; +- client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, +- client->buf_size, 0); +- if (client->msg_size >= 0) { +- int rc = crm_ipc_decompress(client); +- +- if (rc != pcmk_rc_ok) { +- return pcmk_rc2legacy(rc); +- } +- +- header = (struct crm_ipc_response_header *)(void*)client->buffer; +- if(header->version > PCMK_IPC_VERSION) { +- crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", +- header->version, PCMK_IPC_VERSION); +- return -EBADMSG; +- } +- +- crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", +- client->name, header->qb.id, header->qb.size, client->msg_size, +- client->buffer + hdr_offset); +- +- } else { +- crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size)); +- } +- +- if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { +- crm_err("Connection to %s failed", client->name); +- } +- +- if (header) { +- /* Data excluding the header */ +- return header->size_uncompressed; +- } +- return -ENOMSG; +-} +- +-const char * +-crm_ipc_buffer(crm_ipc_t * client) +-{ +- CRM_ASSERT(client != NULL); +- return client->buffer + sizeof(struct crm_ipc_response_header); +-} +- +-uint32_t +-crm_ipc_buffer_flags(crm_ipc_t * client) +-{ +- struct crm_ipc_response_header *header = NULL; +- +- CRM_ASSERT(client != NULL); +- if (client->buffer == NULL) { +- return 0; +- } +- +- header = (struct crm_ipc_response_header *)(void*)client->buffer; +- return header->flags; +-} +- +-const char * +-crm_ipc_name(crm_ipc_t * client) +-{ +- CRM_ASSERT(client != NULL); +- return client->name; +-} +- +-// \return Standard Pacemaker return code +-static int +-internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, +- ssize_t *bytes) +-{ +- time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); +- int rc = pcmk_rc_ok; +- +- crm_ipc_init(); +- +- /* get the reply */ +- crm_trace("client %s waiting on reply to msg id %d", client->name, request_id); +- do { +- +- *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); +- if (*bytes > 0) { +- struct crm_ipc_response_header *hdr = NULL; +- +- rc = crm_ipc_decompress(client); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- hdr = (struct crm_ipc_response_header *)(void*)client->buffer; +- if (hdr->qb.id == request_id) { +- /* Got it */ +- break; +- } else if (hdr->qb.id < request_id) { +- xmlNode *bad = string2xml(crm_ipc_buffer(client)); +- +- crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); +- crm_log_xml_notice(bad, "OldIpcReply"); +- +- } else { +- xmlNode *bad = string2xml(crm_ipc_buffer(client)); +- +- crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); +- crm_log_xml_notice(bad, "ImpossibleReply"); +- CRM_ASSERT(hdr->qb.id <= request_id); +- } +- } else if (crm_ipc_connected(client) == FALSE) { +- crm_err("Server disconnected client %s while waiting for msg id %d", client->name, +- request_id); +- break; +- } +- +- } while (time(NULL) < timeout); +- +- if (*bytes < 0) { +- rc = (int) -*bytes; // System errno +- } +- return rc; +-} +- +-/*! +- * \brief Send an IPC XML message +- * +- * \param[in] client Connection to IPC server +- * \param[in] message XML message to send +- * \param[in] flags Bitmask of crm_ipc_flags +- * \param[in] ms_timeout Give up if not sent within this much time +- * (5 seconds if 0, or no timeout if negative) +- * \param[out] reply Reply from server (or NULL if none) +- * +- * \return Negative errno on error, otherwise size of reply received in bytes +- * if reply was needed, otherwise number of bytes sent +- */ +-int +-crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, +- xmlNode ** reply) +-{ +- int rc = 0; +- ssize_t qb_rc = 0; +- ssize_t bytes = 0; +- struct iovec *iov; +- static uint32_t id = 0; +- static int factor = 8; +- struct crm_ipc_response_header *header; +- +- crm_ipc_init(); +- +- if (client == NULL) { +- crm_notice("Can't send IPC request without connection (bug?): %.100s", +- message); +- return -ENOTCONN; +- +- } else if (crm_ipc_connected(client) == FALSE) { +- /* Don't even bother */ +- crm_notice("Can't send IPC request to %s: Connection closed", +- client->name); +- return -ENOTCONN; +- } +- +- if (ms_timeout == 0) { +- ms_timeout = 5000; +- } +- +- if (client->need_reply) { +- qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); +- if (qb_rc < 0) { +- crm_warn("Sending IPC to %s disabled until pending reply received", +- client->name); +- return -EALREADY; +- +- } else { +- crm_notice("Sending IPC to %s re-enabled after pending reply received", +- client->name); +- client->need_reply = FALSE; +- } +- } +- +- id++; +- CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ +- rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); +- if (rc != pcmk_rc_ok) { +- crm_warn("Couldn't prepare IPC request to %s: %s " CRM_XS " rc=%d", +- client->name, pcmk_rc_str(rc), rc); +- return pcmk_rc2legacy(rc); +- } +- +- header = iov[0].iov_base; +- header->flags |= flags; +- +- if(is_set(flags, crm_ipc_proxied)) { +- /* Don't look for a synchronous response */ +- clear_bit(flags, crm_ipc_client_response); +- } +- +- if(header->size_compressed) { +- if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { +- crm_notice("Compressed message exceeds %d0%% of configured IPC " +- "limit (%u bytes); consider setting PCMK_ipc_buffer to " +- "%u or higher", +- factor, client->max_buf_size, 2 * client->max_buf_size); +- factor++; +- } +- } +- +- crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", +- client->name, header->qb.id, header->qb.size, ms_timeout); +- +- if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { +- +- time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); +- +- do { +- /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return +- * an error if it's not connected? +- */ +- if (!crm_ipc_connected(client)) { +- goto send_cleanup; +- } +- +- qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); +- } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); +- +- rc = (int) qb_rc; // Negative of system errno, or bytes sent +- if (qb_rc <= 0) { +- goto send_cleanup; +- +- } else if (is_not_set(flags, crm_ipc_client_response)) { +- crm_trace("Not waiting for reply to %s IPC request %d", +- client->name, header->qb.id); +- goto send_cleanup; +- } +- +- rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); +- if (rc != pcmk_rc_ok) { +- /* We didn't get the reply in time, so disable future sends for now. +- * The only alternative would be to close the connection since we +- * don't know how to detect and discard out-of-sequence replies. +- * +- * @TODO Implement out-of-sequence detection +- */ +- client->need_reply = TRUE; +- } +- rc = (int) bytes; // Negative system errno, or size of reply received +- +- } else { +- // No timeout, and client response needed +- do { +- qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, +- client->buf_size, -1); +- } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); +- rc = (int) qb_rc; // Negative system errno, or size of reply received +- } +- +- if (rc > 0) { +- struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer; +- +- crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", +- rc, hdr->qb.id, client->name, header->qb.id, +- crm_ipc_buffer(client)); +- +- if (reply) { +- *reply = string2xml(crm_ipc_buffer(client)); +- } +- +- } else { +- crm_trace("No reply to %s IPC %d: rc=%d", +- client->name, header->qb.id, rc); +- } +- +- send_cleanup: +- if (crm_ipc_connected(client) == FALSE) { +- crm_notice("Couldn't send %s IPC request %d: Connection closed " +- CRM_XS " rc=%d", client->name, header->qb.id, rc); +- +- } else if (rc == -ETIMEDOUT) { +- crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", +- client->name, header->qb.id, pcmk_strerror(rc), ms_timeout, +- rc); +- crm_write_blackbox(0, NULL); +- +- } else if (rc <= 0) { +- crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", +- client->name, header->qb.id, +- ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); +- } +- +- pcmk_free_ipc_event(iov); +- return rc; +-} +- +-int +-crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, +- pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { +- int ret = 0; +- pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; +-#if defined(US_AUTH_PEERCRED_UCRED) +- struct ucred ucred; +- socklen_t ucred_len = sizeof(ucred); +- +- if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, +- &ucred, &ucred_len) +- && ucred_len == sizeof(ucred)) { +- found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; +- +-#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) +- struct sockpeercred sockpeercred; +- socklen_t sockpeercred_len = sizeof(sockpeercred); +- +- if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, +- &sockpeercred, &sockpeercred_len) +- && sockpeercred_len == sizeof(sockpeercred_len)) { +- found_pid = sockpeercred.pid; +- found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; +- +-#elif defined(US_AUTH_GETPEEREID) +- if (!getpeereid(sock, &found_uid, &found_gid)) { +- found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ +- +-#elif defined(US_AUTH_GETPEERUCRED) +- ucred_t *ucred; +- if (!getpeerucred(sock, &ucred)) { +- errno = 0; +- found_pid = ucred_getpid(ucred); +- found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); +- ret = -errno; +- ucred_free(ucred); +- if (ret) { +- return (ret < 0) ? ret : -pcmk_err_generic; +- } +- +-#else +-# error "No way to authenticate a Unix socket peer" +- errno = 0; +- if (0) { +-#endif +- if (gotpid != NULL) { +- *gotpid = found_pid; +- } +- if (gotuid != NULL) { +- *gotuid = found_uid; +- } +- if (gotgid != NULL) { +- *gotgid = found_gid; +- } +- ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); +- } else { +- ret = (errno > 0) ? -errno : -pcmk_err_generic; +- } +- +- return ret; +-} +- +-int +-pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, +- gid_t refgid, pid_t *gotpid) +-{ +- static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ +- int fd; +- int rc = pcmk_rc_ipc_unresponsive; +- int auth_rc = 0; +- int32_t qb_rc; +- pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; +- qb_ipcc_connection_t *c; +- +- c = qb_ipcc_connect(name, 0); +- if (c == NULL) { +- crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); +- rc = pcmk_rc_ipc_unresponsive; +- goto bail; +- } +- +- qb_rc = qb_ipcc_fd_get(c, &fd); +- if (qb_rc != 0) { +- rc = (int) -qb_rc; // System errno +- crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", +- name, pcmk_rc_str(rc), rc); +- goto bail; +- } +- +- auth_rc = crm_ipc_is_authentic_process(fd, refuid, refgid, &found_pid, +- &found_uid, &found_gid); +- if (auth_rc < 0) { +- rc = pcmk_legacy2rc(auth_rc); +- crm_err("Could not get peer credentials from %s IPC: %s " +- CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); +- goto bail; +- } +- +- if (gotpid != NULL) { +- *gotpid = found_pid; +- } +- +- if (auth_rc == 0) { +- crm_err("Daemon (IPC %s) effectively blocked with unauthorized" +- " process %lld (uid: %lld, gid: %lld)", +- name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), +- (long long) found_uid, (long long) found_gid); +- rc = pcmk_rc_ipc_unauthorized; +- goto bail; +- } +- +- rc = pcmk_rc_ok; +- if ((found_uid != refuid || found_gid != refgid) +- && strncmp(last_asked_name, name, sizeof(last_asked_name))) { +- if ((found_uid == 0) && (refuid != 0)) { +- crm_warn("Daemon (IPC %s) runs as root, whereas the expected" +- " credentials are %lld:%lld, hazard of violating" +- " the least privilege principle", +- name, (long long) refuid, (long long) refgid); +- } else { +- crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" +- " expected credentials are %lld:%lld, which may" +- " mean a different set of privileges than expected", +- name, (long long) found_uid, (long long) found_gid, +- (long long) refuid, (long long) refgid); +- } +- memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); +- } +- +-bail: +- if (c != NULL) { +- qb_ipcc_disconnect(c); +- } +- return rc; +-} +- +- +-/* Utils */ +- +-xmlNode * +-create_hello_message(const char *uuid, +- const char *client_name, const char *major_version, const char *minor_version) +-{ +- xmlNode *hello_node = NULL; +- xmlNode *hello = NULL; +- +- if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) +- || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "missing information", +- client_name? client_name : "unknown client", +- uuid? uuid : "unknown"); +- return NULL; +- } +- +- hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); +- if (hello_node == NULL) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "Message data creation failed", client_name, uuid); +- return NULL; +- } +- +- crm_xml_add(hello_node, "major_version", major_version); +- crm_xml_add(hello_node, "minor_version", minor_version); +- crm_xml_add(hello_node, "client_name", client_name); +- crm_xml_add(hello_node, "client_uuid", uuid); +- +- hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); +- if (hello == NULL) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "Request creation failed", client_name, uuid); +- return NULL; +- } +- free_xml(hello_node); +- +- crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); +- return hello; +-} +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +new file mode 100644 +index 0000000..7737588 +--- /dev/null ++++ b/lib/common/ipc_client.c +@@ -0,0 +1,755 @@ ++/* ++ * Copyright 2004-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++# ifdef US_AUTH_PEERCRED_UCRED ++# ifndef _GNU_SOURCE ++# define _GNU_SOURCE ++# endif ++# endif ++# include ++#elif defined(US_AUTH_GETPEERUCRED) ++# include ++#endif ++ ++#include ++#include ++#include ++#include ++ ++#include /* indirectly: pcmk_err_generic */ ++#include ++#include ++#include ++#include "crmcommon_private.h" ++ ++struct crm_ipc_s { ++ struct pollfd pfd; ++ unsigned int max_buf_size; // maximum bytes we can send or receive over IPC ++ unsigned int buf_size; // size of allocated buffer ++ int msg_size; ++ int need_reply; ++ char *buffer; ++ char *name; ++ qb_ipcc_connection_t *ipc; ++}; ++ ++crm_ipc_t * ++crm_ipc_new(const char *name, size_t max_size) ++{ ++ crm_ipc_t *client = NULL; ++ ++ client = calloc(1, sizeof(crm_ipc_t)); ++ ++ client->name = strdup(name); ++ client->buf_size = pcmk__ipc_buffer_size(max_size); ++ client->buffer = malloc(client->buf_size); ++ ++ /* Clients initiating connection pick the max buf size */ ++ client->max_buf_size = client->buf_size; ++ ++ client->pfd.fd = -1; ++ client->pfd.events = POLLIN; ++ client->pfd.revents = 0; ++ ++ return client; ++} ++ ++/*! ++ * \brief Establish an IPC connection to a Pacemaker component ++ * ++ * \param[in] client Connection instance obtained from crm_ipc_new() ++ * ++ * \return TRUE on success, FALSE otherwise (in which case errno will be set; ++ * specifically, in case of discovering the remote side is not ++ * authentic, its value is set to ECONNABORTED). ++ */ ++bool ++crm_ipc_connect(crm_ipc_t * client) ++{ ++ uid_t cl_uid = 0; ++ gid_t cl_gid = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ int rv; ++ ++ client->need_reply = FALSE; ++ client->ipc = qb_ipcc_connect(client->name, client->buf_size); ++ ++ if (client->ipc == NULL) { ++ crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); ++ return FALSE; ++ } ++ ++ client->pfd.fd = crm_ipc_get_fd(client); ++ if (client->pfd.fd < 0) { ++ rv = errno; ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = rv; ++ return FALSE; ++ } ++ ++ rv = pcmk_daemon_user(&cl_uid, &cl_gid); ++ if (rv < 0) { ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = -rv; ++ return FALSE; ++ } ++ ++ if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, ++ &found_pid, &found_uid, ++ &found_gid))) { ++ crm_err("Daemon (IPC %s) is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ crm_ipc_close(client); ++ errno = ECONNABORTED; ++ return FALSE; ++ ++ } else if (rv < 0) { ++ errno = -rv; ++ crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", ++ client->name); ++ crm_ipc_close(client); ++ errno = -rv; ++ return FALSE; ++ } ++ ++ qb_ipcc_context_set(client->ipc, client); ++ ++#ifdef HAVE_IPCS_GET_BUFFER_SIZE ++ client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); ++ if (client->max_buf_size > client->buf_size) { ++ free(client->buffer); ++ client->buffer = calloc(1, client->max_buf_size); ++ client->buf_size = client->max_buf_size; ++ } ++#endif ++ ++ return TRUE; ++} ++ ++void ++crm_ipc_close(crm_ipc_t * client) ++{ ++ if (client) { ++ crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); ++ ++ if (client->ipc) { ++ qb_ipcc_connection_t *ipc = client->ipc; ++ ++ client->ipc = NULL; ++ qb_ipcc_disconnect(ipc); ++ } ++ } ++} ++ ++void ++crm_ipc_destroy(crm_ipc_t * client) ++{ ++ if (client) { ++ if (client->ipc && qb_ipcc_is_connected(client->ipc)) { ++ crm_notice("Destroying an active IPC connection to %s", client->name); ++ /* The next line is basically unsafe ++ * ++ * If this connection was attached to mainloop and mainloop is active, ++ * the 'disconnected' callback will end up back here and we'll end ++ * up free'ing the memory twice - something that can still happen ++ * even without this if we destroy a connection and it closes before ++ * we call exit ++ */ ++ /* crm_ipc_close(client); */ ++ } ++ crm_trace("Destroying IPC connection to %s: %p", client->name, client); ++ free(client->buffer); ++ free(client->name); ++ free(client); ++ } ++} ++ ++int ++crm_ipc_get_fd(crm_ipc_t * client) ++{ ++ int fd = 0; ++ ++ if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { ++ return fd; ++ } ++ errno = EINVAL; ++ crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", ++ (client? client->name : "unspecified client")); ++ return -errno; ++} ++ ++bool ++crm_ipc_connected(crm_ipc_t * client) ++{ ++ bool rc = FALSE; ++ ++ if (client == NULL) { ++ crm_trace("No client"); ++ return FALSE; ++ ++ } else if (client->ipc == NULL) { ++ crm_trace("No connection"); ++ return FALSE; ++ ++ } else if (client->pfd.fd < 0) { ++ crm_trace("Bad descriptor"); ++ return FALSE; ++ } ++ ++ rc = qb_ipcc_is_connected(client->ipc); ++ if (rc == FALSE) { ++ client->pfd.fd = -EINVAL; ++ } ++ return rc; ++} ++ ++/*! ++ * \brief Check whether an IPC connection is ready to be read ++ * ++ * \param[in] client Connection to check ++ * ++ * \return Positive value if ready to be read, 0 if not ready, -errno on error ++ */ ++int ++crm_ipc_ready(crm_ipc_t *client) ++{ ++ int rc; ++ ++ CRM_ASSERT(client != NULL); ++ ++ if (crm_ipc_connected(client) == FALSE) { ++ return -ENOTCONN; ++ } ++ ++ client->pfd.revents = 0; ++ rc = poll(&(client->pfd), 1, 0); ++ return (rc < 0)? -errno : rc; ++} ++ ++// \return Standard Pacemaker return code ++static int ++crm_ipc_decompress(crm_ipc_t * client) ++{ ++ pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; ++ ++ if (header->size_compressed) { ++ int rc = 0; ++ unsigned int size_u = 1 + header->size_uncompressed; ++ /* never let buf size fall below our max size required for ipc reads. */ ++ unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); ++ char *uncompressed = calloc(1, new_buf_size); ++ ++ crm_trace("Decompressing message data %u bytes into %u bytes", ++ header->size_compressed, size_u); ++ ++ rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, ++ client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); ++ ++ if (rc != BZ_OK) { ++ crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", ++ bz2_strerror(rc), rc); ++ free(uncompressed); ++ return EILSEQ; ++ } ++ ++ /* ++ * This assert no longer holds true. For an identical msg, some clients may ++ * require compression, and others may not. If that same msg (event) is sent ++ * to multiple clients, it could result in some clients receiving a compressed ++ * msg even though compression was not explicitly required for them. ++ * ++ * CRM_ASSERT((header->size_uncompressed + sizeof(pcmk__ipc_header_t)) >= ipc_buffer_max); ++ */ ++ CRM_ASSERT(size_u == header->size_uncompressed); ++ ++ memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ ++ header = (pcmk__ipc_header_t *)(void*)uncompressed; ++ ++ free(client->buffer); ++ client->buf_size = new_buf_size; ++ client->buffer = uncompressed; ++ } ++ ++ CRM_ASSERT(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0); ++ return pcmk_rc_ok; ++} ++ ++long ++crm_ipc_read(crm_ipc_t * client) ++{ ++ pcmk__ipc_header_t *header = NULL; ++ ++ CRM_ASSERT(client != NULL); ++ CRM_ASSERT(client->ipc != NULL); ++ CRM_ASSERT(client->buffer != NULL); ++ ++ client->buffer[0] = 0; ++ client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, ++ client->buf_size, 0); ++ if (client->msg_size >= 0) { ++ int rc = crm_ipc_decompress(client); ++ ++ if (rc != pcmk_rc_ok) { ++ return pcmk_rc2legacy(rc); ++ } ++ ++ header = (pcmk__ipc_header_t *)(void*)client->buffer; ++ if (!pcmk__valid_ipc_header(header)) { ++ return -EBADMSG; ++ } ++ ++ crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", ++ client->name, header->qb.id, header->qb.size, client->msg_size, ++ client->buffer + sizeof(pcmk__ipc_header_t)); ++ ++ } else { ++ crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size)); ++ } ++ ++ if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { ++ crm_err("Connection to %s failed", client->name); ++ } ++ ++ if (header) { ++ /* Data excluding the header */ ++ return header->size_uncompressed; ++ } ++ return -ENOMSG; ++} ++ ++const char * ++crm_ipc_buffer(crm_ipc_t * client) ++{ ++ CRM_ASSERT(client != NULL); ++ return client->buffer + sizeof(pcmk__ipc_header_t); ++} ++ ++uint32_t ++crm_ipc_buffer_flags(crm_ipc_t * client) ++{ ++ pcmk__ipc_header_t *header = NULL; ++ ++ CRM_ASSERT(client != NULL); ++ if (client->buffer == NULL) { ++ return 0; ++ } ++ ++ header = (pcmk__ipc_header_t *)(void*)client->buffer; ++ return header->flags; ++} ++ ++const char * ++crm_ipc_name(crm_ipc_t * client) ++{ ++ CRM_ASSERT(client != NULL); ++ return client->name; ++} ++ ++// \return Standard Pacemaker return code ++static int ++internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ++ ssize_t *bytes) ++{ ++ time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); ++ int rc = pcmk_rc_ok; ++ ++ /* get the reply */ ++ crm_trace("client %s waiting on reply to msg id %d", client->name, request_id); ++ do { ++ ++ *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); ++ if (*bytes > 0) { ++ pcmk__ipc_header_t *hdr = NULL; ++ ++ rc = crm_ipc_decompress(client); ++ if (rc != pcmk_rc_ok) { ++ return rc; ++ } ++ ++ hdr = (pcmk__ipc_header_t *)(void*)client->buffer; ++ if (hdr->qb.id == request_id) { ++ /* Got it */ ++ break; ++ } else if (hdr->qb.id < request_id) { ++ xmlNode *bad = string2xml(crm_ipc_buffer(client)); ++ ++ crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); ++ crm_log_xml_notice(bad, "OldIpcReply"); ++ ++ } else { ++ xmlNode *bad = string2xml(crm_ipc_buffer(client)); ++ ++ crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); ++ crm_log_xml_notice(bad, "ImpossibleReply"); ++ CRM_ASSERT(hdr->qb.id <= request_id); ++ } ++ } else if (crm_ipc_connected(client) == FALSE) { ++ crm_err("Server disconnected client %s while waiting for msg id %d", client->name, ++ request_id); ++ break; ++ } ++ ++ } while (time(NULL) < timeout); ++ ++ if (*bytes < 0) { ++ rc = (int) -*bytes; // System errno ++ } ++ return rc; ++} ++ ++/*! ++ * \brief Send an IPC XML message ++ * ++ * \param[in] client Connection to IPC server ++ * \param[in] message XML message to send ++ * \param[in] flags Bitmask of crm_ipc_flags ++ * \param[in] ms_timeout Give up if not sent within this much time ++ * (5 seconds if 0, or no timeout if negative) ++ * \param[out] reply Reply from server (or NULL if none) ++ * ++ * \return Negative errno on error, otherwise size of reply received in bytes ++ * if reply was needed, otherwise number of bytes sent ++ */ ++int ++crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, ++ xmlNode ** reply) ++{ ++ int rc = 0; ++ ssize_t qb_rc = 0; ++ ssize_t bytes = 0; ++ struct iovec *iov; ++ static uint32_t id = 0; ++ static int factor = 8; ++ pcmk__ipc_header_t *header; ++ ++ if (client == NULL) { ++ crm_notice("Can't send IPC request without connection (bug?): %.100s", ++ message); ++ return -ENOTCONN; ++ ++ } else if (crm_ipc_connected(client) == FALSE) { ++ /* Don't even bother */ ++ crm_notice("Can't send IPC request to %s: Connection closed", ++ client->name); ++ return -ENOTCONN; ++ } ++ ++ if (ms_timeout == 0) { ++ ms_timeout = 5000; ++ } ++ ++ if (client->need_reply) { ++ qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); ++ if (qb_rc < 0) { ++ crm_warn("Sending IPC to %s disabled until pending reply received", ++ client->name); ++ return -EALREADY; ++ ++ } else { ++ crm_notice("Sending IPC to %s re-enabled after pending reply received", ++ client->name); ++ client->need_reply = FALSE; ++ } ++ } ++ ++ id++; ++ CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ ++ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); ++ if (rc != pcmk_rc_ok) { ++ crm_warn("Couldn't prepare IPC request to %s: %s " CRM_XS " rc=%d", ++ client->name, pcmk_rc_str(rc), rc); ++ return pcmk_rc2legacy(rc); ++ } ++ ++ header = iov[0].iov_base; ++ header->flags |= flags; ++ ++ if(is_set(flags, crm_ipc_proxied)) { ++ /* Don't look for a synchronous response */ ++ clear_bit(flags, crm_ipc_client_response); ++ } ++ ++ if(header->size_compressed) { ++ if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { ++ crm_notice("Compressed message exceeds %d0%% of configured IPC " ++ "limit (%u bytes); consider setting PCMK_ipc_buffer to " ++ "%u or higher", ++ factor, client->max_buf_size, 2 * client->max_buf_size); ++ factor++; ++ } ++ } ++ ++ crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", ++ client->name, header->qb.id, header->qb.size, ms_timeout); ++ ++ if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { ++ ++ time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); ++ ++ do { ++ /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return ++ * an error if it's not connected? ++ */ ++ if (!crm_ipc_connected(client)) { ++ goto send_cleanup; ++ } ++ ++ qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); ++ } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); ++ ++ rc = (int) qb_rc; // Negative of system errno, or bytes sent ++ if (qb_rc <= 0) { ++ goto send_cleanup; ++ ++ } else if (is_not_set(flags, crm_ipc_client_response)) { ++ crm_trace("Not waiting for reply to %s IPC request %d", ++ client->name, header->qb.id); ++ goto send_cleanup; ++ } ++ ++ rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); ++ if (rc != pcmk_rc_ok) { ++ /* We didn't get the reply in time, so disable future sends for now. ++ * The only alternative would be to close the connection since we ++ * don't know how to detect and discard out-of-sequence replies. ++ * ++ * @TODO Implement out-of-sequence detection ++ */ ++ client->need_reply = TRUE; ++ } ++ rc = (int) bytes; // Negative system errno, or size of reply received ++ ++ } else { ++ // No timeout, and client response needed ++ do { ++ qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, ++ client->buf_size, -1); ++ } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); ++ rc = (int) qb_rc; // Negative system errno, or size of reply received ++ } ++ ++ if (rc > 0) { ++ pcmk__ipc_header_t *hdr = (pcmk__ipc_header_t *)(void*)client->buffer; ++ ++ crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", ++ rc, hdr->qb.id, client->name, header->qb.id, ++ crm_ipc_buffer(client)); ++ ++ if (reply) { ++ *reply = string2xml(crm_ipc_buffer(client)); ++ } ++ ++ } else { ++ crm_trace("No reply to %s IPC %d: rc=%d", ++ client->name, header->qb.id, rc); ++ } ++ ++ send_cleanup: ++ if (crm_ipc_connected(client) == FALSE) { ++ crm_notice("Couldn't send %s IPC request %d: Connection closed " ++ CRM_XS " rc=%d", client->name, header->qb.id, rc); ++ ++ } else if (rc == -ETIMEDOUT) { ++ crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", ++ client->name, header->qb.id, pcmk_strerror(rc), ms_timeout, ++ rc); ++ crm_write_blackbox(0, NULL); ++ ++ } else if (rc <= 0) { ++ crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", ++ client->name, header->qb.id, ++ ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); ++ } ++ ++ pcmk_free_ipc_event(iov); ++ return rc; ++} ++ ++int ++crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, ++ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { ++ int ret = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++#if defined(US_AUTH_PEERCRED_UCRED) ++ struct ucred ucred; ++ socklen_t ucred_len = sizeof(ucred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &ucred, &ucred_len) ++ && ucred_len == sizeof(ucred)) { ++ found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; ++ ++#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++ struct sockpeercred sockpeercred; ++ socklen_t sockpeercred_len = sizeof(sockpeercred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &sockpeercred, &sockpeercred_len) ++ && sockpeercred_len == sizeof(sockpeercred_len)) { ++ found_pid = sockpeercred.pid; ++ found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; ++ ++#elif defined(US_AUTH_GETPEEREID) ++ if (!getpeereid(sock, &found_uid, &found_gid)) { ++ found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ ++ ++#elif defined(US_AUTH_GETPEERUCRED) ++ ucred_t *ucred; ++ if (!getpeerucred(sock, &ucred)) { ++ errno = 0; ++ found_pid = ucred_getpid(ucred); ++ found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ++ ret = -errno; ++ ucred_free(ucred); ++ if (ret) { ++ return (ret < 0) ? ret : -pcmk_err_generic; ++ } ++ ++#else ++# error "No way to authenticate a Unix socket peer" ++ errno = 0; ++ if (0) { ++#endif ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ if (gotuid != NULL) { ++ *gotuid = found_uid; ++ } ++ if (gotgid != NULL) { ++ *gotgid = found_gid; ++ } ++ ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); ++ } else { ++ ret = (errno > 0) ? -errno : -pcmk_err_generic; ++ } ++ ++ return ret; ++} ++ ++int ++pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, ++ gid_t refgid, pid_t *gotpid) ++{ ++ static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ ++ int fd; ++ int rc = pcmk_rc_ipc_unresponsive; ++ int auth_rc = 0; ++ int32_t qb_rc; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ qb_ipcc_connection_t *c; ++ ++ c = qb_ipcc_connect(name, 0); ++ if (c == NULL) { ++ crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); ++ rc = pcmk_rc_ipc_unresponsive; ++ goto bail; ++ } ++ ++ qb_rc = qb_ipcc_fd_get(c, &fd); ++ if (qb_rc != 0) { ++ rc = (int) -qb_rc; // System errno ++ crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", ++ name, pcmk_rc_str(rc), rc); ++ goto bail; ++ } ++ ++ auth_rc = crm_ipc_is_authentic_process(fd, refuid, refgid, &found_pid, ++ &found_uid, &found_gid); ++ if (auth_rc < 0) { ++ rc = pcmk_legacy2rc(auth_rc); ++ crm_err("Could not get peer credentials from %s IPC: %s " ++ CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); ++ goto bail; ++ } ++ ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ ++ if (auth_rc == 0) { ++ crm_err("Daemon (IPC %s) effectively blocked with unauthorized" ++ " process %lld (uid: %lld, gid: %lld)", ++ name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ rc = pcmk_rc_ipc_unauthorized; ++ goto bail; ++ } ++ ++ rc = pcmk_rc_ok; ++ if ((found_uid != refuid || found_gid != refgid) ++ && strncmp(last_asked_name, name, sizeof(last_asked_name))) { ++ if ((found_uid == 0) && (refuid != 0)) { ++ crm_warn("Daemon (IPC %s) runs as root, whereas the expected" ++ " credentials are %lld:%lld, hazard of violating" ++ " the least privilege principle", ++ name, (long long) refuid, (long long) refgid); ++ } else { ++ crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" ++ " expected credentials are %lld:%lld, which may" ++ " mean a different set of privileges than expected", ++ name, (long long) found_uid, (long long) found_gid, ++ (long long) refuid, (long long) refgid); ++ } ++ memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); ++ } ++ ++bail: ++ if (c != NULL) { ++ qb_ipcc_disconnect(c); ++ } ++ return rc; ++} ++ ++xmlNode * ++create_hello_message(const char *uuid, ++ const char *client_name, const char *major_version, const char *minor_version) ++{ ++ xmlNode *hello_node = NULL; ++ xmlNode *hello = NULL; ++ ++ if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) ++ || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "missing information", ++ client_name? client_name : "unknown client", ++ uuid? uuid : "unknown"); ++ return NULL; ++ } ++ ++ hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); ++ if (hello_node == NULL) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "Message data creation failed", client_name, uuid); ++ return NULL; ++ } ++ ++ crm_xml_add(hello_node, "major_version", major_version); ++ crm_xml_add(hello_node, "minor_version", minor_version); ++ crm_xml_add(hello_node, "client_name", client_name); ++ crm_xml_add(hello_node, "client_uuid", uuid); ++ ++ hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); ++ if (hello == NULL) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "Request creation failed", client_name, uuid); ++ return NULL; ++ } ++ free_xml(hello_node); ++ ++ crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); ++ return hello; ++} +diff --git a/lib/common/ipc_common.c b/lib/common/ipc_common.c +new file mode 100644 +index 0000000..78360aa +--- /dev/null ++++ b/lib/common/ipc_common.c +@@ -0,0 +1,103 @@ ++/* ++ * Copyright 2004-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include ++#include "crmcommon_private.h" ++ ++#define MIN_MSG_SIZE 12336 // sizeof(struct qb_ipc_connection_response) ++#define MAX_MSG_SIZE 128*1024 // 128k default ++ ++/*! ++ * \internal ++ * \brief Choose an IPC buffer size in bytes ++ * ++ * \param[in] max Use this value if environment/default is lower ++ * ++ * \return Maximum of max and value of PCMK_ipc_buffer (default 128KB) ++ */ ++unsigned int ++pcmk__ipc_buffer_size(unsigned int max) ++{ ++ static unsigned int global_max = 0; ++ ++ if (global_max == 0) { ++ const char *env = getenv("PCMK_ipc_buffer"); ++ ++ if (env) { ++ int env_max = crm_parse_int(env, "0"); ++ ++ global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE; ++ ++ } else { ++ global_max = MAX_MSG_SIZE; ++ } ++ } ++ return QB_MAX(max, global_max); ++} ++ ++/*! ++ * \brief Return pacemaker's default IPC buffer size ++ * ++ * \return IPC buffer size in bytes ++ */ ++unsigned int ++crm_ipc_default_buffer_size(void) ++{ ++ static unsigned int default_size = 0; ++ ++ if (default_size == 0) { ++ default_size = pcmk__ipc_buffer_size(0); ++ } ++ return default_size; ++} ++ ++/*! ++ * \internal ++ * \brief Check whether an IPC header is valid ++ * ++ * \param[in] header IPC header to check ++ * ++ * \return true if IPC header has a supported version, false otherwise ++ */ ++bool ++pcmk__valid_ipc_header(const pcmk__ipc_header_t *header) ++{ ++ if (header == NULL) { ++ crm_err("IPC message without header"); ++ return false; ++ ++ } else if (header->version > PCMK__IPC_VERSION) { ++ crm_err("Filtering incompatible v%d IPC message (only versions <= %d supported)", ++ header->version, PCMK__IPC_VERSION); ++ return false; ++ } ++ return true; ++} ++ ++const char * ++pcmk__client_type_str(enum pcmk__client_type client_type) ++{ ++ switch (client_type) { ++ case PCMK__CLIENT_IPC: ++ return "IPC"; ++ case PCMK__CLIENT_TCP: ++ return "TCP"; ++#ifdef HAVE_GNUTLS_GNUTLS_H ++ case PCMK__CLIENT_TLS: ++ return "TLS"; ++#endif ++ default: ++ return "unknown"; ++ } ++} +diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c +new file mode 100644 +index 0000000..b747be3 +--- /dev/null ++++ b/lib/common/ipc_server.c +@@ -0,0 +1,903 @@ ++/* ++ * Copyright 2004-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include "crmcommon_private.h" ++ ++/* Evict clients whose event queue grows this large (by default) */ ++#define PCMK_IPC_DEFAULT_QUEUE_MAX 500 ++ ++static GHashTable *client_connections = NULL; ++ ++/*! ++ * \internal ++ * \brief Count IPC clients ++ * ++ * \return Number of active IPC client connections ++ */ ++guint ++pcmk__ipc_client_count() ++{ ++ return client_connections? g_hash_table_size(client_connections) : 0; ++} ++ ++/*! ++ * \internal ++ * \brief Execute a function for each active IPC client connection ++ * ++ * \param[in] func Function to call ++ * \param[in] user_data Pointer to pass to function ++ * ++ * \note The parameters are the same as for g_hash_table_foreach(). ++ */ ++void ++pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) ++{ ++ if ((func != NULL) && (client_connections != NULL)) { ++ g_hash_table_foreach(client_connections, func, user_data); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Remote IPC clients based on iterative function result ++ * ++ * \param[in] func Function to call for each active IPC client ++ * \param[in] user_data Pointer to pass to function ++ * ++ * \note The parameters are the same as for g_hash_table_foreach_remove(). ++ */ ++void ++pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data) ++{ ++ if ((func != NULL) && (client_connections != NULL)) { ++ g_hash_table_foreach_remove(client_connections, func, user_data); ++ } ++} ++ ++pcmk__client_t * ++pcmk__find_client(qb_ipcs_connection_t *c) ++{ ++ if (client_connections) { ++ return g_hash_table_lookup(client_connections, c); ++ } ++ ++ crm_trace("No client found for %p", c); ++ return NULL; ++} ++ ++pcmk__client_t * ++pcmk__find_client_by_id(const char *id) ++{ ++ gpointer key; ++ pcmk__client_t *client; ++ GHashTableIter iter; ++ ++ if (client_connections && id) { ++ g_hash_table_iter_init(&iter, client_connections); ++ while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { ++ if (strcmp(client->id, id) == 0) { ++ return client; ++ } ++ } ++ } ++ ++ crm_trace("No client found with id=%s", id); ++ return NULL; ++} ++ ++const char * ++pcmk__client_name(pcmk__client_t *c) ++{ ++ if (c == NULL) { ++ return "null"; ++ } else if (c->name == NULL && c->id == NULL) { ++ return "unknown"; ++ } else if (c->name == NULL) { ++ return c->id; ++ } else { ++ return c->name; ++ } ++} ++ ++void ++pcmk__client_cleanup(void) ++{ ++ if (client_connections != NULL) { ++ int active = g_hash_table_size(client_connections); ++ ++ if (active) { ++ crm_err("Exiting with %d active IPC client%s", ++ active, pcmk__plural_s(active)); ++ } ++ g_hash_table_destroy(client_connections); client_connections = NULL; ++ } ++} ++ ++void ++pcmk__drop_all_clients(qb_ipcs_service_t *service) ++{ ++ qb_ipcs_connection_t *c = NULL; ++ ++ if (service == NULL) { ++ return; ++ } ++ ++ c = qb_ipcs_connection_first_get(service); ++ ++ while (c != NULL) { ++ qb_ipcs_connection_t *last = c; ++ ++ c = qb_ipcs_connection_next_get(service, last); ++ ++ /* There really shouldn't be anyone connected at this point */ ++ crm_notice("Disconnecting client %p, pid=%d...", ++ last, pcmk__client_pid(last)); ++ qb_ipcs_disconnect(last); ++ qb_ipcs_connection_unref(last); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Allocate a new pcmk__client_t object based on an IPC connection ++ * ++ * \param[in] c IPC connection (or NULL to allocate generic client) ++ * \param[in] key Connection table key (or NULL to use sane default) ++ * \param[in] uid_client UID corresponding to c (ignored if c is NULL) ++ * ++ * \return Pointer to new pcmk__client_t (or NULL on error) ++ */ ++static pcmk__client_t * ++client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) ++{ ++ pcmk__client_t *client = calloc(1, sizeof(pcmk__client_t)); ++ ++ if (client == NULL) { ++ crm_perror(LOG_ERR, "Allocating client"); ++ return NULL; ++ } ++ ++ if (c) { ++#if ENABLE_ACL ++ client->user = pcmk__uid2username(uid_client); ++ if (client->user == NULL) { ++ client->user = strdup("#unprivileged"); ++ CRM_CHECK(client->user != NULL, free(client); return NULL); ++ crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", ++ uid_client); ++ } ++#endif ++ client->ipcs = c; ++ client->kind = PCMK__CLIENT_IPC; ++ client->pid = pcmk__client_pid(c); ++ if (key == NULL) { ++ key = c; ++ } ++ } ++ ++ client->id = crm_generate_uuid(); ++ if (client->id == NULL) { ++ crm_err("Could not generate UUID for client"); ++ free(client->user); ++ free(client); ++ return NULL; ++ } ++ if (key == NULL) { ++ key = client->id; ++ } ++ if (client_connections == NULL) { ++ crm_trace("Creating IPC client table"); ++ client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); ++ } ++ g_hash_table_insert(client_connections, key, client); ++ return client; ++} ++ ++/*! ++ * \brief Allocate a new pcmk__client_t object and generate its ID ++ * ++ * \param[in] key What to use as connections hash table key (NULL to use ID) ++ * ++ * \return Pointer to new pcmk__client_t (asserts on failure) ++ */ ++pcmk__client_t * ++pcmk__new_unauth_client(void *key) ++{ ++ pcmk__client_t *client = client_from_connection(NULL, key, 0); ++ ++ CRM_ASSERT(client != NULL); ++ return client; ++} ++ ++pcmk__client_t * ++pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) ++{ ++ gid_t uid_cluster = 0; ++ gid_t gid_cluster = 0; ++ ++ pcmk__client_t *client = NULL; ++ ++ CRM_CHECK(c != NULL, return NULL); ++ ++ if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { ++ static bool need_log = TRUE; ++ ++ if (need_log) { ++ crm_warn("Could not find user and group IDs for user %s", ++ CRM_DAEMON_USER); ++ need_log = FALSE; ++ } ++ } ++ ++ if (uid_client != 0) { ++ crm_trace("Giving group %u access to new IPC connection", gid_cluster); ++ /* Passing -1 to chown(2) means don't change */ ++ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); ++ } ++ ++ /* TODO: Do our own auth checking, return NULL if unauthorized */ ++ client = client_from_connection(c, NULL, uid_client); ++ if (client == NULL) { ++ return NULL; ++ } ++ ++ if ((uid_client == 0) || (uid_client == uid_cluster)) { ++ /* Remember when a connection came from root or hacluster */ ++ set_bit(client->flags, pcmk__client_privileged); ++ } ++ ++ crm_debug("New IPC client %s for PID %u with uid %d and gid %d", ++ client->id, client->pid, uid_client, gid_client); ++ return client; ++} ++ ++static struct iovec * ++pcmk__new_ipc_event(void) ++{ ++ struct iovec *iov = calloc(2, sizeof(struct iovec)); ++ ++ CRM_ASSERT(iov != NULL); ++ return iov; ++} ++ ++/*! ++ * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() ++ * ++ * \param[in] event I/O vector to free ++ */ ++void ++pcmk_free_ipc_event(struct iovec *event) ++{ ++ if (event != NULL) { ++ free(event[0].iov_base); ++ free(event[1].iov_base); ++ free(event); ++ } ++} ++ ++static void ++free_event(gpointer data) ++{ ++ pcmk_free_ipc_event((struct iovec *) data); ++} ++ ++static void ++add_event(pcmk__client_t *c, struct iovec *iov) ++{ ++ if (c->event_queue == NULL) { ++ c->event_queue = g_queue_new(); ++ } ++ g_queue_push_tail(c->event_queue, iov); ++} ++ ++void ++pcmk__free_client(pcmk__client_t *c) ++{ ++ if (c == NULL) { ++ return; ++ } ++ ++ if (client_connections) { ++ if (c->ipcs) { ++ crm_trace("Destroying %p/%p (%d remaining)", ++ c, c->ipcs, g_hash_table_size(client_connections) - 1); ++ g_hash_table_remove(client_connections, c->ipcs); ++ ++ } else { ++ crm_trace("Destroying remote connection %p (%d remaining)", ++ c, g_hash_table_size(client_connections) - 1); ++ g_hash_table_remove(client_connections, c->id); ++ } ++ } ++ ++ if (c->event_timer) { ++ g_source_remove(c->event_timer); ++ } ++ ++ if (c->event_queue) { ++ crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); ++ g_queue_free_full(c->event_queue, free_event); ++ } ++ ++ free(c->id); ++ free(c->name); ++ free(c->user); ++ if (c->remote) { ++ if (c->remote->auth_timeout) { ++ g_source_remove(c->remote->auth_timeout); ++ } ++ free(c->remote->buffer); ++ free(c->remote); ++ } ++ free(c); ++} ++ ++/*! ++ * \internal ++ * \brief Raise IPC eviction threshold for a client, if allowed ++ * ++ * \param[in,out] client Client to modify ++ * \param[in] qmax New threshold (as non-NULL string) ++ * ++ * \return TRUE if change was allowed, FALSE otherwise ++ */ ++bool ++pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) ++{ ++ if (is_set(client->flags, pcmk__client_privileged)) { ++ long long qmax_int; ++ ++ errno = 0; ++ qmax_int = crm_parse_ll(qmax, NULL); ++ if ((errno == 0) && (qmax_int > 0)) { ++ client->queue_max = (unsigned int) qmax_int; ++ return TRUE; ++ } ++ } ++ return FALSE; ++} ++ ++int ++pcmk__client_pid(qb_ipcs_connection_t *c) ++{ ++ struct qb_ipcs_connection_stats stats; ++ ++ stats.client_pid = 0; ++ qb_ipcs_connection_stats_get(c, &stats, 0); ++ return stats.client_pid; ++} ++ ++/*! ++ * \internal ++ * \brief Retrieve message XML from data read from client IPC ++ * ++ * \param[in] c IPC client connection ++ * \param[in] data Data read from client connection ++ * \param[out] id Where to store message ID from libqb header ++ * \param[out] flags Where to store flags from libqb header ++ * ++ * \return Message XML on success, NULL otherwise ++ */ ++xmlNode * ++pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, ++ uint32_t *flags) ++{ ++ xmlNode *xml = NULL; ++ char *uncompressed = NULL; ++ char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); ++ pcmk__ipc_header_t *header = data; ++ ++ if (!pcmk__valid_ipc_header(header)) { ++ return NULL; ++ } ++ ++ if (id) { ++ *id = ((struct qb_ipc_response_header *)data)->id; ++ } ++ if (flags) { ++ *flags = header->flags; ++ } ++ ++ if (is_set(header->flags, crm_ipc_proxied)) { ++ /* Mark this client as being the endpoint of a proxy connection. ++ * Proxy connections responses are sent on the event channel, to avoid ++ * blocking the controller serving as proxy. ++ */ ++ c->flags |= pcmk__client_proxied; ++ } ++ ++ if (header->size_compressed) { ++ int rc = 0; ++ unsigned int size_u = 1 + header->size_uncompressed; ++ uncompressed = calloc(1, size_u); ++ ++ crm_trace("Decompressing message data %u bytes into %u bytes", ++ header->size_compressed, size_u); ++ ++ rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); ++ text = uncompressed; ++ ++ if (rc != BZ_OK) { ++ crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", ++ bz2_strerror(rc), rc); ++ free(uncompressed); ++ return NULL; ++ } ++ } ++ ++ CRM_ASSERT(text[header->size_uncompressed - 1] == 0); ++ ++ xml = string2xml(text); ++ crm_log_xml_trace(xml, "[IPC received]"); ++ ++ free(uncompressed); ++ return xml; ++} ++ ++static int crm_ipcs_flush_events(pcmk__client_t *c); ++ ++static gboolean ++crm_ipcs_flush_events_cb(gpointer data) ++{ ++ pcmk__client_t *c = data; ++ ++ c->event_timer = 0; ++ crm_ipcs_flush_events(c); ++ return FALSE; ++} ++ ++/*! ++ * \internal ++ * \brief Add progressive delay before next event queue flush ++ * ++ * \param[in,out] c Client connection to add delay to ++ * \param[in] queue_len Current event queue length ++ */ ++static inline void ++delay_next_flush(pcmk__client_t *c, unsigned int queue_len) ++{ ++ /* Delay a maximum of 1.5 seconds */ ++ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; ++ ++ c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); ++} ++ ++/*! ++ * \internal ++ * \brief Send client any messages in its queue ++ * ++ * \param[in] c Client to flush ++ * ++ * \return Standard Pacemaker return value ++ */ ++static int ++crm_ipcs_flush_events(pcmk__client_t *c) ++{ ++ int rc = pcmk_rc_ok; ++ ssize_t qb_rc = 0; ++ unsigned int sent = 0; ++ unsigned int queue_len = 0; ++ ++ if (c == NULL) { ++ return rc; ++ ++ } else if (c->event_timer) { ++ /* There is already a timer, wait until it goes off */ ++ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); ++ return rc; ++ } ++ ++ if (c->event_queue) { ++ queue_len = g_queue_get_length(c->event_queue); ++ } ++ while (sent < 100) { ++ pcmk__ipc_header_t *header = NULL; ++ struct iovec *event = NULL; ++ ++ if (c->event_queue) { ++ // We don't pop unless send is successful ++ event = g_queue_peek_head(c->event_queue); ++ } ++ if (event == NULL) { // Queue is empty ++ break; ++ } ++ ++ qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); ++ if (qb_rc < 0) { ++ rc = (int) -qb_rc; ++ break; ++ } ++ event = g_queue_pop_head(c->event_queue); ++ ++ sent++; ++ header = event[0].iov_base; ++ if (header->size_compressed) { ++ crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", ++ header->qb.id, c->ipcs, c->pid, (long long) qb_rc); ++ } else { ++ crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", ++ header->qb.id, c->ipcs, c->pid, (long long) qb_rc, ++ (char *) (event[1].iov_base)); ++ } ++ pcmk_free_ipc_event(event); ++ } ++ ++ queue_len -= sent; ++ if (sent > 0 || queue_len) { ++ crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", ++ sent, queue_len, c->ipcs, c->pid, ++ pcmk_rc_str(rc), (long long) qb_rc); ++ } ++ ++ if (queue_len) { ++ ++ /* Allow clients to briefly fall behind on processing incoming messages, ++ * but drop completely unresponsive clients so the connection doesn't ++ * consume resources indefinitely. ++ */ ++ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { ++ if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { ++ /* Don't evict for a new or shrinking backlog */ ++ crm_warn("Client with process ID %u has a backlog of %u messages " ++ CRM_XS " %p", c->pid, queue_len, c->ipcs); ++ } else { ++ crm_err("Evicting client with process ID %u due to backlog of %u messages " ++ CRM_XS " %p", c->pid, queue_len, c->ipcs); ++ c->queue_backlog = 0; ++ qb_ipcs_disconnect(c->ipcs); ++ return rc; ++ } ++ } ++ ++ c->queue_backlog = queue_len; ++ delay_next_flush(c, queue_len); ++ ++ } else { ++ /* Event queue is empty, there is no backlog */ ++ c->queue_backlog = 0; ++ } ++ ++ return rc; ++} ++ ++/*! ++ * \internal ++ * \brief Create an I/O vector for sending an IPC XML message ++ * ++ * \param[in] request Identifier for libqb response header ++ * \param[in] message XML message to send ++ * \param[in] max_send_size If 0, default IPC buffer size is used ++ * \param[out] result Where to store prepared I/O vector ++ * \param[out] bytes Size of prepared data in bytes ++ * ++ * \return Standard Pacemaker return code ++ */ ++int ++pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, ++ uint32_t max_send_size, struct iovec **result, ++ ssize_t *bytes) ++{ ++ static unsigned int biggest = 0; ++ struct iovec *iov; ++ unsigned int total = 0; ++ char *compressed = NULL; ++ char *buffer = NULL; ++ pcmk__ipc_header_t *header = NULL; ++ ++ if ((message == NULL) || (result == NULL)) { ++ return EINVAL; ++ } ++ ++ header = calloc(1, sizeof(pcmk__ipc_header_t)); ++ if (header == NULL) { ++ return ENOMEM; /* errno mightn't be set by allocator */ ++ } ++ ++ buffer = dump_xml_unformatted(message); ++ ++ if (max_send_size == 0) { ++ max_send_size = crm_ipc_default_buffer_size(); ++ } ++ CRM_LOG_ASSERT(max_send_size != 0); ++ ++ *result = NULL; ++ iov = pcmk__new_ipc_event(); ++ iov[0].iov_len = sizeof(pcmk__ipc_header_t); ++ iov[0].iov_base = header; ++ ++ header->version = PCMK__IPC_VERSION; ++ header->size_uncompressed = 1 + strlen(buffer); ++ total = iov[0].iov_len + header->size_uncompressed; ++ ++ if (total < max_send_size) { ++ iov[1].iov_base = buffer; ++ iov[1].iov_len = header->size_uncompressed; ++ ++ } else { ++ unsigned int new_size = 0; ++ ++ if (pcmk__compress(buffer, (unsigned int) header->size_uncompressed, ++ (unsigned int) max_send_size, &compressed, ++ &new_size) == pcmk_rc_ok) { ++ ++ header->flags |= crm_ipc_compressed; ++ header->size_compressed = new_size; ++ ++ iov[1].iov_len = header->size_compressed; ++ iov[1].iov_base = compressed; ++ ++ free(buffer); ++ ++ biggest = QB_MAX(header->size_compressed, biggest); ++ ++ } else { ++ crm_log_xml_trace(message, "EMSGSIZE"); ++ biggest = QB_MAX(header->size_uncompressed, biggest); ++ ++ crm_err("Could not compress %u-byte message into less than IPC " ++ "limit of %u bytes; set PCMK_ipc_buffer to higher value " ++ "(%u bytes suggested)", ++ header->size_uncompressed, max_send_size, 4 * biggest); ++ ++ free(compressed); ++ free(buffer); ++ pcmk_free_ipc_event(iov); ++ return EMSGSIZE; ++ } ++ } ++ ++ header->qb.size = iov[0].iov_len + iov[1].iov_len; ++ header->qb.id = (int32_t)request; /* Replying to a specific request */ ++ ++ *result = iov; ++ CRM_ASSERT(header->qb.size > 0); ++ if (bytes != NULL) { ++ *bytes = header->qb.size; ++ } ++ return pcmk_rc_ok; ++} ++ ++int ++pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) ++{ ++ int rc = pcmk_rc_ok; ++ static uint32_t id = 1; ++ pcmk__ipc_header_t *header = iov[0].iov_base; ++ ++ if (c->flags & pcmk__client_proxied) { ++ /* _ALL_ replies to proxied connections need to be sent as events */ ++ if (is_not_set(flags, crm_ipc_server_event)) { ++ flags |= crm_ipc_server_event; ++ /* this flag lets us know this was originally meant to be a response. ++ * even though we're sending it over the event channel. */ ++ flags |= crm_ipc_proxied_relay_response; ++ } ++ } ++ ++ header->flags |= flags; ++ if (flags & crm_ipc_server_event) { ++ header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ ++ ++ if (flags & crm_ipc_server_free) { ++ crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); ++ add_event(c, iov); ++ ++ } else { ++ struct iovec *iov_copy = pcmk__new_ipc_event(); ++ ++ crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); ++ iov_copy[0].iov_len = iov[0].iov_len; ++ iov_copy[0].iov_base = malloc(iov[0].iov_len); ++ memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); ++ ++ iov_copy[1].iov_len = iov[1].iov_len; ++ iov_copy[1].iov_base = malloc(iov[1].iov_len); ++ memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); ++ ++ add_event(c, iov_copy); ++ } ++ ++ } else { ++ ssize_t qb_rc; ++ ++ CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ ++ ++ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); ++ if (qb_rc < header->qb.size) { ++ if (qb_rc < 0) { ++ rc = (int) -qb_rc; ++ } ++ crm_notice("Response %d to pid %d failed: %s " ++ CRM_XS " bytes=%u rc=%lld ipcs=%p", ++ header->qb.id, c->pid, pcmk_rc_str(rc), ++ header->qb.size, (long long) qb_rc, c->ipcs); ++ ++ } else { ++ crm_trace("Response %d sent, %lld bytes to %p[%d]", ++ header->qb.id, (long long) qb_rc, c->ipcs, c->pid); ++ } ++ ++ if (flags & crm_ipc_server_free) { ++ pcmk_free_ipc_event(iov); ++ } ++ } ++ ++ if (flags & crm_ipc_server_event) { ++ rc = crm_ipcs_flush_events(c); ++ } else { ++ crm_ipcs_flush_events(c); ++ } ++ ++ if ((rc == EPIPE) || (rc == ENOTCONN)) { ++ crm_trace("Client %p disconnected", c->ipcs); ++ } ++ return rc; ++} ++ ++int ++pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, ++ uint32_t flags) ++{ ++ struct iovec *iov = NULL; ++ int rc = pcmk_rc_ok; ++ ++ if (c == NULL) { ++ return EINVAL; ++ } ++ rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), ++ &iov, NULL); ++ if (rc == pcmk_rc_ok) { ++ rc = pcmk__ipc_send_iov(c, iov, flags | crm_ipc_server_free); ++ } else { ++ pcmk_free_ipc_event(iov); ++ crm_notice("IPC message to pid %d failed: %s " CRM_XS " rc=%d", ++ c->pid, pcmk_rc_str(rc), rc); ++ } ++ return rc; ++} ++ ++void ++pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, ++ uint32_t request, uint32_t flags, const char *tag) ++{ ++ if (flags & crm_ipc_client_response) { ++ xmlNode *ack = create_xml_node(NULL, tag); ++ ++ crm_trace("Ack'ing IPC message from %s", pcmk__client_name(c)); ++ c->request_id = 0; ++ crm_xml_add(ack, "function", function); ++ crm_xml_add_int(ack, "line", line); ++ pcmk__ipc_send_xml(c, request, ack, flags); ++ free_xml(ack); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Add an IPC server to the main loop for the pacemaker-based API ++ * ++ * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API ++ * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API ++ * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API ++ * \param[in] ro_cb IPC callbacks for read-only API ++ * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs ++ * ++ * \note This function exits fatally if unable to create the servers. ++ */ ++void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, ++ qb_ipcs_service_t **ipcs_rw, ++ qb_ipcs_service_t **ipcs_shm, ++ struct qb_ipcs_service_handlers *ro_cb, ++ struct qb_ipcs_service_handlers *rw_cb) ++{ ++ *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, ++ QB_IPC_NATIVE, ro_cb); ++ ++ *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, ++ QB_IPC_NATIVE, rw_cb); ++ ++ *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, ++ QB_IPC_SHM, rw_cb); ++ ++ if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { ++ crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); ++ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); ++ crm_exit(CRM_EX_FATAL); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Destroy IPC servers for pacemaker-based API ++ * ++ * \param[out] ipcs_ro IPC server for read-only pacemaker-based API ++ * \param[out] ipcs_rw IPC server for read/write pacemaker-based API ++ * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API ++ * ++ * \note This is a convenience function for calling qb_ipcs_destroy() for each ++ * argument. ++ */ ++void ++pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, ++ qb_ipcs_service_t *ipcs_rw, ++ qb_ipcs_service_t *ipcs_shm) ++{ ++ qb_ipcs_destroy(ipcs_ro); ++ qb_ipcs_destroy(ipcs_rw); ++ qb_ipcs_destroy(ipcs_shm); ++} ++ ++/*! ++ * \internal ++ * \brief Add an IPC server to the main loop for the pacemaker-controld API ++ * ++ * \param[in] cb IPC callbacks ++ * ++ * \return Newly created IPC server ++ */ ++qb_ipcs_service_t * ++pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) ++{ ++ return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); ++} ++ ++/*! ++ * \internal ++ * \brief Add an IPC server to the main loop for the pacemaker-attrd API ++ * ++ * \param[in] cb IPC callbacks ++ * ++ * \note This function exits fatally if unable to create the servers. ++ */ ++void ++pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, ++ struct qb_ipcs_service_handlers *cb) ++{ ++ *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); ++ ++ if (*ipcs == NULL) { ++ crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); ++ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); ++ crm_exit(CRM_EX_FATAL); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Add an IPC server to the main loop for the pacemaker-fenced API ++ * ++ * \param[in] cb IPC callbacks ++ * ++ * \note This function exits fatally if unable to create the servers. ++ */ ++void ++pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, ++ struct qb_ipcs_service_handlers *cb) ++{ ++ *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, ++ QB_LOOP_HIGH); ++ ++ if (*ipcs == NULL) { ++ crm_err("Failed to create fencer: exiting and inhibiting respawn."); ++ crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); ++ crm_exit(CRM_EX_FATAL); ++ } ++} +diff --git a/lib/common/messages.c b/lib/common/messages.c +new file mode 100644 +index 0000000..c5b5739 +--- /dev/null ++++ b/lib/common/messages.c +@@ -0,0 +1,146 @@ ++/* ++ * Copyright 2004-2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include ++ ++/*! ++ * \brief Create a Pacemaker request (for IPC or cluster layer) ++ * ++ * \param[in] task What to set as the request's task ++ * \param[in] msg_data What to add as the request's data contents ++ * \param[in] host_to What to set as the request's destination host ++ * \param[in] sys_to What to set as the request's destination system ++ * \param[in] sys_from If not NULL, set as request's origin system ++ * \param[in] uuid_from If not NULL, use in request's origin system ++ * \param[in] origin Name of function that called this one ++ * ++ * \return XML of new request ++ * ++ * \note One of sys_from or uuid_from must be non-NULL ++ * \note This function should not be called directly, but via the ++ * create_request() wrapper. ++ * \note The caller is responsible for freeing the result using free_xml(). ++ */ ++xmlNode * ++create_request_adv(const char *task, xmlNode * msg_data, ++ const char *host_to, const char *sys_to, ++ const char *sys_from, const char *uuid_from, ++ const char *origin) ++{ ++ static uint ref_counter = 0; ++ ++ char *true_from = NULL; ++ xmlNode *request = NULL; ++ char *reference = crm_strdup_printf("%s-%s-%lld-%u", ++ (task? task : "_empty_"), ++ (sys_from? sys_from : "_empty_"), ++ (long long) time(NULL), ref_counter++); ++ ++ if (uuid_from != NULL) { ++ true_from = generate_hash_key(sys_from, uuid_from); ++ } else if (sys_from != NULL) { ++ true_from = strdup(sys_from); ++ } else { ++ crm_err("No sys from specified"); ++ } ++ ++ // host_from will get set for us if necessary by the controller when routed ++ request = create_xml_node(NULL, __FUNCTION__); ++ crm_xml_add(request, F_CRM_ORIGIN, origin); ++ crm_xml_add(request, F_TYPE, T_CRM); ++ crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET); ++ crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); ++ crm_xml_add(request, F_CRM_REFERENCE, reference); ++ crm_xml_add(request, F_CRM_TASK, task); ++ crm_xml_add(request, F_CRM_SYS_TO, sys_to); ++ crm_xml_add(request, F_CRM_SYS_FROM, true_from); ++ ++ /* HOSTTO will be ignored if it is to the DC anyway. */ ++ if (host_to != NULL && strlen(host_to) > 0) { ++ crm_xml_add(request, F_CRM_HOST_TO, host_to); ++ } ++ ++ if (msg_data != NULL) { ++ add_message_xml(request, F_CRM_DATA, msg_data); ++ } ++ free(reference); ++ free(true_from); ++ ++ return request; ++} ++ ++/*! ++ * \brief Create a Pacemaker reply (for IPC or cluster layer) ++ * ++ * \param[in] original_request XML of request this is a reply to ++ * \param[in] xml_response_data XML to copy as data section of reply ++ * \param[in] origin Name of function that called this one ++ * ++ * \return XML of new reply ++ * ++ * \note This function should not be called directly, but via the ++ * create_reply() wrapper. ++ * \note The caller is responsible for freeing the result using free_xml(). ++ */ ++xmlNode * ++create_reply_adv(xmlNode *original_request, xmlNode *xml_response_data, ++ const char *origin) ++{ ++ xmlNode *reply = NULL; ++ ++ const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM); ++ const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM); ++ const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO); ++ const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE); ++ const char *operation = crm_element_value(original_request, F_CRM_TASK); ++ const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); ++ ++ if (type == NULL) { ++ crm_err("Cannot create new_message, no message type in original message"); ++ CRM_ASSERT(type != NULL); ++ return NULL; ++#if 0 ++ } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { ++ crm_err("Cannot create new_message, original message was not a request"); ++ return NULL; ++#endif ++ } ++ reply = create_xml_node(NULL, __FUNCTION__); ++ if (reply == NULL) { ++ crm_err("Cannot create new_message, malloc failed"); ++ return NULL; ++ } ++ ++ crm_xml_add(reply, F_CRM_ORIGIN, origin); ++ crm_xml_add(reply, F_TYPE, T_CRM); ++ crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); ++ crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE); ++ crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference); ++ crm_xml_add(reply, F_CRM_TASK, operation); ++ ++ /* since this is a reply, we reverse the from and to */ ++ crm_xml_add(reply, F_CRM_SYS_TO, sys_from); ++ crm_xml_add(reply, F_CRM_SYS_FROM, sys_to); ++ ++ /* HOSTTO will be ignored if it is to the DC anyway. */ ++ if (host_from != NULL && strlen(host_from) > 0) { ++ crm_xml_add(reply, F_CRM_HOST_TO, host_from); ++ } ++ ++ if (xml_response_data != NULL) { ++ add_message_xml(reply, F_CRM_DATA, xml_response_data); ++ } ++ ++ return reply; ++} +-- +1.8.3.1 + + +From 8cfa76d840aff62b2376136a7ddb1cb54d070458 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 7 Apr 2020 11:39:31 -0500 +Subject: [PATCH 3/8] Refactor: libcrmcommon: move get_message_xml() and + add_message_xml() definition + +recently added messages.c is more logical place than xml.c +--- + lib/common/messages.c | 20 ++++++++++++++++++++ + lib/common/xml.c | 17 ----------------- + 2 files changed, 20 insertions(+), 17 deletions(-) + +diff --git a/lib/common/messages.c b/lib/common/messages.c +index c5b5739..25569db 100644 +--- a/lib/common/messages.c ++++ b/lib/common/messages.c +@@ -12,6 +12,9 @@ + #include + #include + ++#include ++#include ++ + #include + + /*! +@@ -144,3 +147,20 @@ create_reply_adv(xmlNode *original_request, xmlNode *xml_response_data, + + return reply; + } ++ ++xmlNode * ++get_message_xml(xmlNode *msg, const char *field) ++{ ++ xmlNode *tmp = first_named_child(msg, field); ++ ++ return __xml_first_child(tmp); ++} ++ ++gboolean ++add_message_xml(xmlNode *msg, const char *field, xmlNode *xml) ++{ ++ xmlNode *holder = create_xml_node(msg, field); ++ ++ add_node_copy(holder, xml); ++ return TRUE; ++} +diff --git a/lib/common/xml.c b/lib/common/xml.c +index de0c508..e071f8d 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -2613,23 +2613,6 @@ write_xml_file(xmlNode * xml_node, const char *filename, gboolean compress) + return write_xml_stream(xml_node, filename, stream, compress); + } + +-xmlNode * +-get_message_xml(xmlNode * msg, const char *field) +-{ +- xmlNode *tmp = first_named_child(msg, field); +- +- return __xml_first_child(tmp); +-} +- +-gboolean +-add_message_xml(xmlNode * msg, const char *field, xmlNode * xml) +-{ +- xmlNode *holder = create_xml_node(msg, field); +- +- add_node_copy(holder, xml); +- return TRUE; +-} +- + static char * + crm_xml_escape_shuffle(char *text, int start, int *length, const char *replace) + { +-- +1.8.3.1 + + +From a123da0a978a45b08f0723fd651059a13d26235c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 8 Apr 2020 11:09:00 -0500 +Subject: [PATCH 4/8] Refactor: libcrmcommon: drop generate_hash_key() + +The internal one-liner was only used in one location, and that wasn't even to +generate a hash key. +--- + include/crm_internal.h | 2 -- + lib/common/messages.c | 5 +++-- + lib/common/utils.c | 11 ----------- + 3 files changed, 3 insertions(+), 15 deletions(-) + +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 15f9d2b..fd56fc6 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -68,8 +68,6 @@ crm_set_bit(const char *function, int line, const char *target, long long word, + # define set_bit(word, bit) word = crm_set_bit(__FUNCTION__, __LINE__, NULL, word, bit) + # define clear_bit(word, bit) word = crm_clear_bit(__FUNCTION__, __LINE__, NULL, word, bit) + +-char *generate_hash_key(const char *crm_msg_reference, const char *sys); +- + void strip_text_nodes(xmlNode * xml); + void pcmk_panic(const char *origin); + pid_t pcmk_locate_sbd(void); +diff --git a/lib/common/messages.c b/lib/common/messages.c +index 25569db..d3fa894 100644 +--- a/lib/common/messages.c ++++ b/lib/common/messages.c +@@ -51,11 +51,12 @@ create_request_adv(const char *task, xmlNode * msg_data, + (long long) time(NULL), ref_counter++); + + if (uuid_from != NULL) { +- true_from = generate_hash_key(sys_from, uuid_from); ++ true_from = crm_strdup_printf("%s_%s", uuid_from, ++ (sys_from? sys_from : "none")); + } else if (sys_from != NULL) { + true_from = strdup(sys_from); + } else { +- crm_err("No sys from specified"); ++ crm_err("Cannot create IPC request: No originating system specified"); + } + + // host_from will get set for us if necessary by the controller when routed +diff --git a/lib/common/utils.c b/lib/common/utils.c +index 13e7cb2..0ac96b8 100644 +--- a/lib/common/utils.c ++++ b/lib/common/utils.c +@@ -116,17 +116,6 @@ score2char(int score) + return crm_itoa(score); + } + +-char * +-generate_hash_key(const char *crm_msg_reference, const char *sys) +-{ +- char *hash_key = crm_strdup_printf("%s_%s", (sys? sys : "none"), +- crm_msg_reference); +- +- crm_trace("created hash key: (%s)", hash_key); +- return hash_key; +-} +- +- + int + crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) + { +-- +1.8.3.1 + + +From dd81ff4b826c662654d6760f20f3ec74f6ae6020 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 16 Apr 2020 16:47:53 -0500 +Subject: [PATCH 5/8] Low: libcrmcommon: new function for draining and quitting + a main loop + +We have an existing drain function to drain events based on a check function +and a timeout, which is better suited to daemons. This one drains up to N +events and then quits the main loop, which is better suited to tools. +--- + include/crm/common/mainloop.h | 3 ++- + lib/common/mainloop.c | 22 ++++++++++++++++++++++ + 2 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h +index b443b4e..9957b25 100644 +--- a/include/crm/common/mainloop.h ++++ b/include/crm/common/mainloop.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2009-2019 the Pacemaker project contributors ++ * Copyright 2009-2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -146,6 +146,7 @@ pid_t mainloop_child_pid(mainloop_child_t * child); + void mainloop_clear_child_userdata(mainloop_child_t * child); + gboolean mainloop_child_kill(pid_t pid); + ++void pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n); + void pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, + bool (*check)(guint)); + +diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c +index 10450e4..634eead 100644 +--- a/lib/common/mainloop.c ++++ b/lib/common/mainloop.c +@@ -1345,6 +1345,28 @@ drain_timeout_cb(gpointer user_data) + } + + /*! ++ * \brief Drain some remaining main loop events then quit it ++ * ++ * \param[in] mloop Main loop to drain and quit ++ * \param[in] n Drain up to this many pending events ++ */ ++void ++pcmk_quit_main_loop(GMainLoop *mloop, unsigned int n) ++{ ++ if ((mloop != NULL) && g_main_loop_is_running(mloop)) { ++ GMainContext *ctx = g_main_loop_get_context(mloop); ++ ++ /* Drain up to n events in case some memory clean-up is pending ++ * (helpful to reduce noise in valgrind output). ++ */ ++ for (int i = 0; (i < n) && g_main_context_pending(ctx); ++i) { ++ g_main_context_dispatch(ctx); ++ } ++ g_main_loop_quit(mloop); ++ } ++} ++ ++/*! + * \brief Process main loop events while a certain condition is met + * + * \param[in] mloop Main loop to process +-- +1.8.3.1 + + +From 771a0c56df363eb91fa7eb6ac4b3ee833c5dcd5e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Apr 2020 13:04:45 -0500 +Subject: [PATCH 6/8] Low: tools: handle memory cleanup better in crm_node + +crm_node had paths that would exit without returning to the main(), +missing the memory cleanup at the end. +--- + tools/crm_node.c | 30 ++++++++++++++---------------- + 1 file changed, 14 insertions(+), 16 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 34511f3..db31f20 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -187,15 +187,6 @@ new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) + return ipc; + } + +-static void +-run_mainloop_and_exit(void) +-{ +- g_main_loop_run(mainloop); +- g_main_loop_unref(mainloop); +- mainloop = NULL; +- crm_node_exit(exit_code); +-} +- + static int + send_controller_hello(crm_ipc_t *controller) + { +@@ -328,7 +319,9 @@ run_controller_mainloop(uint32_t nodeid) + } + + // Run main loop to get controller reply via dispatch_controller() +- run_mainloop_and_exit(); ++ g_main_loop_run(mainloop); ++ g_main_loop_unref(mainloop); ++ mainloop = NULL; + } + + static void +@@ -339,7 +332,8 @@ print_node_name(void) + + if (name != NULL) { + printf("%s\n", name); +- crm_node_exit(CRM_EX_OK); ++ exit_code = CRM_EX_OK; ++ return; + + } else { + // Otherwise ask the controller +@@ -486,11 +480,11 @@ remove_node(const char *target_uname) + if (tools_remove_node_cache(node_name, nodeid, daemons[d])) { + crm_err("Failed to connect to %s to remove node '%s'", + daemons[d], target_uname); +- crm_node_exit(CRM_EX_ERROR); ++ exit_code = CRM_EX_ERROR; + return; + } + } +- crm_node_exit(CRM_EX_OK); ++ exit_code = CRM_EX_OK; + } + + static gint +@@ -513,7 +507,8 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) + + if (msg == NULL) { + fprintf(stderr, "error: Could not understand pacemakerd response\n"); +- crm_node_exit(CRM_EX_PROTOCOL); ++ exit_code = CRM_EX_PROTOCOL; ++ g_main_loop_quit(mainloop); + return 0; + } + +@@ -544,7 +539,8 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) + } + + free_xml(msg); +- crm_node_exit(CRM_EX_OK); ++ exit_code = CRM_EX_OK; ++ g_main_loop_quit(mainloop); + return 0; + } + +@@ -562,7 +558,9 @@ run_pacemakerd_mainloop(void) + free_xml(poke); + + // Handle reply via node_mcp_dispatch() +- run_mainloop_and_exit(); ++ g_main_loop_run(mainloop); ++ g_main_loop_unref(mainloop); ++ mainloop = NULL; + } + + static GOptionContext * +-- +1.8.3.1 + + +From ba7abdbe6cd7f2fb73ce096c87f0599e27000bee Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 20 Apr 2020 15:30:40 -0500 +Subject: [PATCH 7/8] Refactor: tools: use proper type for glib timeout value + in crmadmin + +--- + tools/crmadmin.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/tools/crmadmin.c b/tools/crmadmin.c +index c58de59..bd4bfe5 100644 +--- a/tools/crmadmin.c ++++ b/tools/crmadmin.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include // gboolean, GMainLoop, etc. + + #include + #include +@@ -28,9 +29,10 @@ + + #include + +-static int message_timer_id = -1; +-static int message_timeout_ms = 30 * 1000; ++#define DEFAULT_MESSAGE_TIMEOUT_MS 30000 + ++static guint message_timer_id = 0; ++static guint message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; + static GMainLoop *mainloop = NULL; + static crm_ipc_t *crmd_channel = NULL; + static char *admin_uuid = NULL; +@@ -172,9 +174,9 @@ main(int argc, char **argv) + crm_bump_log_level(argc, argv); + break; + case 't': +- message_timeout_ms = atoi(optarg); ++ message_timeout_ms = (guint) atoi(optarg); + if (message_timeout_ms < 1) { +- message_timeout_ms = 30 * 1000; ++ message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; + } + break; + +-- +1.8.3.1 + + +From 27d763920a1e12b9b5747c1b64a5dc1395c58768 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 20 Apr 2020 15:44:20 -0500 +Subject: [PATCH 8/8] Refactor: tools: functionize listing nodes from CIB in + crmadmin + +--- + tools/crmadmin.c | 47 +++++++++++++++++++++++++++-------------------- + 1 file changed, 27 insertions(+), 20 deletions(-) + +diff --git a/tools/crmadmin.c b/tools/crmadmin.c +index bd4bfe5..3e9e959 100644 +--- a/tools/crmadmin.c ++++ b/tools/crmadmin.c +@@ -149,6 +149,32 @@ static pcmk__cli_option_t long_options[] = { + { 0, 0, 0, 0 } + }; + ++// \return Standard Pacemaker return code ++static int ++list_nodes() ++{ ++ cib_t *the_cib = cib_new(); ++ xmlNode *output = NULL; ++ int rc; ++ ++ if (the_cib == NULL) { ++ return ENOMEM; ++ } ++ rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); ++ if (rc != pcmk_ok) { ++ return pcmk_legacy2rc(rc); ++ } ++ ++ rc = the_cib->cmds->query(the_cib, NULL, &output, ++ cib_scope_local | cib_sync_call); ++ if (rc == pcmk_ok) { ++ do_find_node_list(output); ++ free_xml(output); ++ } ++ the_cib->cmds->signoff(the_cib); ++ return pcmk_legacy2rc(rc); ++} ++ + int + main(int argc, char **argv) + { +@@ -304,26 +330,7 @@ do_work(void) + crmd_operation = CRM_OP_PING; + + } else if (DO_NODE_LIST) { +- +- cib_t *the_cib = cib_new(); +- xmlNode *output = NULL; +- +- int rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); +- +- if (rc != pcmk_ok) { +- fprintf(stderr, "Could not connect to CIB: %s\n", +- pcmk_strerror(rc)); +- return -1; +- } +- +- rc = the_cib->cmds->query(the_cib, NULL, &output, cib_scope_local | cib_sync_call); +- if(rc == pcmk_ok) { +- do_find_node_list(output); +- +- free_xml(output); +- } +- the_cib->cmds->signoff(the_cib); +- crm_exit(crm_errno2exit(rc)); ++ crm_exit(pcmk_rc2exitc(list_nodes())); + + } else if (DO_RESET) { + /* tell dest_node to initiate the shutdown procedure +-- +1.8.3.1 + diff --git a/SOURCES/006-shutdown-lock.patch b/SOURCES/006-shutdown-lock.patch deleted file mode 100644 index 357a2e8..0000000 --- a/SOURCES/006-shutdown-lock.patch +++ /dev/null @@ -1,252 +0,0 @@ -From 3d8a7dc405e98cd8fe637d3e283bc0468d50bc71 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 15 Jan 2020 17:56:44 -0600 -Subject: [PATCH 02/18] Refactor: controller: functionize parts of resource - deletion notification - -... for future reuse ---- - daemons/controld/controld_execd.c | 116 +++++++++++++++++++++++++------------- - daemons/controld/controld_lrm.h | 11 +++- - 2 files changed, 88 insertions(+), 39 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 212739e..82f2bf1 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -42,9 +42,6 @@ static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op - static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - const char *operation, xmlNode *msg); - --void send_direct_ack(const char *to_host, const char *to_sys, -- lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); -- - static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - int log_level); - static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); -@@ -278,7 +275,7 @@ send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, - - op->rc = PCMK_OCF_OK; - op->op_status = PCMK_LRM_OP_DONE; -- send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); -+ controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); - lrmd_free_event(op); - } - -@@ -850,6 +847,57 @@ controld_query_executor_state(const char *node_name) - node_update_cluster|node_update_peer); - } - -+/*! -+ * \internal -+ * \brief Map standard Pacemaker return code to operation status and OCF code -+ * -+ * \param[out] event Executor event whose status and return code should be set -+ * \param[in] rc Standard Pacemaker return code -+ */ -+void -+controld_rc2event(lrmd_event_data_t *event, int rc) -+{ -+ switch (rc) { -+ case pcmk_rc_ok: -+ event->rc = PCMK_OCF_OK; -+ event->op_status = PCMK_LRM_OP_DONE; -+ break; -+ case EACCES: -+ event->rc = PCMK_OCF_INSUFFICIENT_PRIV; -+ event->op_status = PCMK_LRM_OP_ERROR; -+ break; -+ default: -+ event->rc = PCMK_OCF_UNKNOWN_ERROR; -+ event->op_status = PCMK_LRM_OP_ERROR; -+ break; -+ } -+} -+ -+/*! -+ * \internal -+ * \brief Trigger a new transition after CIB status was deleted -+ * -+ * If a CIB status delete was not expected (as part of the transition graph), -+ * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" -+ * cluster property. -+ * -+ * \param[in] from_sys IPC name that requested the delete -+ * \param[in] rsc_id Resource whose status was deleted (for logging only) -+ */ -+void -+controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) -+{ -+ if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { -+ char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); -+ -+ crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); -+ update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, -+ NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, -+ FALSE, NULL, NULL); -+ free(now_s); -+ } -+} -+ - static void - notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) - { -@@ -860,33 +908,11 @@ notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_ - crm_info("Notifying %s on %s that %s was%s deleted", - from_sys, (from_host? from_host : "localhost"), rsc_id, - ((rc == pcmk_ok)? "" : " not")); -- - op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); -- -- if (rc == pcmk_ok) { -- op->op_status = PCMK_LRM_OP_DONE; -- op->rc = PCMK_OCF_OK; -- } else { -- op->op_status = PCMK_LRM_OP_ERROR; -- op->rc = PCMK_OCF_UNKNOWN_ERROR; -- } -- -- send_direct_ack(from_host, from_sys, NULL, op, rsc_id); -+ controld_rc2event(op, pcmk_legacy2rc(rc)); -+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); - lrmd_free_event(op); -- -- if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { -- /* this isn't expected - trigger a new transition */ -- time_t now = time(NULL); -- char *now_s = crm_itoa(now); -- -- crm_debug("Triggering a refresh after %s deleted %s from the executor", -- from_sys, rsc_id); -- -- update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, -- "last-lrm-refresh", now_s, FALSE, NULL, NULL); -- -- free(now_s); -- } -+ controld_trigger_delete_refresh(from_sys, rsc_id); - } - - static gboolean -@@ -1495,7 +1521,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, - #if ENABLE_ACL - if (user_name && is_privileged(user_name) == FALSE) { - crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); -- send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); -+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); - lrmd_free_event(op); - return; - } -@@ -1514,7 +1540,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, - crm_log_xml_warn(xml, "bad input"); - } - -- send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); -+ controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); - lrmd_free_event(op); - } - -@@ -1684,7 +1710,7 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, - } else { - op->rc = PCMK_OCF_UNKNOWN_ERROR; - } -- send_direct_ack(from_host, from_sys, NULL, op, rsc->id); -+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); - lrmd_free_event(op); - return; - } -@@ -2000,9 +2026,23 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons - return op; - } - -+/*! -+ * \internal -+ * \brief Send a (synthesized) event result -+ * -+ * Reply with a synthesized event result directly, as opposed to going through -+ * the executor. -+ * -+ * \param[in] to_host Host to send result to -+ * \param[in] to_sys IPC name to send result to (NULL for transition engine) -+ * \param[in] rsc Type information about resource the result is for -+ * \param[in] op Event with result to send -+ * \param[in] rsc_id ID of resource the result is for -+ */ - void --send_direct_ack(const char *to_host, const char *to_sys, -- lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) -+controld_ack_event_directly(const char *to_host, const char *to_sys, -+ lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, -+ const char *rsc_id) - { - xmlNode *reply = NULL; - xmlNode *update, *iter; -@@ -2221,7 +2261,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - - op->rc = PCMK_OCF_UNKNOWN_ERROR; - op->op_status = PCMK_LRM_OP_INVALID; -- send_direct_ack(NULL, NULL, rsc, op, rsc->id); -+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); - lrmd_free_event(op); - free(op_id); - return; -@@ -2288,7 +2328,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); - op->rc = target_rc; - op->op_status = PCMK_LRM_OP_DONE; -- send_direct_ack(NULL, NULL, rsc, op, rsc->id); -+ controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); - } - - pending->params = op->params; -@@ -2388,7 +2428,7 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data - - } else { - crm_warn("Resource %s no longer exists in the executor", op->rsc_id); -- send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); -+ controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); - goto cleanup; - } - -@@ -2660,7 +2700,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - } - - if (need_direct_ack) { -- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); -+ controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); - } - - if(remove == FALSE) { -diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h -index 3ab7048..7acac2a 100644 ---- a/daemons/controld/controld_lrm.h -+++ b/daemons/controld/controld_lrm.h -@@ -1,11 +1,13 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - */ -+#ifndef CONTROLD_LRM__H -+# define CONTROLD_LRM__H - - #include - #include -@@ -169,3 +171,10 @@ gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); - - void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - active_op_t *pending, xmlNode *action_xml); -+void controld_ack_event_directly(const char *to_host, const char *to_sys, -+ lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, -+ const char *rsc_id); -+void controld_rc2event(lrmd_event_data_t *event, int rc); -+void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id); -+ -+#endif --- -1.8.3.1 - diff --git a/SOURCES/007-ipc_model.patch b/SOURCES/007-ipc_model.patch new file mode 100644 index 0000000..55fed07 --- /dev/null +++ b/SOURCES/007-ipc_model.patch @@ -0,0 +1,4146 @@ +From e34421b2608f235c6a77eb6de4596d93a2128be1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 3 Apr 2020 11:13:52 -0500 +Subject: [PATCH 1/6] Refactor: libcrmcommon: new model for daemon IPC API + +This is based on tools/crm_resource_controller.[ch] and the node removal code +in tools/crm_node.c, but generalized for any daemon. As of this commit, no +specific daemon is supported. +--- + include/crm/common/internal.h | 8 + + include/crm/common/ipc.h | 95 +++++- + lib/common/crmcommon_private.h | 86 +++++ + lib/common/ipc_client.c | 703 ++++++++++++++++++++++++++++++++++++++++- + lib/common/mainloop.c | 70 ++-- + 5 files changed, 940 insertions(+), 22 deletions(-) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index 28b20b4..13c29ea 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -19,6 +19,7 @@ + #include // xmlNode + + #include // crm_strdup_printf() ++#include // mainloop_io_t, struct ipc_client_callbacks + + // Internal ACL-related utilities (from acl.c) + +@@ -103,6 +104,13 @@ pcmk__open_devnull(int flags) + } while (0) + + ++/* internal main loop utilities (from mainloop.c) */ ++ ++int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, ++ struct ipc_client_callbacks *callbacks, ++ mainloop_io_t **source); ++ ++ + /* internal procfs utilities (from procfs.c) */ + + pid_t pcmk__procfs_pid_of(const char *name); +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index a0df956..8dee1b1 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -16,7 +16,8 @@ extern "C" { + + /** + * \file +- * \brief Wrappers for and extensions to libqb IPC ++ * \brief IPC interface to Pacemaker daemons ++ * + * \ingroup core + */ + +@@ -48,6 +49,96 @@ xmlNode *create_request_adv(const char *task, xmlNode *xml_data, + const char *origin); + + ++/* ++ * The library supports two methods of creating IPC connections. The older code ++ * allows connecting to any arbitrary IPC name. The newer code only allows ++ * connecting to one of the Pacemaker daemons. ++ * ++ * As daemons are converted to use the new model, the old functions should be ++ * considered deprecated for use with those daemons. Once all daemons are ++ * converted, the old functions should be officially deprecated as public API ++ * and eventually made internal API. ++ */ ++ ++/* ++ * Pacemaker daemon IPC ++ */ ++ ++//! Available IPC interfaces ++enum pcmk_ipc_server { ++ pcmk_ipc_attrd, //!< Attribute manager ++ pcmk_ipc_based, //!< CIB manager ++ pcmk_ipc_controld, //!< Controller ++ pcmk_ipc_execd, //!< Executor ++ pcmk_ipc_fenced, //!< Fencer ++ pcmk_ipc_pacemakerd, //!< Launcher ++ pcmk_ipc_schedulerd, //!< Scheduler ++}; ++ ++//! Possible event types that an IPC event callback can be called for ++enum pcmk_ipc_event { ++ pcmk_ipc_event_connect, //!< Result of asynchronous connection attempt ++ pcmk_ipc_event_disconnect, //!< Termination of IPC connection ++ pcmk_ipc_event_reply, //!< Daemon's reply to client IPC request ++ pcmk_ipc_event_notify, //!< Notification from daemon ++}; ++ ++//! How IPC replies should be dispatched ++enum pcmk_ipc_dispatch { ++ pcmk_ipc_dispatch_main, //!< Attach IPC to GMainLoop for dispatch ++ pcmk_ipc_dispatch_poll, //!< Caller will poll and dispatch IPC ++ pcmk_ipc_dispatch_sync, //!< Sending a command will wait for any reply ++}; ++ ++//! Client connection to Pacemaker IPC ++typedef struct pcmk_ipc_api_s pcmk_ipc_api_t; ++ ++/*! ++ * \brief Callback function type for Pacemaker daemon IPC APIs ++ * ++ * \param[in] api IPC API connection ++ * \param[in] event_type The type of event that occurred ++ * \param[in] status Event status ++ * \param[in] event_data Event-specific data ++ * \param[in] user_data Caller data provided when callback was registered ++ * ++ * \note For connection and disconnection events, event_data may be NULL (for ++ * local IPC) or the name of the connected node (for remote IPC, for ++ * daemons that support that). For reply and notify events, event_data is ++ * defined by the specific daemon API. ++ */ ++typedef void (*pcmk_ipc_callback_t)(pcmk_ipc_api_t *api, ++ enum pcmk_ipc_event event_type, ++ crm_exit_t status, ++ void *event_data, void *user_data); ++ ++int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server); ++ ++void pcmk_free_ipc_api(pcmk_ipc_api_t *api); ++ ++int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type); ++ ++void pcmk_disconnect_ipc(pcmk_ipc_api_t *api); ++ ++int pcmk_poll_ipc(pcmk_ipc_api_t *api, int timeout_ms); ++ ++void pcmk_dispatch_ipc(pcmk_ipc_api_t *api); ++ ++void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, ++ void *user_data); ++ ++const char *pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log); ++ ++bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api); ++ ++int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, ++ uint32_t nodeid); ++ ++ ++/* ++ * Generic IPC API (to eventually be deprecated as public API and made internal) ++ */ ++ + /* *INDENT-OFF* */ + enum crm_ipc_flags + { +@@ -58,7 +149,7 @@ enum crm_ipc_flags + crm_ipc_proxied = 0x00000100, /* _ALL_ replies to proxied connections need to be sent as events */ + crm_ipc_client_response = 0x00000200, /* A Response is expected in reply */ + +- // These are options only for pcmk__ipc_send_iov() ++ // These are options for Pacemaker's internal use only (pcmk__ipc_send_*()) + crm_ipc_server_event = 0x00010000, /* Send an Event instead of a Response */ + crm_ipc_server_free = 0x00020000, /* Free the iovec after sending */ + crm_ipc_proxied_relay_response = 0x00040000, /* all replies to proxied connections are sent as events, this flag preserves whether the event should be treated as an actual event, or a response.*/ +diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h +index d06fa20..f9df27d 100644 +--- a/lib/common/crmcommon_private.h ++++ b/lib/common/crmcommon_private.h +@@ -103,6 +103,84 @@ pcmk__xml_attr_value(const xmlAttr *attr) + + #define PCMK__IPC_VERSION 1 + ++// IPC behavior that varies by daemon ++typedef struct pcmk__ipc_methods_s { ++ /*! ++ * \internal ++ * \brief Allocate any private data needed by daemon IPC ++ * ++ * \param[in] api IPC API connection ++ * ++ * \return Standard Pacemaker return code ++ */ ++ int (*new_data)(pcmk_ipc_api_t *api); ++ ++ /*! ++ * \internal ++ * \brief Free any private data used by daemon IPC ++ * ++ * \param[in] api_data Data allocated by new_data() method ++ */ ++ void (*free_data)(void *api_data); ++ ++ /*! ++ * \internal ++ * \brief Perform daemon-specific handling after successful connection ++ * ++ * Some daemons require clients to register before sending any other ++ * commands. The controller requires a CRM_OP_HELLO (with no reply), and ++ * the CIB manager, executor, and fencer require a CRM_OP_REGISTER (with a ++ * reply). Ideally this would be consistent across all daemons, but for now ++ * this allows each to do its own authorization. ++ * ++ * \param[in] api IPC API connection ++ * ++ * \return Standard Pacemaker return code ++ */ ++ int (*post_connect)(pcmk_ipc_api_t *api); ++ ++ /*! ++ * \internal ++ * \brief Check whether an IPC request results in a reply ++ * ++ * \parma[in] api IPC API connection ++ * \param[in] request IPC request XML ++ * ++ * \return true if request would result in an IPC reply, false otherwise ++ */ ++ bool (*reply_expected)(pcmk_ipc_api_t *api, xmlNode *request); ++ ++ /*! ++ * \internal ++ * \brief Perform daemon-specific handling of an IPC message ++ * ++ * \param[in] api IPC API connection ++ * \param[in] msg Message read from IPC connection ++ */ ++ void (*dispatch)(pcmk_ipc_api_t *api, xmlNode *msg); ++ ++ /*! ++ * \internal ++ * \brief Perform daemon-specific handling of an IPC disconnect ++ * ++ * \param[in] api IPC API connection ++ */ ++ void (*post_disconnect)(pcmk_ipc_api_t *api); ++} pcmk__ipc_methods_t; ++ ++// Implementation of pcmk_ipc_api_t ++struct pcmk_ipc_api_s { ++ enum pcmk_ipc_server server; // Daemon this IPC API instance is for ++ enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched ++ crm_ipc_t *ipc; // IPC connection ++ mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC ++ bool free_on_disconnect; // Whether disconnect should free object ++ pcmk_ipc_callback_t cb; // Caller-registered callback (if any) ++ void *user_data; // Caller-registered data (if any) ++ void *api_data; // For daemon-specific use ++ pcmk__ipc_methods_t *cmds; // Behavior that varies by daemon ++}; ++ + typedef struct pcmk__ipc_header_s { + struct qb_ipc_response_header qb; + uint32_t size_uncompressed; +@@ -112,6 +190,14 @@ typedef struct pcmk__ipc_header_s { + } pcmk__ipc_header_t; + + G_GNUC_INTERNAL ++int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request); ++ ++G_GNUC_INTERNAL ++void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, ++ enum pcmk_ipc_event event_type, ++ crm_exit_t status, void *event_data); ++ ++G_GNUC_INTERNAL + unsigned int pcmk__ipc_buffer_size(unsigned int max); + + G_GNUC_INTERNAL +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index 7737588..16dc9b5 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -31,6 +31,679 @@ + #include + #include "crmcommon_private.h" + ++/*! ++ * \brief Create a new object for using Pacemaker daemon IPC ++ * ++ * \param[out] api Where to store new IPC object ++ * \param[in] server Which Pacemaker daemon the object is for ++ * ++ * \return Standard Pacemaker result code ++ * ++ * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). ++ * \note This is intended to supersede crm_ipc_new() but is not yet usable. ++ */ ++int ++pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) ++{ ++ size_t max_size = 0; ++ ++ if (api == NULL) { ++ return EINVAL; ++ } ++ ++ *api = calloc(1, sizeof(pcmk_ipc_api_t)); ++ if (*api == NULL) { ++ return errno; ++ } ++ ++ (*api)->server = server; ++ if (pcmk_ipc_name(*api, false) == NULL) { ++ pcmk_free_ipc_api(*api); ++ *api = NULL; ++ return EOPNOTSUPP; ++ } ++ ++ // Set server methods and max_size (if not default) ++ switch (server) { ++ case pcmk_ipc_attrd: ++ break; ++ ++ case pcmk_ipc_based: ++ max_size = 512 * 1024; // 512KB ++ break; ++ ++ case pcmk_ipc_controld: ++ break; ++ ++ case pcmk_ipc_execd: ++ break; ++ ++ case pcmk_ipc_fenced: ++ break; ++ ++ case pcmk_ipc_pacemakerd: ++ break; ++ ++ case pcmk_ipc_schedulerd: ++ // @TODO max_size could vary by client, maybe take as argument? ++ max_size = 5 * 1024 * 1024; // 5MB ++ break; ++ } ++ if ((*api)->cmds == NULL) { ++ pcmk_free_ipc_api(*api); ++ *api = NULL; ++ return ENOMEM; ++ } ++ ++ (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size); ++ if ((*api)->ipc == NULL) { ++ pcmk_free_ipc_api(*api); ++ *api = NULL; ++ return ENOMEM; ++ } ++ ++ // If daemon API has its own data to track, allocate it ++ if ((*api)->cmds->new_data != NULL) { ++ if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { ++ pcmk_free_ipc_api(*api); ++ *api = NULL; ++ return ENOMEM; ++ } ++ } ++ crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); ++ return pcmk_rc_ok; ++} ++ ++static void ++free_daemon_specific_data(pcmk_ipc_api_t *api) ++{ ++ if ((api != NULL) && (api->cmds != NULL)) { ++ if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { ++ api->cmds->free_data(api->api_data); ++ api->api_data = NULL; ++ } ++ free(api->cmds); ++ api->cmds = NULL; ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Call an IPC API event callback, if one is registed ++ * ++ * \param[in] api IPC API connection ++ * \param[in] event_type The type of event that occurred ++ * \param[in] status Event status ++ * \param[in] event_data Event-specific data ++ */ ++void ++pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, ++ crm_exit_t status, void *event_data) ++{ ++ if ((api != NULL) && (api->cb != NULL)) { ++ api->cb(api, event_type, status, event_data, api->user_data); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Clean up after an IPC disconnect ++ * ++ * \param[in] user_data IPC API connection that disconnected ++ * ++ * \note This function can be used as a main loop IPC destroy callback. ++ */ ++static void ++ipc_post_disconnect(gpointer user_data) ++{ ++ pcmk_ipc_api_t *api = user_data; ++ ++ crm_info("Disconnected from %s IPC API", pcmk_ipc_name(api, true)); ++ ++ // Perform any daemon-specific handling needed ++ if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { ++ api->cmds->post_disconnect(api); ++ } ++ ++ // Call client's registered event callback ++ pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, ++ NULL); ++ ++ /* If this is being called from a running main loop, mainloop_gio_destroy() ++ * will free ipc and mainloop_io immediately after calling this function. ++ * If this is called from a stopped main loop, these will leak, so the best ++ * practice is to close the connection before stopping the main loop. ++ */ ++ api->ipc = NULL; ++ api->mainloop_io = NULL; ++ ++ if (api->free_on_disconnect) { ++ /* pcmk_free_ipc_api() has already been called, but did not free api ++ * or api->cmds because this function needed them. Do that now. ++ */ ++ free_daemon_specific_data(api); ++ crm_trace("Freeing IPC API object after disconnect"); ++ free(api); ++ } ++} ++ ++/*! ++ * \brief Free the contents of an IPC API object ++ * ++ * \param[in] api IPC API object to free ++ */ ++void ++pcmk_free_ipc_api(pcmk_ipc_api_t *api) ++{ ++ bool free_on_disconnect = false; ++ ++ if (api == NULL) { ++ return; ++ } ++ crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); ++ ++ if (api->ipc != NULL) { ++ if (api->mainloop_io != NULL) { ++ /* We need to keep the api pointer itself around, because it is the ++ * user data for the IPC client destroy callback. That will be ++ * triggered by the pcmk_disconnect_ipc() call below, but it might ++ * happen later in the main loop (if still running). ++ * ++ * This flag tells the destroy callback to free the object. It can't ++ * do that unconditionally, because the application might call this ++ * function after a disconnect that happened by other means. ++ */ ++ free_on_disconnect = api->free_on_disconnect = true; ++ } ++ pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true ++ } ++ if (!free_on_disconnect) { ++ free_daemon_specific_data(api); ++ crm_trace("Freeing IPC API object"); ++ free(api); ++ } ++} ++ ++/*! ++ * \brief Get the IPC name used with an IPC API connection ++ * ++ * \param[in] api IPC API connection ++ * \param[in] for_log If true, return human-friendly name instead of IPC name ++ * ++ * \return IPC API's human-friendly or connection name, or if none is available, ++ * "Pacemaker" if for_log is true and NULL if for_log is false ++ */ ++const char * ++pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) ++{ ++ if (api == NULL) { ++ return for_log? "Pacemaker" : NULL; ++ } ++ switch (api->server) { ++ case pcmk_ipc_attrd: ++ return for_log? "attribute manager" : NULL /* T_ATTRD */; ++ ++ case pcmk_ipc_based: ++ return for_log? "CIB manager" : NULL /* PCMK__SERVER_BASED_RW */; ++ ++ case pcmk_ipc_controld: ++ return for_log? "controller" : NULL /* CRM_SYSTEM_CRMD */; ++ ++ case pcmk_ipc_execd: ++ return for_log? "executor" : NULL /* CRM_SYSTEM_LRMD */; ++ ++ case pcmk_ipc_fenced: ++ return for_log? "fencer" : NULL /* "stonith-ng" */; ++ ++ case pcmk_ipc_pacemakerd: ++ return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */; ++ ++ case pcmk_ipc_schedulerd: ++ return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */; ++ ++ default: ++ return for_log? "Pacemaker" : NULL; ++ } ++} ++ ++/*! ++ * \brief Check whether an IPC API connection is active ++ * ++ * \param[in] api IPC API connection ++ * ++ * \return true if IPC is connected, false otherwise ++ */ ++bool ++pcmk_ipc_is_connected(pcmk_ipc_api_t *api) ++{ ++ return (api != NULL) && crm_ipc_connected(api->ipc); ++} ++ ++/*! ++ * \internal ++ * \brief Call the daemon-specific API's dispatch function ++ * ++ * Perform daemon-specific handling of IPC reply dispatch. It is the daemon ++ * method's responsibility to call the client's registered event callback, as ++ * well as allocate and free any event data. ++ * ++ * \param[in] api IPC API connection ++ */ ++static void ++call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) ++{ ++ crm_log_xml_trace(message, "ipc-received"); ++ if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { ++ api->cmds->dispatch(api, message); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Dispatch data read from IPC source ++ * ++ * \param[in] buffer Data read from IPC ++ * \param[in] length Number of bytes of data in buffer (ignored) ++ * \param[in] user_data IPC object ++ * ++ * \return Always 0 (meaning connection is still required) ++ * ++ * \note This function can be used as a main loop IPC dispatch callback. ++ */ ++static int ++dispatch_ipc_data(const char *buffer, ssize_t length, gpointer user_data) ++{ ++ pcmk_ipc_api_t *api = user_data; ++ xmlNode *msg; ++ ++ CRM_CHECK(api != NULL, return 0); ++ ++ if (buffer == NULL) { ++ crm_warn("Empty message received from %s IPC", ++ pcmk_ipc_name(api, true)); ++ return 0; ++ } ++ ++ msg = string2xml(buffer); ++ if (msg == NULL) { ++ crm_warn("Malformed message received from %s IPC", ++ pcmk_ipc_name(api, true)); ++ return 0; ++ } ++ call_api_dispatch(api, msg); ++ free_xml(msg); ++ return 0; ++} ++ ++/*! ++ * \brief Check whether an IPC connection has data available (without main loop) ++ * ++ * \param[in] api IPC API connection ++ * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once ++ * and return immediately; otherwise, poll for up to ++ * this many milliseconds ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call ++ * this function to check whether IPC data is available. Return values of ++ * interest include pcmk_rc_ok meaning data is available, and EAGAIN ++ * meaning no data is available; all other values indicate errors. ++ * \todo This does not allow the caller to poll multiple file descriptors at ++ * once. If there is demand for that, we could add a wrapper for ++ * crm_ipc_get_fd(api->ipc), so the caller can call poll() themselves. ++ */ ++int ++pcmk_poll_ipc(pcmk_ipc_api_t *api, int timeout_ms) ++{ ++ int rc; ++ struct pollfd pollfd = { 0, }; ++ ++ if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { ++ return EINVAL; ++ } ++ pollfd.fd = crm_ipc_get_fd(api->ipc); ++ pollfd.events = POLLIN; ++ rc = poll(&pollfd, 1, timeout_ms); ++ if (rc < 0) { ++ return errno; ++ } else if (rc == 0) { ++ return EAGAIN; ++ } ++ return pcmk_rc_ok; ++} ++ ++/*! ++ * \brief Dispatch available messages on an IPC connection (without main loop) ++ * ++ * \param[in] api IPC API connection ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call ++ * this function when IPC data is available. ++ */ ++void ++pcmk_dispatch_ipc(pcmk_ipc_api_t *api) ++{ ++ if (api == NULL) { ++ return; ++ } ++ while (crm_ipc_ready(api->ipc)) { ++ if (crm_ipc_read(api->ipc) > 0) { ++ dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api); ++ } ++ } ++} ++ ++// \return Standard Pacemaker return code ++static int ++connect_with_main_loop(pcmk_ipc_api_t *api) ++{ ++ int rc; ++ ++ struct ipc_client_callbacks callbacks = { ++ .dispatch = dispatch_ipc_data, ++ .destroy = ipc_post_disconnect, ++ }; ++ ++ rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, ++ &callbacks, &(api->mainloop_io)); ++ if (rc != pcmk_rc_ok) { ++ return rc; ++ } ++ crm_debug("Connected to %s IPC (attached to main loop)", ++ pcmk_ipc_name(api, true)); ++ /* After this point, api->mainloop_io owns api->ipc, so api->ipc ++ * should not be explicitly freed. ++ */ ++ return pcmk_rc_ok; ++} ++ ++// \return Standard Pacemaker return code ++static int ++connect_without_main_loop(pcmk_ipc_api_t *api) ++{ ++ int rc; ++ ++ if (!crm_ipc_connect(api->ipc)) { ++ rc = errno; ++ crm_ipc_close(api->ipc); ++ return rc; ++ } ++ crm_debug("Connected to %s IPC (without main loop)", ++ pcmk_ipc_name(api, true)); ++ return pcmk_rc_ok; ++} ++ ++/*! ++ * \brief Connect to a Pacemaker daemon via IPC ++ * ++ * \param[in] api IPC API instance ++ * \param[out] dispatch_type How IPC replies should be dispatched ++ * ++ * \return Standard Pacemaker return code ++ */ ++int ++pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) ++{ ++ int rc = pcmk_rc_ok; ++ ++ if ((api == NULL) || (api->ipc == NULL)) { ++ crm_err("Cannot connect to uninitialized API object"); ++ return EINVAL; ++ } ++ ++ if (crm_ipc_connected(api->ipc)) { ++ crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true)); ++ return pcmk_rc_ok; ++ } ++ ++ api->dispatch_type = dispatch_type; ++ switch (dispatch_type) { ++ case pcmk_ipc_dispatch_main: ++ rc = connect_with_main_loop(api); ++ break; ++ ++ case pcmk_ipc_dispatch_sync: ++ case pcmk_ipc_dispatch_poll: ++ rc = connect_without_main_loop(api); ++ break; ++ } ++ if (rc != pcmk_rc_ok) { ++ return rc; ++ } ++ ++ if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { ++ rc = api->cmds->post_connect(api); ++ if (rc != pcmk_rc_ok) { ++ crm_ipc_close(api->ipc); ++ } ++ } ++ return rc; ++} ++ ++/*! ++ * \brief Disconnect an IPC API instance ++ * ++ * \param[in] api IPC API connection ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note If the connection is attached to a main loop, this function should be ++ * called before quitting the main loop, to ensure that all memory is ++ * freed. ++ */ ++void ++pcmk_disconnect_ipc(pcmk_ipc_api_t *api) ++{ ++ if ((api == NULL) || (api->ipc == NULL)) { ++ return; ++ } ++ switch (api->dispatch_type) { ++ case pcmk_ipc_dispatch_main: ++ { ++ mainloop_io_t *mainloop_io = api->mainloop_io; ++ ++ // Make sure no code with access to api can use these again ++ api->mainloop_io = NULL; ++ api->ipc = NULL; ++ ++ mainloop_del_ipc_client(mainloop_io); ++ // After this point api might have already been freed ++ } ++ break; ++ ++ case pcmk_ipc_dispatch_poll: ++ case pcmk_ipc_dispatch_sync: ++ { ++ crm_ipc_t *ipc = api->ipc; ++ ++ // Make sure no code with access to api can use ipc again ++ api->ipc = NULL; ++ ++ // This should always be the case already, but to be safe ++ api->free_on_disconnect = false; ++ ++ crm_ipc_destroy(ipc); ++ ipc_post_disconnect(api); ++ } ++ break; ++ } ++} ++ ++/*! ++ * \brief Register a callback for IPC API events ++ * ++ * \param[in] api IPC API connection ++ * \param[in] callback Callback to register ++ * \param[in] userdata Caller data to pass to callback ++ * ++ * \note This function may be called multiple times to update the callback ++ * and/or user data. The caller remains responsible for freeing ++ * userdata in any case (after the IPC is disconnected, if the ++ * user data is still registered with the IPC). ++ */ ++void ++pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, ++ void *user_data) ++{ ++ if (api == NULL) { ++ return; ++ } ++ api->cb = cb; ++ api->user_data = user_data; ++} ++ ++/*! ++ * \internal ++ * \brief Send an XML request across an IPC API connection ++ * ++ * \param[in] api IPC API connection ++ * \param[in] request XML request to send ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note Daemon-specific IPC API functions should call this function to send ++ * requests, because it handles different dispatch types appropriately. ++ */ ++int ++pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request) ++{ ++ int rc; ++ xmlNode *reply = NULL; ++ enum crm_ipc_flags flags = crm_ipc_flags_none; ++ ++ if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { ++ return EINVAL; ++ } ++ crm_log_xml_trace(request, "ipc-sent"); ++ ++ // Synchronous dispatch requires waiting for a reply ++ if ((api->dispatch_type == pcmk_ipc_dispatch_sync) ++ && (api->cmds != NULL) ++ && (api->cmds->reply_expected != NULL) ++ && (api->cmds->reply_expected(api, request))) { ++ flags = crm_ipc_client_response; ++ } ++ ++ // The 0 here means a default timeout of 5 seconds ++ rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); ++ ++ if (rc < 0) { ++ return pcmk_legacy2rc(rc); ++ } else if (rc == 0) { ++ return ENODATA; ++ } ++ ++ // With synchronous dispatch, we dispatch any reply now ++ if (reply != NULL) { ++ call_api_dispatch(api, reply); ++ free_xml(reply); ++ } ++ return pcmk_rc_ok; ++} ++ ++/*! ++ * \internal ++ * \brief Create the XML for an IPC request to purge a node from the peer cache ++ * ++ * \param[in] api IPC API connection ++ * \param[in] node_name If not NULL, name of node to purge ++ * \param[in] nodeid If not 0, node ID of node to purge ++ * ++ * \return Newly allocated IPC request XML ++ * ++ * \note The controller, fencer, and pacemakerd use the same request syntax, but ++ * the attribute manager uses a different one. The CIB manager doesn't ++ * have any syntax for it. The executor and scheduler don't connect to the ++ * cluster layer and thus don't have or need any syntax for it. ++ * ++ * \todo Modify the attribute manager to accept the common syntax (as well ++ * as its current one, for compatibility with older clients). Modify ++ * the CIB manager to accept and honor the common syntax. Modify the ++ * executor and scheduler to accept the syntax (immediately returning ++ * success), just for consistency. Modify this function to use the ++ * common syntax with all daemons if their version supports it. ++ */ ++static xmlNode * ++create_purge_node_request(pcmk_ipc_api_t *api, const char *node_name, ++ uint32_t nodeid) ++{ ++ xmlNode *request = NULL; ++ const char *client = crm_system_name? crm_system_name : "client"; ++ ++ switch (api->server) { ++ case pcmk_ipc_attrd: ++ request = create_xml_node(NULL, __FUNCTION__); ++ crm_xml_add(request, F_TYPE, T_ATTRD); ++ crm_xml_add(request, F_ORIG, crm_system_name); ++ crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); ++ crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node_name); ++ if (nodeid > 0) { ++ crm_xml_add_int(request, PCMK__XA_ATTR_NODE_ID, (int) nodeid); ++ } ++ break; ++ ++ case pcmk_ipc_controld: ++ case pcmk_ipc_fenced: ++ case pcmk_ipc_pacemakerd: ++ request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, ++ pcmk_ipc_name(api, false), client, NULL); ++ if (nodeid > 0) { ++ crm_xml_set_id(request, "%lu", (unsigned long) nodeid); ++ } ++ crm_xml_add(request, XML_ATTR_UNAME, node_name); ++ break; ++ ++ case pcmk_ipc_based: ++ case pcmk_ipc_execd: ++ case pcmk_ipc_schedulerd: ++ break; ++ } ++ return request; ++} ++ ++/*! ++ * \brief Ask a Pacemaker daemon to purge a node from its peer cache ++ * ++ * \param[in] api IPC API connection ++ * \param[in] node_name If not NULL, name of node to purge ++ * \param[in] nodeid If not 0, node ID of node to purge ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note At least one of node_name or nodeid must be specified. ++ */ ++int ++pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) ++{ ++ int rc = 0; ++ xmlNode *request = NULL; ++ ++ if (api == NULL) { ++ return EINVAL; ++ } ++ if ((node_name == NULL) && (nodeid == 0)) { ++ return EINVAL; ++ } ++ ++ request = create_purge_node_request(api, node_name, nodeid); ++ if (request == NULL) { ++ return EOPNOTSUPP; ++ } ++ rc = pcmk__send_ipc_request(api, request); ++ free_xml(request); ++ ++ crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", ++ pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); ++ return rc; ++} ++ ++/* ++ * Generic IPC API (to eventually be deprecated as public API and made internal) ++ */ ++ + struct crm_ipc_s { + struct pollfd pfd; + unsigned int max_buf_size; // maximum bytes we can send or receive over IPC +@@ -42,16 +715,44 @@ struct crm_ipc_s { + qb_ipcc_connection_t *ipc; + }; + ++/*! ++ * \brief Create a new (legacy) object for using Pacemaker daemon IPC ++ * ++ * \param[in] name IPC system name to connect to ++ * \param[in] max_size Use a maximum IPC buffer size of at least this size ++ * ++ * \return Newly allocated IPC object on success, NULL otherwise ++ * ++ * \note The caller is responsible for freeing the result using ++ * crm_ipc_destroy(). ++ * \note This should be considered deprecated for use with daemons supported by ++ * pcmk_new_ipc_api(). ++ */ + crm_ipc_t * + crm_ipc_new(const char *name, size_t max_size) + { + crm_ipc_t *client = NULL; + + client = calloc(1, sizeof(crm_ipc_t)); ++ if (client == NULL) { ++ crm_err("Could not create IPC connection: %s", strerror(errno)); ++ return NULL; ++ } + + client->name = strdup(name); ++ if (client->name == NULL) { ++ crm_err("Could not create IPC connection: %s", strerror(errno)); ++ free(client); ++ return NULL; ++ } + client->buf_size = pcmk__ipc_buffer_size(max_size); + client->buffer = malloc(client->buf_size); ++ if (client->buffer == NULL) { ++ crm_err("Could not create IPC connection: %s", strerror(errno)); ++ free(client->name); ++ free(client); ++ return NULL; ++ } + + /* Clients initiating connection pick the max buf size */ + client->max_buf_size = client->buf_size; +@@ -143,8 +844,6 @@ void + crm_ipc_close(crm_ipc_t * client) + { + if (client) { +- crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); +- + if (client->ipc) { + qb_ipcc_connection_t *ipc = client->ipc; + +diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c +index 634eead..e942e57 100644 +--- a/lib/common/mainloop.c ++++ b/lib/common/mainloop.c +@@ -834,32 +834,66 @@ mainloop_gio_destroy(gpointer c) + free(c_name); + } + +-mainloop_io_t * +-mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, +- struct ipc_client_callbacks *callbacks) ++/*! ++ * \brief Connect to IPC and add it as a main loop source ++ * ++ * \param[in] ipc IPC connection to add ++ * \param[in] priority Event source priority to use for connection ++ * \param[in] userdata Data to register with callbacks ++ * \param[in] callbacks Dispatch and destroy callbacks for connection ++ * \param[out] source Newly allocated event source ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note On failure, the caller is still responsible for ipc. On success, the ++ * caller should call mainloop_del_ipc_client() when source is no longer ++ * needed, which will lead to the disconnection of the IPC later in the ++ * main loop if it is connected. However the IPC disconnects, ++ * mainloop_gio_destroy() will free ipc and source after calling the ++ * destroy callback. ++ */ ++int ++pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, ++ struct ipc_client_callbacks *callbacks, ++ mainloop_io_t **source) + { +- mainloop_io_t *client = NULL; +- crm_ipc_t *conn = crm_ipc_new(name, max_size); ++ CRM_CHECK((ipc != NULL) && (callbacks != NULL), return EINVAL); + +- if (conn && crm_ipc_connect(conn)) { +- int32_t fd = crm_ipc_get_fd(conn); ++ if (!crm_ipc_connect(ipc)) { ++ return ENOTCONN; ++ } ++ *source = mainloop_add_fd(crm_ipc_name(ipc), priority, crm_ipc_get_fd(ipc), ++ userdata, NULL); ++ if (*source == NULL) { ++ int rc = errno; + +- client = mainloop_add_fd(name, priority, fd, userdata, NULL); ++ crm_ipc_close(ipc); ++ return rc; + } ++ (*source)->ipc = ipc; ++ (*source)->destroy_fn = callbacks->destroy; ++ (*source)->dispatch_fn_ipc = callbacks->dispatch; ++ return pcmk_rc_ok; ++} + +- if (client == NULL) { +- crm_perror(LOG_TRACE, "Connection to %s failed", name); +- if (conn) { +- crm_ipc_close(conn); +- crm_ipc_destroy(conn); ++mainloop_io_t * ++mainloop_add_ipc_client(const char *name, int priority, size_t max_size, ++ void *userdata, struct ipc_client_callbacks *callbacks) ++{ ++ crm_ipc_t *ipc = crm_ipc_new(name, max_size); ++ mainloop_io_t *source = NULL; ++ int rc = pcmk__add_mainloop_ipc(ipc, priority, userdata, callbacks, ++ &source); ++ ++ if (rc != pcmk_rc_ok) { ++ if (crm_log_level == LOG_STDOUT) { ++ fprintf(stderr, "Connection to %s failed: %s", ++ name, pcmk_rc_str(rc)); + } ++ crm_ipc_destroy(ipc); + return NULL; + } +- +- client->ipc = conn; +- client->destroy_fn = callbacks->destroy; +- client->dispatch_fn_ipc = callbacks->dispatch; +- return client; ++ return source; + } + + void +-- +1.8.3.1 + + +From b9539da27998ff5e6c8b681f39603550a923ca33 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 6 Apr 2020 17:40:55 -0500 +Subject: [PATCH 2/6] Refactor: libcrmcommon: add C API for controller IPC + +Implement a C API for controller IPC using the new IPC API model. +--- + include/crm/common/Makefile.am | 2 +- + include/crm/common/ipc.h | 4 +- + include/crm/common/ipc_controld.h | 99 +++++++ + lib/common/Makefile.am | 1 + + lib/common/crmcommon_private.h | 6 + + lib/common/ipc_client.c | 46 +-- + lib/common/ipc_controld.c | 609 ++++++++++++++++++++++++++++++++++++++ + 7 files changed, 723 insertions(+), 44 deletions(-) + create mode 100644 include/crm/common/ipc_controld.h + create mode 100644 lib/common/ipc_controld.c + +diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am +index 776e4a7..f29d105 100644 +--- a/include/crm/common/Makefile.am ++++ b/include/crm/common/Makefile.am +@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in + headerdir=$(pkgincludedir)/crm/common + + header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ +- nvpair.h acl.h ++ nvpair.h acl.h ipc_controld.h + noinst_HEADERS = internal.h alerts_internal.h \ + iso8601_internal.h remote_internal.h xml_internal.h \ + ipc_internal.h output.h cmdline_internal.h curses_internal.h \ +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index 8dee1b1..c67aaea 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -217,7 +217,9 @@ unsigned int crm_ipc_default_buffer_size(void); + int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, + pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); + +-/* Utils */ ++/* This is controller-specific but is declared in this header for C API ++ * backward compatibility. ++ */ + xmlNode *create_hello_message(const char *uuid, const char *client_name, + const char *major_version, const char *minor_version); + +diff --git a/include/crm/common/ipc_controld.h b/include/crm/common/ipc_controld.h +new file mode 100644 +index 0000000..0ebabfc +--- /dev/null ++++ b/include/crm/common/ipc_controld.h +@@ -0,0 +1,99 @@ ++/* ++ * Copyright 2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#ifndef PCMK__IPC_CONTROLD__H ++# define PCMK__IPC_CONTROLD__H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * \file ++ * \brief IPC commands for Pacemaker controller ++ * ++ * \ingroup core ++ */ ++ ++#include // bool ++#include // xmlNode ++#include // pcmk_ipc_api_t ++ ++//! Possible types of controller replies ++enum pcmk_controld_api_reply { ++ pcmk_controld_reply_unknown, ++ pcmk_controld_reply_reprobe, ++ pcmk_controld_reply_info, ++ pcmk_controld_reply_resource, ++ pcmk_controld_reply_ping, ++}; ++ ++/*! ++ * Controller reply passed to event callback ++ * ++ * \note Shutdown and election calls have no reply. Reprobe calls are ++ * acknowledged but contain no data (reply_type will be the only item ++ * set). Node info and ping calls have their own reply data. Fail and ++ * refresh calls use the resource reply type and reply data. ++ * \note The pointers in the reply are only guaranteed to be meaningful for the ++ * execution of the callback; if the values are needed for later, the ++ * callback should copy them. ++ */ ++typedef struct { ++ enum pcmk_controld_api_reply reply_type; ++ const char *feature_set; //!< CRM feature set advertised by controller ++ const char *host_from; //!< Name of node that sent reply ++ ++ union { ++ // pcmk_controld_reply_info ++ struct { ++ bool have_quorum; ++ bool is_remote; ++ int id; ++ const char *uuid; ++ const char *uname; ++ const char *state; ++ } node_info; ++ ++ // pcmk_controld_reply_resource ++ struct { ++ xmlNode *node_state; //cmds = pcmk__controld_api_methods(); + break; + + case pcmk_ipc_execd: +@@ -247,7 +249,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) + return for_log? "CIB manager" : NULL /* PCMK__SERVER_BASED_RW */; + + case pcmk_ipc_controld: +- return for_log? "controller" : NULL /* CRM_SYSTEM_CRMD */; ++ return for_log? "controller" : CRM_SYSTEM_CRMD; + + case pcmk_ipc_execd: + return for_log? "executor" : NULL /* CRM_SYSTEM_LRMD */; +@@ -1412,43 +1414,3 @@ bail: + } + return rc; + } +- +-xmlNode * +-create_hello_message(const char *uuid, +- const char *client_name, const char *major_version, const char *minor_version) +-{ +- xmlNode *hello_node = NULL; +- xmlNode *hello = NULL; +- +- if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) +- || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "missing information", +- client_name? client_name : "unknown client", +- uuid? uuid : "unknown"); +- return NULL; +- } +- +- hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); +- if (hello_node == NULL) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "Message data creation failed", client_name, uuid); +- return NULL; +- } +- +- crm_xml_add(hello_node, "major_version", major_version); +- crm_xml_add(hello_node, "minor_version", minor_version); +- crm_xml_add(hello_node, "client_name", client_name); +- crm_xml_add(hello_node, "client_uuid", uuid); +- +- hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); +- if (hello == NULL) { +- crm_err("Could not create IPC hello message from %s (UUID %s): " +- "Request creation failed", client_name, uuid); +- return NULL; +- } +- free_xml(hello_node); +- +- crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); +- return hello; +-} +diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c +new file mode 100644 +index 0000000..22bb733 +--- /dev/null ++++ b/lib/common/ipc_controld.c +@@ -0,0 +1,609 @@ ++/* ++ * Copyright 2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "crmcommon_private.h" ++ ++struct controld_api_private_s { ++ char *client_uuid; ++ unsigned int replies_expected; ++}; ++ ++// \return Standard Pacemaker return code ++static int ++new_data(pcmk_ipc_api_t *api) ++{ ++ struct controld_api_private_s *private = NULL; ++ ++ api->api_data = calloc(1, sizeof(struct controld_api_private_s)); ++ ++ if (api->api_data == NULL) { ++ return errno; ++ } ++ ++ private = api->api_data; ++ ++ /* This is set to the PID because that's how it was always done, but PIDs ++ * are not unique because clients can be remote. The value appears to be ++ * unused other than as part of F_CRM_SYS_FROM in IPC requests, which is ++ * only compared against the internal system names (CRM_SYSTEM_TENGINE, ++ * etc.), so it shouldn't be a problem. ++ */ ++ private->client_uuid = pcmk__getpid_s(); ++ ++ /* @TODO Implement a call ID model similar to the CIB, executor, and fencer ++ * IPC APIs, so that requests and replies can be matched, and ++ * duplicate replies can be discarded. ++ */ ++ return pcmk_rc_ok; ++} ++ ++static void ++free_data(void *data) ++{ ++ free(((struct controld_api_private_s *) data)->client_uuid); ++ free(data); ++} ++ ++// \return Standard Pacemaker return code ++static int ++post_connect(pcmk_ipc_api_t *api) ++{ ++ /* The controller currently requires clients to register via a hello ++ * request, but does not reply back. ++ */ ++ struct controld_api_private_s *private = api->api_data; ++ const char *client_name = crm_system_name? crm_system_name : "client"; ++ xmlNode *hello; ++ int rc; ++ ++ hello = create_hello_message(private->client_uuid, client_name, ++ PCMK__CONTROLD_API_MAJOR, ++ PCMK__CONTROLD_API_MINOR); ++ rc = pcmk__send_ipc_request(api, hello); ++ free_xml(hello); ++ if (rc != pcmk_rc_ok) { ++ crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); ++ } else { ++ crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true)); ++ } ++ return rc; ++} ++ ++#define xml_true(xml, field) crm_is_true(crm_element_value(xml, field)) ++ ++static void ++set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) ++{ ++ data->reply_type = pcmk_controld_reply_info; ++ if (msg_data == NULL) { ++ return; ++ } ++ data->data.node_info.have_quorum = xml_true(msg_data, XML_ATTR_HAVE_QUORUM); ++ data->data.node_info.is_remote = xml_true(msg_data, XML_NODE_IS_REMOTE); ++ crm_element_value_int(msg_data, XML_ATTR_ID, &(data->data.node_info.id)); ++ data->data.node_info.uuid = crm_element_value(msg_data, XML_ATTR_UUID); ++ data->data.node_info.uname = crm_element_value(msg_data, XML_ATTR_UNAME); ++ data->data.node_info.state = crm_element_value(msg_data, XML_NODE_IS_PEER); ++} ++ ++static void ++set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) ++{ ++ data->reply_type = pcmk_controld_reply_ping; ++ if (msg_data == NULL) { ++ return; ++ } ++ data->data.ping.sys_from = crm_element_value(msg_data, ++ XML_PING_ATTR_SYSFROM); ++ data->data.ping.fsa_state = crm_element_value(msg_data, ++ XML_PING_ATTR_CRMDSTATE); ++ data->data.ping.result = crm_element_value(msg_data, XML_PING_ATTR_STATUS); ++} ++ ++static bool ++reply_expected(pcmk_ipc_api_t *api, xmlNode *request) ++{ ++ const char *command = crm_element_value(request, F_CRM_TASK); ++ ++ if (command == NULL) { ++ return false; ++ } ++ ++ // We only need to handle commands that functions in this file can send ++ return !strcmp(command, CRM_OP_REPROBE) ++ || !strcmp(command, CRM_OP_NODE_INFO) ++ || !strcmp(command, CRM_OP_PING) ++ || !strcmp(command, CRM_OP_LRM_FAIL) ++ || !strcmp(command, CRM_OP_LRM_DELETE); ++} ++ ++static void ++dispatch(pcmk_ipc_api_t *api, xmlNode *reply) ++{ ++ struct controld_api_private_s *private = api->api_data; ++ crm_exit_t status = CRM_EX_OK; ++ xmlNode *msg_data = NULL; ++ const char *value = NULL; ++ pcmk_controld_api_reply_t reply_data = { ++ pcmk_controld_reply_unknown, NULL, NULL, ++ }; ++ ++ if (private->replies_expected > 0) { ++ private->replies_expected--; ++ } ++ ++ // Do some basic validation of the reply ++ ++ /* @TODO We should be able to verify that value is always a response, but ++ * currently the controller doesn't always properly set the type. Even ++ * if we fix the controller, we'll still need to handle replies from ++ * old versions (feature set could be used to differentiate). ++ */ ++ value = crm_element_value(reply, F_CRM_MSG_TYPE); ++ if ((value == NULL) || (strcmp(value, XML_ATTR_REQUEST) ++ && strcmp(value, XML_ATTR_RESPONSE))) { ++ crm_debug("Unrecognizable controller message: invalid message type '%s'", ++ crm_str(value)); ++ status = CRM_EX_PROTOCOL; ++ reply = NULL; ++ } ++ ++ if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { ++ crm_debug("Unrecognizable controller message: no reference"); ++ status = CRM_EX_PROTOCOL; ++ reply = NULL; ++ } ++ ++ value = crm_element_value(reply, F_CRM_TASK); ++ if (value == NULL) { ++ crm_debug("Unrecognizable controller message: no command name"); ++ status = CRM_EX_PROTOCOL; ++ reply = NULL; ++ } ++ ++ // Parse useful info from reply ++ ++ if (reply != NULL) { ++ reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION); ++ reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM); ++ msg_data = get_message_xml(reply, F_CRM_DATA); ++ ++ if (!strcmp(value, CRM_OP_REPROBE)) { ++ reply_data.reply_type = pcmk_controld_reply_reprobe; ++ ++ } else if (!strcmp(value, CRM_OP_NODE_INFO)) { ++ set_node_info_data(&reply_data, msg_data); ++ ++ } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) { ++ reply_data.reply_type = pcmk_controld_reply_resource; ++ reply_data.data.resource.node_state = msg_data; ++ ++ } else if (!strcmp(value, CRM_OP_PING)) { ++ set_ping_data(&reply_data, msg_data); ++ ++ } else { ++ crm_debug("Unrecognizable controller message: unknown command '%s'", ++ value); ++ status = CRM_EX_PROTOCOL; ++ reply = NULL; ++ } ++ } ++ ++ pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); ++} ++ ++pcmk__ipc_methods_t * ++pcmk__controld_api_methods() ++{ ++ pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); ++ ++ if (cmds != NULL) { ++ cmds->new_data = new_data; ++ cmds->free_data = free_data; ++ cmds->post_connect = post_connect; ++ cmds->reply_expected = reply_expected; ++ cmds->dispatch = dispatch; ++ } ++ return cmds; ++} ++ ++/*! ++ * \internal ++ * \brief Create XML for a controller IPC request ++ * ++ * \param[in] api Controller connection ++ * \param[in] op Controller IPC command name ++ * \param[in] node Node name to set as destination host ++ * \param[in] msg_data XML to attach to request as message data ++ * ++ * \return Newly allocated XML for request ++ */ ++static xmlNode * ++create_controller_request(pcmk_ipc_api_t *api, const char *op, ++ const char *node, xmlNode *msg_data) ++{ ++ struct controld_api_private_s *private = api->api_data; ++ const char *sys_to = NULL; ++ ++ if ((node == NULL) && !strcmp(op, CRM_OP_PING)) { ++ sys_to = CRM_SYSTEM_DC; ++ } else { ++ sys_to = CRM_SYSTEM_CRMD; ++ } ++ return create_request(op, msg_data, node, sys_to, ++ (crm_system_name? crm_system_name : "client"), ++ private->client_uuid); ++} ++ ++// \return Standard Pacemaker return code ++static int ++send_controller_request(pcmk_ipc_api_t *api, xmlNode *request, ++ bool reply_is_expected) ++{ ++ int rc; ++ ++ if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) { ++ return EINVAL; ++ } ++ rc = pcmk__send_ipc_request(api, request); ++ if ((rc == pcmk_rc_ok) && reply_is_expected) { ++ struct controld_api_private_s *private = api->api_data; ++ ++ private->replies_expected++; ++ } ++ return rc; ++} ++ ++static xmlNode * ++create_reprobe_message_data(const char *target_node, const char *router_node) ++{ ++ xmlNode *msg_data; ++ ++ msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); ++ crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); ++ if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { ++ crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); ++ } ++ return msg_data; ++} ++ ++/*! ++ * \brief Send a reprobe controller operation ++ * ++ * \param[in] api Controller connection ++ * \param[in] target_node Name of node to reprobe ++ * \param[in] router_node Router node for host ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_reprobe. ++ */ ++int ++pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node, ++ const char *router_node) ++{ ++ xmlNode *request; ++ xmlNode *msg_data; ++ int rc = pcmk_rc_ok; ++ ++ if (api == NULL) { ++ return EINVAL; ++ } ++ if (router_node == NULL) { ++ router_node = target_node; ++ } ++ crm_debug("Sending %s IPC request to reprobe %s via %s", ++ pcmk_ipc_name(api, true), crm_str(target_node), ++ crm_str(router_node)); ++ msg_data = create_reprobe_message_data(target_node, router_node); ++ request = create_controller_request(api, CRM_OP_REPROBE, router_node, ++ msg_data); ++ rc = send_controller_request(api, request, true); ++ free_xml(msg_data); ++ free_xml(request); ++ return rc; ++} ++ ++/*! ++ * \brief Send a "node info" controller operation ++ * ++ * \param[in] api Controller connection ++ * \param[in] nodeid ID of node to get info for (or 0 for local node) ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_info. ++ */ ++int ++pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid) ++{ ++ xmlNode *request; ++ int rc = pcmk_rc_ok; ++ ++ request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL); ++ if (request == NULL) { ++ return EINVAL; ++ } ++ if (nodeid > 0) { ++ crm_xml_set_id(request, "%lu", (unsigned long) nodeid); ++ } ++ ++ rc = send_controller_request(api, request, true); ++ free_xml(request); ++ return rc; ++} ++ ++/*! ++ * \brief Ask the controller for status ++ * ++ * \param[in] api Controller connection ++ * \param[in] node_name Name of node whose status is desired (or NULL for DC) ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_ping. ++ */ ++int ++pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name) ++{ ++ xmlNode *request; ++ int rc = pcmk_rc_ok; ++ ++ request = create_controller_request(api, CRM_OP_PING, node_name, NULL); ++ if (request == NULL) { ++ return EINVAL; ++ } ++ rc = send_controller_request(api, request, true); ++ free_xml(request); ++ return rc; ++} ++ ++/*! ++ * \internal ++ * \brief Ask the controller to shut down ++ * ++ * \param[in] api Controller connection ++ * \param[in] node_name Name of node whose controller should shut down ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note This capability currently does not work, so the function is considered ++ * internal. It will likely be removed. ++ * \note Event callback will not get a reply. ++ */ ++int ++pcmk_controld_api_shutdown(pcmk_ipc_api_t *api, const char *node_name) ++{ ++ xmlNode *request; ++ int rc = pcmk_rc_ok; ++ ++ request = create_controller_request(api, CRM_OP_SHUTDOWN, NULL, NULL); ++ if (request == NULL) { ++ return EINVAL; ++ } ++ rc = send_controller_request(api, request, false); ++ free_xml(request); ++ return rc; ++} ++ ++/*! ++ * \internal ++ * \brief Ask the controller to start a DC election ++ * ++ * \param[in] api Controller connection ++ * ++ * \return Standard Pacemaker return code ++ * ++ * \note This capability currently does not work, so the function is considered ++ * internal. It will likely be removed. ++ * \note Event callback will not get a reply. ++ */ ++int ++pcmk_controld_api_start_election(pcmk_ipc_api_t *api) ++{ ++ xmlNode *request; ++ int rc = pcmk_rc_ok; ++ ++ request = create_controller_request(api, CRM_OP_VOTE, NULL, NULL); ++ if (request == NULL) { ++ return EINVAL; ++ } ++ rc = send_controller_request(api, request, false); ++ free_xml(request); ++ return rc; ++} ++ ++// \return Standard Pacemaker return code ++static int ++controller_resource_op(pcmk_ipc_api_t *api, const char *op, ++ const char *target_node, const char *router_node, ++ bool cib_only, const char *rsc_id, ++ const char *rsc_long_id, const char *standard, ++ const char *provider, const char *type) ++{ ++ int rc = pcmk_rc_ok; ++ char *key; ++ xmlNode *request, *msg_data, *xml_rsc, *params; ++ ++ if (api == NULL) { ++ return EINVAL; ++ } ++ if (router_node == NULL) { ++ router_node = target_node; ++ } ++ ++ msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); ++ ++ /* The controller logs the transition key from resource op requests, so we ++ * need to have *something* for it. ++ * @TODO don't use "crm-resource" ++ */ ++ key = pcmk__transition_key(0, getpid(), 0, ++ "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); ++ crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); ++ free(key); ++ ++ crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); ++ if (safe_str_neq(router_node, target_node)) { ++ crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); ++ } ++ ++ if (cib_only) { ++ // Indicate that only the CIB needs to be cleaned ++ crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); ++ } ++ ++ xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); ++ crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); ++ crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); ++ crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); ++ crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); ++ crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); ++ ++ params = create_xml_node(msg_data, XML_TAG_ATTRS); ++ crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); ++ ++ // The controller parses the timeout from the request ++ key = crm_meta_name(XML_ATTR_TIMEOUT); ++ crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg ++ free(key); ++ ++ request = create_controller_request(api, op, router_node, msg_data); ++ rc = send_controller_request(api, request, true); ++ free_xml(msg_data); ++ free_xml(request); ++ return rc; ++} ++ ++/*! ++ * \brief Ask the controller to fail a resource ++ * ++ * \param[in] api Controller connection ++ * \param[in] target_node Name of node resource is on ++ * \param[in] router_node Router node for target ++ * \param[in] rsc_id ID of resource to fail ++ * \param[in] rsc_long_id Long ID of resource (if any) ++ * \param[in] standard Standard of resource ++ * \param[in] provider Provider of resource (if any) ++ * \param[in] type Type of resource to fail ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_resource. ++ */ ++int ++pcmk_controld_api_fail(pcmk_ipc_api_t *api, ++ const char *target_node, const char *router_node, ++ const char *rsc_id, const char *rsc_long_id, ++ const char *standard, const char *provider, ++ const char *type) ++{ ++ crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", ++ pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), ++ crm_str(target_node), crm_str(router_node)); ++ return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, ++ router_node, false, rsc_id, rsc_long_id, ++ standard, provider, type); ++} ++ ++/*! ++ * \brief Ask the controller to refresh a resource ++ * ++ * \param[in] api Controller connection ++ * \param[in] target_node Name of node resource is on ++ * \param[in] router_node Router node for target ++ * \param[in] rsc_id ID of resource to refresh ++ * \param[in] rsc_long_id Long ID of resource (if any) ++ * \param[in] standard Standard of resource ++ * \param[in] provider Provider of resource (if any) ++ * \param[in] type Type of resource ++ * \param[in] cib_only If true, clean resource from CIB only ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_resource. ++ */ ++int ++pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node, ++ const char *router_node, ++ const char *rsc_id, const char *rsc_long_id, ++ const char *standard, const char *provider, ++ const char *type, bool cib_only) ++{ ++ crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", ++ pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), ++ crm_str(target_node), crm_str(router_node)); ++ return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, ++ router_node, cib_only, rsc_id, rsc_long_id, ++ standard, provider, type); ++} ++ ++/*! ++ * \brief Get the number of IPC replies currently expected from the controller ++ * ++ * \param[in] api Controller IPC API connection ++ * ++ * \return Number of replies expected ++ */ ++unsigned int ++pcmk_controld_api_replies_expected(pcmk_ipc_api_t *api) ++{ ++ struct controld_api_private_s *private = api->api_data; ++ ++ return private->replies_expected; ++} ++ ++xmlNode * ++create_hello_message(const char *uuid, const char *client_name, ++ const char *major_version, const char *minor_version) ++{ ++ xmlNode *hello_node = NULL; ++ xmlNode *hello = NULL; ++ ++ if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) ++ || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "missing information", ++ client_name? client_name : "unknown client", ++ uuid? uuid : "unknown"); ++ return NULL; ++ } ++ ++ hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); ++ if (hello_node == NULL) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "Message data creation failed", client_name, uuid); ++ return NULL; ++ } ++ ++ crm_xml_add(hello_node, "major_version", major_version); ++ crm_xml_add(hello_node, "minor_version", minor_version); ++ crm_xml_add(hello_node, "client_name", client_name); ++ crm_xml_add(hello_node, "client_uuid", uuid); ++ ++ hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); ++ if (hello == NULL) { ++ crm_err("Could not create IPC hello message from %s (UUID %s): " ++ "Request creation failed", client_name, uuid); ++ return NULL; ++ } ++ free_xml(hello_node); ++ ++ crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); ++ return hello; ++} +-- +1.8.3.1 + + +From 1d1b34664b64f6805aeaf4d8e29e77aa8f59b4fc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 7 Apr 2020 15:43:01 -0500 +Subject: [PATCH 3/6] Refactor: tools: convert crm_resource to use new + controller IPC model + +--- + tools/Makefile.am | 3 +- + tools/crm_resource.c | 138 +++++++------ + tools/crm_resource.h | 9 +- + tools/crm_resource_controller.c | 425 ---------------------------------------- + tools/crm_resource_controller.h | 198 ------------------- + tools/crm_resource_runtime.c | 30 +-- + 6 files changed, 96 insertions(+), 707 deletions(-) + delete mode 100644 tools/crm_resource_controller.c + delete mode 100644 tools/crm_resource_controller.h + +diff --git a/tools/Makefile.am b/tools/Makefile.am +index c822a8c..4609b0f 100644 +--- a/tools/Makefile.am ++++ b/tools/Makefile.am +@@ -12,7 +12,7 @@ if BUILD_SYSTEMD + systemdsystemunit_DATA = crm_mon.service + endif + +-noinst_HEADERS = crm_mon.h crm_resource.h crm_resource_controller.h ++noinst_HEADERS = crm_mon.h crm_resource.h + + pcmkdir = $(datadir)/$(PACKAGE) + pcmk_DATA = report.common report.collector +@@ -115,7 +115,6 @@ crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ + + crm_resource_SOURCES = crm_resource.c \ + crm_resource_ban.c \ +- crm_resource_controller.c \ + crm_resource_print.c \ + crm_resource_runtime.c + crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 6853ad5..c8c1cfa 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -11,62 +11,70 @@ + #include + + #include +- +-#include +-#include +- + #include + #include + #include +- + #include + #include + #include + #include + #include + ++#include ++#include ++#include ++ + bool BE_QUIET = FALSE; + bool scope_master = FALSE; + int cib_options = cib_sync_call; +- +-static GMainLoop *mainloop = NULL; ++static crm_exit_t exit_code = CRM_EX_OK; + + // Things that should be cleaned up on exit ++static GMainLoop *mainloop = NULL; + static cib_t *cib_conn = NULL; +-static pcmk_controld_api_t *controld_api = NULL; ++static pcmk_ipc_api_t *controld_api = NULL; + static pe_working_set_t *data_set = NULL; + + #define MESSAGE_TIMEOUT_S 60 + + // Clean up and exit + static crm_exit_t +-bye(crm_exit_t exit_code) ++bye(crm_exit_t ec) + { +- static bool crm_resource_shutdown_flag = false; +- +- if (crm_resource_shutdown_flag) { +- // Allow usage like "return bye(...);" +- return exit_code; +- } +- crm_resource_shutdown_flag = true; +- + if (cib_conn != NULL) { + cib_t *save_cib_conn = cib_conn; + +- cib_conn = NULL; ++ cib_conn = NULL; // Ensure we can't free this twice + save_cib_conn->cmds->signoff(save_cib_conn); + cib_delete(save_cib_conn); + } + if (controld_api != NULL) { +- pcmk_controld_api_t *save_controld_api = controld_api; ++ pcmk_ipc_api_t *save_controld_api = controld_api; + +- controld_api = NULL; +- pcmk_free_controld_api(save_controld_api); ++ controld_api = NULL; // Ensure we can't free this twice ++ pcmk_free_ipc_api(save_controld_api); ++ } ++ if (mainloop != NULL) { ++ g_main_loop_unref(mainloop); ++ mainloop = NULL; + } + pe_free_working_set(data_set); + data_set = NULL; +- crm_exit(exit_code); +- return exit_code; ++ crm_exit(ec); ++ return ec; ++} ++ ++static void ++quit_main_loop(crm_exit_t ec) ++{ ++ exit_code = ec; ++ if (mainloop != NULL) { ++ GMainLoop *mloop = mainloop; ++ ++ mainloop = NULL; // Don't re-enter this block ++ pcmk_quit_main_loop(mloop, 10); ++ g_main_loop_unref(mloop); ++ } + } + + static gboolean +@@ -76,39 +84,54 @@ resource_ipc_timeout(gpointer data) + fprintf(stderr, "\nAborting because no messages received in %d seconds\n", + MESSAGE_TIMEOUT_S); + crm_err("No messages received in %d seconds", MESSAGE_TIMEOUT_S); +- bye(CRM_EX_TIMEOUT); ++ quit_main_loop(CRM_EX_TIMEOUT); + return FALSE; + } + + static void +-handle_controller_reply(pcmk_controld_api_t *capi, void *api_data, +- void *user_data) ++controller_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, ++ crm_exit_t status, void *event_data, void *user_data) + { +- fprintf(stderr, "."); +- if ((capi->replies_expected(capi) == 0) +- && mainloop && g_main_loop_is_running(mainloop)) { +- fprintf(stderr, " OK\n"); +- crm_debug("Got all the replies we expected"); +- bye(CRM_EX_OK); +- } +-} ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ if (exit_code == CRM_EX_DISCONNECT) { // Unexpected ++ crm_info("Connection to controller was terminated"); ++ } ++ quit_main_loop(exit_code); ++ break; + +-static void +-handle_controller_drop(pcmk_controld_api_t *capi, void *api_data, +- void *user_data) +-{ +- crm_info("Connection to controller was terminated"); +- bye(CRM_EX_DISCONNECT); ++ case pcmk_ipc_event_reply: ++ if (status != CRM_EX_OK) { ++ fprintf(stderr, "\nError: bad reply from controller: %s\n", ++ crm_exit_str(status)); ++ pcmk_disconnect_ipc(api); ++ quit_main_loop(status); ++ } else { ++ fprintf(stderr, "."); ++ if ((pcmk_controld_api_replies_expected(api) == 0) ++ && mainloop && g_main_loop_is_running(mainloop)) { ++ fprintf(stderr, " OK\n"); ++ crm_debug("Got all the replies we expected"); ++ pcmk_disconnect_ipc(api); ++ quit_main_loop(CRM_EX_OK); ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } + } + + static void +-start_mainloop(pcmk_controld_api_t *capi) ++start_mainloop(pcmk_ipc_api_t *capi) + { +- if (capi->replies_expected(capi) > 0) { +- unsigned int count = capi->replies_expected(capi); ++ unsigned int count = pcmk_controld_api_replies_expected(capi); + ++ if (count > 0) { + fprintf(stderr, "Waiting for %d %s from the controller", + count, pcmk__plural_alt(count, "reply", "replies")); ++ exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects + mainloop = g_main_loop_new(NULL, FALSE); + g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); + g_main_loop_run(mainloop); +@@ -664,7 +687,6 @@ main(int argc, char **argv) + int argerr = 0; + int flag; + int find_flags = 0; // Flags to use when searching for resource +- crm_exit_t exit_code = CRM_EX_OK; + + crm_log_cli_init("crm_resource"); + pcmk__set_cli_options(NULL, "| [options]", long_options, +@@ -1151,21 +1173,15 @@ main(int argc, char **argv) + + // Establish a connection to the controller if needed + if (require_crmd) { +- char *client_uuid; +- pcmk_controld_api_cb_t dispatch_cb = { +- handle_controller_reply, NULL +- }; +- pcmk_controld_api_cb_t destroy_cb = { +- handle_controller_drop, NULL +- }; +- +- +- client_uuid = pcmk__getpid_s(); +- controld_api = pcmk_new_controld_api(crm_system_name, client_uuid); +- free(client_uuid); +- +- rc = controld_api->connect(controld_api, true, &dispatch_cb, +- &destroy_cb); ++ rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); ++ if (rc != pcmk_rc_ok) { ++ CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); ++ rc = pcmk_rc2legacy(rc); ++ goto bail; ++ } ++ pcmk_register_ipc_callback(controld_api, controller_event_callback, ++ NULL); ++ rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); + rc = pcmk_rc2legacy(rc); +@@ -1525,8 +1541,8 @@ main(int argc, char **argv) + NULL, NULL, NULL, + NULL, attr_options)); + +- if (controld_api->reprobe(controld_api, host_uname, +- router_node) == pcmk_rc_ok) { ++ if (pcmk_controld_api_reprobe(controld_api, host_uname, ++ router_node) == pcmk_rc_ok) { + start_mainloop(controld_api); + } + +diff --git a/tools/crm_resource.h b/tools/crm_resource.h +index 0bf7bee..cb7f506 100644 +--- a/tools/crm_resource.h ++++ b/tools/crm_resource.h +@@ -21,7 +21,6 @@ + #include + #include + #include +-#include "crm_resource_controller.h" + + extern bool print_pending; + +@@ -36,8 +35,6 @@ extern char *move_lifetime; + + extern const char *attr_set_type; + +-extern pcmk_controld_api_cb_t controld_api_cb; +- + /* ban */ + int cli_resource_prefer(const char *rsc_id, const char *host, cib_t * cib_conn); + int cli_resource_ban(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn); +@@ -63,16 +60,16 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bo + + /* runtime */ + void cli_resource_check(cib_t * cib, pe_resource_t *rsc); +-int cli_resource_fail(pcmk_controld_api_t *controld_api, ++int cli_resource_fail(pcmk_ipc_api_t *controld_api, + const char *host_uname, const char *rsc_id, + pe_working_set_t *data_set); + int cli_resource_search(pe_resource_t *rsc, const char *requested_name, + pe_working_set_t *data_set); +-int cli_resource_delete(pcmk_controld_api_t *controld_api, ++int cli_resource_delete(pcmk_ipc_api_t *controld_api, + const char *host_uname, pe_resource_t *rsc, + const char *operation, const char *interval_spec, + bool just_failures, pe_working_set_t *data_set); +-int cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, ++int cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, + const char *operation, const char *interval_spec, + pe_working_set_t *data_set); + int cli_resource_restart(pe_resource_t *rsc, const char *host, int timeout_ms, +diff --git a/tools/crm_resource_controller.c b/tools/crm_resource_controller.c +deleted file mode 100644 +index 994a7be..0000000 +--- a/tools/crm_resource_controller.c ++++ /dev/null +@@ -1,425 +0,0 @@ +-/* +- * Copyright 2020 the Pacemaker project contributors +- * +- * The version control history for this file may have further details. +- * +- * This source code is licensed under the GNU General Public License version 2 +- * or later (GPLv2+) WITHOUT ANY WARRANTY. +- */ +- +-#include +-#include +-#include +-#include "crm_resource.h" +- +-// API object's private members +-struct controller_private { +- char *client_name; // Client name to use with IPC +- char *client_uuid; // Client UUID to use with IPC +- mainloop_io_t *source; // If main loop used, I/O source for IPC +- crm_ipc_t *ipc; // IPC connection to controller +- int replies_expected; // How many controller replies are expected +- pcmk_controld_api_cb_t dispatch_cb; // Caller's registered dispatch callback +- pcmk_controld_api_cb_t destroy_cb; // Caller's registered destroy callback +-}; +- +-static void +-call_client_callback(pcmk_controld_api_t *api, pcmk_controld_api_cb_t *cb, +- void *api_data) +-{ +- if ((cb != NULL) && (cb->callback != NULL)) { +- cb->callback(api, api_data, cb->user_data); +- } +-} +- +-/* +- * IPC callbacks when used with main loop +- */ +- +-static void +-controller_ipc_destroy(gpointer user_data) +-{ +- pcmk_controld_api_t *api = user_data; +- struct controller_private *private = api->private; +- +- private->ipc = NULL; +- private->source = NULL; +- call_client_callback(api, &(private->destroy_cb), NULL); +-} +- +-// \return < 0 if connection is no longer required, >= 0 if it is +-static int +-controller_ipc_dispatch(const char *buffer, ssize_t length, gpointer user_data) +-{ +- xmlNode *msg = NULL; +- pcmk_controld_api_t *api = user_data; +- +- CRM_CHECK(buffer && api && api->private, return 0); +- +- msg = string2xml(buffer); +- if (msg == NULL) { +- crm_warn("Received malformed controller IPC message"); +- } else { +- struct controller_private *private = api->private; +- +- crm_log_xml_trace(msg, "controller-reply"); +- private->replies_expected--; +- call_client_callback(api, &(private->dispatch_cb), +- get_message_xml(msg, F_CRM_DATA)); +- free_xml(msg); +- } +- return 0; +-} +- +-/* +- * IPC utilities +- */ +- +-// \return Standard Pacemaker return code +-static int +-send_hello(crm_ipc_t *ipc, const char *client_name, const char *client_uuid) +-{ +- xmlNode *hello = create_hello_message(client_uuid, client_name, "0", "1"); +- int rc = crm_ipc_send(ipc, hello, 0, 0, NULL); +- +- free_xml(hello); +- if (rc < 0) { +- rc = pcmk_legacy2rc(rc); +- crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", +- CRM_SYSTEM_CRMD /* ipc->name */, +- pcmk_rc_str(rc), rc); +- return rc; +- } +- crm_debug("Sent IPC hello to %s", CRM_SYSTEM_CRMD /* ipc->name */); +- return pcmk_rc_ok; +-} +- +-// \return Standard Pacemaker return code +-static int +-send_controller_request(pcmk_controld_api_t *api, const char *op, +- xmlNode *msg_data, const char *node) +-{ +- int rc; +- struct controller_private *private = api->private; +- xmlNode *cmd = create_request(op, msg_data, node, CRM_SYSTEM_CRMD, +- private->client_name, private->client_uuid); +- const char *reference = crm_element_value(cmd, XML_ATTR_REFERENCE); +- +- if ((cmd == NULL) || (reference == NULL)) { +- return EINVAL; +- } +- +- //@TODO pass as args? 0=crm_ipc_flags, 0=timeout_ms (default 5s), NULL=reply +- crm_log_xml_trace(cmd, "controller-request"); +- rc = crm_ipc_send(private->ipc, cmd, 0, 0, NULL); +- free_xml(cmd); +- if (rc < 0) { +- return pcmk_legacy2rc(rc); +- } +- private->replies_expected++; +- return pcmk_rc_ok; +-} +- +-/* +- * pcmk_controld_api_t methods +- */ +- +-static int +-controller_connect_mainloop(pcmk_controld_api_t *api) +-{ +- struct controller_private *private = api->private; +- struct ipc_client_callbacks callbacks = { +- .dispatch = controller_ipc_dispatch, +- .destroy = controller_ipc_destroy, +- }; +- +- private->source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, +- G_PRIORITY_DEFAULT, 0, api, +- &callbacks); +- if (private->source == NULL) { +- return ENOTCONN; +- } +- +- private->ipc = mainloop_get_ipc_client(private->source); +- if (private->ipc == NULL) { +- (void) api->disconnect(api); +- return ENOTCONN; +- } +- +- crm_debug("Connected to %s IPC (attaching to main loop)", CRM_SYSTEM_CRMD); +- return pcmk_rc_ok; +-} +- +-static int +-controller_connect_no_mainloop(pcmk_controld_api_t *api) +-{ +- struct controller_private *private = api->private; +- +- private->ipc = crm_ipc_new(CRM_SYSTEM_CRMD, 0); +- if (private->ipc == NULL) { +- return ENOTCONN; +- } +- if (!crm_ipc_connect(private->ipc)) { +- crm_ipc_close(private->ipc); +- crm_ipc_destroy(private->ipc); +- private->ipc = NULL; +- return errno; +- } +- /* @TODO caller needs crm_ipc_get_fd(private->ipc); either add method for +- * that, or replace use_mainloop with int *fd +- */ +- crm_debug("Connected to %s IPC", CRM_SYSTEM_CRMD); +- return pcmk_rc_ok; +-} +- +-static void +-set_callback(pcmk_controld_api_cb_t *dest, pcmk_controld_api_cb_t *source) +-{ +- if (source) { +- dest->callback = source->callback; +- dest->user_data = source->user_data; +- } +-} +- +-static int +-controller_api_connect(pcmk_controld_api_t *api, bool use_mainloop, +- pcmk_controld_api_cb_t *dispatch_cb, +- pcmk_controld_api_cb_t *destroy_cb) +-{ +- int rc = pcmk_rc_ok; +- struct controller_private *private; +- +- if (api == NULL) { +- return EINVAL; +- } +- private = api->private; +- +- set_callback(&(private->dispatch_cb), dispatch_cb); +- set_callback(&(private->destroy_cb), destroy_cb); +- +- if (private->ipc != NULL) { +- return pcmk_rc_ok; // already connected +- } +- +- if (use_mainloop) { +- rc = controller_connect_mainloop(api); +- } else { +- rc = controller_connect_no_mainloop(api); +- } +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = send_hello(private->ipc, private->client_name, private->client_uuid); +- if (rc != pcmk_rc_ok) { +- (void) api->disconnect(api); +- } +- return rc; +-} +- +-static int +-controller_api_disconnect(pcmk_controld_api_t *api) +-{ +- struct controller_private *private = api->private; +- +- if (private->source != NULL) { +- // Attached to main loop +- mainloop_del_ipc_client(private->source); +- private->source = NULL; +- private->ipc = NULL; +- +- } else if (private->ipc != NULL) { +- // Not attached to main loop +- crm_ipc_t *ipc = private->ipc; +- +- private->ipc = NULL; +- crm_ipc_close(ipc); +- crm_ipc_destroy(ipc); +- } +- crm_debug("Disconnected from %s IPC", CRM_SYSTEM_CRMD /* ipc->name */); +- return pcmk_rc_ok; +-} +- +-//@TODO dispatch function for non-mainloop a la stonith_dispatch() +-//@TODO convenience retry-connect function a la stonith_api_connect_retry() +- +-static unsigned int +-controller_api_replies_expected(pcmk_controld_api_t *api) +-{ +- if (api != NULL) { +- struct controller_private *private = api->private; +- +- return private->replies_expected; +- } +- return 0; +-} +- +-static xmlNode * +-create_reprobe_message_data(const char *target_node, const char *router_node) +-{ +- xmlNode *msg_data; +- +- msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); +- crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); +- if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { +- crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); +- } +- return msg_data; +-} +- +-static int +-controller_api_reprobe(pcmk_controld_api_t *api, const char *target_node, +- const char *router_node) +-{ +- int rc = EINVAL; +- +- if (api != NULL) { +- xmlNode *msg_data; +- +- crm_debug("Sending %s IPC request to reprobe %s via %s", +- CRM_SYSTEM_CRMD, crm_str(target_node), crm_str(router_node)); +- msg_data = create_reprobe_message_data(target_node, router_node); +- rc = send_controller_request(api, CRM_OP_REPROBE, msg_data, +- (router_node? router_node : target_node)); +- free_xml(msg_data); +- } +- return rc; +-} +- +-// \return Standard Pacemaker return code +-static int +-controller_resource_op(pcmk_controld_api_t *api, const char *op, +- const char *target_node, const char *router_node, +- bool cib_only, const char *rsc_id, +- const char *rsc_long_id, const char *standard, +- const char *provider, const char *type) +-{ +- int rc; +- char *key; +- xmlNode *msg_data, *xml_rsc, *params; +- +- if (api == NULL) { +- return EINVAL; +- } +- if (router_node == NULL) { +- router_node = target_node; +- } +- +- msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); +- +- /* The controller logs the transition key from resource op requests, so we +- * need to have *something* for it. +- */ +- key = pcmk__transition_key(0, getpid(), 0, +- "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); +- crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); +- free(key); +- +- crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); +- if (safe_str_neq(router_node, target_node)) { +- crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); +- } +- +- if (cib_only) { +- // Indicate that only the CIB needs to be cleaned +- crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); +- } +- +- xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); +- crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); +- crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); +- crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); +- crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); +- crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); +- +- params = create_xml_node(msg_data, XML_TAG_ATTRS); +- crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); +- +- // The controller parses the timeout from the request +- key = crm_meta_name(XML_ATTR_TIMEOUT); +- crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg +- free(key); +- +- rc = send_controller_request(api, op, msg_data, router_node); +- free_xml(msg_data); +- return rc; +-} +- +-static int +-controller_api_fail_resource(pcmk_controld_api_t *api, +- const char *target_node, const char *router_node, +- const char *rsc_id, const char *rsc_long_id, +- const char *standard, const char *provider, +- const char *type) +-{ +- crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", +- CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), +- crm_str(target_node), crm_str(router_node)); +- return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, +- router_node, false, rsc_id, rsc_long_id, +- standard, provider, type); +-} +- +-static int +-controller_api_refresh_resource(pcmk_controld_api_t *api, +- const char *target_node, +- const char *router_node, +- const char *rsc_id, const char *rsc_long_id, +- const char *standard, const char *provider, +- const char *type, bool cib_only) +-{ +- crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", +- CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), +- crm_str(target_node), crm_str(router_node)); +- return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, +- router_node, cib_only, rsc_id, rsc_long_id, +- standard, provider, type); +-} +- +-pcmk_controld_api_t * +-pcmk_new_controld_api(const char *client_name, const char *client_uuid) +-{ +- struct controller_private *private; +- pcmk_controld_api_t *api = calloc(1, sizeof(pcmk_controld_api_t)); +- +- CRM_ASSERT(api != NULL); +- +- api->private = calloc(1, sizeof(struct controller_private)); +- CRM_ASSERT(api->private != NULL); +- private = api->private; +- +- if (client_name == NULL) { +- client_name = crm_system_name? crm_system_name : "client"; +- } +- private->client_name = strdup(client_name); +- CRM_ASSERT(private->client_name != NULL); +- +- if (client_uuid == NULL) { +- private->client_uuid = crm_generate_uuid(); +- } else { +- private->client_uuid = strdup(client_uuid); +- } +- CRM_ASSERT(private->client_uuid != NULL); +- +- api->connect = controller_api_connect; +- api->disconnect = controller_api_disconnect; +- api->replies_expected = controller_api_replies_expected; +- api->reprobe = controller_api_reprobe; +- api->fail_resource = controller_api_fail_resource; +- api->refresh_resource = controller_api_refresh_resource; +- return api; +-} +- +-void +-pcmk_free_controld_api(pcmk_controld_api_t *api) +-{ +- if (api != NULL) { +- struct controller_private *private = api->private; +- +- api->disconnect(api); +- free(private->client_name); +- free(private->client_uuid); +- free(api->private); +- free(api); +- } +-} +diff --git a/tools/crm_resource_controller.h b/tools/crm_resource_controller.h +deleted file mode 100644 +index 50e20b4..0000000 +--- a/tools/crm_resource_controller.h ++++ /dev/null +@@ -1,198 +0,0 @@ +-/* +- * Copyright 2020 the Pacemaker project contributors +- * +- * The version control history for this file may have further details. +- * +- * This source code is licensed under the GNU Lesser General Public License +- * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. +- */ +-#ifndef PCMK__CONTROLD_API_H +-#define PCMK__CONTROLD_API_H +- +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-#include // bool +- +-/* This is a demonstration of an abstracted controller IPC API. It is expected +- * that this will be improved and moved to libcrmcommon. +- * +- * @TODO We could consider whether it's reasonable to have a single type for +- * all daemons' IPC APIs (e.g. pcmk_ipc_api_t instead of pcmk_*_api_t). They +- * could potentially have common connect/disconnect methods and then a void* to +- * a group of API-specific methods. +- * +- * In that case, the callback type would also need to be generic, taking +- * (pcmk_ipc_api_t *api, void *api_data, void *user_data), with individual APIs +- * having functions for getting useful info from api_data. If all APIs followed +- * the call_id model, we could use int call_id instead of api_data. +- * +- * A major annoyance is that the controller IPC protocol currently does not have +- * any way to tie a particular reply to a particular request. The current +- * clients (crmadmin, crm_node, and crm_resource) simply know what kind of reply +- * to expect for the kind of request they sent. In crm_resource's case, all it +- * does is count replies, ignoring their content altogether. +- * +- * That really forces us to have a single callback for all events rather than a +- * per-request callback. That in turn implies that callers can only provide a +- * single user data pointer. +- * +- * @TODO Define protocol version constants to use in hello message. +- * @TODO Allow callers to specify timeouts. +- * @TODO Define call IDs for controller ops, while somehow maintaining backward +- * compatibility, since a client running on a Pacemaker Remote node could +- * be older or newer than the controller on the connection's cluster +- * node. +- * @TODO The controller currently does not respond to hello messages. We should +- * establish a common connection handshake protocol for all daemons that +- * involves a hello message and acknowledgement. We should support sync +- * or async connection (i.e. block until the ack is received, or return +- * after the hello is sent and call a connection callback when the hello +- * ack is received). +- */ +- +-//! \internal +-typedef struct pcmk_controld_api_s pcmk_controld_api_t; +- +-//! \internal +-typedef struct pcmk_controld_api_callback_s { +- void (*callback)(pcmk_controld_api_t *api, void *api_data, void *user_data); +- void *user_data; +-} pcmk_controld_api_cb_t; +- +-//! \internal +-struct pcmk_controld_api_s { +- //! \internal +- void *private; +- +- /*! +- * \internal +- * \brief Connect to the local controller +- * +- * \param[in] api Controller API instance +- * \param[in] use_mainloop If true, attach IPC to main loop +- * \param[in] dispatch_cb If not NULL, call this when replies are received +- * \param[in] destroy_cb If not NULL, call this if connection drops +- * +- * \return Standard Pacemaker return code +- * \note Only the pointers inside the callback objects need to be +- * persistent, not the callback objects themselves. The destroy_cb +- * will be called only for unrequested disconnects. +- */ +- int (*connect)(pcmk_controld_api_t *api, bool use_mainloop, +- pcmk_controld_api_cb_t *dispatch_cb, +- pcmk_controld_api_cb_t *destroy_cb); +- +- /*! +- * \internal +- * \brief Disconnect from the local controller +- * +- * \param[in] api Controller API instance +- * +- * \return Standard Pacemaker return code +- */ +- int (*disconnect)(pcmk_controld_api_t *api); +- +- /*! +- * \internal +- * \brief Check number of replies still expected from controller +- * +- * \param[in] api Controller API instance +- * +- * \return Number of expected replies +- */ +- unsigned int (*replies_expected)(pcmk_controld_api_t *api); +- +- /*! +- * \internal +- * \brief Send a reprobe controller operation +- * +- * \param[in] api Controller API instance +- * \param[in] target_node Name of node to reprobe +- * \param[in] router_node Router node for host +- * +- * \return Standard Pacemaker return code +- */ +- int (*reprobe)(pcmk_controld_api_t *api, const char *target_node, +- const char *router_node); +- +- /* @TODO These methods have a lot of arguments. One possibility would be to +- * make a struct for agent info (standard/provider/type), which theortically +- * could be used throughout pacemaker code. However that would end up being +- * really awkward to use generically, since sometimes you need to allocate +- * those strings (char *) and other times you only have references into XML +- * (const char *). We could make some structs just for this API. +- */ +- +- /*! +- * \internal +- * \brief Ask the controller to fail a resource +- * +- * \param[in] api Controller API instance +- * \param[in] target_node Name of node resource is on +- * \param[in] router_node Router node for target +- * \param[in] rsc_id ID of resource to fail +- * \param[in] rsc_long_id Long ID of resource (if any) +- * \param[in] standard Standard of resource +- * \param[in] provider Provider of resource (if any) +- * \param[in] type Type of resource to fail +- * +- * \return Standard Pacemaker return code +- */ +- int (*fail_resource)(pcmk_controld_api_t *api, const char *target_node, +- const char *router_node, const char *rsc_id, +- const char *rsc_long_id, const char *standard, +- const char *provider, const char *type); +- +- /*! +- * \internal +- * \brief Ask the controller to refresh a resource +- * +- * \param[in] api Controller API instance +- * \param[in] target_node Name of node resource is on +- * \param[in] router_node Router node for target +- * \param[in] rsc_id ID of resource to refresh +- * \param[in] rsc_long_id Long ID of resource (if any) +- * \param[in] standard Standard of resource +- * \param[in] provider Provider of resource (if any) +- * \param[in] type Type of resource +- * \param[in] cib_only If true, clean resource from CIB only +- * +- * \return Standard Pacemaker return code +- */ +- int (*refresh_resource)(pcmk_controld_api_t *api, const char *target_node, +- const char *router_node, const char *rsc_id, +- const char *rsc_long_id, const char *standard, +- const char *provider, const char *type, +- bool cib_only); +-}; +- +-/*! +- * \internal +- * \brief Create new controller IPC API object for clients +- * +- * \param[in] client_name Client name to use with IPC +- * \param[in] client_uuid Client UUID to use with IPC +- * +- * \return Newly allocated object +- * \note This function asserts on errors, so it will never return NULL. +- * The caller is responsible for freeing the result with +- * pcmk_free_controld_api(). +- */ +-pcmk_controld_api_t *pcmk_new_controld_api(const char *client_name, +- const char *client_uuid); +- +-/*! +- * \internal +- * \brief Free a controller IPC API object +- * +- * \param[in] api Controller IPC API object to free +- */ +-void pcmk_free_controld_api(pcmk_controld_api_t *api); +- +-#ifdef __cplusplus +-} +-#endif +- +-#endif +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 37789d1..cc2abeb 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -8,6 +8,7 @@ + */ + + #include ++#include + + int resource_verbose = 0; + bool do_force = FALSE; +@@ -460,7 +461,7 @@ cli_resource_delete_attribute(pe_resource_t *rsc, const char *requested_name, + + // \return Standard Pacemaker return code + static int +-send_lrm_rsc_op(pcmk_controld_api_t *controld_api, bool do_fail_resource, ++send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource, + const char *host_uname, const char *rsc_id, + pe_working_set_t *data_set) + { +@@ -528,14 +529,13 @@ send_lrm_rsc_op(pcmk_controld_api_t *controld_api, bool do_fail_resource, + rsc_api_id = rsc->id; + } + if (do_fail_resource) { +- return controld_api->fail_resource(controld_api, host_uname, +- router_node, rsc_api_id, rsc_long_id, +- rsc_class, rsc_provider, rsc_type); ++ return pcmk_controld_api_fail(controld_api, host_uname, router_node, ++ rsc_api_id, rsc_long_id, ++ rsc_class, rsc_provider, rsc_type); + } else { +- return controld_api->refresh_resource(controld_api, host_uname, +- router_node, rsc_api_id, +- rsc_long_id, rsc_class, +- rsc_provider, rsc_type, cib_only); ++ return pcmk_controld_api_refresh(controld_api, host_uname, router_node, ++ rsc_api_id, rsc_long_id, rsc_class, ++ rsc_provider, rsc_type, cib_only); + } + } + +@@ -558,7 +558,7 @@ rsc_fail_name(pe_resource_t *rsc) + + // \return Standard Pacemaker return code + static int +-clear_rsc_history(pcmk_controld_api_t *controld_api, const char *host_uname, ++clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname, + const char *rsc_id, pe_working_set_t *data_set) + { + int rc = pcmk_ok; +@@ -574,16 +574,16 @@ clear_rsc_history(pcmk_controld_api_t *controld_api, const char *host_uname, + } + + crm_trace("Processing %d mainloop inputs", +- controld_api->replies_expected(controld_api)); ++ pcmk_controld_api_replies_expected(controld_api)); + while (g_main_context_iteration(NULL, FALSE)) { + crm_trace("Processed mainloop input, %d still remaining", +- controld_api->replies_expected(controld_api)); ++ pcmk_controld_api_replies_expected(controld_api)); + } + return rc; + } + + static int +-clear_rsc_failures(pcmk_controld_api_t *controld_api, const char *node_name, ++clear_rsc_failures(pcmk_ipc_api_t *controld_api, const char *node_name, + const char *rsc_id, const char *operation, + const char *interval_spec, pe_working_set_t *data_set) + { +@@ -683,7 +683,7 @@ clear_rsc_fail_attrs(pe_resource_t *rsc, const char *operation, + } + + int +-cli_resource_delete(pcmk_controld_api_t *controld_api, const char *host_uname, ++cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, + pe_resource_t *rsc, const char *operation, + const char *interval_spec, bool just_failures, + pe_working_set_t *data_set) +@@ -792,7 +792,7 @@ cli_resource_delete(pcmk_controld_api_t *controld_api, const char *host_uname, + } + + int +-cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, ++cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, + const char *operation, const char *interval_spec, + pe_working_set_t *data_set) + { +@@ -905,7 +905,7 @@ cli_resource_check(cib_t * cib_conn, pe_resource_t *rsc) + + // \return Standard Pacemaker return code + int +-cli_resource_fail(pcmk_controld_api_t *controld_api, const char *host_uname, ++cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, + const char *rsc_id, pe_working_set_t *data_set) + { + crm_notice("Failing %s on %s", rsc_id, host_uname); +-- +1.8.3.1 + + +From ae14fa4a831e45eae0d78b0f42765fcf40c4ce56 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Apr 2020 14:06:02 -0500 +Subject: [PATCH 4/6] Refactor: tools: convert crm_node to use new controller + IPC model + +--- + tools/crm_node.c | 281 +++++++++++++++++++++++++++---------------------------- + 1 file changed, 140 insertions(+), 141 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index db31f20..1773a36 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + + #define SUMMARY "crm_node - Tool for displaying low-level node information" +@@ -39,7 +40,6 @@ gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data + gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); + gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); + +-static char *pid_s = NULL; + static GMainLoop *mainloop = NULL; + static crm_exit_t exit_code = CRM_EX_OK; + +@@ -140,11 +140,6 @@ remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError * + static void + crm_node_exit(crm_exit_t value) + { +- if (pid_s) { +- free(pid_s); +- pid_s = NULL; +- } +- + exit_code = value; + + if (mainloop && g_main_loop_is_running(mainloop)) { +@@ -155,173 +150,123 @@ crm_node_exit(crm_exit_t value) + } + + static void +-exit_disconnect(gpointer user_data) +-{ +- fprintf(stderr, "error: Lost connection to cluster\n"); +- crm_node_exit(CRM_EX_DISCONNECT); +-} +- +-typedef int (*ipc_dispatch_fn) (const char *buffer, ssize_t length, +- gpointer userdata); +- +-static crm_ipc_t * +-new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) ++controller_event_cb(pcmk_ipc_api_t *controld_api, ++ enum pcmk_ipc_event event_type, crm_exit_t status, ++ void *event_data, void *user_data) + { +- mainloop_io_t *source = NULL; +- crm_ipc_t *ipc = NULL; +- +- struct ipc_client_callbacks ipc_callbacks = { +- .dispatch = dispatch, +- .destroy = exit_disconnect +- }; ++ pcmk_controld_api_reply_t *reply = event_data; + +- mainloop = g_main_loop_new(NULL, FALSE); +- source = mainloop_add_ipc_client(system, G_PRIORITY_DEFAULT, 0, +- NULL, &ipc_callbacks); +- ipc = mainloop_get_ipc_client(source); +- if (ipc == NULL) { +- fprintf(stderr, +- "error: Could not connect to cluster (is it running?)\n"); +- crm_node_exit(CRM_EX_DISCONNECT); +- } +- return ipc; +-} +- +-static int +-send_controller_hello(crm_ipc_t *controller) +-{ +- xmlNode *hello = NULL; +- int rc; +- +- pid_s = pcmk__getpid_s(); +- hello = create_hello_message(pid_s, crm_system_name, "1", "0"); +- rc = crm_ipc_send(controller, hello, 0, 0, NULL); +- free_xml(hello); +- return (rc < 0)? rc : 0; +-} +- +-static int +-send_node_info_request(crm_ipc_t *controller, uint32_t nodeid) +-{ +- xmlNode *ping = NULL; +- int rc; +- +- ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, +- crm_system_name, pid_s); +- if (nodeid > 0) { +- crm_xml_add_int(ping, XML_ATTR_ID, nodeid); +- } +- rc = crm_ipc_send(controller, ping, 0, 0, NULL); +- free_xml(ping); +- return (rc < 0)? rc : 0; +-} ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ if (exit_code == CRM_EX_DISCONNECT) { // Unexpected ++ fprintf(stderr, "error: Lost connection to controller\n"); ++ } ++ goto done; ++ break; + +-static int +-dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) +-{ +- xmlNode *message = string2xml(buffer); +- xmlNode *data = NULL; +- const char *value = NULL; ++ case pcmk_ipc_event_reply: ++ break; + +- if (message == NULL) { +- fprintf(stderr, "error: Could not understand reply from controller\n"); +- crm_node_exit(CRM_EX_PROTOCOL); +- return 0; ++ default: ++ return; + } +- crm_log_xml_trace(message, "controller reply"); +- +- exit_code = CRM_EX_PROTOCOL; + +- // Validate reply +- value = crm_element_value(message, F_CRM_MSG_TYPE); +- if (safe_str_neq(value, XML_ATTR_RESPONSE)) { +- fprintf(stderr, "error: Message from controller was not a reply\n"); ++ if (status != CRM_EX_OK) { ++ fprintf(stderr, "error: Bad reply from controller: %s\n", ++ crm_exit_str(status)); + goto done; + } +- value = crm_element_value(message, XML_ATTR_REFERENCE); +- if (value == NULL) { +- fprintf(stderr, "error: Controller reply did not specify original message\n"); +- goto done; +- } +- data = get_message_xml(message, F_CRM_DATA); +- if (data == NULL) { +- fprintf(stderr, "error: Controller reply did not contain any data\n"); ++ if (reply->reply_type != pcmk_controld_reply_info) { ++ fprintf(stderr, "error: Unknown reply type %d from controller\n", ++ reply->reply_type); + goto done; + } + ++ // Parse desired info from reply and display to user + switch (options.command) { + case 'i': +- value = crm_element_value(data, XML_ATTR_ID); +- if (value == NULL) { +- fprintf(stderr, "error: Controller reply did not contain node ID\n"); +- } else { +- printf("%s\n", value); +- exit_code = CRM_EX_OK; ++ if (reply->data.node_info.id == 0) { ++ fprintf(stderr, ++ "error: Controller reply did not contain node ID\n"); ++ exit_code = CRM_EX_PROTOCOL; ++ goto done; + } ++ printf("%d\n", reply->data.node_info.id); + break; + + case 'n': + case 'N': +- value = crm_element_value(data, XML_ATTR_UNAME); +- if (value == NULL) { ++ if (reply->data.node_info.uname == NULL) { + fprintf(stderr, "Node is not known to cluster\n"); + exit_code = CRM_EX_NOHOST; +- } else { +- printf("%s\n", value); +- exit_code = CRM_EX_OK; ++ goto done; + } ++ printf("%s\n", reply->data.node_info.uname); + break; + + case 'q': +- value = crm_element_value(data, XML_ATTR_HAVE_QUORUM); +- if (value == NULL) { +- fprintf(stderr, "error: Controller reply did not contain quorum status\n"); +- } else { +- bool quorum = crm_is_true(value); +- +- printf("%d\n", quorum); +- exit_code = quorum? CRM_EX_OK : CRM_EX_QUORUM; ++ printf("%d\n", reply->data.node_info.have_quorum); ++ if (!(reply->data.node_info.have_quorum)) { ++ exit_code = CRM_EX_QUORUM; ++ goto done; + } + break; + + default: + fprintf(stderr, "internal error: Controller reply not expected\n"); + exit_code = CRM_EX_SOFTWARE; +- break; ++ goto done; + } + ++ // Success ++ exit_code = CRM_EX_OK; + done: +- free_xml(message); +- crm_node_exit(exit_code); +- return 0; ++ pcmk_disconnect_ipc(controld_api); ++ pcmk_quit_main_loop(mainloop, 10); + } + + static void + run_controller_mainloop(uint32_t nodeid) + { +- crm_ipc_t *controller = NULL; ++ pcmk_ipc_api_t *controld_api = NULL; + int rc; + +- controller = new_mainloop_for_ipc(CRM_SYSTEM_CRMD, dispatch_controller); ++ // Set disconnect exit code to handle unexpected disconnects ++ exit_code = CRM_EX_DISCONNECT; ++ ++ // Create controller IPC object ++ rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ return; ++ } ++ pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL); + +- rc = send_controller_hello(controller); +- if (rc < 0) { +- fprintf(stderr, "error: Could not register with controller: %s\n", +- pcmk_strerror(rc)); +- crm_node_exit(crm_errno2exit(rc)); ++ // Connect to controller ++ rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); ++ return; + } + +- rc = send_node_info_request(controller, nodeid); +- if (rc < 0) { ++ rc = pcmk_controld_api_node_info(controld_api, nodeid); ++ if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not ping controller: %s\n", +- pcmk_strerror(rc)); +- crm_node_exit(crm_errno2exit(rc)); ++ pcmk_rc_str(rc)); ++ pcmk_disconnect_ipc(controld_api); ++ exit_code = pcmk_rc2exitc(rc); ++ return; + } + +- // Run main loop to get controller reply via dispatch_controller() ++ // Run main loop to get controller reply via controller_event_cb() ++ mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + g_main_loop_unref(mainloop); + mainloop = NULL; ++ pcmk_free_ipc_api(controld_api); + } + + static void +@@ -385,32 +330,56 @@ cib_remove_node(long id, const char *name) + } + + static int ++controller_remove_node(const char *node_name, long nodeid) ++{ ++ pcmk_ipc_api_t *controld_api = NULL; ++ int rc; ++ ++ // Create controller IPC object ++ rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ return ENOTCONN; ++ } ++ ++ // Connect to controller (without main loop) ++ rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_sync); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ pcmk_free_ipc_api(controld_api); ++ return rc; ++ } ++ ++ rc = pcmk_ipc_purge_node(controld_api, node_name, nodeid); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, ++ "error: Could not clear node from controller's cache: %s\n", ++ pcmk_rc_str(rc)); ++ } ++ ++ pcmk_free_ipc_api(controld_api); ++ return pcmk_rc_ok; ++} ++ ++static int + tools_remove_node_cache(const char *node_name, long nodeid, const char *target) + { + int rc = -1; +- crm_ipc_t *conn = crm_ipc_new(target, 0); ++ crm_ipc_t *conn = NULL; + xmlNode *cmd = NULL; + ++ conn = crm_ipc_new(target, 0); + if (!conn) { + return -ENOTCONN; + } +- + if (!crm_ipc_connect(conn)) { + crm_perror(LOG_ERR, "Connection to %s failed", target); + crm_ipc_destroy(conn); + return -ENOTCONN; + } + +- if(safe_str_eq(target, CRM_SYSTEM_CRMD)) { +- // The controller requires a hello message before sending a request +- rc = send_controller_hello(conn); +- if (rc < 0) { +- fprintf(stderr, "error: Could not register with controller: %s\n", +- pcmk_strerror(rc)); +- return rc; +- } +- } +- + crm_trace("Removing %s[%ld] from the %s membership cache", + node_name, nodeid, target); + +@@ -427,9 +396,9 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) + crm_xml_add_int(cmd, PCMK__XA_ATTR_NODE_ID, (int) nodeid); + } + +- } else { +- cmd = create_request(CRM_OP_RM_NODE_CACHE, +- NULL, NULL, target, crm_system_name, pid_s); ++ } else { // Fencer or pacemakerd ++ cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, target, ++ crm_system_name, NULL); + if (nodeid > 0) { + crm_xml_set_id(cmd, "%ld", nodeid); + } +@@ -441,6 +410,7 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) + target, node_name, nodeid, rc); + + if (rc > 0) { ++ // @TODO Should this be done just once after all the rest? + rc = cib_remove_node(nodeid, node_name); + } + +@@ -455,12 +425,12 @@ tools_remove_node_cache(const char *node_name, long nodeid, const char *target) + static void + remove_node(const char *target_uname) + { ++ int rc; + int d = 0; + long nodeid = 0; + const char *node_name = NULL; + char *endptr = NULL; + const char *daemons[] = { +- CRM_SYSTEM_CRMD, + "stonith-ng", + T_ATTRD, + CRM_SYSTEM_MCP, +@@ -476,6 +446,12 @@ remove_node(const char *target_uname) + node_name = target_uname; + } + ++ rc = controller_remove_node(node_name, nodeid); ++ if (rc != pcmk_rc_ok) { ++ exit_code = pcmk_rc2exitc(rc); ++ return; ++ } ++ + for (d = 0; d < DIMOF(daemons); d++) { + if (tools_remove_node_cache(node_name, nodeid, daemons[d])) { + crm_err("Failed to connect to %s to remove node '%s'", +@@ -545,12 +521,34 @@ node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) + } + + static void ++lost_pacemakerd(gpointer user_data) ++{ ++ fprintf(stderr, "error: Lost connection to cluster\n"); ++ exit_code = CRM_EX_DISCONNECT; ++ g_main_loop_quit(mainloop); ++} ++ ++static void + run_pacemakerd_mainloop(void) + { + crm_ipc_t *ipc = NULL; + xmlNode *poke = NULL; ++ mainloop_io_t *source = NULL; + +- ipc = new_mainloop_for_ipc(CRM_SYSTEM_MCP, node_mcp_dispatch); ++ struct ipc_client_callbacks ipc_callbacks = { ++ .dispatch = node_mcp_dispatch, ++ .destroy = lost_pacemakerd ++ }; ++ ++ source = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, ++ NULL, &ipc_callbacks); ++ ipc = mainloop_get_ipc_client(source); ++ if (ipc == NULL) { ++ fprintf(stderr, ++ "error: Could not connect to cluster (is it running?)\n"); ++ exit_code = CRM_EX_DISCONNECT; ++ return; ++ } + + // Sending anything will get us a list of nodes + poke = create_xml_node(NULL, "poke"); +@@ -558,6 +556,7 @@ run_pacemakerd_mainloop(void) + free_xml(poke); + + // Handle reply via node_mcp_dispatch() ++ mainloop = g_main_loop_new(NULL, FALSE); + g_main_loop_run(mainloop); + g_main_loop_unref(mainloop); + mainloop = NULL; +-- +1.8.3.1 + + +From a361f764cb28630d440eec0f3e04a4f3812825eb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Apr 2020 16:05:15 -0500 +Subject: [PATCH 5/6] Refactor: tools: remove dead code from crm_node + +--- + tools/crm_node.c | 22 +--------------------- + 1 file changed, 1 insertion(+), 21 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 1773a36..57c2ee5 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -130,25 +130,6 @@ remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError * + return TRUE; + } + +-/*! +- * \internal +- * \brief Exit crm_node +- * Clean up memory, and either quit mainloop (if running) or exit +- * +- * \param[in] value Exit status +- */ +-static void +-crm_node_exit(crm_exit_t value) +-{ +- exit_code = value; +- +- if (mainloop && g_main_loop_is_running(mainloop)) { +- g_main_loop_quit(mainloop); +- } else { +- crm_exit(exit_code); +- } +-} +- + static void + controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, +@@ -660,6 +641,5 @@ done: + g_strfreev(processed_args); + g_clear_error(&error); + pcmk__free_arg_context(context); +- crm_node_exit(exit_code); +- return exit_code; ++ return crm_exit(exit_code); + } +-- +1.8.3.1 + + +From 591944539259f6294450517770d95c7a02fc599c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 20 Apr 2020 15:48:15 -0500 +Subject: [PATCH 6/6] Refactor: tools: convert crmadmin to use new controller + IPC model + +--- + tools/crmadmin.c | 484 ++++++++++++++++++++++++------------------------------- + 1 file changed, 208 insertions(+), 276 deletions(-) + +diff --git a/tools/crmadmin.c b/tools/crmadmin.c +index 3e9e959..4688458 100644 +--- a/tools/crmadmin.c ++++ b/tools/crmadmin.c +@@ -9,56 +9,44 @@ + + #include + +-#include +- + #include +-#include +-#include ++#include ++#include // atoi() + +-#include +-#include +-#include +-#include + #include // gboolean, GMainLoop, etc. ++#include // xmlNode + + #include ++#include + #include + #include +- ++#include + #include + +-#include +- + #define DEFAULT_MESSAGE_TIMEOUT_MS 30000 + + static guint message_timer_id = 0; + static guint message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; + static GMainLoop *mainloop = NULL; +-static crm_ipc_t *crmd_channel = NULL; +-static char *admin_uuid = NULL; +- +-gboolean do_init(void); +-int do_work(void); +-void crmadmin_ipc_connection_destroy(gpointer user_data); +-int admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata); +-int do_find_node_list(xmlNode * xml_node); ++ ++bool do_work(pcmk_ipc_api_t *api); ++void do_find_node_list(xmlNode *xml_node); + gboolean admin_message_timeout(gpointer data); + ++static enum { ++ cmd_none, ++ cmd_shutdown, ++ cmd_health, ++ cmd_elect_dc, ++ cmd_whois_dc, ++ cmd_list_nodes, ++} command = cmd_none; ++ + static gboolean BE_VERBOSE = FALSE; +-static int expected_responses = 1; + static gboolean BASH_EXPORT = FALSE; +-static gboolean DO_HEALTH = FALSE; +-static gboolean DO_RESET = FALSE; +-static gboolean DO_RESOURCE = FALSE; +-static gboolean DO_ELECT_DC = FALSE; +-static gboolean DO_WHOIS_DC = FALSE; +-static gboolean DO_NODE_LIST = FALSE; + static gboolean BE_SILENT = FALSE; +-static gboolean DO_RESOURCE_LIST = FALSE; +-static const char *crmd_operation = NULL; + static char *dest_node = NULL; + static crm_exit_t exit_code = CRM_EX_OK; +-static const char *sys_to = NULL; + + static pcmk__cli_option_t long_options[] = { + // long option, argument type, storage, short option, description, flags +@@ -133,7 +121,8 @@ static pcmk__cli_option_t long_options[] = { + }, + { + "bash-export", no_argument, NULL, 'B', +- "Create Bash export entries of the form 'export uname=uuid'\n", ++ "Display nodes as shell commands of the form 'export uname=uuid' " ++ "(valid with -N/--nodes)'\n", + pcmk__option_default + }, + { +@@ -142,13 +131,98 @@ static pcmk__cli_option_t long_options[] = { + }, + { + "-spacer-", no_argument, NULL, '-', +- " The -K and -E commands are rarely used and may be removed in " +- "future versions.", ++ "The -K and -E commands do not work and may be removed in a future " ++ "version.", + pcmk__option_default + }, + { 0, 0, 0, 0 } + }; + ++static void ++quit_main_loop(crm_exit_t ec) ++{ ++ exit_code = ec; ++ if (mainloop != NULL) { ++ GMainLoop *mloop = mainloop; ++ ++ mainloop = NULL; // Don't re-enter this block ++ pcmk_quit_main_loop(mloop, 10); ++ g_main_loop_unref(mloop); ++ } ++} ++ ++static void ++controller_event_cb(pcmk_ipc_api_t *controld_api, ++ enum pcmk_ipc_event event_type, crm_exit_t status, ++ void *event_data, void *user_data) ++{ ++ pcmk_controld_api_reply_t *reply = event_data; ++ ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ if (exit_code == CRM_EX_DISCONNECT) { // Unexpected ++ fprintf(stderr, "error: Lost connection to controller\n"); ++ } ++ goto done; ++ break; ++ ++ case pcmk_ipc_event_reply: ++ break; ++ ++ default: ++ return; ++ } ++ ++ if (message_timer_id != 0) { ++ g_source_remove(message_timer_id); ++ message_timer_id = 0; ++ } ++ ++ if (status != CRM_EX_OK) { ++ fprintf(stderr, "error: Bad reply from controller: %s", ++ crm_exit_str(status)); ++ exit_code = status; ++ goto done; ++ } ++ ++ if (reply->reply_type != pcmk_controld_reply_ping) { ++ fprintf(stderr, "error: Unknown reply type %d from controller\n", ++ reply->reply_type); ++ goto done; ++ } ++ ++ // Parse desired information from reply ++ switch (command) { ++ case cmd_health: ++ printf("Status of %s@%s: %s (%s)\n", ++ reply->data.ping.sys_from, ++ reply->host_from, ++ reply->data.ping.fsa_state, ++ reply->data.ping.result); ++ if (BE_SILENT && (reply->data.ping.fsa_state != NULL)) { ++ fprintf(stderr, "%s\n", reply->data.ping.fsa_state); ++ } ++ exit_code = CRM_EX_OK; ++ break; ++ ++ case cmd_whois_dc: ++ printf("Designated Controller is: %s\n", reply->host_from); ++ if (BE_SILENT && (reply->host_from != NULL)) { ++ fprintf(stderr, "%s\n", reply->host_from); ++ } ++ exit_code = CRM_EX_OK; ++ break; ++ ++ default: // Not really possible here ++ exit_code = CRM_EX_SOFTWARE; ++ break; ++ } ++ ++done: ++ pcmk_disconnect_ipc(controld_api); ++ quit_main_loop(exit_code); ++} ++ + // \return Standard Pacemaker return code + static int + list_nodes() +@@ -181,6 +255,9 @@ main(int argc, char **argv) + int option_index = 0; + int argerr = 0; + int flag; ++ int rc; ++ pcmk_ipc_api_t *controld_api = NULL; ++ bool need_controld_api = true; + + crm_log_cli_init("crmadmin"); + pcmk__set_cli_options(NULL, " [options]", long_options, +@@ -211,33 +288,40 @@ main(int argc, char **argv) + pcmk__cli_help(flag, CRM_EX_OK); + break; + case 'D': +- DO_WHOIS_DC = TRUE; ++ command = cmd_whois_dc; + break; + case 'B': + BASH_EXPORT = TRUE; + break; + case 'K': +- DO_RESET = TRUE; ++ command = cmd_shutdown; + crm_trace("Option %c => %s", flag, optarg); ++ if (dest_node != NULL) { ++ free(dest_node); ++ } + dest_node = strdup(optarg); +- crmd_operation = CRM_OP_LOCAL_SHUTDOWN; + break; + case 'q': + BE_SILENT = TRUE; + break; + case 'S': +- DO_HEALTH = TRUE; ++ command = cmd_health; + crm_trace("Option %c => %s", flag, optarg); ++ if (dest_node != NULL) { ++ free(dest_node); ++ } + dest_node = strdup(optarg); + break; + case 'E': +- DO_ELECT_DC = TRUE; ++ command = cmd_elect_dc; + break; + case 'N': +- DO_NODE_LIST = TRUE; ++ command = cmd_list_nodes; ++ need_controld_api = false; + break; + case 'H': +- DO_HEALTH = TRUE; ++ fprintf(stderr, "Cluster-wide health option not supported\n"); ++ ++argerr; + break; + default: + printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); +@@ -257,285 +341,133 @@ main(int argc, char **argv) + ++argerr; + } + ++ if (command == cmd_none) { ++ fprintf(stderr, "error: Must specify a command option\n\n"); ++ ++argerr; ++ } ++ + if (argerr) { + pcmk__cli_help('?', CRM_EX_USAGE); + } + +- if (do_init()) { +- int res = 0; +- +- res = do_work(); +- if (res > 0) { +- /* wait for the reply by creating a mainloop and running it until +- * the callbacks are invoked... +- */ +- mainloop = g_main_loop_new(NULL, FALSE); +- crm_trace("Waiting for %d replies from the local CRM", expected_responses); +- +- message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); +- +- g_main_loop_run(mainloop); +- +- } else if (res < 0) { +- crm_err("No message to send"); +- exit_code = CRM_EX_ERROR; ++ // Connect to the controller if needed ++ if (need_controld_api) { ++ rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); ++ if (controld_api == NULL) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); ++ goto done; + } +- } else { +- crm_warn("Init failed, could not perform requested operations"); +- exit_code = CRM_EX_UNAVAILABLE; +- } +- +- crm_trace("%s exiting normally", crm_system_name); +- return exit_code; +-} +- +-int +-do_work(void) +-{ +- int ret = 1; +- +- /* construct the request */ +- xmlNode *msg_data = NULL; +- gboolean all_is_good = TRUE; +- +- if (DO_HEALTH == TRUE) { +- crm_trace("Querying the system"); +- +- sys_to = CRM_SYSTEM_DC; +- +- if (dest_node != NULL) { +- sys_to = CRM_SYSTEM_CRMD; +- crmd_operation = CRM_OP_PING; +- +- if (BE_VERBOSE) { +- expected_responses = 1; +- } +- +- } else { +- crm_info("Cluster-wide health not available yet"); +- all_is_good = FALSE; ++ pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL); ++ rc = pcmk_connect_ipc(controld_api, pcmk_ipc_dispatch_main); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to controller: %s\n", ++ pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); ++ goto done; + } +- +- } else if (DO_ELECT_DC) { +- /* tell the local node to initiate an election */ +- +- dest_node = NULL; +- sys_to = CRM_SYSTEM_CRMD; +- crmd_operation = CRM_OP_VOTE; +- ret = 0; /* no return message */ +- +- } else if (DO_WHOIS_DC) { +- dest_node = NULL; +- sys_to = CRM_SYSTEM_DC; +- crmd_operation = CRM_OP_PING; +- +- } else if (DO_NODE_LIST) { +- crm_exit(pcmk_rc2exitc(list_nodes())); +- +- } else if (DO_RESET) { +- /* tell dest_node to initiate the shutdown procedure +- * +- * if dest_node is NULL, the request will be sent to the +- * local node +- */ +- sys_to = CRM_SYSTEM_CRMD; +- ret = 0; /* no return message */ +- +- } else { +- crm_err("Unknown options"); +- all_is_good = FALSE; + } + +- if (all_is_good == FALSE) { +- crm_err("Creation of request failed. No message to send"); +- return -1; ++ if (do_work(controld_api)) { ++ // A reply is needed from controller, so run main loop to get it ++ exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects ++ mainloop = g_main_loop_new(NULL, FALSE); ++ message_timer_id = g_timeout_add(message_timeout_ms, ++ admin_message_timeout, NULL); ++ g_main_loop_run(mainloop); + } + +-/* send it */ +- if (crmd_channel == NULL) { +- crm_err("The IPC connection is not valid, cannot send anything"); +- return -1; +- } +- +- if (sys_to == NULL) { +- if (dest_node != NULL) { +- sys_to = CRM_SYSTEM_CRMD; +- } else { +- sys_to = CRM_SYSTEM_DC; +- } +- } +- +- { +- xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to, +- crm_system_name, admin_uuid); +- +- crm_ipc_send(crmd_channel, cmd, 0, 0, NULL); +- free_xml(cmd); +- } +- +- return ret; +-} ++done: ++ if (controld_api != NULL) { ++ pcmk_ipc_api_t *capi = controld_api; + +-void +-crmadmin_ipc_connection_destroy(gpointer user_data) +-{ +- crm_err("Connection to controller was terminated"); +- if (mainloop) { +- g_main_loop_quit(mainloop); +- } else { +- crm_exit(CRM_EX_DISCONNECT); ++ controld_api = NULL; // Ensure we can't free this twice ++ pcmk_free_ipc_api(capi); + } +-} +- +-struct ipc_client_callbacks crm_callbacks = { +- .dispatch = admin_msg_callback, +- .destroy = crmadmin_ipc_connection_destroy +-}; +- +-gboolean +-do_init(void) +-{ +- mainloop_io_t *source = +- mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); +- +- admin_uuid = pcmk__getpid_s(); +- +- crmd_channel = mainloop_get_ipc_client(source); +- +- if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) { +- return TRUE; +- +- } else if (crmd_channel != NULL) { +- xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1"); +- +- crm_ipc_send(crmd_channel, xml, 0, 0, NULL); +- return TRUE; ++ if (mainloop != NULL) { ++ g_main_loop_unref(mainloop); ++ mainloop = NULL; + } +- return FALSE; ++ return crm_exit(exit_code); + } + +-static bool +-validate_crm_message(xmlNode * msg, const char *sys, const char *uuid, const char *msg_type) ++// \return True if reply from controller is needed ++bool ++do_work(pcmk_ipc_api_t *controld_api) + { +- const char *type = NULL; +- const char *crm_msg_reference = NULL; +- +- if (msg == NULL) { +- return FALSE; +- } ++ bool need_reply = false; ++ int rc = pcmk_rc_ok; + +- type = crm_element_value(msg, F_CRM_MSG_TYPE); +- crm_msg_reference = crm_element_value(msg, XML_ATTR_REFERENCE); +- +- if (type == NULL) { +- crm_info("No message type defined."); +- return FALSE; +- +- } else if (msg_type != NULL && strcasecmp(msg_type, type) != 0) { +- crm_info("Expecting a (%s) message but received a (%s).", msg_type, type); +- return FALSE; +- } +- +- if (crm_msg_reference == NULL) { +- crm_info("No message crm_msg_reference defined."); +- return FALSE; +- } +- +- return TRUE; +-} +- +-int +-admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata) +-{ +- static int received_responses = 0; +- xmlNode *xml = string2xml(buffer); +- +- received_responses++; +- g_source_remove(message_timer_id); +- +- crm_log_xml_trace(xml, "ipc"); +- +- if (xml == NULL) { +- crm_info("XML in IPC message was not valid... " "discarding."); +- +- } else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) { +- crm_trace("Message was not a CRM response. Discarding."); +- +- } else if (DO_HEALTH) { +- xmlNode *data = get_message_xml(xml, F_CRM_DATA); +- const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE); ++ switch (command) { ++ case cmd_shutdown: ++ rc = pcmk_controld_api_shutdown(controld_api, dest_node); ++ break; + +- printf("Status of %s@%s: %s (%s)\n", +- crm_element_value(data, XML_PING_ATTR_SYSFROM), +- crm_element_value(xml, F_CRM_HOST_FROM), +- state, crm_element_value(data, XML_PING_ATTR_STATUS)); ++ case cmd_health: // dest_node != NULL ++ case cmd_whois_dc: // dest_node == NULL ++ rc = pcmk_controld_api_ping(controld_api, dest_node); ++ need_reply = true; ++ break; + +- if (BE_SILENT && state != NULL) { +- fprintf(stderr, "%s\n", state); +- } ++ case cmd_elect_dc: ++ rc = pcmk_controld_api_start_election(controld_api); ++ break; + +- } else if (DO_WHOIS_DC) { +- const char *dc = crm_element_value(xml, F_CRM_HOST_FROM); ++ case cmd_list_nodes: ++ rc = list_nodes(); ++ break; + +- printf("Designated Controller is: %s\n", dc); +- if (BE_SILENT && dc != NULL) { +- fprintf(stderr, "%s\n", dc); +- } +- crm_exit(CRM_EX_OK); ++ case cmd_none: // not actually possible here ++ break; + } +- +- free_xml(xml); +- +- if (received_responses >= expected_responses) { +- crm_trace("Received expected number (%d) of replies, exiting normally", +- expected_responses); +- crm_exit(CRM_EX_OK); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Command failed: %s", pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); + } +- +- message_timer_id = g_timeout_add(message_timeout_ms, admin_message_timeout, NULL); +- return 0; ++ return need_reply; + } + + gboolean + admin_message_timeout(gpointer data) + { +- fprintf(stderr, "No messages received in %d seconds.. aborting\n", +- (int)message_timeout_ms / 1000); +- crm_err("No messages received in %d seconds", (int)message_timeout_ms / 1000); +- exit_code = CRM_EX_TIMEOUT; +- g_main_loop_quit(mainloop); +- return FALSE; ++ fprintf(stderr, ++ "error: No reply received from controller before timeout (%dms)\n", ++ message_timeout_ms); ++ message_timer_id = 0; ++ quit_main_loop(CRM_EX_TIMEOUT); ++ return FALSE; // Tells glib to remove source + } + +-int ++void + do_find_node_list(xmlNode * xml_node) + { + int found = 0; + xmlNode *node = NULL; + xmlNode *nodes = get_object_root(XML_CIB_TAG_NODES, xml_node); + +- for (node = __xml_first_child_element(nodes); node != NULL; +- node = __xml_next_element(node)) { ++ for (node = first_named_child(nodes, XML_CIB_TAG_NODE); node != NULL; ++ node = crm_next_same_xml(node)) { + +- if (crm_str_eq((const char *)node->name, XML_CIB_TAG_NODE, TRUE)) { ++ if (BASH_EXPORT) { ++ printf("export %s=%s\n", ++ crm_element_value(node, XML_ATTR_UNAME), ++ crm_element_value(node, XML_ATTR_ID)); ++ } else { ++ const char *node_type = crm_element_value(node, XML_ATTR_TYPE); + +- if (BASH_EXPORT) { +- printf("export %s=%s\n", +- crm_element_value(node, XML_ATTR_UNAME), +- crm_element_value(node, XML_ATTR_ID)); +- } else { +- printf("%s node: %s (%s)\n", +- crm_element_value(node, XML_ATTR_TYPE), +- crm_element_value(node, XML_ATTR_UNAME), +- crm_element_value(node, XML_ATTR_ID)); ++ if (node_type == NULL) { ++ node_type = "member"; + } +- found++; ++ printf("%s node: %s (%s)\n", node_type, ++ crm_element_value(node, XML_ATTR_UNAME), ++ crm_element_value(node, XML_ATTR_ID)); + } ++ found++; + } ++ // @TODO List Pacemaker Remote nodes that don't have a entry + + if (found == 0) { +- printf("NO nodes configured\n"); ++ printf("No nodes configured\n"); + } +- +- return found; + } +-- +1.8.3.1 + diff --git a/SOURCES/007-shutdown-lock.patch b/SOURCES/007-shutdown-lock.patch deleted file mode 100644 index 17e7588..0000000 --- a/SOURCES/007-shutdown-lock.patch +++ /dev/null @@ -1,60 +0,0 @@ -From f17c99492c7ab9e639b940a34d2a48b55937b605 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 16:00:36 -0600 -Subject: [PATCH 03/18] Low: tools: improve crm_resource "why" messages - ---- - tools/crm_resource_runtime.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 9ae24b6..61ceee7 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -878,7 +878,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, - void - cli_resource_check(cib_t * cib_conn, resource_t *rsc) - { -- int need_nl = 0; -+ bool printed = false; - char *role_s = NULL; - char *managed = NULL; - resource_t *parent = uber_parent(rsc); -@@ -897,23 +897,26 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) - // Treated as if unset - - } else if(role == RSC_ROLE_STOPPED) { -- printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); -- need_nl++; -+ printf("\n * Configuration specifies '%s' should remain stopped\n", -+ parent->id); -+ printed = true; - - } else if (is_set(parent->flags, pe_rsc_promotable) - && (role == RSC_ROLE_SLAVE)) { -- printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); -- need_nl++; -+ printf("\n * Configuration specifies '%s' should not be promoted\n", -+ parent->id); -+ printed = true; - } - } - -- if(managed && crm_is_true(managed) == FALSE) { -- printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); -- need_nl++; -+ if (managed && !crm_is_true(managed)) { -+ printf("%s * Configuration prevents cluster from stopping or starting unmanaged '%s'\n", -+ (printed? "" : "\n"), parent->id); -+ printed = true; - } - free(managed); - -- if(need_nl) { -+ if (printed) { - printf("\n"); - } - } --- -1.8.3.1 - diff --git a/SOURCES/008-crm_node.patch b/SOURCES/008-crm_node.patch new file mode 100644 index 0000000..7d7c51a --- /dev/null +++ b/SOURCES/008-crm_node.patch @@ -0,0 +1,685 @@ +From e01a1178fd8f5b99895683b3af9998e9485d12a4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 24 Apr 2020 16:42:02 -0500 +Subject: [PATCH 1/3] Feature: controller: add new IPC API command for getting + node list + +This is based on and will replace the corresponding functionality in +pacemakerd. +--- + daemons/controld/controld_messages.c | 44 ++++++++++++++++++++++++++++-- + include/crm/common/ipc_controld.h | 13 +++++++++ + include/crm_internal.h | 1 + + lib/common/ipc_controld.c | 53 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 109 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 0be04d0..423f006 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -375,10 +375,11 @@ relay_message(xmlNode * msg, gboolean originated_locally) + is_local = 0; + + } else if (is_for_crm) { +- if (safe_str_eq(task, CRM_OP_NODE_INFO)) { ++ if (safe_str_eq(task, CRM_OP_NODE_INFO) ++ || safe_str_eq(task, PCMK__CONTROLD_CMD_NODES)) { + /* Node info requests do not specify a host, which is normally + * treated as "all hosts", because the whole point is that the +- * client doesn't know the local node name. Always handle these ++ * client may not know the local node name. Always handle these + * requests locally. + */ + is_local = 1; +@@ -784,6 +785,42 @@ handle_ping(xmlNode *msg) + } + + /*! ++ * \brief Handle a PCMK__CONTROLD_CMD_NODES message ++ * ++ * \return Next FSA input ++ */ ++static enum crmd_fsa_input ++handle_node_list(xmlNode *request) ++{ ++ GHashTableIter iter; ++ crm_node_t *node = NULL; ++ xmlNode *reply = NULL; ++ xmlNode *reply_data = NULL; ++ ++ // Create message data for reply ++ reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES); ++ g_hash_table_iter_init(&iter, crm_peer_cache); ++ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { ++ xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE); ++ ++ crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t ++ crm_xml_add(xml, XML_ATTR_UNAME, node->uname); ++ crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state); ++ } ++ ++ // Create and send reply ++ reply = create_reply(request, reply_data); ++ free_xml(reply_data); ++ if (reply) { ++ (void) relay_message(reply, TRUE); ++ free_xml(reply); ++ } ++ ++ // Nothing further to do ++ return I_NULL; ++} ++ ++/*! + * \brief Handle a CRM_OP_NODE_INFO request + * + * \param[in] msg Message XML +@@ -1080,6 +1117,9 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) + + remote_ra_process_maintenance_nodes(xml); + ++ } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { ++ return handle_node_list(stored_msg); ++ + /*========== (NOT_DC)-Only Actions ==========*/ + } else if (!AM_I_DC) { + +diff --git a/include/crm/common/ipc_controld.h b/include/crm/common/ipc_controld.h +index 0ebabfc..b817357 100644 +--- a/include/crm/common/ipc_controld.h ++++ b/include/crm/common/ipc_controld.h +@@ -22,6 +22,7 @@ extern "C" { + */ + + #include // bool ++#include // GList + #include // xmlNode + #include // pcmk_ipc_api_t + +@@ -32,8 +33,16 @@ enum pcmk_controld_api_reply { + pcmk_controld_reply_info, + pcmk_controld_reply_resource, + pcmk_controld_reply_ping, ++ pcmk_controld_reply_nodes, + }; + ++// Node information passed with pcmk_controld_reply_nodes ++typedef struct { ++ uint32_t id; ++ const char *uname; ++ const char *state; ++} pcmk_controld_api_node_t; ++ + /*! + * Controller reply passed to event callback + * +@@ -72,6 +81,9 @@ typedef struct { + const char *fsa_state; + const char *result; + } ping; ++ ++ // pcmk_controld_reply_nodes ++ GList *nodes; // list of pcmk_controld_api_node_t * + } data; + } pcmk_controld_api_reply_t; + +@@ -88,6 +100,7 @@ int pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node, + const char *provider, const char *type, + bool cib_only); + int pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name); ++int pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api); + int pcmk_controld_api_shutdown(pcmk_ipc_api_t *api, const char *node_name); + int pcmk_controld_api_start_election(pcmk_ipc_api_t *api); + unsigned int pcmk_controld_api_replies_expected(pcmk_ipc_api_t *api); +diff --git a/include/crm_internal.h b/include/crm_internal.h +index fd56fc6..cf8999f 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -122,6 +122,7 @@ pid_t pcmk_locate_sbd(void); + #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" + #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" + ++#define PCMK__CONTROLD_CMD_NODES "list-nodes" + + /* + * Environment variables used by Pacemaker +diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c +index 22bb733..a733dd3 100644 +--- a/lib/common/ipc_controld.c ++++ b/lib/common/ipc_controld.c +@@ -120,6 +120,28 @@ set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) + data->data.ping.result = crm_element_value(msg_data, XML_PING_ATTR_STATUS); + } + ++static void ++set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) ++{ ++ pcmk_controld_api_node_t *node_info; ++ ++ data->reply_type = pcmk_controld_reply_nodes; ++ for (xmlNode *node = first_named_child(msg_data, XML_CIB_TAG_NODE); ++ node != NULL; node = crm_next_same_xml(node)) { ++ ++ long long id_ll = 0; ++ ++ node_info = calloc(1, sizeof(pcmk_controld_api_node_t)); ++ crm_element_value_ll(node, XML_ATTR_ID, &id_ll); ++ if (id_ll > 0) { ++ node_info->id = id_ll; ++ } ++ node_info->uname = crm_element_value(node, XML_ATTR_UNAME); ++ node_info->state = crm_element_value(node, XML_NODE_IN_CLUSTER); ++ data->data.nodes = g_list_prepend(data->data.nodes, node_info); ++ } ++} ++ + static bool + reply_expected(pcmk_ipc_api_t *api, xmlNode *request) + { +@@ -201,6 +223,9 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + } else if (!strcmp(value, CRM_OP_PING)) { + set_ping_data(&reply_data, msg_data); + ++ } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) { ++ set_nodes_data(&reply_data, msg_data); ++ + } else { + crm_debug("Unrecognizable controller message: unknown command '%s'", + value); +@@ -210,6 +235,11 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + } + + pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); ++ ++ // Free any reply data that was allocated ++ if (safe_str_eq(value, PCMK__CONTROLD_CMD_NODES)) { ++ g_list_free_full(reply_data.data.nodes, free); ++ } + } + + pcmk__ipc_methods_t * +@@ -376,6 +406,29 @@ pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name) + } + + /*! ++ * \brief Ask the controller for cluster information ++ * ++ * \param[in] api Controller connection ++ * ++ * \return Standard Pacemaker return code ++ * \note Event callback will get a reply of type pcmk_controld_reply_nodes. ++ */ ++int ++pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api) ++{ ++ xmlNode *request; ++ int rc = EINVAL; ++ ++ request = create_controller_request(api, PCMK__CONTROLD_CMD_NODES, NULL, ++ NULL); ++ if (request != NULL) { ++ rc = send_controller_request(api, request, true); ++ free_xml(request); ++ } ++ return rc; ++} ++ ++/*! + * \internal + * \brief Ask the controller to shut down + * +-- +1.8.3.1 + + +From 74e2d8d18bf534c1ec6f0e0f44a90772d393a553 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Jul 2020 11:51:56 -0500 +Subject: [PATCH 2/3] Refactor: functionize numeric comparisons of strings + +This moves the guts of sort_node_uname() from libpe_status to a new function, +pcmk_numeric_strcasecmp(), in libcrmcommon, so it can be used with strings and +not just pe_node_t objects. +--- + include/crm/common/util.h | 1 + + lib/common/strings.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ + lib/pengine/utils.c | 50 ++---------------------------------- + 3 files changed, 68 insertions(+), 48 deletions(-) + +diff --git a/include/crm/common/util.h b/include/crm/common/util.h +index 67d74d2..bb97b0a 100644 +--- a/include/crm/common/util.h ++++ b/include/crm/common/util.h +@@ -59,6 +59,7 @@ gboolean crm_strcase_equal(gconstpointer a, gconstpointer b); + char *crm_strdup_printf(char const *format, ...) __attribute__ ((__format__ (__printf__, 1, 2))); + int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end); + gboolean pcmk__str_in_list(GList *lst, const gchar *s); ++int pcmk_numeric_strcasecmp(const char *s1, const char *s2); + + # define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) + # define crm_str_hash g_str_hash_traditional +diff --git a/lib/common/strings.c b/lib/common/strings.c +index 4562738..bd68ccf 100644 +--- a/lib/common/strings.c ++++ b/lib/common/strings.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -715,3 +716,67 @@ pcmk__str_none_of(const char *s, ...) + + return g_list_find_custom(lst, s, (GCompareFunc) strcmp) != NULL; + } ++ ++/* ++ * \brief Sort strings, with numeric portions sorted numerically ++ * ++ * Sort two strings case-insensitively like strcasecmp(), but with any numeric ++ * portions of the string sorted numerically. This is particularly useful for ++ * node names (for example, "node10" will sort higher than "node9" but lower ++ * than "remotenode9"). ++ * ++ * \param[in] s1 First string to compare (must not be NULL) ++ * \param[in] s2 Second string to compare (must not be NULL) ++ * ++ * \retval -1 \p s1 comes before \p s2 ++ * \retval 0 \p s1 and \p s2 are equal ++ * \retval 1 \p s1 comes after \p s2 ++ */ ++int ++pcmk_numeric_strcasecmp(const char *s1, const char *s2) ++{ ++ while (*s1 && *s2) { ++ if (isdigit(*s1) && isdigit(*s2)) { ++ // If node names contain a number, sort numerically ++ ++ char *end1 = NULL; ++ char *end2 = NULL; ++ long num1 = strtol(s1, &end1, 10); ++ long num2 = strtol(s2, &end2, 10); ++ ++ // allow ordering e.g. 007 > 7 ++ size_t len1 = end1 - s1; ++ size_t len2 = end2 - s2; ++ ++ if (num1 < num2) { ++ return -1; ++ } else if (num1 > num2) { ++ return 1; ++ } else if (len1 < len2) { ++ return -1; ++ } else if (len1 > len2) { ++ return 1; ++ } ++ s1 = end1; ++ s2 = end2; ++ } else { ++ // Compare non-digits case-insensitively ++ int lower1 = tolower(*s1); ++ int lower2 = tolower(*s2); ++ ++ if (lower1 < lower2) { ++ return -1; ++ } else if (lower1 > lower2) { ++ return 1; ++ } ++ ++s1; ++ ++s2; ++ } ++ } ++ if (!*s1 && *s2) { ++ return -1; ++ } else if (*s1 && !*s2) { ++ return 1; ++ } ++ return 0; ++} +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index ce3c260..584def4 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -13,7 +13,6 @@ + #include + #include + +-#include + #include + #include + +@@ -214,53 +213,8 @@ pe__node_list2table(GList *list) + gint + sort_node_uname(gconstpointer a, gconstpointer b) + { +- const char *name_a = ((const pe_node_t *) a)->details->uname; +- const char *name_b = ((const pe_node_t *) b)->details->uname; +- +- while (*name_a && *name_b) { +- if (isdigit(*name_a) && isdigit(*name_b)) { +- // If node names contain a number, sort numerically +- +- char *end_a = NULL; +- char *end_b = NULL; +- long num_a = strtol(name_a, &end_a, 10); +- long num_b = strtol(name_b, &end_b, 10); +- +- // allow ordering e.g. 007 > 7 +- size_t len_a = end_a - name_a; +- size_t len_b = end_b - name_b; +- +- if (num_a < num_b) { +- return -1; +- } else if (num_a > num_b) { +- return 1; +- } else if (len_a < len_b) { +- return -1; +- } else if (len_a > len_b) { +- return 1; +- } +- name_a = end_a; +- name_b = end_b; +- } else { +- // Compare non-digits case-insensitively +- int lower_a = tolower(*name_a); +- int lower_b = tolower(*name_b); +- +- if (lower_a < lower_b) { +- return -1; +- } else if (lower_a > lower_b) { +- return 1; +- } +- ++name_a; +- ++name_b; +- } +- } +- if (!*name_a && *name_b) { +- return -1; +- } else if (*name_a && !*name_b) { +- return 1; +- } +- return 0; ++ return pcmk_numeric_strcasecmp(((const pe_node_t *) a)->details->uname, ++ ((const pe_node_t *) b)->details->uname); + } + + /*! +-- +1.8.3.1 + + +From 8461509158e06365122dc741c527c83c94e966ce Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 24 Apr 2020 19:35:19 -0500 +Subject: [PATCH 3/3] Fix: tools: crm_node -l and -p now work from Pacemaker + Remote nodes + +crm_node now asks the controller for the cluster node list, instead of +pacemakerd. This allows it to work from Pacemaker Remote nodes, since +controller IPC is proxied but pacemakerd IPC is not. +--- + tools/crm_node.c | 176 +++++++++++++++++++++---------------------------------- + 1 file changed, 67 insertions(+), 109 deletions(-) + +diff --git a/tools/crm_node.c b/tools/crm_node.c +index 57c2ee5..146454d 100644 +--- a/tools/crm_node.c ++++ b/tools/crm_node.c +@@ -130,6 +130,16 @@ remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError * + return TRUE; + } + ++static gint ++sort_node(gconstpointer a, gconstpointer b) ++{ ++ const pcmk_controld_api_node_t *node_a = a; ++ const pcmk_controld_api_node_t *node_b = b; ++ ++ return pcmk_numeric_strcasecmp((node_a->uname? node_a->uname : ""), ++ (node_b->uname? node_b->uname : "")); ++} ++ + static void + controller_event_cb(pcmk_ipc_api_t *controld_api, + enum pcmk_ipc_event event_type, crm_exit_t status, +@@ -157,15 +167,16 @@ controller_event_cb(pcmk_ipc_api_t *controld_api, + crm_exit_str(status)); + goto done; + } +- if (reply->reply_type != pcmk_controld_reply_info) { +- fprintf(stderr, "error: Unknown reply type %d from controller\n", +- reply->reply_type); +- goto done; +- } + + // Parse desired info from reply and display to user + switch (options.command) { + case 'i': ++ if (reply->reply_type != pcmk_controld_reply_info) { ++ fprintf(stderr, ++ "error: Unknown reply type %d from controller\n", ++ reply->reply_type); ++ goto done; ++ } + if (reply->data.node_info.id == 0) { + fprintf(stderr, + "error: Controller reply did not contain node ID\n"); +@@ -177,6 +188,12 @@ controller_event_cb(pcmk_ipc_api_t *controld_api, + + case 'n': + case 'N': ++ if (reply->reply_type != pcmk_controld_reply_info) { ++ fprintf(stderr, ++ "error: Unknown reply type %d from controller\n", ++ reply->reply_type); ++ goto done; ++ } + if (reply->data.node_info.uname == NULL) { + fprintf(stderr, "Node is not known to cluster\n"); + exit_code = CRM_EX_NOHOST; +@@ -186,6 +203,12 @@ controller_event_cb(pcmk_ipc_api_t *controld_api, + break; + + case 'q': ++ if (reply->reply_type != pcmk_controld_reply_info) { ++ fprintf(stderr, ++ "error: Unknown reply type %d from controller\n", ++ reply->reply_type); ++ goto done; ++ } + printf("%d\n", reply->data.node_info.have_quorum); + if (!(reply->data.node_info.have_quorum)) { + exit_code = CRM_EX_QUORUM; +@@ -193,6 +216,36 @@ controller_event_cb(pcmk_ipc_api_t *controld_api, + } + break; + ++ case 'l': ++ case 'p': ++ if (reply->reply_type != pcmk_controld_reply_nodes) { ++ fprintf(stderr, ++ "error: Unknown reply type %d from controller\n", ++ reply->reply_type); ++ goto done; ++ } ++ reply->data.nodes = g_list_sort(reply->data.nodes, sort_node); ++ for (GList *node_iter = reply->data.nodes; ++ node_iter != NULL; node_iter = node_iter->next) { ++ ++ pcmk_controld_api_node_t *node = node_iter->data; ++ const char *uname = (node->uname? node->uname : ""); ++ const char *state = (node->state? node->state : ""); ++ ++ if (options.command == 'l') { ++ printf("%lu %s %s\n", ++ (unsigned long) node->id, uname, state); ++ ++ // i.e. CRM_NODE_MEMBER, but we don't want to include cluster.h ++ } else if (!strcmp(state, "member")) { ++ printf("%s ", uname); ++ } ++ } ++ if (options.command == 'p') { ++ printf("\n"); ++ } ++ break; ++ + default: + fprintf(stderr, "internal error: Controller reply not expected\n"); + exit_code = CRM_EX_SOFTWARE; +@@ -207,7 +260,7 @@ done: + } + + static void +-run_controller_mainloop(uint32_t nodeid) ++run_controller_mainloop(uint32_t nodeid, bool list_nodes) + { + pcmk_ipc_api_t *controld_api = NULL; + int rc; +@@ -233,7 +286,11 @@ run_controller_mainloop(uint32_t nodeid) + return; + } + +- rc = pcmk_controld_api_node_info(controld_api, nodeid); ++ if (list_nodes) { ++ rc = pcmk_controld_api_list_nodes(controld_api); ++ } else { ++ rc = pcmk_controld_api_node_info(controld_api, nodeid); ++ } + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not ping controller: %s\n", + pcmk_rc_str(rc)); +@@ -263,7 +320,7 @@ print_node_name(void) + + } else { + // Otherwise ask the controller +- run_controller_mainloop(0); ++ run_controller_mainloop(0, false); + } + } + +@@ -444,105 +501,6 @@ remove_node(const char *target_uname) + exit_code = CRM_EX_OK; + } + +-static gint +-compare_node_xml(gconstpointer a, gconstpointer b) +-{ +- const char *a_name = crm_element_value((xmlNode*) a, "uname"); +- const char *b_name = crm_element_value((xmlNode*) b, "uname"); +- +- return strcmp((a_name? a_name : ""), (b_name? b_name : "")); +-} +- +-static int +-node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) +-{ +- GList *nodes = NULL; +- xmlNode *node = NULL; +- xmlNode *msg = string2xml(buffer); +- const char *uname; +- const char *state; +- +- if (msg == NULL) { +- fprintf(stderr, "error: Could not understand pacemakerd response\n"); +- exit_code = CRM_EX_PROTOCOL; +- g_main_loop_quit(mainloop); +- return 0; +- } +- +- crm_log_xml_trace(msg, "message"); +- +- for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { +- nodes = g_list_insert_sorted(nodes, node, compare_node_xml); +- } +- +- for (GList *iter = nodes; iter; iter = iter->next) { +- node = (xmlNode*) iter->data; +- uname = crm_element_value(node, "uname"); +- state = crm_element_value(node, "state"); +- +- if (options.command == 'l') { +- int id = 0; +- +- crm_element_value_int(node, "id", &id); +- printf("%d %s %s\n", id, (uname? uname : ""), (state? state : "")); +- +- // This is CRM_NODE_MEMBER but we don't want to include cluster header +- } else if ((options.command == 'p') && safe_str_eq(state, "member")) { +- printf("%s ", (uname? uname : "")); +- } +- } +- if (options.command == 'p') { +- fprintf(stdout, "\n"); +- } +- +- free_xml(msg); +- exit_code = CRM_EX_OK; +- g_main_loop_quit(mainloop); +- return 0; +-} +- +-static void +-lost_pacemakerd(gpointer user_data) +-{ +- fprintf(stderr, "error: Lost connection to cluster\n"); +- exit_code = CRM_EX_DISCONNECT; +- g_main_loop_quit(mainloop); +-} +- +-static void +-run_pacemakerd_mainloop(void) +-{ +- crm_ipc_t *ipc = NULL; +- xmlNode *poke = NULL; +- mainloop_io_t *source = NULL; +- +- struct ipc_client_callbacks ipc_callbacks = { +- .dispatch = node_mcp_dispatch, +- .destroy = lost_pacemakerd +- }; +- +- source = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, +- NULL, &ipc_callbacks); +- ipc = mainloop_get_ipc_client(source); +- if (ipc == NULL) { +- fprintf(stderr, +- "error: Could not connect to cluster (is it running?)\n"); +- exit_code = CRM_EX_DISCONNECT; +- return; +- } +- +- // Sending anything will get us a list of nodes +- poke = create_xml_node(NULL, "poke"); +- crm_ipc_send(ipc, poke, 0, 0, NULL); +- free_xml(poke); +- +- // Handle reply via node_mcp_dispatch() +- mainloop = g_main_loop_new(NULL, FALSE); +- g_main_loop_run(mainloop); +- g_main_loop_unref(mainloop); +- mainloop = NULL; +-} +- + static GOptionContext * + build_arg_context(pcmk__common_args_t *args, GOptionGroup *group) { + GOptionContext *context = NULL; +@@ -627,11 +585,11 @@ main(int argc, char **argv) + case 'i': + case 'q': + case 'N': +- run_controller_mainloop(options.nodeid); ++ run_controller_mainloop(options.nodeid, false); + break; + case 'l': + case 'p': +- run_pacemakerd_mainloop(); ++ run_controller_mainloop(0, true); + break; + default: + break; +-- +1.8.3.1 + diff --git a/SOURCES/008-shutdown-lock.patch b/SOURCES/008-shutdown-lock.patch deleted file mode 100644 index 0592013..0000000 --- a/SOURCES/008-shutdown-lock.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 736f255c18d4c99f1956fbb5ad4ac5bfc15bb841 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 16:23:25 -0600 -Subject: [PATCH 04/18] Low: tools: improve error checking for crm_resource - cleanup/fail commands - -Bail earlier for misconfigured resources, and return error (rather than hang) -for unknown or offline node. Also add timeout directly to controller request -rather than rely on the controller using the interval as default timeout. ---- - tools/crm_resource_runtime.c | 54 +++++++++++++++++++++++++++----------------- - 1 file changed, 33 insertions(+), 21 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 61ceee7..2ea8bb3 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -468,8 +468,9 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - int rc = -ECOMM; - xmlNode *cmd = NULL; - xmlNode *xml_rsc = NULL; -- const char *value = NULL; - const char *router_node = host_uname; -+ const char *rsc_class = NULL; -+ const char *rsc_type = NULL; - xmlNode *params = NULL; - xmlNode *msg_data = NULL; - resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); -@@ -481,27 +482,49 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - } else if (rsc->variant != pe_native) { - CMD_ERR("We can only process primitive resources, not %s", rsc_id); - return -EINVAL; -+ } - -- } else if (host_uname == NULL) { -+ rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); -+ if ((rsc_class == NULL) || (rsc_type == NULL)) { -+ CMD_ERR("Resource %s does not have a class and type", rsc_id); -+ return -EINVAL; -+ } -+ -+ if (host_uname == NULL) { - CMD_ERR("Please specify a node name"); - return -EINVAL; -+ - } else { -- node_t *node = pe_find_node(data_set->nodes, host_uname); -+ pe_node_t *node = pe_find_node(data_set->nodes, host_uname); - -+ if (node == NULL) { -+ CMD_ERR("Node %s not found", host_uname); -+ return -pcmk_err_node_unknown; -+ } -+ -+ if (!(node->details->online)) { -+ CMD_ERR("Node %s is not online", host_uname); -+ return -ENOTCONN; -+ } - if (pe__is_guest_or_remote_node(node)) { - node = pe__current_node(node->details->remote_rsc); - if (node == NULL) { - CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", - host_uname); -- return -ENXIO; -+ return -ENOTCONN; - } - router_node = node->details->uname; - } - } - -- key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); -- - msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); -+ -+ /* The controller logs the transition key from requests, so we need to have -+ * *something* for it. -+ */ -+ key = generate_transition_key(0, getpid(), 0, -+ "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); - crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); - free(key); - -@@ -519,31 +542,20 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); - } - -- value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); -- if (value == NULL) { -- CMD_ERR("%s has no type! Aborting...", rsc_id); -- return -ENXIO; -- } -- -- value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); -- if (value == NULL) { -- CMD_ERR("%s has no class! Aborting...", rsc_id); -- return -ENXIO; -- } -- -+ crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, rsc_class); - crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); -+ crm_xml_add(xml_rsc, XML_ATTR_TYPE, rsc_type); - - params = create_xml_node(msg_data, XML_TAG_ATTRS); - crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - -- key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); -+ // The controller parses the timeout from the request -+ key = crm_meta_name(XML_ATTR_TIMEOUT); - crm_xml_add(params, key, "60000"); /* 1 minute */ - free(key); - - our_pid = crm_getpid_s(); - cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); -- --/* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ - free_xml(msg_data); - - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { --- -1.8.3.1 - diff --git a/SOURCES/009-shutdown-lock.patch b/SOURCES/009-shutdown-lock.patch deleted file mode 100644 index ff73598..0000000 --- a/SOURCES/009-shutdown-lock.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 8a0e19a7702f61622d06b1c473fb3d9a5924c8f4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 18:07:18 -0600 -Subject: [PATCH 05/18] Refactor: liblrmd: new convenience function for - allocating lrmd_event_data_t - ---- - daemons/controld/controld_execd.c | 7 +------ - include/crm/lrmd.h | 2 ++ - lib/lrmd/lrmd_client.c | 34 +++++++++++++++++++++++++++++++++- - lib/pacemaker/pcmk_sched_transition.c | 7 +------ - lib/pacemaker/pcmk_trans_unpack.c | 9 +++------ - 5 files changed, 40 insertions(+), 19 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 82f2bf1..17cc8d6 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1878,15 +1878,10 @@ construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, cons - - CRM_ASSERT(rsc_id && operation); - -- op = calloc(1, sizeof(lrmd_event_data_t)); -- CRM_ASSERT(op != NULL); -- -+ op = lrmd_new_event(rsc_id, operation, 0); - op->type = lrmd_event_exec_complete; -- op->op_type = strdup(operation); - op->op_status = PCMK_LRM_OP_PENDING; - op->rc = -1; -- op->rsc_id = strdup(rsc_id); -- op->interval_ms = 0; - op->timeout = 0; - op->start_delay = 0; - -diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h -index cfa2925..3ad1f05 100644 ---- a/include/crm/lrmd.h -+++ b/include/crm/lrmd.h -@@ -248,6 +248,8 @@ typedef struct lrmd_event_data_s { - const char *exit_reason; - } lrmd_event_data_t; - -+lrmd_event_data_t *lrmd_new_event(const char *rsc_id, const char *task, -+ guint interval_ms); - lrmd_event_data_t *lrmd_copy_event(lrmd_event_data_t * event); - void lrmd_free_event(lrmd_event_data_t * event); - -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index 2469c52..d16743d 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -1,5 +1,7 @@ - /* -- * Copyright 2012-2018 David Vossel -+ * Copyright 2012-2020 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -@@ -175,6 +177,36 @@ lrmd_key_value_freeall(lrmd_key_value_t * head) - } - } - -+/*! -+ * Create a new lrmd_event_data_t object -+ * -+ * \param[in] rsc_id ID of resource involved in event -+ * \param[in] task Action name -+ * \param[in] interval_ms Action interval -+ * -+ * \return Newly allocated and initialized lrmd_event_data_t -+ * \note This functions asserts on memory errors, so the return value is -+ * guaranteed to be non-NULL. The caller is responsible for freeing the -+ * result with lrmd_free_event(). -+ */ -+lrmd_event_data_t * -+lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) -+{ -+ lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); -+ -+ CRM_ASSERT(event != NULL); -+ if (rsc_id != NULL) { -+ event->rsc_id = strdup(rsc_id); -+ CRM_ASSERT(event->rsc_id != NULL); -+ } -+ if (task != NULL) { -+ event->op_type = strdup(task); -+ CRM_ASSERT(event->op_type != NULL); -+ } -+ event->interval_ms = interval_ms; -+ return event; -+} -+ - lrmd_event_data_t * - lrmd_copy_event(lrmd_event_data_t * event) - { -diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c -index c415b75..1698c85 100644 ---- a/lib/pacemaker/pcmk_sched_transition.c -+++ b/lib/pacemaker/pcmk_sched_transition.c -@@ -131,12 +131,7 @@ create_op(xmlNode *cib_resource, const char *task, guint interval_ms, - lrmd_event_data_t *op = NULL; - xmlNode *xop = NULL; - -- op = calloc(1, sizeof(lrmd_event_data_t)); -- -- op->rsc_id = strdup(ID(cib_resource)); -- op->interval_ms = interval_ms; -- op->op_type = strdup(task); -- -+ op = lrmd_new_event(ID(cib_resource), task, interval_ms); - op->rc = outcome; - op->op_status = 0; - op->params = NULL; /* TODO: Fill me in */ -diff --git a/lib/pacemaker/pcmk_trans_unpack.c b/lib/pacemaker/pcmk_trans_unpack.c -index e57f386..3e53289 100644 ---- a/lib/pacemaker/pcmk_trans_unpack.c -+++ b/lib/pacemaker/pcmk_trans_unpack.c -@@ -298,12 +298,9 @@ convert_graph_action(xmlNode * resource, crm_action_t * action, int status, int - CRM_CHECK(action_resource != NULL, crm_log_xml_warn(action->xml, "Bad"); - return NULL); - -- op = calloc(1, sizeof(lrmd_event_data_t)); -- -- op->rsc_id = strdup(ID(action_resource)); -- op->interval_ms = action->interval_ms; -- op->op_type = strdup(crm_element_value(action->xml, XML_LRM_ATTR_TASK)); -- -+ op = lrmd_new_event(ID(action_resource), -+ crm_element_value(action->xml, XML_LRM_ATTR_TASK), -+ action->interval_ms); - op->rc = rc; - op->op_status = status; - op->t_run = time(NULL); --- -1.8.3.1 - diff --git a/SOURCES/009-timeout-log.patch b/SOURCES/009-timeout-log.patch new file mode 100644 index 0000000..74447bc --- /dev/null +++ b/SOURCES/009-timeout-log.patch @@ -0,0 +1,34 @@ +From b542a8f667002519fbc07693a796553746c43c12 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 11 Jul 2020 18:31:55 -0700 +Subject: [PATCH] Log: controld: Show action timer plus cluster-delay in + action_timer cb + +`action_timer_callback()` prints a misleading error message. If it +times out waiting for an action result, the error message prints the +timeout value followed by "(action timeout plus cluster-delay)". +However, only the `action->timeout` value is displayed. `cluster-delay` +is not added in. + +Resolves: RHBZ#1856035 +--- + daemons/controld/controld_te_callbacks.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c +index 8506f26..6ddaffe 100644 +--- a/daemons/controld/controld_te_callbacks.c ++++ b/daemons/controld/controld_te_callbacks.c +@@ -697,7 +697,8 @@ action_timer_callback(gpointer data) + crm_err("Node %s did not send %s result (via %s) within %dms " + "(action timeout plus cluster-delay)", + (on_node? on_node : ""), (task? task : "unknown action"), +- (via_node? via_node : "controller"), timer->timeout); ++ (via_node? via_node : "controller"), ++ timer->timeout + transition_graph->network_delay); + print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action); + + timer->action->failed = TRUE; +-- +1.8.3.1 + diff --git a/SOURCES/010-crm_mon.patch b/SOURCES/010-crm_mon.patch new file mode 100644 index 0000000..414b68a --- /dev/null +++ b/SOURCES/010-crm_mon.patch @@ -0,0 +1,476 @@ +From 7056ae08bfa5cafeec9c454cb40aefa7553af6df Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 16 Jul 2020 12:53:24 -0400 +Subject: [PATCH 1/4] Fix: libcrmcommon: Set out->priv to NULL in free_priv. + +init won't do anything if priv is not NULL, so when the private data is +freed, also set it to NULL. This prevents segfaults when reset is +called. +--- + lib/common/output_html.c | 1 + + lib/common/output_log.c | 1 + + lib/common/output_text.c | 1 + + lib/common/output_xml.c | 1 + + tools/crm_mon_curses.c | 1 + + 5 files changed, 5 insertions(+) + +diff --git a/lib/common/output_html.c b/lib/common/output_html.c +index c8f0088..fc06641 100644 +--- a/lib/common/output_html.c ++++ b/lib/common/output_html.c +@@ -72,6 +72,7 @@ html_free_priv(pcmk__output_t *out) { + g_queue_free(priv->parent_q); + g_slist_free(priv->errors); + free(priv); ++ out->priv = NULL; + } + + static bool +diff --git a/lib/common/output_log.c b/lib/common/output_log.c +index 5b45ce4..0208046 100644 +--- a/lib/common/output_log.c ++++ b/lib/common/output_log.c +@@ -44,6 +44,7 @@ log_free_priv(pcmk__output_t *out) { + + g_queue_free(priv->prefixes); + free(priv); ++ out->priv = NULL; + } + + static bool +diff --git a/lib/common/output_text.c b/lib/common/output_text.c +index 54c409a..8f15849 100644 +--- a/lib/common/output_text.c ++++ b/lib/common/output_text.c +@@ -43,6 +43,7 @@ text_free_priv(pcmk__output_t *out) { + + g_queue_free(priv->parent_q); + free(priv); ++ out->priv = NULL; + } + + static bool +diff --git a/lib/common/output_xml.c b/lib/common/output_xml.c +index 8565bfe..858da3f 100644 +--- a/lib/common/output_xml.c ++++ b/lib/common/output_xml.c +@@ -54,6 +54,7 @@ xml_free_priv(pcmk__output_t *out) { + g_queue_free(priv->parent_q); + g_slist_free(priv->errors); + free(priv); ++ out->priv = NULL; + } + + static bool +diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c +index d93b847..e9cc023 100644 +--- a/tools/crm_mon_curses.c ++++ b/tools/crm_mon_curses.c +@@ -46,6 +46,7 @@ curses_free_priv(pcmk__output_t *out) { + + g_queue_free(priv->parent_q); + free(priv); ++ out->priv = NULL; + } + + static bool +-- +1.8.3.1 + + +From 3779152993ca0e88dc407c918882568217f1b630 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 16 Jul 2020 13:50:24 -0400 +Subject: [PATCH 2/4] Fix: libcrmcommon: Make reset and finish work more + similarly. + +When finish is called for HTML and XML output formats, various extra +nodes and headers are added, errors are added, etc. None of this stuff +happens on reset. For the HTML format, this also means things like the +CGI headers and title don't get added when reset is called. Make these +two functions much more similar. + +Regression in 2.0.3. + +See: rhbz#1857728 +--- + lib/common/output_html.c | 26 ++++++++++++++++---------- + lib/common/output_xml.c | 30 ++++++++++++++++-------------- + 2 files changed, 32 insertions(+), 24 deletions(-) + +diff --git a/lib/common/output_html.c b/lib/common/output_html.c +index fc06641..6127df2 100644 +--- a/lib/common/output_html.c ++++ b/lib/common/output_html.c +@@ -113,18 +113,11 @@ add_error_node(gpointer data, gpointer user_data) { + } + + static void +-html_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest) { ++finish_reset_common(pcmk__output_t *out, crm_exit_t exit_status, bool print) { + private_data_t *priv = out->priv; + htmlNodePtr head_node = NULL; + htmlNodePtr charset_node = NULL; + +- /* If root is NULL, html_init failed and we are being called from pcmk__output_free +- * in the pcmk__output_new path. +- */ +- if (priv == NULL || priv->root == NULL) { +- return; +- } +- + if (cgi_output && print) { + fprintf(out->dest, "Content-Type: text/html\n\n"); + } +@@ -174,6 +167,20 @@ html_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy + if (print) { + htmlDocDump(out->dest, priv->root->doc); + } ++} ++ ++static void ++html_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest) { ++ private_data_t *priv = out->priv; ++ ++ /* If root is NULL, html_init failed and we are being called from pcmk__output_free ++ * in the pcmk__output_new path. ++ */ ++ if (priv == NULL || priv->root == NULL) { ++ return; ++ } ++ ++ finish_reset_common(out, exit_status, print); + + if (copy_dest != NULL) { + *copy_dest = copy_xml(priv->root); +@@ -185,8 +192,7 @@ html_reset(pcmk__output_t *out) { + CRM_ASSERT(out != NULL); + + if (out->priv != NULL) { +- private_data_t *priv = out->priv; +- htmlDocDump(out->dest, priv->root->doc); ++ finish_reset_common(out, CRM_EX_OK, true); + } + + html_free_priv(out); +diff --git a/lib/common/output_xml.c b/lib/common/output_xml.c +index 858da3f..b64a71d 100644 +--- a/lib/common/output_xml.c ++++ b/lib/common/output_xml.c +@@ -106,17 +106,10 @@ add_error_node(gpointer data, gpointer user_data) { + } + + static void +-xml_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest) { ++finish_reset_common(pcmk__output_t *out, crm_exit_t exit_status, bool print) { + xmlNodePtr node; + private_data_t *priv = out->priv; + +- /* If root is NULL, xml_init failed and we are being called from pcmk__output_free +- * in the pcmk__output_new path. +- */ +- if (priv == NULL || priv->root == NULL) { +- return; +- } +- + if (legacy_xml) { + GSList *node = priv->errors; + +@@ -148,6 +141,20 @@ xml_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_ + fprintf(out->dest, "%s", buf); + free(buf); + } ++} ++ ++static void ++xml_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_dest) { ++ private_data_t *priv = out->priv; ++ ++ /* If root is NULL, xml_init failed and we are being called from pcmk__output_free ++ * in the pcmk__output_new path. ++ */ ++ if (priv == NULL || priv->root == NULL) { ++ return; ++ } ++ ++ finish_reset_common(out, exit_status, print); + + if (copy_dest != NULL) { + *copy_dest = copy_xml(priv->root); +@@ -156,15 +163,10 @@ xml_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy_ + + static void + xml_reset(pcmk__output_t *out) { +- char *buf = NULL; +- + CRM_ASSERT(out != NULL); + + if (out->priv != NULL) { +- private_data_t *priv = out->priv; +- buf = dump_xml_formatted_with_text(priv->root); +- fprintf(out->dest, "%s", buf); +- free(buf); ++ finish_reset_common(out, CRM_EX_OK, true); + } + + xml_free_priv(out); +-- +1.8.3.1 + + +From 0f8e4ca5d9a429c934f1e91a1bdf572efd07e0db Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 16 Jul 2020 16:09:08 -0400 +Subject: [PATCH 3/4] Fix: tools, libcrmcommon: Reopen the output dest on + reset. + +This is needed when running crm_mon as a daemon. When we do a reset, +we need to clear out any existing output destination and start writing +again from the beginning. This really only matters when the destination +is a file. + +The extra freopen at the end of crm_mon is to handle when crm_mon is +killed. We need to reset the output file to its beginning before +calling finish. +--- + lib/common/output_html.c | 3 +++ + lib/common/output_log.c | 3 +++ + lib/common/output_text.c | 3 +++ + lib/common/output_xml.c | 3 +++ + tools/crm_mon.c | 9 +++++++++ + 5 files changed, 21 insertions(+) + +diff --git a/lib/common/output_html.c b/lib/common/output_html.c +index 6127df2..6e21031 100644 +--- a/lib/common/output_html.c ++++ b/lib/common/output_html.c +@@ -191,6 +191,9 @@ static void + html_reset(pcmk__output_t *out) { + CRM_ASSERT(out != NULL); + ++ out->dest = freopen(NULL, "w", out->dest); ++ CRM_ASSERT(out->dest != NULL); ++ + if (out->priv != NULL) { + finish_reset_common(out, CRM_EX_OK, true); + } +diff --git a/lib/common/output_log.c b/lib/common/output_log.c +index 0208046..8422ac2 100644 +--- a/lib/common/output_log.c ++++ b/lib/common/output_log.c +@@ -72,6 +72,9 @@ static void + log_reset(pcmk__output_t *out) { + CRM_ASSERT(out != NULL); + ++ out->dest = freopen(NULL, "w", out->dest); ++ CRM_ASSERT(out->dest != NULL); ++ + log_free_priv(out); + log_init(out); + } +diff --git a/lib/common/output_text.c b/lib/common/output_text.c +index 8f15849..2f7e5b0 100644 +--- a/lib/common/output_text.c ++++ b/lib/common/output_text.c +@@ -75,6 +75,9 @@ static void + text_reset(pcmk__output_t *out) { + CRM_ASSERT(out != NULL); + ++ out->dest = freopen(NULL, "w", out->dest); ++ CRM_ASSERT(out->dest != NULL); ++ + text_free_priv(out); + text_init(out); + } +diff --git a/lib/common/output_xml.c b/lib/common/output_xml.c +index b64a71d..9f8e01b 100644 +--- a/lib/common/output_xml.c ++++ b/lib/common/output_xml.c +@@ -165,6 +165,9 @@ static void + xml_reset(pcmk__output_t *out) { + CRM_ASSERT(out != NULL); + ++ out->dest = freopen(NULL, "w", out->dest); ++ CRM_ASSERT(out->dest != NULL); ++ + if (out->priv != NULL) { + finish_reset_common(out, CRM_EX_OK, true); + } +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index b2e143b..10624c1 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -2014,6 +2014,10 @@ mon_refresh_display(gpointer user_data) + break; + } + ++ if (options.daemonize) { ++ out->reset(out); ++ } ++ + stonith_history_free(stonith_history); + stonith_history = NULL; + pe_reset_working_set(mon_data_set); +@@ -2179,6 +2183,11 @@ clean_up(crm_exit_t exit_code) + * crm_mon to be able to do so. + */ + if (out != NULL) { ++ if (options.daemonize) { ++ out->dest = freopen(NULL, "w", out->dest); ++ CRM_ASSERT(out->dest != NULL); ++ } ++ + switch (output_format) { + case mon_output_cgi: + case mon_output_html: +-- +1.8.3.1 + + +From b655c039414d2c7af77c3532222b04684ef1f3d0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Jul 2020 10:58:32 -0400 +Subject: [PATCH 4/4] Fix: tools: Add the http-equiv header to crm_mon at the + right time. + +This header is only getting added on termination, which is not a lot of +help if you're running crm_mon in daemonize mode. Putting this header +in at the right time requires a couple changes: + +* pcmk__html_add_header doesn't need a parent argument. It was not +being used in the first place. + +* The extra_headers list in output_html.c should not be freed in the +reset function. This means it would get freed after every time the +daemonized output is refreshed, which means the header would have to be +added every time too. extra_headers will now only be freed when the +program exits. This is a behavior change, but I can't see why it's +going to be a problem. + +* To support that, we need to copy each item in the extra_headers list +when it gets added to the output XML document. This prevents segfaults +when we later free that document. + +* handle_html_output no longer needs to exist. That function only +existed to add the http-equiv header at the end, which is wrong. +--- + include/crm/common/output.h | 5 ++--- + lib/common/output_html.c | 7 ++++--- + tools/crm_mon.c | 26 +++++++------------------- + 3 files changed, 13 insertions(+), 25 deletions(-) + +diff --git a/include/crm/common/output.h b/include/crm/common/output.h +index e7c9417..186bcfe 100644 +--- a/include/crm/common/output.h ++++ b/include/crm/common/output.h +@@ -703,15 +703,14 @@ pcmk__output_create_html_node(pcmk__output_t *out, const char *element_name, con + * the following code would generate the tag "": + * + * \code +- * pcmk__html_add_header(parent, "meta", "http-equiv", "refresh", "content", "19", NULL); ++ * pcmk__html_add_header("meta", "http-equiv", "refresh", "content", "19", NULL); + * \endcode + * +- * \param[in,out] parent The node that will be the parent of the new node. + * \param[in] name The HTML tag for the new node. + * \param[in] ... A NULL-terminated key/value list of attributes. + */ + void +-pcmk__html_add_header(xmlNodePtr parent, const char *name, ...) ++pcmk__html_add_header(const char *name, ...) + G_GNUC_NULL_TERMINATED; + + #ifdef __cplusplus +diff --git a/lib/common/output_html.c b/lib/common/output_html.c +index 6e21031..259e412 100644 +--- a/lib/common/output_html.c ++++ b/lib/common/output_html.c +@@ -139,7 +139,7 @@ finish_reset_common(pcmk__output_t *out, crm_exit_t exit_status, bool print) { + + /* Add any extra header nodes the caller might have created. */ + for (int i = 0; i < g_slist_length(extra_headers); i++) { +- xmlAddChild(head_node, g_slist_nth_data(extra_headers, i)); ++ xmlAddChild(head_node, xmlCopyNode(g_slist_nth_data(extra_headers, i), 1)); + } + + /* Stylesheets are included two different ways. The first is via a built-in +@@ -185,6 +185,8 @@ html_finish(pcmk__output_t *out, crm_exit_t exit_status, bool print, void **copy + if (copy_dest != NULL) { + *copy_dest = copy_xml(priv->root); + } ++ ++ g_slist_free_full(extra_headers, (GDestroyNotify) xmlFreeNode); + } + + static void +@@ -199,7 +201,6 @@ html_reset(pcmk__output_t *out) { + } + + html_free_priv(out); +- g_slist_free_full(extra_headers, (GDestroyNotify) xmlFreeNode); + html_init(out); + } + +@@ -412,7 +413,7 @@ pcmk__output_create_html_node(pcmk__output_t *out, const char *element_name, con + } + + void +-pcmk__html_add_header(xmlNodePtr parent, const char *name, ...) { ++pcmk__html_add_header(const char *name, ...) { + htmlNodePtr header_node; + va_list ap; + +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index 10624c1..7fd2b9c 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -1346,6 +1346,12 @@ main(int argc, char **argv) + options.mon_ops |= mon_op_print_timing | mon_op_inactive_resources; + } + ++ if ((output_format == mon_output_html || output_format == mon_output_cgi) && ++ out->dest != stdout) { ++ pcmk__html_add_header("meta", "http-equiv", "refresh", "content", ++ crm_itoa(options.reconnect_msec/1000), NULL); ++ } ++ + crm_info("Starting %s", crm_system_name); + + if (cib) { +@@ -2106,15 +2112,6 @@ clean_up_connections(void) + } + } + +-static void +-handle_html_output(crm_exit_t exit_code) { +- xmlNodePtr html = NULL; +- +- pcmk__html_add_header(html, "meta", "http-equiv", "refresh", "content", +- crm_itoa(options.reconnect_msec/1000), NULL); +- out->finish(out, exit_code, true, (void **) &html); +-} +- + /* + * De-init ncurses, disconnect from the CIB manager, disconnect fencing, + * deallocate memory and show usage-message if requested. +@@ -2188,16 +2185,7 @@ clean_up(crm_exit_t exit_code) + CRM_ASSERT(out->dest != NULL); + } + +- switch (output_format) { +- case mon_output_cgi: +- case mon_output_html: +- handle_html_output(exit_code); +- break; +- +- default: +- out->finish(out, exit_code, true, NULL); +- break; +- } ++ out->finish(out, exit_code, true, NULL); + + pcmk__output_free(out); + pcmk__unregister_formats(); +-- +1.8.3.1 + diff --git a/SOURCES/010-shutdown-lock.patch b/SOURCES/010-shutdown-lock.patch deleted file mode 100644 index 6304246..0000000 --- a/SOURCES/010-shutdown-lock.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 50b0944c8add3f16b8190e75a6d06c3473c12a8f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 21 Nov 2019 14:48:02 -0600 -Subject: [PATCH 06/18] Feature: scheduler: add shutdown lock cluster options - -This commit adds shutdown-lock and shutdown-lock-limit options (just the -options, not the feature itself). - -shutdown-lock defaults to false, which preserves current behavior. The intended -purpose of setting it to true is to *prevent* recovery of a node's resources -elsewhere when the node is cleanly shut down, until the node rejoins. If -shutdown-lock-limit is set to a nonzero time duration, the cluster will -be allowed to recover the resources if the node has not rejoined within this -time. - -The use case is when rebooting a node (such as for software updates) is done by -cluster-unaware system administrators during scheduled maintenance windows, -resources prefer specific nodes, and resource recovery time is high. ---- - include/crm/msg_xml.h | 4 +++- - include/crm/pengine/pe_types.h | 2 ++ - lib/pengine/common.c | 24 +++++++++++++++++++++++- - lib/pengine/unpack.c | 10 ++++++++++ - 4 files changed, 38 insertions(+), 2 deletions(-) - -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index d56e40c..d0cdf6c 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -346,6 +346,8 @@ extern "C" { - # define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation" - # define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval" - # define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction" -+# define XML_CONFIG_ATTR_SHUTDOWN_LOCK "shutdown-lock" -+# define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit" - - # define XML_ALERT_ATTR_PATH "path" - # define XML_ALERT_ATTR_TIMEOUT "timeout" -diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h -index 23e1c46..8a735a3 100644 ---- a/include/crm/pengine/pe_types.h -+++ b/include/crm/pengine/pe_types.h -@@ -102,6 +102,7 @@ enum pe_find { - # define pe_flag_start_failure_fatal 0x00001000ULL - # define pe_flag_remove_after_stop 0x00002000ULL - # define pe_flag_startup_fencing 0x00004000ULL -+# define pe_flag_shutdown_lock 0x00008000ULL - - # define pe_flag_startup_probes 0x00010000ULL - # define pe_flag_have_status 0x00020000ULL -@@ -167,6 +168,7 @@ struct pe_working_set_s { - GList *stop_needed; // Containers that need stop actions - time_t recheck_by; // Hint to controller to re-run scheduler by this time - int ninstances; // Total number of resource instances -+ guint shutdown_lock;// How long (seconds) to lock resources to shutdown node - }; - - enum pe_check_parameters { -diff --git a/lib/pengine/common.c b/lib/pengine/common.c -index da39c99..e72a033 100644 ---- a/lib/pengine/common.c -+++ b/lib/pengine/common.c -@@ -1,5 +1,7 @@ - /* -- * Copyright 2004-2018 Andrew Beekhof -+ * Copyright 2004-2020 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -@@ -85,6 +87,26 @@ static pe_cluster_option pe_opts[] = { - "When set to TRUE, the cluster will immediately ban a resource from a node if it fails to start there. When FALSE, the cluster will instead check the resource's fail count against its migration-threshold." }, - { "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean, - "Should the cluster check for active resources during startup", NULL }, -+ { -+ XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ NULL, "boolean", NULL, "false", &check_boolean, -+ "Whether to lock resources to a cleanly shut down node", -+ "When true, resources active on a node when it is cleanly shut down " -+ "are kept \"locked\" to that node (not allowed to run elsewhere) " -+ "until they start again on that node after it rejoins (or for at " -+ "most shutdown-lock-limit, if set). Stonith resources and " -+ "Pacemaker Remote connections are never locked. Clone and bundle " -+ "instances and the master role of promotable clones are currently " -+ "never locked, though support could be added in a future release." -+ }, -+ { -+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, -+ NULL, "time", NULL, "0", &check_timer, -+ "Do not lock resources to a cleanly shut down node longer than this", -+ "If shutdown-lock is true and this is set to a nonzero time duration, " -+ "shutdown locks will expire after this much time has passed since " -+ "the shutdown was initiated, even if the node has not rejoined." -+ }, - - /* Stonith Options */ - { "stonith-enabled", NULL, "boolean", NULL, "true", &check_boolean, -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index c9fc672..8c0d72a 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -319,6 +319,16 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) - data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); - crm_trace("Placement strategy: %s", data_set->placement_strategy); - -+ set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock); -+ crm_trace("Resources will%s be locked to cleanly shut down nodes", -+ (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not")); -+ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { -+ value = pe_pref(data_set->config_hash, -+ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); -+ data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000; -+ crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock); -+ } -+ - return TRUE; - } - --- -1.8.3.1 - diff --git a/SOURCES/011-cts.patch b/SOURCES/011-cts.patch new file mode 100644 index 0000000..1b0ec7f --- /dev/null +++ b/SOURCES/011-cts.patch @@ -0,0 +1,270 @@ +From 4e190ebc5460563bae2586b28afb0415f2eb3d1a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Jul 2020 20:38:16 -0500 +Subject: [PATCH 1/4] Test: CTS: libqb shared memory creates directories now + +... so use "rm -rf" instead of "rm -f" +--- + cts/CTS.py.in | 2 +- + cts/CTSaudits.py | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cts/CTS.py.in b/cts/CTS.py.in +index c418318..091bb1f 100644 +--- a/cts/CTS.py.in ++++ b/cts/CTS.py.in +@@ -546,7 +546,7 @@ class ClusterManager(UserDict): + if self.rsh(node, self.templates["StopCmd"]) == 0: + # Make sure we can continue even if corosync leaks + # fdata-* is the old name +- #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*") ++ #self.rsh(node, "rm -rf /dev/shm/qb-* /dev/shm/fdata-*") + self.ShouldBeStatus[node] = "down" + self.cluster_stable(self.Env["DeadTime"]) + return 1 +diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py +index b7e0827..cc82171 100755 +--- a/cts/CTSaudits.py ++++ b/cts/CTSaudits.py +@@ -233,7 +233,7 @@ class FileAudit(ClusterAudit): + for line in lsout: + self.CM.debug("ps[%s]: %s" % (node, line)) + +- self.CM.rsh(node, "rm -f /dev/shm/qb-*") ++ self.CM.rsh(node, "rm -rf /dev/shm/qb-*") + + else: + self.CM.debug("Skipping %s" % node) +-- +1.8.3.1 + + +From 4316507d50d51c7864d8d34aac1da31a232b9f42 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Jul 2020 16:09:20 -0500 +Subject: [PATCH 2/4] Test: CTS: ignore error logged by recent pcs versions + +... because it is expected when a node is fenced, and we should already see +pacemaker errors if a node is unexpectedly fenced +--- + cts/patterns.py | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/cts/patterns.py b/cts/patterns.py +index 96d6471..7eed90c 100644 +--- a/cts/patterns.py ++++ b/cts/patterns.py +@@ -21,6 +21,10 @@ class BasePatterns(object): + + # Logging bug in some versions of libvirtd + r"libvirtd.*: internal error: Failed to parse PCI config address", ++ ++ # pcs can log this when node is fenced, but fencing is OK in some ++ # tests (and we will catch it in pacemaker logs when not OK) ++ r"pcs.daemon:No response from: .* request: get_configs, error:", + ] + self.BadNews = [] + self.components = {} +-- +1.8.3.1 + + +From 598ae0f65bad6ed16978d1ab6e24e8e358e0a1a4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Jul 2020 20:40:00 -0500 +Subject: [PATCH 3/4] Low: libcrmcommon: avoid assertion on controller protocol + errors + +Previously, after a protocol error, we would set reply to NULL and then try to +call crm_element_value() on it, which would log an assertion. +--- + lib/common/ipc_controld.c | 46 ++++++++++++++++++++++------------------------ + 1 file changed, 22 insertions(+), 24 deletions(-) + +diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c +index 5917cc5..22cb9e0 100644 +--- a/lib/common/ipc_controld.c ++++ b/lib/common/ipc_controld.c +@@ -187,53 +187,51 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + crm_debug("Unrecognizable controller message: invalid message type '%s'", + crm_str(value)); + status = CRM_EX_PROTOCOL; +- reply = NULL; ++ goto done; + } + + if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { + crm_debug("Unrecognizable controller message: no reference"); + status = CRM_EX_PROTOCOL; +- reply = NULL; ++ goto done; + } + + value = crm_element_value(reply, F_CRM_TASK); + if (value == NULL) { + crm_debug("Unrecognizable controller message: no command name"); + status = CRM_EX_PROTOCOL; +- reply = NULL; ++ goto done; + } + + // Parse useful info from reply + +- if (reply != NULL) { +- reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION); +- reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM); +- msg_data = get_message_xml(reply, F_CRM_DATA); ++ reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION); ++ reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM); ++ msg_data = get_message_xml(reply, F_CRM_DATA); + +- if (!strcmp(value, CRM_OP_REPROBE)) { +- reply_data.reply_type = pcmk_controld_reply_reprobe; ++ if (!strcmp(value, CRM_OP_REPROBE)) { ++ reply_data.reply_type = pcmk_controld_reply_reprobe; + +- } else if (!strcmp(value, CRM_OP_NODE_INFO)) { +- set_node_info_data(&reply_data, msg_data); ++ } else if (!strcmp(value, CRM_OP_NODE_INFO)) { ++ set_node_info_data(&reply_data, msg_data); + +- } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) { +- reply_data.reply_type = pcmk_controld_reply_resource; +- reply_data.data.resource.node_state = msg_data; ++ } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) { ++ reply_data.reply_type = pcmk_controld_reply_resource; ++ reply_data.data.resource.node_state = msg_data; + +- } else if (!strcmp(value, CRM_OP_PING)) { +- set_ping_data(&reply_data, msg_data); ++ } else if (!strcmp(value, CRM_OP_PING)) { ++ set_ping_data(&reply_data, msg_data); + +- } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) { +- set_nodes_data(&reply_data, msg_data); ++ } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) { ++ set_nodes_data(&reply_data, msg_data); + +- } else { +- crm_debug("Unrecognizable controller message: unknown command '%s'", +- value); +- status = CRM_EX_PROTOCOL; +- reply = NULL; +- } ++ } else { ++ crm_debug("Unrecognizable controller message: unknown command '%s'", ++ value); ++ status = CRM_EX_PROTOCOL; + } + ++done: + pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); + + // Free any reply data that was allocated +-- +1.8.3.1 + + +From 5ae4101b60f8c0cd96eb2097a65a59aaa1750d73 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 17 Jul 2020 17:20:23 -0500 +Subject: [PATCH 4/4] Log: fencer: don't log assertion if unable to create full + request reply + +Previously, we would log an assertion and a warning if asked to create a reply +to a NULL request. However there is a possible sequence for this to happen: + +- Some nodes are up and some down at cluster start-up +- One node is elected DC and schedules fencing of the down nodes +- Fencing is initiated for one of the down nodes +- One of the other down nodes comes up and is elected DC +- The fencing result comes back and all peers (including new DC) are notified +- New DC tries to create a notification for its client (the controller) + but doesn't know anything about the initial request + +For now, just log a warning and drop the assertion. Longer term, maybe we +should synchronize in-flight request information when a fencer joins the +process group. +--- + daemons/fenced/fenced_commands.c | 55 +++++++++++++++++++++++----------------- + 1 file changed, 32 insertions(+), 23 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 05c5437..9c27d61 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2336,22 +2336,8 @@ stonith_fence(xmlNode * msg) + xmlNode * + stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) + { +- int lpc = 0; + xmlNode *reply = NULL; + +- const char *name = NULL; +- const char *value = NULL; +- +- const char *names[] = { +- F_STONITH_OPERATION, +- F_STONITH_CALLID, +- F_STONITH_CLIENTID, +- F_STONITH_CLIENTNAME, +- F_STONITH_REMOTE_OP_ID, +- F_STONITH_CALLOPTS +- }; +- +- crm_trace("Creating a basic reply"); + reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __FUNCTION__); +@@ -2359,16 +2345,39 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i + crm_xml_add(reply, "st_output", output); + crm_xml_add_int(reply, F_STONITH_RC, rc); + +- CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); +- for (lpc = 0; lpc < DIMOF(names); lpc++) { +- name = names[lpc]; +- value = crm_element_value(request, name); +- crm_xml_add(reply, name, value); +- } ++ if (request == NULL) { ++ /* Most likely, this is the result of a stonith operation that was ++ * initiated before we came up. Unfortunately that means we lack enough ++ * information to provide clients with a full result. ++ * ++ * @TODO Maybe synchronize this information at start-up? ++ */ ++ crm_warn("Missing request information for client notifications for " ++ "operation with result %d (initiated before we came up?)", rc); + +- if (data != NULL) { +- crm_trace("Attaching reply output"); +- add_message_xml(reply, F_STONITH_CALLDATA, data); ++ } else { ++ const char *name = NULL; ++ const char *value = NULL; ++ ++ const char *names[] = { ++ F_STONITH_OPERATION, ++ F_STONITH_CALLID, ++ F_STONITH_CLIENTID, ++ F_STONITH_CLIENTNAME, ++ F_STONITH_REMOTE_OP_ID, ++ F_STONITH_CALLOPTS ++ }; ++ ++ crm_trace("Creating a result reply with%s reply output (rc=%d)", ++ (data? "" : "out"), rc); ++ for (int lpc = 0; lpc < DIMOF(names); lpc++) { ++ name = names[lpc]; ++ value = crm_element_value(request, name); ++ crm_xml_add(reply, name, value); ++ } ++ if (data != NULL) { ++ add_message_xml(reply, F_STONITH_CALLDATA, data); ++ } + } + return reply; + } +-- +1.8.3.1 + diff --git a/SOURCES/011-shutdown-lock.patch b/SOURCES/011-shutdown-lock.patch deleted file mode 100644 index e9f1f5c..0000000 --- a/SOURCES/011-shutdown-lock.patch +++ /dev/null @@ -1,144 +0,0 @@ -From f5d88938955f63935058b7cc2d706a12e6ea1121 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 6 Dec 2019 11:57:59 -0600 -Subject: [PATCH 07/18] Low: scheduler: respect shutdown locks when placing - active resources - -Use new pe_resource_t members to indicate that a resource is locked to a -particular node. - -For active resources (i.e. in the transition where the node is scheduled for -shutdown), these are connected by checking each lockable resource for whether -it is running on a single clean node that is shutting down. - -When applying constraints, place -INFINITY location constraints for locked -resources on all nodes other than the lock node. - -(Inactive resources -- i.e. in later transitions after the node is shut down -- -are not yet locked.) ---- - include/crm/pengine/pe_types.h | 2 + - lib/pacemaker/pcmk_sched_allocate.c | 87 +++++++++++++++++++++++++++++++++++++ - 2 files changed, 89 insertions(+) - -diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h -index 8a735a3..123d8ef 100644 ---- a/include/crm/pengine/pe_types.h -+++ b/include/crm/pengine/pe_types.h -@@ -354,6 +354,8 @@ struct pe_resource_s { - GListPtr fillers; - - pe_node_t *pending_node; // Node on which pending_task is happening -+ pe_node_t *lock_node; // Resource is shutdown-locked to this node -+ time_t lock_time; // When shutdown lock started - - #if ENABLE_VERSIONED_ATTRS - xmlNode *versioned_parameters; -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index fc2f4cf..0314f1b 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -977,6 +977,87 @@ rsc_discover_filter(resource_t *rsc, node_t *node) - } - } - -+static time_t -+shutdown_time(pe_node_t *node, pe_working_set_t *data_set) -+{ -+ const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); -+ time_t result = 0; -+ -+ if (shutdown) { -+ errno = 0; -+ result = (time_t) crm_int_helper(shutdown, NULL); -+ if (errno != 0) { -+ result = 0; -+ } -+ } -+ return result? result : get_effective_time(data_set); -+} -+ -+static void -+apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) -+{ -+ const char *class; -+ -+ // Only primitives and (uncloned) groups may be locked -+ if (rsc->variant == pe_group) { -+ for (GList *item = rsc->children; item != NULL; -+ item = item->next) { -+ apply_shutdown_lock((pe_resource_t *) item->data, data_set); -+ } -+ } else if (rsc->variant != pe_native) { -+ return; -+ } -+ -+ // Fence devices and remote connections can't be locked -+ class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -+ if ((class == NULL) || !strcmp(class, PCMK_RESOURCE_CLASS_STONITH) -+ || pe__resource_is_remote_conn(rsc, data_set)) { -+ return; -+ } -+ -+ // Only a resource active on exactly one node can be locked -+ if (pcmk__list_of_1(rsc->running_on)) { -+ pe_node_t *node = rsc->running_on->data; -+ -+ if (node->details->shutdown) { -+ if (node->details->unclean) { -+ pe_rsc_debug(rsc, "Not locking %s to unclean %s for shutdown", -+ rsc->id, node->details->uname); -+ } else { -+ rsc->lock_node = node; -+ rsc->lock_time = shutdown_time(node, data_set); -+ } -+ } -+ } -+ -+ if (rsc->lock_node == NULL) { -+ // No lock needed -+ return; -+ } -+ -+ if (data_set->shutdown_lock > 0) { -+ time_t lock_expiration = rsc->lock_time + data_set->shutdown_lock; -+ -+ pe_rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)", -+ rsc->id, rsc->lock_node->details->uname, -+ (long long) lock_expiration); -+ pe__update_recheck_time(++lock_expiration, data_set); -+ } else { -+ pe_rsc_info(rsc, "Locking %s to %s due to shutdown", -+ rsc->id, rsc->lock_node->details->uname); -+ } -+ -+ // If resource is locked to one node, ban it from all other nodes -+ for (GList *item = data_set->nodes; item != NULL; item = item->next) { -+ pe_node_t *node = item->data; -+ -+ if (strcmp(node->details->uname, rsc->lock_node->details->uname)) { -+ resource_location(rsc, node, -CRM_SCORE_INFINITY, -+ XML_CONFIG_ATTR_SHUTDOWN_LOCK, data_set); -+ } -+ } -+} -+ - /* - * Count how many valid nodes we have (so we know the maximum number of - * colors we can resolve). -@@ -988,6 +1069,12 @@ stage2(pe_working_set_t * data_set) - { - GListPtr gIter = NULL; - -+ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { -+ for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { -+ apply_shutdown_lock((pe_resource_t *) gIter->data, data_set); -+ } -+ } -+ - for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { - node_t *node = (node_t *) gIter->data; - --- -1.8.3.1 - diff --git a/SOURCES/012-ipc_fix.patch b/SOURCES/012-ipc_fix.patch new file mode 100644 index 0000000..234b215 --- /dev/null +++ b/SOURCES/012-ipc_fix.patch @@ -0,0 +1,85 @@ +From f7389ac6f67804f20393951462a59a0b505dfe03 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Jul 2020 16:41:18 -0500 +Subject: [PATCH] Fix: executor: only send executor notifications to executor + clients + +This bug has existed since Pacemaker Remote was first implemented, but was +hidden until crm_node -l/-p was recently modified to go through controller IPC, +because other command-line IPC API clients either fire-and-forget IPC requests +or merely count replies, rather than parse the content of replies. + +Previously, when the executor sent notifications of results, it broadcast the +notification to all IPC clients. Normally this behavior makes sense, but for +the executor in particular, it may be running as pacemaker-remoted, in which +case its IPC clients include not only clients that connected to the executor +IPC, but clients that connected via proxy to other IPC APIs on the cluster node +hosting the remote connection. + +With crm_node -l/-p, this meant that it was possible for an executor API +notification to arrive while crm_node was waiting for a controller IPC reply. +It would not find the information it needed and would report a protocol +violation error. + +The fix is to send executor notifications only to executor clients. +--- + daemons/execd/execd_commands.c | 9 +++++++++ + daemons/execd/remoted_proxy.c | 5 +++++ + include/crm/common/ipc_internal.h | 5 +++-- + 3 files changed, 17 insertions(+), 2 deletions(-) + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index aaf2976..685fcc7 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -507,6 +507,15 @@ send_client_notify(gpointer key, gpointer value, gpointer user_data) + crm_trace("Skipping notification to client without name"); + return; + } ++ if (is_set(client->flags, pcmk__client_to_proxy)) { ++ /* We only want to notify clients of the executor IPC API. If we are ++ * running as Pacemaker Remote, we may have clients proxied to other ++ * IPC services in the cluster, so skip those. ++ */ ++ crm_trace("Skipping executor API notification to %s IPC client", ++ client->name); ++ return; ++ } + + rc = lrmd_server_send_notify(client, update_msg); + if (rc == pcmk_rc_ok) { +diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c +index dda7eed..5c58de4 100644 +--- a/daemons/execd/remoted_proxy.c ++++ b/daemons/execd/remoted_proxy.c +@@ -88,6 +88,11 @@ ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc + client->userdata = strdup(ipc_proxy->id); + client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id); + ++ /* Allow remote executor to distinguish between proxied local clients and ++ * actual executor API clients ++ */ ++ set_bit(client->flags, pcmk__client_to_proxy); ++ + g_hash_table_insert(ipc_clients, client->id, client); + + msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 6a1fcf3..91b3435 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -121,8 +121,9 @@ struct pcmk__remote_s { + }; + + enum pcmk__client_flags { +- pcmk__client_proxied = 0x00001, /* ipc_proxy code only */ +- pcmk__client_privileged = 0x00002, /* root or cluster user */ ++ pcmk__client_proxied = (1 << 0), // Remote client behind proxy ++ pcmk__client_privileged = (1 << 1), // root or cluster user ++ pcmk__client_to_proxy = (1 << 2), // Local client to be proxied + }; + + struct pcmk__client_s { +-- +1.8.3.1 + diff --git a/SOURCES/012-shutdown-lock.patch b/SOURCES/012-shutdown-lock.patch deleted file mode 100644 index c700d96..0000000 --- a/SOURCES/012-shutdown-lock.patch +++ /dev/null @@ -1,202 +0,0 @@ -From 16f57bb79de4f88c2def174e3bb7d8ef312674cd Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 6 Dec 2019 12:17:03 -0600 -Subject: [PATCH 08/18] Low: scheduler: respect shutdown locks when placing - inactive resources - -When shutdown-lock is enabled, and we're either scheduling a resource stop -on a node that's cleanly shutting down or scheduling any action for a -previously locked resource, add "shutdown-lock=" to the -graph action. The controller will be able to use this to know when to preserve -the lock (by adding the lock time to the resource state entry). - -When the scheduler unpacks a resource state entry with a lock, it will remember -the lock node and lock time, which will trigger existing code for applying -shutdown locks. ---- - lib/pacemaker/pcmk_sched_allocate.c | 17 ++++++++++++- - lib/pacemaker/pcmk_sched_graph.c | 30 ++++++++++++++++++++++- - lib/pengine/unpack.c | 49 +++++++++++++++++++++++++++++++++---- - 3 files changed, 89 insertions(+), 7 deletions(-) - -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index 0314f1b..884e1bd 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -1015,8 +1015,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) - return; - } - -+ if (rsc->lock_node != NULL) { -+ // The lock was obtained from resource history -+ -+ if (rsc->running_on != NULL) { -+ /* The resource was started elsewhere even though it is now -+ * considered locked. This shouldn't be possible, but as a -+ * failsafe, we don't want to disturb the resource now. -+ */ -+ pe_rsc_info(rsc, -+ "Cancelling shutdown lock because %s is already active", -+ rsc->id); -+ rsc->lock_node = NULL; -+ rsc->lock_time = 0; -+ } -+ - // Only a resource active on exactly one node can be locked -- if (pcmk__list_of_1(rsc->running_on)) { -+ } else if (pcmk__list_of_1(rsc->running_on)) { - pe_node_t *node = rsc->running_on->data; - - if (node->details->shutdown) { -diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c -index a6967fe..2861f3d 100644 ---- a/lib/pacemaker/pcmk_sched_graph.c -+++ b/lib/pacemaker/pcmk_sched_graph.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -988,6 +988,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action, - } - } - -+static bool -+should_lock_action(pe_action_t *action) -+{ -+ // Only actions taking place on resource's lock node are locked -+ if ((action->rsc->lock_node == NULL) || (action->node == NULL) -+ || (action->node->details != action->rsc->lock_node->details)) { -+ return false; -+ } -+ -+ /* During shutdown, only stops are locked (otherwise, another action such as -+ * a demote would cause the controller to clear the lock) -+ */ -+ if (action->node->details->shutdown && action->task -+ && strcmp(action->task, RSC_STOP)) { -+ return false; -+ } -+ -+ return true; -+} -+ - static xmlNode * - action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - { -@@ -1097,6 +1117,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - XML_ATTR_TYPE - }; - -+ /* If a resource is locked to a node via shutdown-lock, mark its actions -+ * so the controller can preserve the lock when the action completes. -+ */ -+ if (should_lock_action(action)) { -+ crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ (long long) action->rsc->lock_time); -+ } -+ - // List affected resource - - rsc_xml = create_xml_node(action_xml, -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 8c0d72a..5139e60 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1059,7 +1060,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) - crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED); - - } else if (!pe__is_guest_node(this_node) -- && rsc->role == RSC_ROLE_STARTED) { -+ && ((rsc->role == RSC_ROLE_STARTED) -+ || is_set(data_set->flags, pe_flag_shutdown_lock))) { - check = TRUE; - crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED); - } -@@ -1075,6 +1077,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) - - } else if (fence) { - process = TRUE; -+ -+ } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) { -+ process = TRUE; - } - - if(process) { -@@ -2198,6 +2203,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) - } - } - -+// If resource history entry has shutdown lock, remember lock node and time -+static void -+unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, -+ pe_working_set_t *data_set) -+{ -+ time_t lock_time = 0; // When lock started (i.e. node shutdown time) -+ -+ if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ &lock_time) == pcmk_ok) && (lock_time != 0)) { -+ -+ if ((data_set->shutdown_lock > 0) -+ && (get_effective_time(data_set) -+ > (lock_time + data_set->shutdown_lock))) { -+ pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", -+ rsc->id, node->details->uname); -+ } else { -+ rsc->lock_node = node; -+ rsc->lock_time = lock_time; -+ } -+ } -+} -+ - static resource_t * - unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) - { -@@ -2234,18 +2261,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data - } - } - -- if (op_list == NULL) { -- /* if there are no operations, there is nothing to do */ -- return NULL; -+ if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) { -+ if (op_list == NULL) { -+ // If there are no operations, there is nothing to do -+ return NULL; -+ } - } - - /* find the resource */ - rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); - if (rsc == NULL) { -- rsc = process_orphan_resource(rsc_entry, node, data_set); -+ if (op_list == NULL) { -+ // If there are no operations, there is nothing to do -+ return NULL; -+ } else { -+ rsc = process_orphan_resource(rsc_entry, node, data_set); -+ } - } - CRM_ASSERT(rsc != NULL); - -+ // Check whether the resource is "shutdown-locked" to this node -+ if (is_set(data_set->flags, pe_flag_shutdown_lock)) { -+ unpack_shutdown_lock(rsc_entry, rsc, node, data_set); -+ } -+ - /* process operations */ - saved_role = rsc->role; - on_fail = action_fail_ignore; --- -1.8.3.1 - diff --git a/SOURCES/013-pacemakerd.patch b/SOURCES/013-pacemakerd.patch new file mode 100644 index 0000000..3478450 --- /dev/null +++ b/SOURCES/013-pacemakerd.patch @@ -0,0 +1,1072 @@ +From b49880467c18ade43cc283036949b686d1413118 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 14 Apr 2020 14:12:22 -0500 +Subject: [PATCH 1/5] Low: pacemakerd: remove unneeded IPC and CPG commands + +Now that the controller handles crm_node's -l/-p needs, pacemakerd no longer +needs to. Backward compatibility isn't an issue because pacemakerd IPC isn't +proxied for Pacemaker Remote, so only local clients are relevant, and the +pacemakerd IPC never had a C API, which means it was internal only. + +Without those commands, pacemakerd no longer needs to track a process mask, +connect to CPG, or maintain the peer cache. + +The only remaining need for the cluster layer is to use corosync CFG to tell +corosync to initiate or block shutdown. +--- + daemons/pacemakerd/pacemakerd.c | 283 ++---------------------------------- + daemons/pacemakerd/pacemakerd.h | 2 +- + daemons/pacemakerd/pcmkd_corosync.c | 7 +- + 3 files changed, 16 insertions(+), 276 deletions(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index 64c30e2..0f7b2db 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -36,15 +36,12 @@ + #include + #include + +-static gboolean pcmk_quorate = FALSE; + static gboolean fatal_error = FALSE; + static GMainLoop *mainloop = NULL; + static bool global_keep_tracking = false; + + #define PCMK_PROCESS_CHECK_INTERVAL 5 + +-static const char *local_name = NULL; +-static uint32_t local_nodeid = 0; + static crm_trigger_t *shutdown_trigger = NULL; + static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid"; + +@@ -105,23 +102,6 @@ static pcmk_child_t pcmk_children[] = { + static gboolean check_active_before_startup_processes(gpointer user_data); + static int child_liveness(pcmk_child_t *child); + static gboolean start_child(pcmk_child_t * child); +-static gboolean update_node_processes(uint32_t id, const char *uname, +- uint32_t procs); +-void update_process_clients(pcmk__client_t *client); +- +-static uint32_t +-get_process_list(void) +-{ +- int lpc = 0; +- uint32_t procs = crm_get_cluster_proc(); +- +- for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { +- if (pcmk_children[lpc].pid != 0) { +- procs |= pcmk_children[lpc].flag; +- } +- } +- return procs; +-} + + static void + pcmk_process_exit(pcmk_child_t * child) +@@ -129,16 +109,6 @@ pcmk_process_exit(pcmk_child_t * child) + child->pid = 0; + child->active_before_startup = FALSE; + +- /* Broadcast the fact that one of our processes died ASAP +- * +- * Try to get some logging of the cause out first though +- * because we're probably about to get fenced +- * +- * Potentially do this only if respawn_count > N +- * to allow for local recovery +- */ +- update_node_processes(local_nodeid, NULL, get_process_list()); +- + child->respawn_count += 1; + if (child->respawn_count > MAX_RESPAWN) { + crm_err("Child respawn count exceeded by %s", child->name); +@@ -148,8 +118,6 @@ pcmk_process_exit(pcmk_child_t * child) + if (shutdown_trigger) { + /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ + mainloop_set_trigger(shutdown_trigger); +- /* intended to speed up propagating expected lay-off of the daemons? */ +- update_node_processes(local_nodeid, NULL, get_process_list()); + + } else if (!child->respawn) { + /* nothing to do */ +@@ -341,7 +309,6 @@ start_child(pcmk_child_t * child) + crm_info("Forked child %lld for process %s%s", + (long long) child->pid, child->name, + use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); +- update_node_processes(local_nodeid, NULL, get_process_list()); + return TRUE; + + } else { +@@ -492,7 +459,6 @@ pcmk_shutdown_worker(gpointer user_data) + } + } + +- /* send_cluster_id(); */ + crm_notice("Shutdown complete"); + + { +@@ -567,22 +533,12 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) + pcmk_shutdown(15); + + } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { +- /* Send to everyone */ +- struct iovec *iov; +- int id = 0; +- const char *name = NULL; +- +- crm_element_value_int(msg, XML_ATTR_ID, &id); +- name = crm_element_value(msg, XML_ATTR_UNAME); +- crm_notice("Instructing peers to remove references to node %s/%u", name, id); +- +- iov = calloc(1, sizeof(struct iovec)); +- iov->iov_base = dump_xml_unformatted(msg); +- iov->iov_len = 1 + strlen(iov->iov_base); +- send_cpg_iov(iov); ++ crm_trace("Ignoring IPC request to purge node " ++ "because peer cache is not used"); + + } else { +- update_process_clients(c); ++ crm_debug("Unrecognized IPC command '%s' sent to pacemakerd", ++ crm_str(task)); + } + + free_xml(msg); +@@ -618,113 +574,6 @@ struct qb_ipcs_service_handlers mcp_ipc_callbacks = { + .connection_destroyed = pcmk_ipc_destroy + }; + +-static void +-send_xml_to_client(gpointer key, gpointer value, gpointer user_data) +-{ +- pcmk__ipc_send_xml((pcmk__client_t *) value, 0, (xmlNode *) user_data, +- crm_ipc_server_event); +-} +- +-/*! +- * \internal +- * \brief Send an XML message with process list of all known peers to client(s) +- * +- * \param[in] client Send message to this client, or all clients if NULL +- */ +-void +-update_process_clients(pcmk__client_t *client) +-{ +- GHashTableIter iter; +- crm_node_t *node = NULL; +- xmlNode *update = create_xml_node(NULL, "nodes"); +- +- if (is_corosync_cluster()) { +- crm_xml_add_int(update, "quorate", pcmk_quorate); +- } +- +- g_hash_table_iter_init(&iter, crm_peer_cache); +- while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { +- xmlNode *xml = create_xml_node(update, "node"); +- +- crm_xml_add_int(xml, "id", node->id); +- crm_xml_add(xml, "uname", node->uname); +- crm_xml_add(xml, "state", node->state); +- crm_xml_add_int(xml, "processes", node->processes); +- } +- +- if(client) { +- crm_trace("Sending process list to client %s", client->id); +- send_xml_to_client(NULL, client, update); +- +- } else { +- crm_trace("Sending process list to %d clients", +- pcmk__ipc_client_count()); +- pcmk__foreach_ipc_client(send_xml_to_client, update); +- } +- free_xml(update); +-} +- +-/*! +- * \internal +- * \brief Send a CPG message with local node's process list to all peers +- */ +-static void +-update_process_peers(void) +-{ +- /* Do nothing for corosync-2 based clusters */ +- +- struct iovec *iov = calloc(1, sizeof(struct iovec)); +- +- CRM_ASSERT(iov); +- if (local_name) { +- iov->iov_base = crm_strdup_printf("", +- local_name, get_process_list()); +- } else { +- iov->iov_base = crm_strdup_printf("", +- get_process_list()); +- } +- iov->iov_len = strlen(iov->iov_base) + 1; +- crm_trace("Sending %s", (char*) iov->iov_base); +- send_cpg_iov(iov); +-} +- +-/*! +- * \internal +- * \brief Update a node's process list, notifying clients and peers if needed +- * +- * \param[in] id Node ID of affected node +- * \param[in] uname Uname of affected node +- * \param[in] procs Affected node's process list mask +- * +- * \return TRUE if the process list changed, FALSE otherwise +- */ +-static gboolean +-update_node_processes(uint32_t id, const char *uname, uint32_t procs) +-{ +- gboolean changed = FALSE; +- crm_node_t *node = crm_get_peer(id, uname); +- +- if (procs != 0) { +- if (procs != node->processes) { +- crm_debug("Node %s now has process list: %.32x (was %.32x)", +- node->uname, procs, node->processes); +- node->processes = procs; +- changed = TRUE; +- +- /* If local node's processes have changed, notify clients/peers */ +- if (id == local_nodeid) { +- update_process_clients(NULL); +- update_process_peers(); +- } +- +- } else { +- crm_trace("Node %s still has process list: %.32x", node->uname, procs); +- } +- } +- return changed; +-} +- +- + static pcmk__cli_option_t long_options[] = { + // long option, argument type, storage, short option, description, flags + { +@@ -1126,91 +975,6 @@ init_children_processes(void) + setenv("PCMK_respawned", "true", 1); + } + +-static void +-mcp_cpg_destroy(gpointer user_data) +-{ +- crm_crit("Lost connection to cluster layer, shutting down"); +- crm_exit(CRM_EX_DISCONNECT); +-} +- +-/*! +- * \internal +- * \brief Process a CPG message (process list or manual peer cache removal) +- * +- * \param[in] handle CPG connection (ignored) +- * \param[in] groupName CPG group name (ignored) +- * \param[in] nodeid ID of affected node +- * \param[in] pid Process ID (ignored) +- * \param[in] msg CPG XML message +- * \param[in] msg_len Length of msg in bytes (ignored) +- */ +-static void +-mcp_cpg_deliver(cpg_handle_t handle, +- const struct cpg_name *groupName, +- uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) +-{ +- xmlNode *xml = string2xml(msg); +- const char *task = crm_element_value(xml, F_CRM_TASK); +- +- crm_trace("Received CPG message (%s): %.200s", +- (task? task : "process list"), (char*)msg); +- +- if (task == NULL) { +- if (nodeid == local_nodeid) { +- crm_debug("Ignoring message with local node's process list"); +- } else { +- uint32_t procs = 0; +- const char *uname = crm_element_value(xml, "uname"); +- +- crm_element_value_int(xml, "proclist", (int *)&procs); +- if (update_node_processes(nodeid, uname, procs)) { +- update_process_clients(NULL); +- } +- } +- +- } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { +- int id = 0; +- const char *name = NULL; +- +- crm_element_value_int(xml, XML_ATTR_ID, &id); +- name = crm_element_value(xml, XML_ATTR_UNAME); +- reap_crm_member(id, name); +- } +- +- if (xml != NULL) { +- free_xml(xml); +- } +-} +- +-static void +-mcp_cpg_membership(cpg_handle_t handle, +- const struct cpg_name *groupName, +- const struct cpg_address *member_list, size_t member_list_entries, +- const struct cpg_address *left_list, size_t left_list_entries, +- const struct cpg_address *joined_list, size_t joined_list_entries) +-{ +- /* Update peer cache if needed */ +- pcmk_cpg_membership(handle, groupName, member_list, member_list_entries, +- left_list, left_list_entries, +- joined_list, joined_list_entries); +- +- /* Always broadcast our own presence after any membership change */ +- update_process_peers(); +-} +- +-static gboolean +-mcp_quorum_callback(unsigned long long seq, gboolean quorate) +-{ +- pcmk_quorate = quorate; +- return TRUE; +-} +- +-static void +-mcp_quorum_destroy(gpointer user_data) +-{ +- crm_info("connection lost"); +-} +- + int + main(int argc, char **argv) + { +@@ -1226,7 +990,6 @@ main(int argc, char **argv) + struct rlimit cores; + crm_ipc_t *old_instance = NULL; + qb_ipcs_service_t *ipcs = NULL; +- static crm_cluster_t cluster; + + crm_log_preinit(NULL, argc, argv); + pcmk__set_cli_options(NULL, "[options]", long_options, +@@ -1397,7 +1160,7 @@ main(int argc, char **argv) + } + + /* Allows us to block shutdown */ +- if (cluster_connect_cfg(&local_nodeid) == FALSE) { ++ if (!cluster_connect_cfg()) { + crm_err("Couldn't connect to Corosync's CFG service"); + crm_exit(CRM_EX_PROTOCOL); + } +@@ -1417,34 +1180,13 @@ main(int argc, char **argv) + crm_exit(CRM_EX_FATAL); + }; + +- cluster.destroy = mcp_cpg_destroy; +- cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; +- cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; +- +- crm_set_autoreap(FALSE); ++ mainloop_add_signal(SIGTERM, pcmk_shutdown); ++ mainloop_add_signal(SIGINT, pcmk_shutdown); + +- rc = pcmk_ok; ++ init_children_processes(); + +- if (cluster_connect_cpg(&cluster) == FALSE) { +- crm_err("Couldn't connect to Corosync's CPG service"); +- rc = -ENOPROTOOPT; +- +- } else if (cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) +- == FALSE) { +- rc = -ENOTCONN; +- +- } else { +- local_name = get_local_node_name(); +- update_node_processes(local_nodeid, local_name, get_process_list()); +- +- mainloop_add_signal(SIGTERM, pcmk_shutdown); +- mainloop_add_signal(SIGINT, pcmk_shutdown); +- +- init_children_processes(); +- +- crm_notice("Pacemaker daemon successfully started and accepting connections"); +- g_main_loop_run(mainloop); +- } ++ crm_notice("Pacemaker daemon successfully started and accepting connections"); ++ g_main_loop_run(mainloop); + + if (ipcs) { + crm_trace("Closing IPC server"); +@@ -1453,9 +1195,6 @@ main(int argc, char **argv) + } + + g_main_loop_unref(mainloop); +- +- cluster_disconnect_cpg(&cluster); + cluster_disconnect_cfg(); +- +- crm_exit(crm_errno2exit(rc)); ++ crm_exit(CRM_EX_OK); + } +diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h +index d66ab10..ac2d842 100644 +--- a/daemons/pacemakerd/pacemakerd.h ++++ b/daemons/pacemakerd/pacemakerd.h +@@ -22,7 +22,7 @@ + + gboolean mcp_read_config(void); + +-gboolean cluster_connect_cfg(uint32_t * nodeid); ++gboolean cluster_connect_cfg(void); + gboolean cluster_disconnect_cfg(void); + + void pcmk_shutdown(int nsig); +diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c +index ec74908..156f965 100644 +--- a/daemons/pacemakerd/pcmkd_corosync.c ++++ b/daemons/pacemakerd/pcmkd_corosync.c +@@ -93,13 +93,14 @@ cluster_disconnect_cfg(void) + } while(counter < max) + + gboolean +-cluster_connect_cfg(uint32_t * nodeid) ++cluster_connect_cfg(void) + { + cs_error_t rc; + int fd = -1, retries = 0, rv; + uid_t found_uid = 0; + gid_t found_gid = 0; + pid_t found_pid = 0; ++ uint32_t nodeid; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, +@@ -134,14 +135,14 @@ cluster_connect_cfg(uint32_t * nodeid) + } + + retries = 0; +- cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid)); ++ cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); + + if (rc != CS_OK) { + crm_err("corosync cfg local_get error %d", rc); + goto bail; + } + +- crm_debug("Our nodeid: %d", *nodeid); ++ crm_debug("Our nodeid: %lu", (unsigned long) nodeid); + mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); + + return TRUE; +-- +1.8.3.1 + + +From 23ad3803a12189a369d188f3d3e606142cf16c52 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Jul 2020 11:25:28 -0500 +Subject: [PATCH 2/5] Refactor: pacemakerd: functionize removing core file + limit + +... for readability and code isolation +--- + daemons/pacemakerd/pacemakerd.c | 50 ++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 21 deletions(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index 0f7b2db..ac308e7 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -975,10 +975,37 @@ init_children_processes(void) + setenv("PCMK_respawned", "true", 1); + } + ++static void ++remove_core_file_limit(void) ++{ ++ struct rlimit cores; ++ int rc = getrlimit(RLIMIT_CORE, &cores); ++ ++ if (rc < 0) { ++ crm_perror(LOG_ERR, "Cannot determine current maximum core size."); ++ return; ++ } ++ ++ if ((cores.rlim_max == 0) && (geteuid() == 0)) { ++ cores.rlim_max = RLIM_INFINITY; ++ } else { ++ crm_info("Maximum core file size is %llu bytes", ++ (unsigned long long) cores.rlim_max); ++ } ++ cores.rlim_cur = cores.rlim_max; ++ ++ rc = setrlimit(RLIMIT_CORE, &cores); ++ if (rc < 0) { ++ crm_perror(LOG_ERR, ++ "Core file generation will remain disabled." ++ " Core files are an important diagnostic tool, so" ++ " please consider enabling them by default."); ++ } ++} ++ + int + main(int argc, char **argv) + { +- int rc; + int flag; + int argerr = 0; + +@@ -987,7 +1014,6 @@ main(int argc, char **argv) + + uid_t pcmk_uid = 0; + gid_t pcmk_gid = 0; +- struct rlimit cores; + crm_ipc_t *old_instance = NULL; + qb_ipcs_service_t *ipcs = NULL; + +@@ -1099,25 +1125,7 @@ main(int argc, char **argv) + PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + mainloop = g_main_loop_new(NULL, FALSE); + +- rc = getrlimit(RLIMIT_CORE, &cores); +- if (rc < 0) { +- crm_perror(LOG_ERR, "Cannot determine current maximum core size."); +- } else { +- if (cores.rlim_max == 0 && geteuid() == 0) { +- cores.rlim_max = RLIM_INFINITY; +- } else { +- crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); +- } +- cores.rlim_cur = cores.rlim_max; +- +- rc = setrlimit(RLIMIT_CORE, &cores); +- if (rc < 0) { +- crm_perror(LOG_ERR, +- "Core file generation will remain disabled." +- " Core files are an important diagnostic tool, so" +- " please consider enabling them by default."); +- } +- } ++ remove_core_file_limit(); + + if (pcmk_daemon_user(&pcmk_uid, &pcmk_gid) < 0) { + crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); +-- +1.8.3.1 + + +From 40b0891dc92767aad8495121afcbd6e68fd3830a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Jul 2020 11:26:09 -0500 +Subject: [PATCH 3/5] Log: pacemakerd: improve messages + +--- + daemons/pacemakerd/pacemakerd.c | 22 +++++----- + daemons/pacemakerd/pcmkd_corosync.c | 85 +++++++++++++++++-------------------- + 2 files changed, 50 insertions(+), 57 deletions(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index ac308e7..0f459c0 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -344,7 +345,7 @@ start_child(pcmk_child_t * child) + + // Drop root group access if not needed + if (!need_root_group && (setgid(gid) < 0)) { +- crm_perror(LOG_ERR, "Could not set group to %d", gid); ++ crm_warn("Could not set group to %d: %s", gid, strerror(errno)); + } + + /* Initialize supplementary groups to only those always granted to +@@ -356,7 +357,8 @@ start_child(pcmk_child_t * child) + } + + if (uid && setuid(uid) < 0) { +- crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); ++ crm_warn("Could not set user to %s (id %d): %s", ++ child->uid, uid, strerror(errno)); + } + + pcmk__close_fds_in_child(true); +@@ -370,7 +372,7 @@ start_child(pcmk_child_t * child) + } else { + (void)execvp(child->command, opts_default); + } +- crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); ++ crm_crit("Could not execute %s: %s", child->command, strerror(errno)); + crm_exit(CRM_EX_FATAL); + } + return TRUE; /* never reached */ +@@ -527,8 +529,7 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) + + task = crm_element_value(msg, F_CRM_TASK); + if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { +- /* Time to quit */ +- crm_notice("Shutting down in response to ticket %s (%s)", ++ crm_notice("Shutting down in response to IPC request %s from %s", + crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); + pcmk_shutdown(15); + +@@ -982,7 +983,8 @@ remove_core_file_limit(void) + int rc = getrlimit(RLIMIT_CORE, &cores); + + if (rc < 0) { +- crm_perror(LOG_ERR, "Cannot determine current maximum core size."); ++ crm_warn("Cannot determine current maximum core file size: %s", ++ strerror(errno)); + return; + } + +@@ -996,10 +998,8 @@ remove_core_file_limit(void) + + rc = setrlimit(RLIMIT_CORE, &cores); + if (rc < 0) { +- crm_perror(LOG_ERR, +- "Core file generation will remain disabled." +- " Core files are an important diagnostic tool, so" +- " please consider enabling them by default."); ++ crm_warn("Cannot raise system limit on core file size " ++ "(consider doing so manually)"); + } + } + +@@ -1108,7 +1108,6 @@ main(int argc, char **argv) + crm_ipc_destroy(old_instance); + + if (mcp_read_config() == FALSE) { +- crm_notice("Could not obtain corosync config data, exiting"); + crm_exit(CRM_EX_UNAVAILABLE); + } + +@@ -1169,7 +1168,6 @@ main(int argc, char **argv) + + /* Allows us to block shutdown */ + if (!cluster_connect_cfg()) { +- crm_err("Couldn't connect to Corosync's CFG service"); + crm_exit(CRM_EX_PROTOCOL); + } + +diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c +index 156f965..6f19803 100644 +--- a/daemons/pacemakerd/pcmkd_corosync.c ++++ b/daemons/pacemakerd/pcmkd_corosync.c +@@ -28,7 +28,6 @@ + + #include /* PCMK__SPECIAL_PID* */ + +-enum cluster_type_e stack = pcmk_cluster_unknown; + static corosync_cfg_handle_t cfg_handle; + + /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ +@@ -63,9 +62,8 @@ pcmk_cfg_dispatch(gpointer user_data) + static void + cfg_connection_destroy(gpointer user_data) + { +- crm_err("Connection destroyed"); ++ crm_err("Lost connection to Corosync"); + cfg_handle = 0; +- + pcmk_shutdown(SIGTERM); + } + +@@ -85,7 +83,7 @@ cluster_disconnect_cfg(void) + code; \ + if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ + counter++; \ +- crm_debug("Retrying operation after %ds", counter); \ ++ crm_debug("Retrying Corosync operation after %ds", counter); \ + sleep(counter); \ + } else { \ + break; \ +@@ -110,41 +108,42 @@ cluster_connect_cfg(void) + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { +- crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc); ++ crm_crit("Could not connect to Corosync CFG: %s " CRM_XS " rc=%d", ++ cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { +- crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc); ++ crm_crit("Could not get Corosync CFG descriptor: %s " CRM_XS " rc=%d", ++ cs_strerror(rc), rc); + goto bail; + } + + /* CFG provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { +- crm_err("CFG provider is not authentic:" +- " process %lld (uid: %lld, gid: %lld)", +- (long long) PCMK__SPECIAL_PID_AS_0(found_pid), +- (long long) found_uid, (long long) found_gid); ++ crm_crit("Rejecting Corosync CFG provider because process %lld " ++ "is running as uid %lld gid %lld, not root", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); + goto bail; + } else if (rv < 0) { +- crm_err("Could not verify authenticity of CFG provider: %s (%d)", +- strerror(-rv), -rv); ++ crm_crit("Could not authenticate Corosync CFG provider: %s " ++ CRM_XS " rc=%d", strerror(-rv), -rv); + goto bail; + } + + retries = 0; + cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, &nodeid)); +- + if (rc != CS_OK) { +- crm_err("corosync cfg local_get error %d", rc); ++ crm_crit("Could not get local node ID from Corosync: %s " ++ CRM_XS " rc=%d", cs_strerror(rc), rc); + goto bail; + } ++ crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid); + +- crm_debug("Our nodeid: %lu", (unsigned long) nodeid); + mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); +- + return TRUE; + + bail: +@@ -184,14 +183,15 @@ mcp_read_config(void) + gid_t found_gid = 0; + pid_t found_pid = 0; + int rv; ++ enum cluster_type_e stack; + + // There can be only one possibility + do { + rc = cmap_initialize(&local_handle); + if (rc != CS_OK) { + retries++; +- printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); +- crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); ++ crm_info("Could not connect to Corosync CMAP: %s (retrying in %ds) " ++ CRM_XS " rc=%d", cs_strerror(rc), retries, rc); + sleep(retries); + + } else { +@@ -201,15 +201,15 @@ mcp_read_config(void) + } while (retries < 5); + + if (rc != CS_OK) { +- printf("Could not connect to Cluster Configuration Database API, error %d\n", rc); +- crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); ++ crm_crit("Could not connect to Corosync CMAP: %s " ++ CRM_XS " rc=%d", cs_strerror(rc), rc); + return FALSE; + } + + rc = cmap_fd_get(local_handle, &fd); + if (rc != CS_OK) { +- crm_err("Could not obtain the CMAP API connection: %s (%d)", +- cs_strerror(rc), rc); ++ crm_crit("Could not get Corosync CMAP descriptor: %s " CRM_XS " rc=%d", ++ cs_strerror(rc), rc); + cmap_finalize(local_handle); + return FALSE; + } +@@ -217,38 +217,33 @@ mcp_read_config(void) + /* CMAP provider run as root (in given user namespace, anyway)? */ + if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, + &found_uid, &found_gid))) { +- crm_err("CMAP provider is not authentic:" +- " process %lld (uid: %lld, gid: %lld)", +- (long long) PCMK__SPECIAL_PID_AS_0(found_pid), +- (long long) found_uid, (long long) found_gid); ++ crm_crit("Rejecting Corosync CMAP provider because process %lld " ++ "is running as uid %lld gid %lld, not root", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); + cmap_finalize(local_handle); + return FALSE; + } else if (rv < 0) { +- crm_err("Could not verify authenticity of CMAP provider: %s (%d)", +- strerror(-rv), -rv); ++ crm_crit("Could not authenticate Corosync CMAP provider: %s " ++ CRM_XS " rc=%d", strerror(-rv), -rv); + cmap_finalize(local_handle); + return FALSE; + } + + stack = get_cluster_type(); +- crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); +- +- /* =::=::= Should we be here =::=::= */ +- if (stack == pcmk_cluster_corosync) { +- pcmk__set_env_option("cluster_type", "corosync"); +- pcmk__set_env_option("quorum_type", "corosync"); +- +- } else { +- crm_err("Unsupported stack type: %s", name_for_cluster_type(stack)); ++ if (stack != pcmk_cluster_corosync) { ++ crm_crit("Expected corosync stack but detected %s " CRM_XS " stack=%d", ++ name_for_cluster_type(stack), stack); + return FALSE; + } + +- /* =::=::= Logging =::=::= */ +- if (pcmk__env_option("debug")) { +- /* Syslog logging is already setup by crm_log_init() */ ++ crm_info("Reading configuration for %s stack", ++ name_for_cluster_type(stack)); ++ pcmk__set_env_option("cluster_type", "corosync"); ++ pcmk__set_env_option("quorum_type", "corosync"); + +- } else { +- /* Check corosync */ ++ // If debug logging is not configured, check whether corosync has it ++ if (pcmk__env_option("debug") == NULL) { + char *debug_enabled = NULL; + + get_config_opt(config, local_handle, "logging.debug", &debug_enabled, "off"); +@@ -269,7 +264,7 @@ mcp_read_config(void) + if(local_handle){ + gid_t gid = 0; + if (pcmk_daemon_user(NULL, &gid) < 0) { +- crm_warn("Could not authorize group with corosync " CRM_XS ++ crm_warn("Could not authorize group with Corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); + + } else { +@@ -277,8 +272,8 @@ mcp_read_config(void) + snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); + rc = cmap_set_uint8(local_handle, key, 1); + if (rc != CS_OK) { +- crm_warn("Could not authorize group with corosync "CRM_XS +- " group=%u rc=%d (%s)", gid, rc, ais_error2text(rc)); ++ crm_warn("Could not authorize group with Corosync: %s " CRM_XS ++ " group=%u rc=%d", ais_error2text(rc), gid, rc); + } + } + } +-- +1.8.3.1 + + +From c1e12aeb58366f508730defcb8eb6f3ebedac469 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 23 Apr 2020 13:30:58 -0500 +Subject: [PATCH 4/5] Refactor: pacemakerd: use existing handle for corosync + shutdown + +--- + daemons/pacemakerd/pacemakerd.c | 31 ++++--------------------------- + daemons/pacemakerd/pacemakerd.h | 1 + + daemons/pacemakerd/pcmkd_corosync.c | 24 +++++++++++++++++++++++- + 3 files changed, 28 insertions(+), 28 deletions(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index 0f459c0..fb35dfc 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -30,10 +30,6 @@ + #include + #include + +-#ifdef SUPPORT_COROSYNC +-#include +-#endif +- + #include + #include + +@@ -145,28 +141,6 @@ pcmk_process_exit(pcmk_child_t * child) + } + } + +-static void pcmk_exit_with_cluster(int exitcode) +-{ +-#ifdef SUPPORT_COROSYNC +- corosync_cfg_handle_t cfg_handle; +- cs_error_t err; +- +- if (exitcode == CRM_EX_FATAL) { +- crm_info("Asking Corosync to shut down"); +- err = corosync_cfg_initialize(&cfg_handle, NULL); +- if (err != CS_OK) { +- crm_warn("Unable to open handle to corosync to close it down. err=%d", err); +- } +- err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); +- if (err != CS_OK) { +- crm_warn("Corosync shutdown failed. err=%d", err); +- } +- corosync_cfg_finalize(cfg_handle); +- } +-#endif +- crm_exit(exitcode); +-} +- + static void + pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) + { +@@ -475,7 +449,10 @@ pcmk_shutdown_worker(gpointer user_data) + + if (fatal_error) { + crm_notice("Shutting down and staying down after fatal error"); +- pcmk_exit_with_cluster(CRM_EX_FATAL); ++#ifdef SUPPORT_COROSYNC ++ pcmkd_shutdown_corosync(); ++#endif ++ crm_exit(CRM_EX_FATAL); + } + + return TRUE; +diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h +index ac2d842..5f475fd 100644 +--- a/daemons/pacemakerd/pacemakerd.h ++++ b/daemons/pacemakerd/pacemakerd.h +@@ -24,5 +24,6 @@ gboolean mcp_read_config(void); + + gboolean cluster_connect_cfg(void); + gboolean cluster_disconnect_cfg(void); ++void pcmkd_shutdown_corosync(void); + + void pcmk_shutdown(int nsig); +diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c +index 6f19803..82bd257 100644 +--- a/daemons/pacemakerd/pcmkd_corosync.c ++++ b/daemons/pacemakerd/pcmkd_corosync.c +@@ -28,7 +28,7 @@ + + #include /* PCMK__SPECIAL_PID* */ + +-static corosync_cfg_handle_t cfg_handle; ++static corosync_cfg_handle_t cfg_handle = 0; + + /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ + +@@ -151,6 +151,28 @@ cluster_connect_cfg(void) + return FALSE; + } + ++void ++pcmkd_shutdown_corosync(void) ++{ ++ cs_error_t rc; ++ ++ if (cfg_handle == 0) { ++ crm_warn("Unable to shut down Corosync: No connection"); ++ return; ++ } ++ crm_info("Asking Corosync to shut down"); ++ rc = corosync_cfg_try_shutdown(cfg_handle, ++ COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); ++ if (rc == CS_OK) { ++ corosync_cfg_finalize(cfg_handle); ++ cfg_handle = 0; ++ } else { ++ crm_warn("Corosync shutdown failed: %s " CRM_XS " rc=%d", ++ cs_strerror(rc), rc); ++ } ++} ++ ++ + /* =::=::=::= Configuration =::=::=::= */ + static int + get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, +-- +1.8.3.1 + + +From e93857b25a22045d5db7bd45c3f026fb82e6da8d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 23 Apr 2020 13:36:08 -0500 +Subject: [PATCH 5/5] Build: pacemakerd: properly conditionalize corosync calls + +Previously, pacemakerd in its entirety was not built unless corosync support +was enabled, a throwback to when CMAN and Corosync 1 were supported. Now, +pacemakerd is built unconditionally, and just its corosync-related calls are +guarded by corosync support. + +This has no effect currently since corosync 2+ is the only supported cluster +layer, but it offers some future-proofing. +--- + daemons/pacemakerd/Makefile.am | 9 ++++----- + daemons/pacemakerd/pacemakerd.c | 6 ++++++ + 2 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am +index b01d8ef..4cc8a7c 100644 +--- a/daemons/pacemakerd/Makefile.am ++++ b/daemons/pacemakerd/Makefile.am +@@ -1,5 +1,5 @@ + # +-# Copyright 2004-2019 the Pacemaker project contributors ++# Copyright 2004-2020 the Pacemaker project contributors + # + # The version control history for this file may have further details. + # +@@ -9,8 +9,6 @@ + + include $(top_srcdir)/mk/common.mk + +-if BUILD_CS_SUPPORT +- + initdir = $(INITDIR) + init_SCRIPTS = pacemaker + sbin_PROGRAMS = pacemakerd +@@ -30,8 +28,9 @@ pacemakerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + + pacemakerd_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(top_builddir)/lib/common/libcrmcommon.la + pacemakerd_LDADD += $(CLUSTERLIBS) +-pacemakerd_SOURCES = pacemakerd.c pcmkd_corosync.c +- ++pacemakerd_SOURCES = pacemakerd.c ++if BUILD_CS_SUPPORT ++pacemakerd_SOURCES += pcmkd_corosync.c + endif + + CLEANFILES = $(man8_MANS) +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index fb35dfc..652d6ca 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -1084,9 +1084,11 @@ main(int argc, char **argv) + crm_ipc_close(old_instance); + crm_ipc_destroy(old_instance); + ++#ifdef SUPPORT_COROSYNC + if (mcp_read_config() == FALSE) { + crm_exit(CRM_EX_UNAVAILABLE); + } ++#endif + + // OCF shell functions and cluster-glue need facility under different name + { +@@ -1143,10 +1145,12 @@ main(int argc, char **argv) + crm_exit(CRM_EX_OSERR); + } + ++#ifdef SUPPORT_COROSYNC + /* Allows us to block shutdown */ + if (!cluster_connect_cfg()) { + crm_exit(CRM_EX_PROTOCOL); + } ++#endif + + if(pcmk_locate_sbd() > 0) { + setenv("PCMK_watchdog", "true", 1); +@@ -1178,6 +1182,8 @@ main(int argc, char **argv) + } + + g_main_loop_unref(mainloop); ++#ifdef SUPPORT_COROSYNC + cluster_disconnect_cfg(); ++#endif + crm_exit(CRM_EX_OK); + } +-- +1.8.3.1 + diff --git a/SOURCES/013-shutdown-lock.patch b/SOURCES/013-shutdown-lock.patch deleted file mode 100644 index 4b9c91f..0000000 --- a/SOURCES/013-shutdown-lock.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 223ab7251adcb8c6f6b96def138be58b1478c42b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 22 Nov 2019 17:03:20 -0600 -Subject: [PATCH 09/18] Low: controller: mark shutdown-locked resources in - resource history - -When a graph action indicates that the resource should be shutdown-locked -to its node, remember the shutdown lock time in active_op_t so we can remember -that when the result comes back. When the result does come back, add -"shutdown-lock" to its lrm_resource entry in the CIB status section -- as -the timestamp if it's a successful stop or a probe finding the resource -inactive, or as 0 to clear the lock for any other operation. ---- - daemons/controld/controld_control.c | 9 ++++- - daemons/controld/controld_execd.c | 44 +++++++++++++++++++-- - daemons/controld/controld_lrm.h | 1 + - daemons/controld/controld_te_callbacks.c | 65 ++++++++++++++++++++++---------- - daemons/controld/controld_utils.h | 1 + - 5 files changed, 95 insertions(+), 25 deletions(-) - -diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c -index 6c7f97c..c918a1e 100644 ---- a/daemons/controld/controld_control.c -+++ b/daemons/controld/controld_control.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -35,6 +35,7 @@ gboolean fsa_has_quorum = FALSE; - crm_trigger_t *fsa_source = NULL; - crm_trigger_t *config_read = NULL; - bool no_quorum_suicide_escalation = FALSE; -+bool controld_shutdown_lock_enabled = false; - - /* A_HA_CONNECT */ - void -@@ -587,7 +588,10 @@ static pe_cluster_option crmd_opts[] = { - { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, - "How many times stonith can fail before it will no longer be attempted on a target" - }, -+ -+ // Already documented in libpe_status (other values must be kept identical) - { "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, -+ { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", &check_boolean, NULL, NULL }, - }; - /* *INDENT-ON* */ - -@@ -698,6 +702,9 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void - value = crmd_pref(config_hash, "join-finalization-timeout"); - finalization_timer->period_ms = crm_parse_interval_spec(value); - -+ value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); -+ controld_shutdown_lock_enabled = crm_is_true(value); -+ - free(fsa_cluster_name); - fsa_cluster_name = NULL; - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 17cc8d6..c0436a2 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -44,7 +44,8 @@ static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - - static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - int log_level); --static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); -+static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, -+ lrmd_event_data_t *op, time_t lock_time); - - static void - lrm_connection_destroy(void) -@@ -2171,7 +2172,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t - crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", - op->rsc_id, op->op_type, op->interval_ms, node_name); - -- do_update_resource(node_name, rsc, op); -+ do_update_resource(node_name, rsc, op, 0); - } - - static void -@@ -2313,6 +2314,10 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - pending->rsc_id = strdup(rsc->id); - pending->start_time = time(NULL); - pending->user_data = op->user_data? strdup(op->user_data) : NULL; -+ if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ &(pending->lock_time)) != pcmk_ok) { -+ pending->lock_time = 0; -+ } - g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); - - if ((op->interval_ms > 0) -@@ -2356,8 +2361,28 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use - } - } - -+/* Only successful stops, and probes that found the resource inactive, get locks -+ * recorded in the history. This ensures the resource stays locked to the node -+ * until it is active there again after the node comes back up. -+ */ -+static bool -+should_preserve_lock(lrmd_event_data_t *op) -+{ -+ if (!controld_shutdown_lock_enabled) { -+ return false; -+ } -+ if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { -+ return true; -+ } -+ if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { -+ return true; -+ } -+ return false; -+} -+ - static int --do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) -+do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, -+ lrmd_event_data_t *op, time_t lock_time) - { - /* - -@@ -2412,6 +2437,16 @@ do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data - crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); - crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); - crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); -+ if (lock_time != 0) { -+ /* Actions on a locked resource should either preserve the lock by -+ * recording it with the action result, or clear it. -+ */ -+ if (!should_preserve_lock(op)) { -+ lock_time = 0; -+ } -+ crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ (long long) lock_time); -+ } - - if (op->params) { - container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); -@@ -2616,7 +2651,8 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - if (controld_action_is_recordable(op->op_type)) { - if (node_name && rsc) { - // We should record the result, and happily, we can -- update_id = do_update_resource(node_name, rsc, op); -+ update_id = do_update_resource(node_name, rsc, op, -+ pending? pending->lock_time : 0); - need_direct_ack = FALSE; - - } else if (op->rsc_deleted) { -diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h -index 7acac2a..da0582c 100644 ---- a/daemons/controld/controld_lrm.h -+++ b/daemons/controld/controld_lrm.h -@@ -46,6 +46,7 @@ typedef struct active_op_s { - int call_id; - uint32_t flags; // bitmask of active_op_e - time_t start_time; -+ time_t lock_time; - char *rsc_id; - char *op_type; - char *op_key; -diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c -index 25f0ab2..8506f26 100644 ---- a/daemons/controld/controld_te_callbacks.c -+++ b/daemons/controld/controld_te_callbacks.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -28,6 +28,17 @@ crm_trigger_t *transition_trigger = NULL; - /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ - #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" - -+// An explicit shutdown-lock of 0 means the lock has been cleared -+static bool -+shutdown_lock_cleared(xmlNode *lrm_resource) -+{ -+ time_t shutdown_lock = 0; -+ -+ return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, -+ &shutdown_lock) == pcmk_ok) -+ && (shutdown_lock == 0); -+} -+ - static void - te_update_diff_v1(const char *event, xmlNode *diff) - { -@@ -106,33 +117,42 @@ te_update_diff_v1(const char *event, xmlNode *diff) - } - freeXpathObject(xpathObj); - -+ // Check for lrm_resource entries -+ xpathObj = xpath_search(diff, -+ "//" F_CIB_UPDATE_RESULT -+ "//" XML_TAG_DIFF_ADDED -+ "//" XML_LRM_TAG_RESOURCE); -+ max = numXpathResults(xpathObj); -+ - /* -- * Updates by, or in response to, TE actions will never contain updates -- * for more than one resource at a time, so such updates indicate an -- * LRM refresh. -- * -- * In that case, start a new transition rather than check each result -- * individually, which can result in _huge_ speedups in large clusters. -+ * Updates by, or in response to, graph actions will never affect more than -+ * one resource at a time, so such updates indicate an LRM refresh. In that -+ * case, start a new transition rather than check each result individually, -+ * which can result in _huge_ speedups in large clusters. - * - * Unfortunately, we can only do so when there are no pending actions. - * Otherwise, we could mistakenly throw away those results here, and - * the cluster will stall waiting for them and time out the operation. - */ -- if (transition_graph->pending == 0) { -- xpathObj = xpath_search(diff, -- "//" F_CIB_UPDATE_RESULT -- "//" XML_TAG_DIFF_ADDED -- "//" XML_LRM_TAG_RESOURCE); -- max = numXpathResults(xpathObj); -- if (max > 1) { -- crm_debug("Ignoring resource operation updates due to history refresh of %d resources", -- max); -- crm_log_xml_trace(diff, "lrm-refresh"); -- abort_transition(INFINITY, tg_restart, "History refresh", NULL); -- goto bail; -+ if ((transition_graph->pending == 0) && (max > 1)) { -+ crm_debug("Ignoring resource operation updates due to history refresh of %d resources", -+ max); -+ crm_log_xml_trace(diff, "lrm-refresh"); -+ abort_transition(INFINITY, tg_restart, "History refresh", NULL); -+ goto bail; -+ } -+ -+ if (max == 1) { -+ xmlNode *lrm_resource = getXpathResult(xpathObj, 0); -+ -+ if (shutdown_lock_cleared(lrm_resource)) { -+ // @TODO would be more efficient to abort once after transition done -+ abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", -+ lrm_resource); -+ // Still process results, so we stop timers and update failcounts - } -- freeXpathObject(xpathObj); - } -+ freeXpathObject(xpathObj); - - /* Process operation updates */ - xpathObj = -@@ -205,6 +225,11 @@ process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) - rsc_op = __xml_next(rsc_op)) { - process_graph_event(rsc_op, node); - } -+ if (shutdown_lock_cleared(lrm_resource)) { -+ // @TODO would be more efficient to abort once after transition done -+ abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", -+ lrm_resource); -+ } - } - - static void -diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h -index ca8cddb..8e31007 100644 ---- a/daemons/controld/controld_utils.h -+++ b/daemons/controld/controld_utils.h -@@ -41,6 +41,7 @@ fsa_cib_anon_update(const char *section, xmlNode *data) { - } - - extern gboolean fsa_has_quorum; -+extern bool controld_shutdown_lock_enabled; - extern int last_peer_update; - extern int last_resource_update; - --- -1.8.3.1 - diff --git a/SOURCES/014-sbd.patch b/SOURCES/014-sbd.patch new file mode 100644 index 0000000..c830bfe --- /dev/null +++ b/SOURCES/014-sbd.patch @@ -0,0 +1,1187 @@ +From 17d5ceac78f610aabf6a3678813706faf252c2fb Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 20 Jul 2020 17:56:29 +0200 +Subject: [PATCH 1/6] Fix: ipc-api: allow calling connect after disconnection + +--- + lib/common/crmcommon_private.h | 1 + + lib/common/ipc_client.c | 22 ++++++++++++++++------ + 2 files changed, 17 insertions(+), 6 deletions(-) + +diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h +index 49dae6c..d55df99 100644 +--- a/lib/common/crmcommon_private.h ++++ b/lib/common/crmcommon_private.h +@@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s { + struct pcmk_ipc_api_s { + enum pcmk_ipc_server server; // Daemon this IPC API instance is for + enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched ++ size_t ipc_size_max; // maximum IPC buffer size + crm_ipc_t *ipc; // IPC connection + mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC + bool free_on_disconnect; // Whether disconnect should free object +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index 4077d61..df687da 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -46,8 +46,6 @@ + int + pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) + { +- size_t max_size = 0; +- + if (api == NULL) { + return EINVAL; + } +@@ -64,13 +62,15 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) + return EOPNOTSUPP; + } + ++ (*api)->ipc_size_max = 0; ++ + // Set server methods and max_size (if not default) + switch (server) { + case pcmk_ipc_attrd: + break; + + case pcmk_ipc_based: +- max_size = 512 * 1024; // 512KB ++ (*api)->ipc_size_max = 512 * 1024; // 512KB + break; + + case pcmk_ipc_controld: +@@ -88,7 +88,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) + + case pcmk_ipc_schedulerd: + // @TODO max_size could vary by client, maybe take as argument? +- max_size = 5 * 1024 * 1024; // 5MB ++ (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB + break; + } + if ((*api)->cmds == NULL) { +@@ -97,7 +97,8 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) + return ENOMEM; + } + +- (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size); ++ (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), ++ (*api)->ipc_size_max); + if ((*api)->ipc == NULL) { + pcmk_free_ipc_api(*api); + *api = NULL; +@@ -451,11 +452,20 @@ pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) + { + int rc = pcmk_rc_ok; + +- if ((api == NULL) || (api->ipc == NULL)) { ++ if (api == NULL) { + crm_err("Cannot connect to uninitialized API object"); + return EINVAL; + } + ++ if (api->ipc == NULL) { ++ api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), ++ api->ipc_size_max); ++ if (api->ipc == NULL) { ++ crm_err("Failed to re-create IPC API"); ++ return ENOMEM; ++ } ++ } ++ + if (crm_ipc_connected(api->ipc)) { + crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true)); + return pcmk_rc_ok; +-- +1.8.3.1 + + +From e5ad1a6c54da48c86c8ab262abd4921cb37e998d Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 20 Jul 2020 18:18:01 +0200 +Subject: [PATCH 2/6] Fix: ipc-api: avoid infinite loop when disconnected + +Happens when using pcmk_dispatch_ipc when dispatching without +mainloop. +--- + lib/common/ipc_client.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index df687da..aa032fe 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -392,7 +392,7 @@ pcmk_dispatch_ipc(pcmk_ipc_api_t *api) + if (api == NULL) { + return; + } +- while (crm_ipc_ready(api->ipc)) { ++ while (crm_ipc_ready(api->ipc) > 0) { + if (crm_ipc_read(api->ipc) > 0) { + dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api); + } +-- +1.8.3.1 + + +From 927b43a57d5e8256fbce8fe0792f8ea228c57687 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 9 Dec 2019 15:13:11 +0100 +Subject: [PATCH 3/6] Fix: sbd-integration: sync pacemakerd with sbd + +Make pacemakerd wait to be pinged by sbd before starting +sub-daemons. Pings further reply health-state and timestamp +of last successful check. On shutdown bring down all the +sub-daemons and wait to be polled for state by sbd before +finally exiting pacemakerd. +Add new api as not to make the xml-structure an external interface. +--- + daemons/pacemakerd/pacemakerd.c | 100 ++++++++++++++-- + include/crm/common/Makefile.am | 2 +- + include/crm/common/ipc_pacemakerd.h | 71 +++++++++++ + include/crm/msg_xml.h | 7 ++ + lib/common/Makefile.am | 1 + + lib/common/crmcommon_private.h | 3 + + lib/common/ipc_client.c | 5 +- + lib/common/ipc_pacemakerd.c | 232 ++++++++++++++++++++++++++++++++++++ + 8 files changed, 410 insertions(+), 11 deletions(-) + create mode 100644 include/crm/common/ipc_pacemakerd.h + create mode 100644 lib/common/ipc_pacemakerd.c + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index 652d6ca..ccfae66 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -40,8 +40,25 @@ static bool global_keep_tracking = false; + #define PCMK_PROCESS_CHECK_INTERVAL 5 + + static crm_trigger_t *shutdown_trigger = NULL; ++static crm_trigger_t *startup_trigger = NULL; + static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid"; + ++/* state we report when asked via pacemakerd-api status-ping */ ++static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; ++static gboolean running_with_sbd = FALSE; /* local copy */ ++/* When contacted via pacemakerd-api by a client having sbd in ++ * the name we assume it is sbd-daemon which wants to know ++ * if pacemakerd shutdown gracefully. ++ * Thus when everything is shutdown properly pacemakerd ++ * waits till it has reported the graceful completion of ++ * shutdown to sbd and just when sbd-client closes the ++ * connection we can assume that the report has arrived ++ * properly so that pacemakerd can finally exit. ++ * Following two variables are used to track that handshake. ++ */ ++static unsigned int shutdown_complete_state_reported_to = 0; ++static gboolean shutdown_complete_state_reported_client_closed = FALSE; ++ + typedef struct pcmk_child_s { + pid_t pid; + long flag; +@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data) + static gboolean + pcmk_shutdown_worker(gpointer user_data) + { +- static int phase = 0; ++ static int phase = SIZEOF(pcmk_children); + static time_t next_log = 0; +- static int max = SIZEOF(pcmk_children); + + int lpc = 0; + +- if (phase == 0) { ++ if (phase == SIZEOF(pcmk_children)) { + crm_notice("Shutting down Pacemaker"); +- phase = max; ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; + } + + for (; phase > 0; phase--) { + /* Don't stop anything with start_seq < 1 */ + +- for (lpc = max - 1; lpc >= 0; lpc--) { ++ for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) { + pcmk_child_t *child = &(pcmk_children[lpc]); + + if (phase != child->start_seq) { +@@ -436,6 +452,11 @@ pcmk_shutdown_worker(gpointer user_data) + } + + crm_notice("Shutdown complete"); ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; ++ if (!fatal_error && running_with_sbd && ++ !shutdown_complete_state_reported_client_closed) { ++ return TRUE; ++ } + + { + const char *delay = pcmk__env_option("shutdown_delay"); +@@ -489,6 +510,51 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) + return 0; + } + ++static void ++pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id) ++{ ++ const char *value = NULL; ++ xmlNode *ping = NULL; ++ xmlNode *reply = NULL; ++ time_t pinged = time(NULL); ++ const char *from = crm_element_value(msg, F_CRM_SYS_FROM); ++ ++ /* Pinged for status */ ++ crm_trace("Pinged from %s.%s", ++ crm_str(crm_element_value(msg, F_CRM_ORIGIN)), ++ from?from:"unknown"); ++ ping = create_xml_node(NULL, XML_CRM_TAG_PING); ++ value = crm_element_value(msg, F_CRM_SYS_TO); ++ crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); ++ crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state); ++ crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged); ++ crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); ++ reply = create_reply(msg, ping); ++ free_xml(ping); ++ if (reply) { ++ if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) != ++ pcmk_rc_ok) { ++ crm_err("Failed sending ping-reply"); ++ } ++ free_xml(reply); ++ } else { ++ crm_err("Failed building ping-reply"); ++ } ++ /* just proceed state on sbd pinging us */ ++ if (from && strstr(from, "sbd")) { ++ if (crm_str_eq(pacemakerd_state, ++ XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, ++ TRUE)) { ++ shutdown_complete_state_reported_to = c->pid; ++ } else if (crm_str_eq(pacemakerd_state, ++ XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, ++ TRUE)) { ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; ++ mainloop_set_trigger(startup_trigger); ++ } ++ } ++} ++ + /* Exit code means? */ + static int32_t + pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) +@@ -514,6 +580,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) + crm_trace("Ignoring IPC request to purge node " + "because peer cache is not used"); + ++ } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) { ++ pcmk_handle_ping_request(c, msg, id); ++ + } else { + crm_debug("Unrecognized IPC command '%s' sent to pacemakerd", + crm_str(task)); +@@ -533,6 +602,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c) + return 0; + } + crm_trace("Connection %p", c); ++ if (shutdown_complete_state_reported_to == client->pid) { ++ shutdown_complete_state_reported_client_closed = TRUE; ++ if (shutdown_trigger) { ++ mainloop_set_trigger(shutdown_trigger); ++ } ++ } + pcmk__free_client(client); + return 0; + } +@@ -924,8 +999,8 @@ find_and_track_existing_processes(void) + return pcmk_rc_ok; + } + +-static void +-init_children_processes(void) ++static gboolean ++init_children_processes(void *user_data) + { + int start_seq = 1, lpc = 0; + static int max = SIZEOF(pcmk_children); +@@ -951,6 +1026,8 @@ init_children_processes(void) + * This may be useful for the daemons to know + */ + setenv("PCMK_respawned", "true", 1); ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; ++ return TRUE; + } + + static void +@@ -1154,6 +1231,7 @@ main(int argc, char **argv) + + if(pcmk_locate_sbd() > 0) { + setenv("PCMK_watchdog", "true", 1); ++ running_with_sbd = TRUE; + } else { + setenv("PCMK_watchdog", "false", 1); + } +@@ -1170,7 +1248,13 @@ main(int argc, char **argv) + mainloop_add_signal(SIGTERM, pcmk_shutdown); + mainloop_add_signal(SIGINT, pcmk_shutdown); + +- init_children_processes(); ++ if (running_with_sbd) { ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; ++ startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); ++ } else { ++ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; ++ init_children_processes(NULL); ++ } + + crm_notice("Pacemaker daemon successfully started and accepting connections"); + g_main_loop_run(mainloop); +diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am +index f29d105..1b5730a 100644 +--- a/include/crm/common/Makefile.am ++++ b/include/crm/common/Makefile.am +@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in + headerdir=$(pkgincludedir)/crm/common + + header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ +- nvpair.h acl.h ipc_controld.h ++ nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h + noinst_HEADERS = internal.h alerts_internal.h \ + iso8601_internal.h remote_internal.h xml_internal.h \ + ipc_internal.h output.h cmdline_internal.h curses_internal.h \ +diff --git a/include/crm/common/ipc_pacemakerd.h b/include/crm/common/ipc_pacemakerd.h +new file mode 100644 +index 0000000..00e3edd +--- /dev/null ++++ b/include/crm/common/ipc_pacemakerd.h +@@ -0,0 +1,71 @@ ++/* ++ * Copyright 2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#ifndef PCMK__IPC_PACEMAKERD__H ++# define PCMK__IPC_PACEMAKERD__H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * \file ++ * \brief IPC commands for Pacemakerd ++ * ++ * \ingroup core ++ */ ++ ++#include // time_t ++#include // pcmk_ipc_api_t ++ ++enum pcmk_pacemakerd_state { ++ pcmk_pacemakerd_state_invalid = -1, ++ pcmk_pacemakerd_state_init = 0, ++ pcmk_pacemakerd_state_starting_daemons, ++ pcmk_pacemakerd_state_wait_for_ping, ++ pcmk_pacemakerd_state_running, ++ pcmk_pacemakerd_state_shutting_down, ++ pcmk_pacemakerd_state_shutdown_complete, ++ pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete, ++}; ++ ++//! Possible types of pacemakerd replies ++enum pcmk_pacemakerd_api_reply { ++ pcmk_pacemakerd_reply_unknown, ++ pcmk_pacemakerd_reply_ping, ++}; ++ ++/*! ++ * Pacemakerd reply passed to event callback ++ */ ++typedef struct { ++ enum pcmk_pacemakerd_api_reply reply_type; ++ ++ union { ++ // pcmk_pacemakerd_reply_ping ++ struct { ++ const char *sys_from; ++ enum pcmk_pacemakerd_state state; ++ time_t last_good; ++ int status; ++ } ping; ++ } data; ++} pcmk_pacemakerd_api_reply_t; ++ ++int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name); ++enum pcmk_pacemakerd_state ++ pcmk_pacemakerd_api_daemon_state_text2enum(const char *state); ++const char ++ *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif // PCMK__IPC_PACEMAKERD__H +diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h +index af3f33e..1fcb72d 100644 +--- a/include/crm/msg_xml.h ++++ b/include/crm/msg_xml.h +@@ -123,6 +123,13 @@ extern "C" { + # define XML_PING_ATTR_STATUS "result" + # define XML_PING_ATTR_SYSFROM "crm_subsystem" + # define XML_PING_ATTR_CRMDSTATE "crmd_state" ++# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state" ++# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init" ++# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons" ++# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping" ++# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running" ++# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down" ++# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete" + + # define XML_TAG_FRAGMENT "cib_fragment" + +diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am +index db66a6e..e0249b9 100644 +--- a/lib/common/Makefile.am ++++ b/lib/common/Makefile.am +@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES += io.c + libcrmcommon_la_SOURCES += ipc_client.c + libcrmcommon_la_SOURCES += ipc_common.c + libcrmcommon_la_SOURCES += ipc_controld.c ++libcrmcommon_la_SOURCES += ipc_pacemakerd.c + libcrmcommon_la_SOURCES += ipc_server.c + libcrmcommon_la_SOURCES += iso8601.c + libcrmcommon_la_SOURCES += logging.c +diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h +index d55df99..68e3390 100644 +--- a/lib/common/crmcommon_private.h ++++ b/lib/common/crmcommon_private.h +@@ -210,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); + G_GNUC_INTERNAL + pcmk__ipc_methods_t *pcmk__controld_api_methods(void); + ++G_GNUC_INTERNAL ++pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void); ++ + #endif // CRMCOMMON_PRIVATE__H +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index aa032fe..033199d 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -41,7 +41,7 @@ + * + * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). + * \note This is intended to supersede crm_ipc_new() but currently only +- * supports the controller IPC API. ++ * supports the controller & pacemakerd IPC API. + */ + int + pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) +@@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) + break; + + case pcmk_ipc_pacemakerd: ++ (*api)->cmds = pcmk__pacemakerd_api_methods(); + break; + + case pcmk_ipc_schedulerd: +@@ -259,7 +260,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) + return for_log? "fencer" : NULL /* "stonith-ng" */; + + case pcmk_ipc_pacemakerd: +- return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */; ++ return for_log? "launcher" : CRM_SYSTEM_MCP; + + case pcmk_ipc_schedulerd: + return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */; +diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c +new file mode 100644 +index 0000000..241722e +--- /dev/null ++++ b/lib/common/ipc_pacemakerd.c +@@ -0,0 +1,232 @@ ++/* ++ * Copyright 2020 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "crmcommon_private.h" ++ ++typedef struct pacemakerd_api_private_s { ++ enum pcmk_pacemakerd_state state; ++ char *client_uuid; ++} pacemakerd_api_private_t; ++ ++static const char *pacemakerd_state_str[] = { ++ XML_PING_ATTR_PACEMAKERDSTATE_INIT, ++ XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS, ++ XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, ++ XML_PING_ATTR_PACEMAKERDSTATE_RUNNING, ++ XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN, ++ XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE ++}; ++ ++enum pcmk_pacemakerd_state ++pcmk_pacemakerd_api_daemon_state_text2enum(const char *state) ++{ ++ int i; ++ ++ if (state == NULL) { ++ return pcmk_pacemakerd_state_invalid; ++ } ++ for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max; ++ i++) { ++ if (crm_str_eq(state, pacemakerd_state_str[i], TRUE)) { ++ return i; ++ } ++ } ++ return pcmk_pacemakerd_state_invalid; ++} ++ ++const char * ++pcmk_pacemakerd_api_daemon_state_enum2text( ++ enum pcmk_pacemakerd_state state) ++{ ++ if ((state >= pcmk_pacemakerd_state_init) && ++ (state <= pcmk_pacemakerd_state_max)) { ++ return pacemakerd_state_str[state]; ++ } ++ return "invalid"; ++} ++ ++// \return Standard Pacemaker return code ++static int ++new_data(pcmk_ipc_api_t *api) ++{ ++ struct pacemakerd_api_private_s *private = NULL; ++ ++ api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s)); ++ ++ if (api->api_data == NULL) { ++ return errno; ++ } ++ ++ private = api->api_data; ++ private->state = pcmk_pacemakerd_state_invalid; ++ /* other as with cib, controld, ... we are addressing pacemakerd just ++ from the local node -> pid is unique and thus sufficient as an ID ++ */ ++ private->client_uuid = pcmk__getpid_s(); ++ ++ return pcmk_rc_ok; ++} ++ ++static void ++free_data(void *data) ++{ ++ free(((struct pacemakerd_api_private_s *) data)->client_uuid); ++ free(data); ++} ++ ++// \return Standard Pacemaker return code ++static int ++post_connect(pcmk_ipc_api_t *api) ++{ ++ struct pacemakerd_api_private_s *private = NULL; ++ ++ if (api->api_data == NULL) { ++ return EINVAL; ++ } ++ private = api->api_data; ++ private->state = pcmk_pacemakerd_state_invalid; ++ ++ return pcmk_rc_ok; ++} ++ ++static void ++post_disconnect(pcmk_ipc_api_t *api) ++{ ++ struct pacemakerd_api_private_s *private = NULL; ++ ++ if (api->api_data == NULL) { ++ return; ++ } ++ private = api->api_data; ++ private->state = pcmk_pacemakerd_state_invalid; ++ ++ return; ++} ++ ++static bool ++reply_expected(pcmk_ipc_api_t *api, xmlNode *request) ++{ ++ const char *command = crm_element_value(request, F_CRM_TASK); ++ ++ if (command == NULL) { ++ return false; ++ } ++ ++ // We only need to handle commands that functions in this file can send ++ return !strcmp(command, CRM_OP_PING); ++} ++ ++static void ++dispatch(pcmk_ipc_api_t *api, xmlNode *reply) ++{ ++ crm_exit_t status = CRM_EX_OK; ++ xmlNode *msg_data = NULL; ++ pcmk_pacemakerd_api_reply_t reply_data = { ++ pcmk_pacemakerd_reply_unknown ++ }; ++ const char *value = NULL; ++ long long value_ll = 0; ++ ++ value = crm_element_value(reply, F_CRM_MSG_TYPE); ++ if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) { ++ crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'", ++ crm_str(value)); ++ status = CRM_EX_PROTOCOL; ++ goto done; ++ } ++ ++ if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { ++ crm_debug("Unrecognizable pacemakerd message: no reference"); ++ status = CRM_EX_PROTOCOL; ++ goto done; ++ } ++ ++ value = crm_element_value(reply, F_CRM_TASK); ++ if ((value == NULL) || strcmp(value, CRM_OP_PING)) { ++ crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value)); ++ status = CRM_EX_PROTOCOL; ++ goto done; ++ } ++ ++ // Parse useful info from reply ++ ++ msg_data = get_message_xml(reply, F_CRM_DATA); ++ crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll); ++ ++ reply_data.reply_type = pcmk_pacemakerd_reply_ping; ++ reply_data.data.ping.state = ++ pcmk_pacemakerd_api_daemon_state_text2enum( ++ crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE)); ++ reply_data.data.ping.status = ++ crm_str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), ++ "ok", FALSE)?pcmk_rc_ok:pcmk_rc_error; ++ reply_data.data.ping.last_good = (time_t) value_ll; ++ reply_data.data.ping.sys_from = crm_element_value(msg_data, ++ XML_PING_ATTR_SYSFROM); ++ ++done: ++ pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); ++} ++ ++pcmk__ipc_methods_t * ++pcmk__pacemakerd_api_methods() ++{ ++ pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); ++ ++ if (cmds != NULL) { ++ cmds->new_data = new_data; ++ cmds->free_data = free_data; ++ cmds->post_connect = post_connect; ++ cmds->reply_expected = reply_expected; ++ cmds->dispatch = dispatch; ++ cmds->post_disconnect = post_disconnect; ++ } ++ return cmds; ++} ++ ++int ++pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name) ++{ ++ pacemakerd_api_private_t *private; ++ xmlNode *cmd; ++ int rc; ++ ++ CRM_CHECK(api != NULL, return -EINVAL); ++ private = api->api_data; ++ CRM_ASSERT(private != NULL); ++ ++ cmd = create_request(CRM_OP_PING, NULL, NULL, CRM_SYSTEM_MCP, ++ ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")), ++ private->client_uuid); ++ ++ if (cmd) { ++ rc = pcmk__send_ipc_request(api, cmd); ++ if (rc != pcmk_rc_ok) { ++ crm_debug("Couldn't ping pacemakerd: %s rc=%d", ++ pcmk_rc_str(rc), rc); ++ rc = ECOMM; ++ } ++ free_xml(cmd); ++ } else { ++ rc = ENOMSG; ++ } ++ ++ return rc; ++} +-- +1.8.3.1 + + +From 06da3c3685b0bdf093a13067cc399e782115e39c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 20 Jul 2020 23:28:32 +0200 +Subject: [PATCH 4/6] Feature: tools: Add -P to crmadmin to ping via + pacemakerd-api + +--- + include/crm/crm.h | 2 +- + tools/crmadmin.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 152 insertions(+), 11 deletions(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index dc2adc1..ce2074b 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -51,7 +51,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.4.0" ++# define CRM_FEATURE_SET "3.4.1" + + # define EOS '\0' + # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) +diff --git a/tools/crmadmin.c b/tools/crmadmin.c +index 4688458..2ebdd14 100644 +--- a/tools/crmadmin.c ++++ b/tools/crmadmin.c +@@ -20,7 +20,9 @@ + #include + #include + #include ++#include + #include ++#include + #include + + #define DEFAULT_MESSAGE_TIMEOUT_MS 30000 +@@ -31,6 +33,8 @@ static GMainLoop *mainloop = NULL; + + bool do_work(pcmk_ipc_api_t *api); + void do_find_node_list(xmlNode *xml_node); ++static char *ipc_name = NULL; ++ + gboolean admin_message_timeout(gpointer data); + + static enum { +@@ -40,6 +44,7 @@ static enum { + cmd_elect_dc, + cmd_whois_dc, + cmd_list_nodes, ++ cmd_pacemakerd_health, + } command = cmd_none; + + static gboolean BE_VERBOSE = FALSE; +@@ -82,6 +87,15 @@ static pcmk__cli_option_t long_options[] = { + pcmk__option_default + }, + { ++ "pacemakerd", no_argument, NULL, 'P', ++ "Display the status of local pacemakerd.", pcmk__option_default ++ }, ++ { ++ "-spacer-", no_argument, NULL, '-', ++ "\n\tResult is the state of the sub-daemons watched by pacemakerd.\n", ++ pcmk__option_default ++ }, ++ { + "dc_lookup", no_argument, NULL, 'D', + "Display the uname of the node co-ordinating the cluster.", + pcmk__option_default +@@ -122,16 +136,21 @@ static pcmk__cli_option_t long_options[] = { + { + "bash-export", no_argument, NULL, 'B', + "Display nodes as shell commands of the form 'export uname=uuid' " +- "(valid with -N/--nodes)'\n", ++ "(valid with -N/--nodes)", ++ pcmk__option_default ++ }, ++ { ++ "ipc-name", required_argument, NULL, 'i', ++ "Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).", + pcmk__option_default + }, + { + "-spacer-", no_argument, NULL, '-', +- "Notes:", pcmk__option_default ++ "\nNotes:", pcmk__option_default + }, + { + "-spacer-", no_argument, NULL, '-', +- "The -K and -E commands do not work and may be removed in a future " ++ "\nThe -K and -E commands do not work and may be removed in a future " + "version.", + pcmk__option_default + }, +@@ -223,6 +242,88 @@ done: + quit_main_loop(exit_code); + } + ++static void ++pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, ++ enum pcmk_ipc_event event_type, crm_exit_t status, ++ void *event_data, void *user_data) ++{ ++ pcmk_pacemakerd_api_reply_t *reply = event_data; ++ ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ if (exit_code == CRM_EX_DISCONNECT) { // Unexpected ++ fprintf(stderr, "error: Lost connection to pacemakerd\n"); ++ } ++ goto done; ++ break; ++ ++ case pcmk_ipc_event_reply: ++ break; ++ ++ default: ++ return; ++ } ++ ++ if (message_timer_id != 0) { ++ g_source_remove(message_timer_id); ++ message_timer_id = 0; ++ } ++ ++ if (status != CRM_EX_OK) { ++ fprintf(stderr, "error: Bad reply from pacemakerd: %s", ++ crm_exit_str(status)); ++ exit_code = status; ++ goto done; ++ } ++ ++ if (reply->reply_type != pcmk_pacemakerd_reply_ping) { ++ fprintf(stderr, "error: Unknown reply type %d from pacemakerd\n", ++ reply->reply_type); ++ goto done; ++ } ++ ++ // Parse desired information from reply ++ switch (command) { ++ case cmd_pacemakerd_health: ++ { ++ crm_time_t *crm_when = crm_time_new(NULL); ++ char *pinged_buf = NULL; ++ ++ crm_time_set_timet(crm_when, &reply->data.ping.last_good); ++ pinged_buf = crm_time_as_string(crm_when, ++ crm_time_log_date | crm_time_log_timeofday | ++ crm_time_log_with_timezone); ++ ++ printf("Status of %s: '%s' %s %s\n", ++ reply->data.ping.sys_from, ++ (reply->data.ping.status == pcmk_rc_ok)? ++ pcmk_pacemakerd_api_daemon_state_enum2text( ++ reply->data.ping.state):"query failed", ++ (reply->data.ping.status == pcmk_rc_ok)?"last updated":"", ++ (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:""); ++ if (BE_SILENT && ++ (reply->data.ping.state != pcmk_pacemakerd_state_invalid)) { ++ fprintf(stderr, "%s\n", ++ (reply->data.ping.status == pcmk_rc_ok)? ++ pcmk_pacemakerd_api_daemon_state_enum2text( ++ reply->data.ping.state): ++ "query failed"); ++ } ++ exit_code = CRM_EX_OK; ++ free(pinged_buf); ++ } ++ break; ++ ++ default: // Not really possible here ++ exit_code = CRM_EX_SOFTWARE; ++ break; ++ } ++ ++done: ++ pcmk_disconnect_ipc(pacemakerd_api); ++ quit_main_loop(exit_code); ++} ++ + // \return Standard Pacemaker return code + static int + list_nodes() +@@ -257,7 +358,9 @@ main(int argc, char **argv) + int flag; + int rc; + pcmk_ipc_api_t *controld_api = NULL; ++ pcmk_ipc_api_t *pacemakerd_api = NULL; + bool need_controld_api = true; ++ bool need_pacemakerd_api = false; + + crm_log_cli_init("crmadmin"); + pcmk__set_cli_options(NULL, " [options]", long_options, +@@ -282,7 +385,9 @@ main(int argc, char **argv) + message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; + } + break; +- ++ case 'i': ++ ipc_name = strdup(optarg); ++ break; + case '$': + case '?': + pcmk__cli_help(flag, CRM_EX_OK); +@@ -304,6 +409,11 @@ main(int argc, char **argv) + case 'q': + BE_SILENT = TRUE; + break; ++ case 'P': ++ command = cmd_pacemakerd_health; ++ need_pacemakerd_api = true; ++ need_controld_api = false; ++ break; + case 'S': + command = cmd_health; + crm_trace("Option %c => %s", flag, optarg); +@@ -369,7 +479,26 @@ main(int argc, char **argv) + } + } + +- if (do_work(controld_api)) { ++ // Connect to pacemakerd if needed ++ if (need_pacemakerd_api) { ++ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); ++ if (pacemakerd_api == NULL) { ++ fprintf(stderr, "error: Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); ++ goto done; ++ } ++ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); ++ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); ++ if (rc != pcmk_rc_ok) { ++ fprintf(stderr, "error: Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ exit_code = pcmk_rc2exitc(rc); ++ goto done; ++ } ++ } ++ ++ if (do_work(controld_api?controld_api:pacemakerd_api)) { + // A reply is needed from controller, so run main loop to get it + exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects + mainloop = g_main_loop_new(NULL, FALSE); +@@ -379,12 +508,19 @@ main(int argc, char **argv) + } + + done: ++ + if (controld_api != NULL) { + pcmk_ipc_api_t *capi = controld_api; +- + controld_api = NULL; // Ensure we can't free this twice + pcmk_free_ipc_api(capi); + } ++ ++ if (pacemakerd_api != NULL) { ++ pcmk_ipc_api_t *capi = pacemakerd_api; ++ pacemakerd_api = NULL; // Ensure we can't free this twice ++ pcmk_free_ipc_api(capi); ++ } ++ + if (mainloop != NULL) { + g_main_loop_unref(mainloop); + mainloop = NULL; +@@ -394,30 +530,35 @@ done: + + // \return True if reply from controller is needed + bool +-do_work(pcmk_ipc_api_t *controld_api) ++do_work(pcmk_ipc_api_t *api) + { + bool need_reply = false; + int rc = pcmk_rc_ok; + + switch (command) { + case cmd_shutdown: +- rc = pcmk_controld_api_shutdown(controld_api, dest_node); ++ rc = pcmk_controld_api_shutdown(api, dest_node); + break; + + case cmd_health: // dest_node != NULL + case cmd_whois_dc: // dest_node == NULL +- rc = pcmk_controld_api_ping(controld_api, dest_node); ++ rc = pcmk_controld_api_ping(api, dest_node); + need_reply = true; + break; + + case cmd_elect_dc: +- rc = pcmk_controld_api_start_election(controld_api); ++ rc = pcmk_controld_api_start_election(api); + break; + + case cmd_list_nodes: + rc = list_nodes(); + break; + ++ case cmd_pacemakerd_health: ++ rc = pcmk_pacemakerd_api_ping(api, ipc_name); ++ need_reply = true; ++ break; ++ + case cmd_none: // not actually possible here + break; + } +-- +1.8.3.1 + + +From 6ce5bb0d6fd30a204468ea245209d34f2682d7c9 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 21 Jul 2020 18:12:53 +0200 +Subject: [PATCH 5/6] Fix: pacemakerd: interworking with sbd not using + pacemakerd-api + +--- + daemons/pacemakerd/pacemakerd.c | 8 +++++++- + include/crm/common/options_internal.h | 1 + + lib/common/watchdog.c | 15 +++++++++++++++ + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index ccfae66..e91982a 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -454,6 +454,7 @@ pcmk_shutdown_worker(gpointer user_data) + crm_notice("Shutdown complete"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; + if (!fatal_error && running_with_sbd && ++ pcmk__get_sbd_sync_resource_startup() && + !shutdown_complete_state_reported_client_closed) { + return TRUE; + } +@@ -1248,10 +1249,15 @@ main(int argc, char **argv) + mainloop_add_signal(SIGTERM, pcmk_shutdown); + mainloop_add_signal(SIGINT, pcmk_shutdown); + +- if (running_with_sbd) { ++ if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; + startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); + } else { ++ if (running_with_sbd) { ++ crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " ++ "by your sbd version) improve reliability of " ++ "interworking between SBD & pacemaker."); ++ } + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + init_children_processes(NULL); + } +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index db54da4..d0429c9 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value); + + // from watchdog.c + long pcmk__get_sbd_timeout(void); ++bool pcmk__get_sbd_sync_resource_startup(void); + long pcmk__auto_watchdog_timeout(void); + bool pcmk__valid_sbd_timeout(const char *value); + +diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c +index 9d8896b..8838be6 100644 +--- a/lib/common/watchdog.c ++++ b/lib/common/watchdog.c +@@ -227,6 +227,21 @@ pcmk__get_sbd_timeout(void) + return sbd_timeout; + } + ++bool ++pcmk__get_sbd_sync_resource_startup(void) ++{ ++ static bool sync_resource_startup = false; ++ static bool checked_sync_resource_startup = false; ++ ++ if (!checked_sync_resource_startup) { ++ sync_resource_startup = ++ crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP")); ++ checked_sync_resource_startup = true; ++ } ++ ++ return sync_resource_startup; ++} ++ + long + pcmk__auto_watchdog_timeout() + { +-- +1.8.3.1 + + +From 567cb6ec6f317af9e973321633950ef26f43c486 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 23 Jul 2020 23:00:23 +0200 +Subject: [PATCH 6/6] Fix: pacemakerd: improve logging when synced with SBD + +--- + daemons/pacemakerd/pacemakerd.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c +index e91982a..c888b73 100644 +--- a/daemons/pacemakerd/pacemakerd.c ++++ b/daemons/pacemakerd/pacemakerd.c +@@ -456,6 +456,7 @@ pcmk_shutdown_worker(gpointer user_data) + if (!fatal_error && running_with_sbd && + pcmk__get_sbd_sync_resource_startup() && + !shutdown_complete_state_reported_client_closed) { ++ crm_notice("Waiting for SBD to pick up shutdown-complete-state."); + return TRUE; + } + +@@ -546,10 +547,14 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id) + if (crm_str_eq(pacemakerd_state, + XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, + TRUE)) { ++ if (pcmk__get_sbd_sync_resource_startup()) { ++ crm_notice("Shutdown-complete-state passed to SBD."); ++ } + shutdown_complete_state_reported_to = c->pid; + } else if (crm_str_eq(pacemakerd_state, + XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, + TRUE)) { ++ crm_notice("Received startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + mainloop_set_trigger(startup_trigger); + } +@@ -1250,12 +1255,13 @@ main(int argc, char **argv) + mainloop_add_signal(SIGINT, pcmk_shutdown); + + if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { ++ crm_notice("Waiting for startup-trigger from SBD."); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; + startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); + } else { + if (running_with_sbd) { + crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " +- "by your sbd version) improve reliability of " ++ "by your SBD version) improve reliability of " + "interworking between SBD & pacemaker."); + } + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; +-- +1.8.3.1 + diff --git a/SOURCES/014-shutdown-lock.patch b/SOURCES/014-shutdown-lock.patch deleted file mode 100644 index b464947..0000000 --- a/SOURCES/014-shutdown-lock.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 8270e8aed46f6e672b94f00fe0bde07cd2b6ddd7 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 13 Dec 2019 11:38:49 -0600 -Subject: [PATCH 10/18] Low: controller: don't clear shutdown locks when node - rejoins - -Add new controld_delete_node_state() values for clearing resource history -while preserving shutdown locks. This is accomplished by deleting all -unlocked lrm_resource entries and all lrm_rsc_op entries, instead of the entire -lrm subsection. ---- - daemons/controld/controld_based.c | 22 +++++++++++++++++++++- - daemons/controld/controld_join_dc.c | 7 +++++-- - daemons/controld/controld_remote_ra.c | 16 ++++++++++------ - daemons/controld/controld_utils.h | 2 ++ - 4 files changed, 38 insertions(+), 9 deletions(-) - -diff --git a/daemons/controld/controld_based.c b/daemons/controld/controld_based.c -index f3a7c4f..0ffc1e8 100644 ---- a/daemons/controld/controld_based.c -+++ b/daemons/controld/controld_based.c -@@ -191,12 +191,21 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, - // Node's lrm section (name 1x) - #define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM - -+// Node's lrm_rsc_op entries and lrm_resource entries without lock (name 2x) -+#define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ -+ "|" XPATH_NODE_STATE \ -+ "//" XML_LRM_TAG_RESOURCE \ -+ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ")]" -+ - // Node's transient_attributes section (name 1x) - #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS - - // Everything under node_state (name 1x) - #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" - -+// Unlocked history + transient attributes (name 3x) -+#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS -+ - /*! - * \internal - * \brief Delete subsection of a node's CIB node_state -@@ -218,6 +227,11 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, - xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); - desc = crm_strdup_printf("resource history for node %s", uname); - break; -+ case controld_section_lrm_unlocked: -+ xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname); -+ desc = crm_strdup_printf("resource history (other than shutdown " -+ "locks) for node %s", uname); -+ break; - case controld_section_attrs: - xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); - desc = crm_strdup_printf("transient attributes for node %s", uname); -@@ -226,6 +240,12 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, - xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); - desc = crm_strdup_printf("all state for node %s", uname); - break; -+ case controld_section_all_unlocked: -+ xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, -+ uname, uname, uname); -+ desc = crm_strdup_printf("all state (other than shutdown locks) " -+ "for node %s", uname); -+ break; - } - - if (fsa_cib_conn == NULL) { -@@ -234,7 +254,7 @@ controld_delete_node_state(const char *uname, enum controld_section_e section, - } else { - int call_id; - -- options |= cib_quorum_override|cib_xpath; -+ options |= cib_quorum_override|cib_xpath|cib_multiple; - call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, xpath, NULL, options); - crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", - desc, call_id, xpath); -diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c -index 885b2a9..f0eb2a2 100644 ---- a/daemons/controld/controld_join_dc.c -+++ b/daemons/controld/controld_join_dc.c -@@ -534,6 +534,7 @@ do_dc_join_ack(long long action, - int join_id = -1; - int call_id = 0; - ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); -+ enum controld_section_e section = controld_section_lrm; - - const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); - const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); -@@ -583,8 +584,10 @@ do_dc_join_ack(long long action, - /* Update CIB with node's current executor state. A new transition will be - * triggered later, when the CIB notifies us of the change. - */ -- controld_delete_node_state(join_from, controld_section_lrm, -- cib_scope_local); -+ if (controld_shutdown_lock_enabled) { -+ section = controld_section_lrm_unlocked; -+ } -+ controld_delete_node_state(join_from, section, cib_scope_local); - if (safe_str_eq(join_from, fsa_our_uname)) { - xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); - -diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c -index 2d3dfa7..a81c354 100644 ---- a/daemons/controld/controld_remote_ra.c -+++ b/daemons/controld/controld_remote_ra.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2013-2019 the Pacemaker project contributors -+ * Copyright 2013-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -177,17 +177,21 @@ remote_node_up(const char *node_name) - int call_opt, call_id = 0; - xmlNode *update, *state; - crm_node_t *node; -+ enum controld_section_e section = controld_section_all; - - CRM_CHECK(node_name != NULL, return); - crm_info("Announcing pacemaker_remote node %s", node_name); - -- /* Clear node's entire state (resource history and transient attributes). -- * The transient attributes should and normally will be cleared when the -- * node leaves, but since remote node state has a number of corner cases, -- * clear them here as well, to be sure. -+ /* Clear node's entire state (resource history and transient attributes) -+ * other than shutdown locks. The transient attributes should and normally -+ * will be cleared when the node leaves, but since remote node state has a -+ * number of corner cases, clear them here as well, to be sure. - */ - call_opt = crmd_cib_smart_opt(); -- controld_delete_node_state(node_name, controld_section_all, call_opt); -+ if (controld_shutdown_lock_enabled) { -+ section = controld_section_all_unlocked; -+ } -+ controld_delete_node_state(node_name, section, call_opt); - - /* Clear node's probed attribute */ - update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE); -diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h -index 8e31007..5549636 100644 ---- a/daemons/controld/controld_utils.h -+++ b/daemons/controld/controld_utils.h -@@ -90,8 +90,10 @@ bool controld_action_is_recordable(const char *action); - // Subsections of node_state - enum controld_section_e { - controld_section_lrm, -+ controld_section_lrm_unlocked, - controld_section_attrs, - controld_section_all, -+ controld_section_all_unlocked - }; - - void controld_delete_node_state(const char *uname, --- -1.8.3.1 - diff --git a/SOURCES/015-cibsecret.patch b/SOURCES/015-cibsecret.patch new file mode 100644 index 0000000..d64cc65 --- /dev/null +++ b/SOURCES/015-cibsecret.patch @@ -0,0 +1,123 @@ +From 2e7a40570d6b21534ec0215ac5ebc174796cf17c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 20 Aug 2020 10:02:20 -0500 +Subject: [PATCH 1/2] Refactor: tools: rename function in cibsecret to be more + clear + +It led me to initially misdiagnose a problem. +--- + tools/cibsecret.in | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/tools/cibsecret.in b/tools/cibsecret.in +index 9b74ba3..dabbfc0 100644 +--- a/tools/cibsecret.in ++++ b/tools/cibsecret.in +@@ -162,28 +162,28 @@ check_env() { + } + + # This must be called (and return success) before calling $rsh or $rcp_to_from +-get_live_nodes() { +- # Get a list of all cluster nodes ++get_live_peers() { ++ # Get a list of all other cluster nodes + GLN_ALL_NODES="$(crm_node -l | awk '{print $2}' | grep -v "$(uname -n)")" + + # Make a list of those that respond to pings + if [ "$(id -u)" = "0" ] && which fping >/dev/null 2>&1; then +- LIVE_NODES=$(fping -a $GLN_ALL_NODES 2>/dev/null) ++ LIVE_NODES=$(fping -a $GLP_ALL_PEERS 2>/dev/null) + else + LIVE_NODES="" +- for GLN_NODE in $GLN_ALL_NODES; do \ +- ping -c 2 -q "$GLN_NODE" >/dev/null 2>&1 && +- LIVE_NODES="$LIVE_NODES $GLN_NODE" ++ for GLP_NODE in $GLP_ALL_PEERS; do \ ++ ping -c 2 -q "$GLP_NODE" >/dev/null 2>&1 && ++ LIVE_NODES="$LIVE_NODES $GLP_NODE" + done + fi + + # Warn the user about any that didn't respond to pings +- GLN_DOWN="$( (for GLN_NODE in $LIVE_NODES $GLN_ALL_NODES; do echo "$GLN_NODE"; done) | sort | uniq -u)" +- if [ "$(echo "$GLN_DOWN" | wc -w)" = "1" ]; then +- warn "node $GLN_DOWN is down" ++ GLP_DOWN="$( (for GLP_NODE in $LIVE_NODES $GLP_ALL_PEERS; do echo "$GLP_NODE"; done) | sort | uniq -u)" ++ if [ "$(echo "$GLP_DOWN" | wc -w)" = "1" ]; then ++ warn "node $GLP_DOWN is down" + warn "you'll need to update it using \"$PROG sync\" later" +- elif [ -n "$GLN_DOWN" ]; then +- warn "nodes $(echo "$GLN_DOWN" | tr '\n' ' ')are down" ++ elif [ -n "$GLP_DOWN" ]; then ++ warn "nodes $(echo "$GLP_DOWN" | tr '\n' ' ')are down" + warn "you'll need to update them using \"$PROG sync\" later" + fi + +@@ -235,7 +235,7 @@ scp_fun() { + # TODO: this procedure should be replaced with csync2 + # provided that csync2 has already been configured + sync_files() { +- get_live_nodes || return ++ get_live_peers || return + info "syncing $LRM_CIBSECRETS to $(echo "$LIVE_NODES" | tr '\n' ' ') ..." + $rsh rm -rf "$LRM_CIBSECRETS" && + $rsh mkdir -p "$(dirname "$LRM_CIBSECRETS")" && +@@ -244,7 +244,7 @@ sync_files() { + + sync_one() { + SO_FILE="$1" +- get_live_nodes || return ++ get_live_peers || return + info "syncing $SO_FILE to $(echo "$LIVE_NODES" | tr '\n' ' ') ..." + $rsh mkdir -p "$(dirname "$SO_FILE")" && + if [ -f "$SO_FILE" ]; then +-- +1.8.3.1 + + +From 9c1517e6a681f35d62b4714e854b258c17ab5e59 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 20 Aug 2020 10:03:23 -0500 +Subject: [PATCH 2/2] Fix: tools: properly detect local node name + +cibsecret had two serious problems when generating a list of other nodes to +sync secrets to: + +* It used `uname -n` to remove the local node from the list. If the local node + name is different from its uname, this could cause local secrets to be + removed from the local node rather than synced to other nodes. + +* It removed not just the local node name, but any node name that contained + the local node name as a substring (e.g. "node1" and "node10"). This could + cause secrets to not be synced to such nodes. + +Now, use `crm_node -n` to determine the local node name, check crm_node for +errors to get better error messages, and remove only the node name that matches +the local node name in its entirety. +--- + tools/cibsecret.in | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/cibsecret.in b/tools/cibsecret.in +index dabbfc0..568833c 100644 +--- a/tools/cibsecret.in ++++ b/tools/cibsecret.in +@@ -163,8 +163,14 @@ check_env() { + + # This must be called (and return success) before calling $rsh or $rcp_to_from + get_live_peers() { ++ # Get local node name ++ GLP_LOCAL_NODE="$(crm_node -n)" ++ [ $? -eq 0 ] || fatal "couldn't get local node name" ++ + # Get a list of all other cluster nodes +- GLN_ALL_NODES="$(crm_node -l | awk '{print $2}' | grep -v "$(uname -n)")" ++ GLP_ALL_PEERS="$(crm_node -l)" ++ [ $? -eq 0 ] || fatal "couldn't determine cluster nodes" ++ GLP_ALL_PEERS="$(echo "$GLP_ALL_PEERS" | awk '{print $2}' | grep -v "^${GLP_LOCAL_NODE}$")" + + # Make a list of those that respond to pings + if [ "$(id -u)" = "0" ] && which fping >/dev/null 2>&1; then +-- +1.8.3.1 + diff --git a/SOURCES/015-shutdown-lock.patch b/SOURCES/015-shutdown-lock.patch deleted file mode 100644 index 364b2aa..0000000 --- a/SOURCES/015-shutdown-lock.patch +++ /dev/null @@ -1,38 +0,0 @@ -From d70d90367c898bcb62fd6c7dd8d641ca56be04ae Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 20 Dec 2019 11:46:37 -0600 -Subject: [PATCH 11/18] Low: scheduler: display when a resource is - shutdown-locked to a node - -... so it shows up in logs and cluster status displays ---- - lib/pengine/native.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/lib/pengine/native.c b/lib/pengine/native.c -index b064115..5a6fd80 100644 ---- a/lib/pengine/native.c -+++ b/lib/pengine/native.c -@@ -541,6 +541,9 @@ native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, - provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); - } - -+ if ((node == NULL) && (rsc->lock_node != NULL)) { -+ node = rsc->lock_node; -+ } - if (is_set(options, pe_print_rsconly) - || pcmk__list_of_multiple(rsc->running_on)) { - node = NULL; -@@ -583,6 +586,9 @@ native_output_string(pe_resource_t *rsc, const char *name, pe_node_t *node, - if (node && !(node->details->online) && node->details->unclean) { - have_flags = add_output_flag(outstr, "UNCLEAN", have_flags); - } -+ if (node && (node == rsc->lock_node)) { -+ have_flags = add_output_flag(outstr, "LOCKED", have_flags); -+ } - if (is_set(options, pe_print_pending)) { - const char *pending_task = native_pending_task(rsc); - --- -1.8.3.1 - diff --git a/SOURCES/016-shutdown-lock.patch b/SOURCES/016-shutdown-lock.patch deleted file mode 100644 index b8f8e5d..0000000 --- a/SOURCES/016-shutdown-lock.patch +++ /dev/null @@ -1,29 +0,0 @@ -From bc9c07951cb9c411324056b4d5322016153fee20 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 16:01:16 -0600 -Subject: [PATCH 12/18] Low: tools: crm_resource resource checks should show - shutdown locks - ---- - tools/crm_resource_runtime.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 2ea8bb3..ed5fb03 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -928,6 +928,11 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) - } - free(managed); - -+ if (rsc->lock_node) { -+ printf("%s * '%s' is locked to node %s due to shutdown\n", -+ (printed? "" : "\n"), parent->id, rsc->lock_node->details->uname); -+ } -+ - if (printed) { - printf("\n"); - } --- -1.8.3.1 - diff --git a/SOURCES/017-shutdown-lock.patch b/SOURCES/017-shutdown-lock.patch deleted file mode 100644 index 8dc7dd9..0000000 --- a/SOURCES/017-shutdown-lock.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 45a6f0b051743c266c13f3ffd365baf3a9d730f6 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 12:53:39 -0600 -Subject: [PATCH 13/18] Low: controller: allow CRM_OP_LRM_DELETE to clear CIB - only - -Normally, CRM_OP_LRM_DELETE is relayed to the affected node's controller, which -clears the resource from the executor and CIB as well the its own bookkeeping. - -Now, we want to be able to use it to clear shutdown locks for nodes that are -down. Let it take a new "mode" attribute, and if it is "cib", clear the -resource from the CIB locally without relaying the operation or doing anything -else. ---- - daemons/controld/controld_execd.c | 4 +- - daemons/controld/controld_messages.c | 97 ++++++++++++++++++++++++++++++++-- - daemons/controld/controld_te_actions.c | 7 +++ - include/crm_internal.h | 2 + - 4 files changed, 106 insertions(+), 4 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index c0436a2..8d25fb8 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1769,7 +1769,9 @@ do_lrm_invoke(long long action, - crm_trace("Executor %s command from %s", crm_op, from_sys); - - if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { -- crm_rsc_delete = TRUE; // Only crm_resource uses this op -+ if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { -+ crm_rsc_delete = TRUE; // from crm_resource -+ } - operation = CRMD_ACTION_DELETE; - - } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { -diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c -index 466c64c..689e4a0 100644 ---- a/daemons/controld/controld_messages.c -+++ b/daemons/controld/controld_messages.c -@@ -410,6 +410,14 @@ relay_message(xmlNode * msg, gboolean originated_locally) - - } else if (safe_str_eq(fsa_our_uname, host_to)) { - is_local = 1; -+ } else if (is_for_crm && safe_str_eq(task, CRM_OP_LRM_DELETE)) { -+ xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); -+ const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); -+ -+ if (safe_str_eq(mode, XML_TAG_CIB)) { -+ // Local delete of an offline node's resource history -+ is_local = 1; -+ } - } - - if (is_for_dc || is_for_dcib || is_for_te) { -@@ -654,6 +662,86 @@ handle_failcount_op(xmlNode * stored_msg) - return I_NULL; - } - -+static enum crmd_fsa_input -+handle_lrm_delete(xmlNode *stored_msg) -+{ -+ const char *mode = NULL; -+ xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA); -+ -+ CRM_CHECK(msg_data != NULL, return I_NULL); -+ -+ /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to -+ * relay the operation to the affected node, which will unregister the -+ * resource from the local executor, clear the resource's history from the -+ * CIB, and do some bookkeeping in the controller. -+ * -+ * However, if the affected node is offline, the client will specify -+ * mode="cib" which means the controller receiving the operation should -+ * clear the resource's history from the CIB and nothing else. This is used -+ * to clear shutdown locks. -+ */ -+ mode = crm_element_value(msg_data, PCMK__XA_MODE); -+ if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) { -+ // Relay to affected node -+ crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); -+ return I_ROUTER; -+ -+ } else { -+ // Delete CIB history locally (compare with do_lrm_delete()) -+ const char *from_sys = NULL; -+ const char *user_name = NULL; -+ const char *rsc_id = NULL; -+ const char *node = NULL; -+ xmlNode *rsc_xml = NULL; -+ int rc = pcmk_rc_ok; -+ -+ rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE); -+ CRM_CHECK(rsc_xml != NULL, return I_NULL); -+ -+ rsc_id = ID(rsc_xml); -+ from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM); -+ node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET); -+#if ENABLE_ACL -+ user_name = crm_acl_get_set_user(stored_msg, F_CRM_USER, NULL); -+#endif -+ crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " -+ "(clearing CIB resource history only)", rsc_id, node, -+ (user_name? " for user " : ""), (user_name? user_name : "")); -+#if ENABLE_ACL -+ rc = controld_delete_resource_history(rsc_id, node, user_name, -+ cib_dryrun|cib_sync_call); -+#endif -+ if (rc == pcmk_rc_ok) { -+ rc = controld_delete_resource_history(rsc_id, node, user_name, -+ crmd_cib_smart_opt()); -+ } -+ -+ // Notify client if not from graph (compare with notify_deleted()) -+ if (from_sys && strcmp(from_sys, CRM_SYSTEM_TENGINE)) { -+ lrmd_event_data_t *op = NULL; -+ const char *from_host = crm_element_value(stored_msg, -+ F_CRM_HOST_FROM); -+ const char *transition = crm_element_value(msg_data, -+ XML_ATTR_TRANSITION_KEY); -+ -+ crm_info("Notifying %s on %s that %s was%s deleted", -+ from_sys, (from_host? from_host : "local node"), rsc_id, -+ ((rc == pcmk_rc_ok)? "" : " not")); -+ op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); -+ op->type = lrmd_event_exec_complete; -+ op->user_data = strdup(transition? transition : FAKE_TE_ID); -+ op->params = crm_str_table_new(); -+ g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), -+ strdup(CRM_FEATURE_SET)); -+ controld_rc2event(op, rc); -+ controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); -+ lrmd_free_event(op); -+ controld_trigger_delete_refresh(from_sys, rsc_id); -+ } -+ return I_NULL; -+ } -+} -+ - /*! - * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache - * -@@ -913,9 +1001,12 @@ handle_request(xmlNode * stored_msg, enum crmd_fsa_cause cause) - crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); - return I_JOIN_RESULT; - -- } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0 -- || strcmp(op, CRM_OP_LRM_FAIL) == 0 -- || strcmp(op, CRM_OP_LRM_REFRESH) == 0 || strcmp(op, CRM_OP_REPROBE) == 0) { -+ } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { -+ return handle_lrm_delete(stored_msg); -+ -+ } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) -+ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) -+ || (strcmp(op, CRM_OP_REPROBE) == 0)) { - - crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); - return I_ROUTER; -diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c -index 948bd64..59e0b5a 100644 ---- a/daemons/controld/controld_te_actions.c -+++ b/daemons/controld/controld_te_actions.c -@@ -107,6 +107,13 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action) - - if (!router_node) { - router_node = on_node; -+ if (safe_str_eq(task, CRM_OP_LRM_DELETE)) { -+ const char *mode = crm_element_value(action->xml, PCMK__XA_MODE); -+ -+ if (safe_str_eq(mode, XML_TAG_CIB)) { -+ router_node = fsa_our_uname; -+ } -+ } - } - - CRM_CHECK(on_node != NULL && strlen(on_node) != 0, -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 1f25686..2fa53dd 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -216,6 +216,8 @@ pid_t pcmk_locate_sbd(void); - # define ATTRD_OP_SYNC_RESPONSE "sync-response" - # define ATTRD_OP_CLEAR_FAILURE "clear-failure" - -+# define PCMK__XA_MODE "mode" -+ - # define PCMK_ENV_PHYSICAL_HOST "physical_host" - - --- -1.8.3.1 - diff --git a/SOURCES/018-shutdown-lock.patch b/SOURCES/018-shutdown-lock.patch deleted file mode 100644 index 99ad90e..0000000 --- a/SOURCES/018-shutdown-lock.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 457e231256feb0bdcf206209e03f0875f50d03b3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 14 Jan 2020 16:24:08 -0600 -Subject: [PATCH 14/18] Low: tools: for down nodes, crm_resource --refresh - should clear CIB only - -This provides a mechanism to manually clear shutdown locks. ---- - tools/crm_resource_runtime.c | 16 +++++++++++++--- - 1 file changed, 13 insertions(+), 3 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index ed5fb03..e89b572 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -473,6 +473,7 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - const char *rsc_type = NULL; - xmlNode *params = NULL; - xmlNode *msg_data = NULL; -+ bool cib_only = false; - resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); - - if (rsc == NULL) { -@@ -504,10 +505,14 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - } - - if (!(node->details->online)) { -- CMD_ERR("Node %s is not online", host_uname); -- return -ENOTCONN; -+ if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { -+ cib_only = true; -+ } else { -+ CMD_ERR("Node %s is not online", host_uname); -+ return -ENOTCONN; -+ } - } -- if (pe__is_guest_or_remote_node(node)) { -+ if (!cib_only && pe__is_guest_or_remote_node(node)) { - node = pe__current_node(node->details->remote_rsc); - if (node == NULL) { - CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", -@@ -533,6 +538,11 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - -+ if (cib_only) { -+ // Indicate that only the CIB needs to be cleaned -+ crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); -+ } -+ - xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); - if (rsc->clone_name) { - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); --- -1.8.3.1 - diff --git a/SOURCES/019-shutdown-lock.patch b/SOURCES/019-shutdown-lock.patch deleted file mode 100644 index f94dc58..0000000 --- a/SOURCES/019-shutdown-lock.patch +++ /dev/null @@ -1,221 +0,0 @@ -From cf1e90ffe764f3639799206db9444ae32821386b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 10 Jan 2020 18:18:07 -0600 -Subject: [PATCH 15/18] Low: scheduler: clear resource history when appropriate - -Tell the controller to clear resource history from the CIB when a resource has -a shutdown lock that expired or was cancelled because the resource is already -active elsewhere. ---- - include/crm/pengine/internal.h | 4 +++- - include/crm/pengine/pe_types.h | 4 +++- - lib/pacemaker/pcmk_sched_allocate.c | 1 + - lib/pacemaker/pcmk_sched_graph.c | 16 ++++++++++++++-- - lib/pacemaker/pcmk_sched_native.c | 6 ++++++ - lib/pengine/unpack.c | 1 + - lib/pengine/utils.c | 34 ++++++++++++++++++++++++++++++++-- - 7 files changed, 60 insertions(+), 6 deletions(-) - -diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h -index 119624d..bc2c70e 100644 ---- a/include/crm/pengine/internal.h -+++ b/include/crm/pengine/internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -435,5 +435,7 @@ void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, - pe_working_set_t *data_set); - - bool pe__resource_is_disabled(pe_resource_t *rsc); -+pe_action_t *pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, -+ pe_working_set_t *data_set); - - #endif -diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h -index 123d8ef..572787b 100644 ---- a/include/crm/pengine/pe_types.h -+++ b/include/crm/pengine/pe_types.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -287,6 +287,8 @@ enum pe_action_flags { - pe_action_reschedule = 0x02000, - pe_action_tracking = 0x04000, - pe_action_dedup = 0x08000, //! Internal state tracking when creating graph -+ -+ pe_action_dc = 0x10000, //! Action may run on DC instead of target - }; - /* *INDENT-ON* */ - -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index 884e1bd..195d055 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -1026,6 +1026,7 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set) - pe_rsc_info(rsc, - "Cancelling shutdown lock because %s is already active", - rsc->id); -+ pe__clear_resource_history(rsc, rsc->lock_node, data_set); - rsc->lock_node = NULL; - rsc->lock_time = 0; - } -diff --git a/lib/pacemaker/pcmk_sched_graph.c b/lib/pacemaker/pcmk_sched_graph.c -index 2861f3d..355ffca 100644 ---- a/lib/pacemaker/pcmk_sched_graph.c -+++ b/lib/pacemaker/pcmk_sched_graph.c -@@ -586,10 +586,11 @@ update_action(pe_action_t *then, pe_working_set_t *data_set) - - /* 'then' is required, so we must abandon 'first' - * (e.g. a required stop cancels any reload). -- * Only used with reload actions as 'first'. - */ - set_bit(other->action->flags, pe_action_optional); -- clear_bit(first->rsc->flags, pe_rsc_reload); -+ if (!strcmp(first->task, CRMD_ACTION_RELOAD)) { -+ clear_bit(first->rsc->flags, pe_rsc_reload); -+ } - } - - if (first->rsc && then->rsc && (first->rsc != then->rsc) -@@ -1039,6 +1040,11 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - } else if (safe_str_eq(action->task, CRM_OP_LRM_REFRESH)) { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); - -+ } else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) { -+ // CIB-only clean-up for shutdown locks -+ action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); -+ crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB); -+ - /* } else if(safe_str_eq(action->task, RSC_PROBED)) { */ - /* action_xml = create_xml_node(NULL, XML_GRAPH_TAG_CRM_EVENT); */ - -@@ -1051,6 +1057,7 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set) - - } else { - action_xml = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); -+ - #if ENABLE_VERSIONED_ATTRS - rsc_details = pe_rsc_action_details(action); - #endif -@@ -1392,6 +1399,11 @@ should_dump_action(pe_action_t *action) - log_action(LOG_DEBUG, "Unallocated action", action, false); - return false; - -+ } else if (is_set(action->flags, pe_action_dc)) { -+ crm_trace("Action %s (%d) should be dumped: " -+ "can run on DC instead of %s", -+ action->uuid, action->id, action->node->details->uname); -+ - } else if (pe__is_guest_node(action->node) - && !action->node->details->remote_requires_reset) { - crm_trace("Action %s (%d) should be dumped: " -diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c -index 9ebdd35..714a7a0 100644 ---- a/lib/pacemaker/pcmk_sched_native.c -+++ b/lib/pacemaker/pcmk_sched_native.c -@@ -1403,6 +1403,12 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - pe_order_runnable_left, data_set); - } - -+ // Don't clear resource history if probing on same node -+ custom_action_order(rsc, generate_op_key(rsc->id, CRM_OP_LRM_DELETE, 0), -+ NULL, rsc, generate_op_key(rsc->id, RSC_STATUS, 0), -+ NULL, pe_order_same_node|pe_order_then_cancels_first, -+ data_set); -+ - // Certain checks need allowed nodes - if (check_unfencing || check_utilization || rsc->container) { - allowed_nodes = allowed_nodes_as_list(rsc, data_set); -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 5139e60..87edc83 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2218,6 +2218,7 @@ unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node, - > (lock_time + data_set->shutdown_lock))) { - pe_rsc_info(rsc, "Shutdown lock for %s on %s expired", - rsc->id, node->details->uname); -+ pe__clear_resource_history(rsc, node, data_set); - } else { - rsc->lock_node = node; - rsc->lock_time = lock_time; -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 586d92c..b61455d 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -520,6 +520,11 @@ custom_action(resource_t * rsc, char *key, const char *task, - } - action->uuid = strdup(key); - -+ if (safe_str_eq(task, CRM_OP_LRM_DELETE)) { -+ // Resource history deletion for a node can be done on the DC -+ pe_set_action_bit(action, pe_action_dc); -+ } -+ - pe_set_action_bit(action, pe_action_runnable); - if (optional) { - pe_set_action_bit(action, pe_action_optional); -@@ -588,7 +593,8 @@ custom_action(resource_t * rsc, char *key, const char *task, - pe_set_action_bit(action, pe_action_optional); - /* action->runnable = FALSE; */ - -- } else if (action->node->details->online == FALSE -+ } else if (is_not_set(action->flags, pe_action_dc) -+ && !(action->node->details->online) - && (!pe__is_guest_node(action->node) - || action->node->details->remote_requires_reset)) { - pe_clear_action_bit(action, pe_action_runnable); -@@ -600,7 +606,8 @@ custom_action(resource_t * rsc, char *key, const char *task, - pe_fence_node(data_set, action->node, "resource actions are unrunnable"); - } - -- } else if (action->node->details->pending) { -+ } else if (is_not_set(action->flags, pe_action_dc) -+ && action->node->details->pending) { - pe_clear_action_bit(action, pe_action_runnable); - do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", - action->uuid, action->node->details->uname); -@@ -714,6 +721,8 @@ unpack_operation_on_fail(action_t * action) - - value = on_fail; - } -+ } else if (safe_str_eq(action->task, CRM_OP_LRM_DELETE)) { -+ value = "ignore"; - } - - return value; -@@ -2595,3 +2604,24 @@ pe__resource_is_disabled(pe_resource_t *rsc) - } - return false; - } -+ -+/*! -+ * \internal -+ * \brief Create an action to clear a resource's history from CIB -+ * -+ * \param[in] rsc Resource to clear -+ * \param[in] node Node to clear history on -+ * -+ * \return New action to clear resource history -+ */ -+pe_action_t * -+pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, -+ pe_working_set_t *data_set) -+{ -+ char *key = NULL; -+ -+ CRM_ASSERT(rsc && node); -+ key = generate_op_key(rsc->id, CRM_OP_LRM_DELETE, 0); -+ return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, -+ data_set); -+} --- -1.8.3.1 - diff --git a/SOURCES/020-shutdown-lock.patch b/SOURCES/020-shutdown-lock.patch deleted file mode 100644 index f650b81..0000000 --- a/SOURCES/020-shutdown-lock.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 16bcad136dc004b7c7bb9f5044c7ef488c441701 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 21 Nov 2019 15:39:42 -0600 -Subject: [PATCH 16/18] Feature: controller: bump feature set for shutdown-lock - ---- - include/crm/crm.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/include/crm/crm.h b/include/crm/crm.h -index cbf72d3..d2ffb61 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -51,7 +51,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.2.0" -+# define CRM_FEATURE_SET "3.3.0" - - # define EOS '\0' - # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) --- -1.8.3.1 - diff --git a/SOURCES/021-shutdown-lock.patch b/SOURCES/021-shutdown-lock.patch deleted file mode 100644 index cdd9dba..0000000 --- a/SOURCES/021-shutdown-lock.patch +++ /dev/null @@ -1,738 +0,0 @@ -From a9fdae8b3acd9a271d04f98f9c4e230bfa74efd3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 6 Jan 2020 16:19:12 -0600 -Subject: [PATCH 17/18] Test: scheduler: add regression tests for shutdown - locks - ---- - cts/cts-scheduler.in | 4 +- - cts/scheduler/shutdown-lock-expiration.dot | 11 ++ - cts/scheduler/shutdown-lock-expiration.exp | 68 +++++++++ - cts/scheduler/shutdown-lock-expiration.scores | 17 +++ - cts/scheduler/shutdown-lock-expiration.summary | 31 ++++ - cts/scheduler/shutdown-lock-expiration.xml | 187 +++++++++++++++++++++++++ - cts/scheduler/shutdown-lock.dot | 11 ++ - cts/scheduler/shutdown-lock.exp | 64 +++++++++ - cts/scheduler/shutdown-lock.scores | 17 +++ - cts/scheduler/shutdown-lock.summary | 31 ++++ - cts/scheduler/shutdown-lock.xml | 186 ++++++++++++++++++++++++ - 11 files changed, 626 insertions(+), 1 deletion(-) - create mode 100644 cts/scheduler/shutdown-lock-expiration.dot - create mode 100644 cts/scheduler/shutdown-lock-expiration.exp - create mode 100644 cts/scheduler/shutdown-lock-expiration.scores - create mode 100644 cts/scheduler/shutdown-lock-expiration.summary - create mode 100644 cts/scheduler/shutdown-lock-expiration.xml - create mode 100644 cts/scheduler/shutdown-lock.dot - create mode 100644 cts/scheduler/shutdown-lock.exp - create mode 100644 cts/scheduler/shutdown-lock.scores - create mode 100644 cts/scheduler/shutdown-lock.summary - create mode 100644 cts/scheduler/shutdown-lock.xml - -diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in -index 8fa16fb..f2957ba 100644 ---- a/cts/cts-scheduler.in -+++ b/cts/cts-scheduler.in -@@ -5,7 +5,7 @@ - # Pacemaker targets compatibility with Python 2.7 and 3.2+ - from __future__ import print_function, unicode_literals, absolute_import, division - --__copyright__ = "Copyright 2004-2019 the Pacemaker project contributors" -+__copyright__ = "Copyright 2004-2020 the Pacemaker project contributors" - __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" - - import io -@@ -956,6 +956,8 @@ TESTS = [ - [ - [ "resource-discovery", "Exercises resource-discovery location constraint option" ], - [ "rsc-discovery-per-node", "Disable resource discovery per node" ], -+ [ "shutdown-lock", "Ensure shutdown lock works properly" ], -+ [ "shutdown-lock-expiration", "Ensure shutdown lock expiration works properly" ], - ], - - # @TODO: If pacemaker implements versioned attributes, uncomment these tests -diff --git a/cts/scheduler/shutdown-lock-expiration.dot b/cts/scheduler/shutdown-lock-expiration.dot -new file mode 100644 -index 0000000..ee99079 ---- /dev/null -+++ b/cts/scheduler/shutdown-lock-expiration.dot -@@ -0,0 +1,11 @@ -+ digraph "g" { -+"Fencing_monitor_120000 node3" [ style=bold color="green" fontcolor="black"] -+"Fencing_start_0 node3" -> "Fencing_monitor_120000 node3" [ style = bold] -+"Fencing_start_0 node3" [ style=bold color="green" fontcolor="black"] -+"Fencing_stop_0 node3" -> "Fencing_start_0 node3" [ style = bold] -+"Fencing_stop_0 node3" [ style=bold color="green" fontcolor="black"] -+"rsc2_lrm_delete_0 node2" [ style=bold color="green" fontcolor="black"] -+"rsc2_monitor_10000 node4" [ style=bold color="green" fontcolor="black"] -+"rsc2_start_0 node4" -> "rsc2_monitor_10000 node4" [ style = bold] -+"rsc2_start_0 node4" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/cts/scheduler/shutdown-lock-expiration.exp b/cts/scheduler/shutdown-lock-expiration.exp -new file mode 100644 -index 0000000..465f12b ---- /dev/null -+++ b/cts/scheduler/shutdown-lock-expiration.exp -@@ -0,0 +1,68 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/cts/scheduler/shutdown-lock-expiration.scores b/cts/scheduler/shutdown-lock-expiration.scores -new file mode 100644 -index 0000000..e5d435d ---- /dev/null -+++ b/cts/scheduler/shutdown-lock-expiration.scores -@@ -0,0 +1,17 @@ -+Allocation scores: -+Using the original execution date of: 2020-01-06 22:11:40Z -+native_color: Fencing allocation score on node1: 0 -+native_color: Fencing allocation score on node2: 0 -+native_color: Fencing allocation score on node3: 0 -+native_color: Fencing allocation score on node4: 0 -+native_color: Fencing allocation score on node5: 0 -+native_color: rsc1 allocation score on node1: INFINITY -+native_color: rsc1 allocation score on node2: -INFINITY -+native_color: rsc1 allocation score on node3: -INFINITY -+native_color: rsc1 allocation score on node4: -INFINITY -+native_color: rsc1 allocation score on node5: -INFINITY -+native_color: rsc2 allocation score on node1: 0 -+native_color: rsc2 allocation score on node2: INFINITY -+native_color: rsc2 allocation score on node3: 0 -+native_color: rsc2 allocation score on node4: 0 -+native_color: rsc2 allocation score on node5: 0 -diff --git a/cts/scheduler/shutdown-lock-expiration.summary b/cts/scheduler/shutdown-lock-expiration.summary -new file mode 100644 -index 0000000..08c93aa ---- /dev/null -+++ b/cts/scheduler/shutdown-lock-expiration.summary -@@ -0,0 +1,31 @@ -+Using the original execution date of: 2020-01-06 22:11:40Z -+ -+Current cluster status: -+Online: [ node3 node4 node5 ] -+OFFLINE: [ node1 node2 ] -+ -+ Fencing (stonith:fence_xvm): Started node3 -+ rsc1 (ocf::pacemaker:Dummy): Stopped node1 (LOCKED) -+ rsc2 (ocf::pacemaker:Dummy): Stopped -+ -+Transition Summary: -+ * Restart Fencing ( node3 ) due to resource definition change -+ * Start rsc2 ( node4 ) -+ -+Executing cluster transition: -+ * Resource action: Fencing stop on node3 -+ * Resource action: Fencing start on node3 -+ * Resource action: Fencing monitor=120000 on node3 -+ * Resource action: rsc2 start on node4 -+ * Cluster action: lrm_delete for rsc2 on node2 -+ * Resource action: rsc2 monitor=10000 on node4 -+Using the original execution date of: 2020-01-06 22:11:40Z -+ -+Revised cluster status: -+Online: [ node3 node4 node5 ] -+OFFLINE: [ node1 node2 ] -+ -+ Fencing (stonith:fence_xvm): Started node3 -+ rsc1 (ocf::pacemaker:Dummy): Stopped node1 (LOCKED) -+ rsc2 (ocf::pacemaker:Dummy): Started node4 -+ -diff --git a/cts/scheduler/shutdown-lock-expiration.xml b/cts/scheduler/shutdown-lock-expiration.xml -new file mode 100644 -index 0000000..26f720e ---- /dev/null -+++ b/cts/scheduler/shutdown-lock-expiration.xml -@@ -0,0 +1,187 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/cts/scheduler/shutdown-lock.dot b/cts/scheduler/shutdown-lock.dot -new file mode 100644 -index 0000000..0a7d8c3 ---- /dev/null -+++ b/cts/scheduler/shutdown-lock.dot -@@ -0,0 +1,11 @@ -+ digraph "g" { -+"Fencing_monitor_120000 node3" [ style=bold color="green" fontcolor="black"] -+"Fencing_start_0 node3" -> "Fencing_monitor_120000 node3" [ style = bold] -+"Fencing_start_0 node3" [ style=bold color="green" fontcolor="black"] -+"Fencing_stop_0 node1" -> "Fencing_start_0 node3" [ style = bold] -+"Fencing_stop_0 node1" -> "do_shutdown node1" [ style = bold] -+"Fencing_stop_0 node1" [ style=bold color="green" fontcolor="black"] -+"do_shutdown node1" [ style=bold color="green" fontcolor="black"] -+"rsc1_stop_0 node1" -> "do_shutdown node1" [ style = bold] -+"rsc1_stop_0 node1" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/cts/scheduler/shutdown-lock.exp b/cts/scheduler/shutdown-lock.exp -new file mode 100644 -index 0000000..e8bf9d8 ---- /dev/null -+++ b/cts/scheduler/shutdown-lock.exp -@@ -0,0 +1,64 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/cts/scheduler/shutdown-lock.scores b/cts/scheduler/shutdown-lock.scores -new file mode 100644 -index 0000000..e09ebfb ---- /dev/null -+++ b/cts/scheduler/shutdown-lock.scores -@@ -0,0 +1,17 @@ -+Allocation scores: -+Using the original execution date of: 2020-01-06 21:59:11Z -+native_color: Fencing allocation score on node1: 0 -+native_color: Fencing allocation score on node2: 0 -+native_color: Fencing allocation score on node3: 0 -+native_color: Fencing allocation score on node4: 0 -+native_color: Fencing allocation score on node5: 0 -+native_color: rsc1 allocation score on node1: INFINITY -+native_color: rsc1 allocation score on node2: -INFINITY -+native_color: rsc1 allocation score on node3: -INFINITY -+native_color: rsc1 allocation score on node4: -INFINITY -+native_color: rsc1 allocation score on node5: -INFINITY -+native_color: rsc2 allocation score on node1: -INFINITY -+native_color: rsc2 allocation score on node2: INFINITY -+native_color: rsc2 allocation score on node3: -INFINITY -+native_color: rsc2 allocation score on node4: -INFINITY -+native_color: rsc2 allocation score on node5: -INFINITY -diff --git a/cts/scheduler/shutdown-lock.summary b/cts/scheduler/shutdown-lock.summary -new file mode 100644 -index 0000000..6ed56d1 ---- /dev/null -+++ b/cts/scheduler/shutdown-lock.summary -@@ -0,0 +1,31 @@ -+Using the original execution date of: 2020-01-06 21:59:11Z -+ -+Current cluster status: -+Online: [ node1 node3 node4 node5 ] -+OFFLINE: [ node2 ] -+ -+ Fencing (stonith:fence_xvm): Started node1 -+ rsc1 (ocf::pacemaker:Dummy): Started node1 -+ rsc2 (ocf::pacemaker:Dummy): Stopped node2 (LOCKED) -+ -+Transition Summary: -+ * Shutdown node1 -+ * Move Fencing ( node1 -> node3 ) -+ * Stop rsc1 ( node1 ) due to node availability -+ -+Executing cluster transition: -+ * Resource action: Fencing stop on node1 -+ * Resource action: rsc1 stop on node1 -+ * Cluster action: do_shutdown on node1 -+ * Resource action: Fencing start on node3 -+ * Resource action: Fencing monitor=120000 on node3 -+Using the original execution date of: 2020-01-06 21:59:11Z -+ -+Revised cluster status: -+Online: [ node1 node3 node4 node5 ] -+OFFLINE: [ node2 ] -+ -+ Fencing (stonith:fence_xvm): Started node3 -+ rsc1 (ocf::pacemaker:Dummy): Stopped -+ rsc2 (ocf::pacemaker:Dummy): Stopped node2 (LOCKED) -+ -diff --git a/cts/scheduler/shutdown-lock.xml b/cts/scheduler/shutdown-lock.xml -new file mode 100644 -index 0000000..ec6db30 ---- /dev/null -+++ b/cts/scheduler/shutdown-lock.xml -@@ -0,0 +1,186 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - diff --git a/SOURCES/022-shutdown-lock.patch b/SOURCES/022-shutdown-lock.patch deleted file mode 100644 index cfcef11..0000000 --- a/SOURCES/022-shutdown-lock.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 5656b7d486569702ea6f3fe695c2fba366c970ac Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 12 Dec 2019 09:26:00 -0600 -Subject: [PATCH 18/18] Doc: Pacemaker Explained: document shutdown lock - options - ---- - doc/Pacemaker_Explained/en-US/Ch-Options.txt | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -index f864987..35856aa 100644 ---- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt -+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt -@@ -389,6 +389,33 @@ rules with +date_spec+ are only guaranteed to be checked this often, and it - also serves as a fail-safe for certain classes of scheduler bugs. A value of 0 - disables this polling; positive values are a time interval. - -+| shutdown-lock | false | -+The default of false allows active resources to be recovered elsewhere when -+their node is cleanly shut down, which is what the vast majority of users will -+want. However, some users prefer to make resources highly available only for -+failures, with no recovery for clean shutdowns. If this option is true, -+resources active on a node when it is cleanly shut down are kept "locked" to -+that node (not allowed to run elsewhere) until they start again on that node -+after it rejoins (or for at most shutdown-lock-limit, if set). Stonith -+resources and Pacemaker Remote connections are never locked. Clone and bundle -+instances and the master role of promotable clones are currently never locked, -+though support could be added in a future release. Locks may be manually -+cleared using the `--refresh` option of `crm_resource` (both the resource and -+node must be specified; this works with remote nodes if their connection -+resource's target-role is set to Stopped, but not if Pacemaker Remote is -+stopped on the remote node without disabling the connection resource). -+indexterm:[shutdown-lock,Cluster Option] -+indexterm:[Cluster,Option,shutdown-lock] -+ -+| shutdown-lock-limit | 0 | -+If shutdown-lock is true, and this is set to a nonzero time duration, locked -+resources will be allowed to start after this much time has passed since the -+node shutdown was initiated, even if the node has not rejoined. (This works -+with remote nodes only if their connection resource's target-role is set to -+Stopped.) -+indexterm:[shutdown-lock-limit,Cluster Option] -+indexterm:[Cluster,Option,shutdown-lock-limit] -+ - | remove-after-stop | FALSE | - indexterm:[remove-after-stop,Cluster Option] - indexterm:[Cluster,Option,remove-after-stop] --- -1.8.3.1 - diff --git a/SOURCES/023-curses.patch b/SOURCES/023-curses.patch deleted file mode 100644 index c1d9a91..0000000 --- a/SOURCES/023-curses.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 426f06cc088d11d6db0c45b434e5ce6b69da78b4 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 2 Jan 2020 15:08:58 -0500 -Subject: [PATCH] Fix: tools: Fix definition of curses_indented_printf. - -The placeholder version that is built if curses is not enabled does not -have a type that matches the header file. Correct that. ---- - tools/crm_mon_curses.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c -index c0dbedb..ecd0584 100644 ---- a/tools/crm_mon_curses.c -+++ b/tools/crm_mon_curses.c -@@ -368,7 +368,7 @@ curses_indented_vprintf(pcmk__output_t *out, const char *format, va_list args) { - - G_GNUC_PRINTF(2, 3) - void --curses_indented_printf(pcmk__output_t *out, const char *format, va_list args) { -+curses_indented_printf(pcmk__output_t *out, const char *format, ...) { - return; - } - --- -1.8.3.1 - diff --git a/SOURCES/024-crm_mon-cgi.patch b/SOURCES/024-crm_mon-cgi.patch deleted file mode 100644 index c6743eb..0000000 --- a/SOURCES/024-crm_mon-cgi.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 5b98dd71cef867a115a1b07fca2351ba430baf08 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 10 Jan 2020 09:54:59 -0500 -Subject: [PATCH] Fix: tools: Re-enable CGI output from crm_mon. - -The CGI header was not being written out because "false" was being -passed to the finish function. That was being passed because we didn't -want the HTML to be printed out without the refresh header. The fix is -just to s/false/true, and change the order so the extra header is added -first. ---- - tools/crm_mon.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/tools/crm_mon.c b/tools/crm_mon.c -index c1dcf29..4b28bef 100644 ---- a/tools/crm_mon.c -+++ b/tools/crm_mon.c -@@ -1854,10 +1854,9 @@ static void - handle_html_output(crm_exit_t exit_code) { - xmlNodePtr html = NULL; - -- out->finish(out, exit_code, false, (void **) &html); - pcmk__html_add_header(html, "meta", "http-equiv", "refresh", "content", - crm_itoa(options.reconnect_msec/1000), NULL); -- htmlDocDump(out->dest, html->doc); -+ out->finish(out, exit_code, true, (void **) &html); - } - - /* --- -1.8.3.1 - diff --git a/SOURCES/025-clear-attrs.patch b/SOURCES/025-clear-attrs.patch deleted file mode 100644 index 842656c..0000000 --- a/SOURCES/025-clear-attrs.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 01b463bd715d48dde5bf76ca3a2e78e31f0ffaa1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 21 Jan 2020 17:25:57 -0600 -Subject: [PATCH] Fix: controller: clear leaving node's transient attributes - even if there is no DC - ---- - daemons/controld/controld_callbacks.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c -index f7e3db2..21f831a 100644 ---- a/daemons/controld/controld_callbacks.c -+++ b/daemons/controld/controld_callbacks.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2019 the Pacemaker project contributors -+ * Copyright 2004-2020 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -205,7 +205,11 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - cib_scope_local); - } - -- } else if(AM_I_DC) { -+ } else if (AM_I_DC || (fsa_our_dc == NULL)) { -+ /* This only needs to be done once, so normally the DC should do -+ * it. However if there is no DC, every node must do it, since -+ * there is no other way to ensure some one node does it. -+ */ - if (appeared) { - te_trigger_stonith_history_sync(FALSE); - } else { --- -1.8.3.1 - diff --git a/SOURCES/026-initialize-var.patch b/SOURCES/026-initialize-var.patch deleted file mode 100644 index 235c564..0000000 --- a/SOURCES/026-initialize-var.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 40fbb833dfd0efa482303f741211c61536f9e4e2 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 May 2020 15:54:28 -0500 -Subject: [PATCH] Fix: controller: properly detect remote node info requests - -This fixes a regression introduced in 1.1.19 / 2.0.0 - -Ironically the original change was intended to make crm_node -n work reliably -on Pacemaker Remote nodes (even when the node name in the cluster differed from -the local hostname). However, the remote request might not be detected -appropriately depending on what the value of an uninitialized variable happened -to be. - -The fix is to initialize the variable. ---- - daemons/controld/controld_execd_state.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c -index 9f30a4e..2d3268a 100644 ---- a/daemons/controld/controld_execd_state.c -+++ b/daemons/controld/controld_execd_state.c -@@ -540,7 +540,7 @@ crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) - */ - if (safe_str_eq(crm_element_value(request, F_CRM_TASK), - CRM_OP_NODE_INFO)) { -- int node_id; -+ int node_id = 0; - - crm_element_value_int(request, XML_ATTR_ID, &node_id); - if ((node_id <= 0) --- -1.8.3.1 - diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index fc3a4ba..b33bb5f 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -1,4 +1,5 @@ -# Globals and defines to control package behavior (configure these as desired) +# User-configurable globals and defines to control package behavior +# (these should not test {with X} values, which are declared later) ## User and group to use for nonprivileged services %global uname hacluster @@ -21,12 +22,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.0.3 -%global specversion 5 +%global pcmkversion 2.0.4 +%global specversion 6 -## Upstream commit (or git tag, such as "Pacemaker-" plus the -## {pcmkversion} macro for an official release) to use for this package -%global commit 4b1f869f0f64ef0d248b6aa4781d38ecccf83318 +## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build +%global commit 2deceaa3ae1fbadd844f5c5b47fd33129fa2c227 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. %global commit_abbrev 7 @@ -39,24 +39,58 @@ %global nagios_hash 105ab8a +# Define conditionals so that "rpmbuild --with " and +# "rpmbuild --without " can enable and disable specific features + +## Add option to enable support for stonith/external fencing agents +%bcond_with stonithd + +## Add option to disable support for storing sensitive information outside CIB +%bcond_without cibsecrets + +## Add option to create binaries suitable for use with profiling tools +%bcond_with profiling + +## Add option to create binaries with coverage analysis +%bcond_with coverage + +## Add option to generate documentation (requires Publican, Asciidoc and Inkscape) +%bcond_with doc + +## Add option to prefix package version with "0." +## (so later "official" packages will be considered updates) +%bcond_with pre_release + +## Add option to ship Upstart job files +%bcond_with upstart_job + +## Add option to turn off hardening of libraries and daemon executables +%bcond_without hardening + +## Add option to disable links for legacy daemon names +%bcond_without legacy_links + + # Define globals for convenient use later ## Workaround to use parentheses in other globals %global lparen ( %global rparen ) -## Short version of git commit -%define shortcommit %(c=%{commit}; case ${c} in - Pacemaker-*%{rparen} echo ${c:10};; - *%{rparen} echo ${c:0:%{commit_abbrev}};; esac) +## Whether this is a tagged release (final or release candidate) +%define tag_release %(c=%{commit}; case ${c} in Pacemaker-*%{rparen} echo 1 ;; + *%{rparen} echo 0 ;; esac) -## Whether this is a tagged release -%define tag_release %([ %{commit} != Pacemaker-%{shortcommit} ]; echo $?) - -## Whether this is a release candidate (in case of a tagged release) -%define pre_release %([ "%{tag_release}" -eq 0 ] || { - case "%{shortcommit}" in *-rc[[:digit:]]*%{rparen} false;; - esac; }; echo $?) +## Portion of export/dist tarball name after "pacemaker-", and release version +%if 0%{tag_release} +%define archive_version %(c=%{commit}; echo ${c:10}) +%define archive_github_url %{commit}#/%{name}-%{archive_version}.tar.gz +%else +%define archive_version %(c=%{commit}; echo ${c:0:%{commit_abbrev}}) +%define archive_github_url %{archive_version}#/%{name}-%{archive_version}.tar.gz +%endif +# RHEL always uses a simple release number +%define pcmk_release %{specversion} ## Heuristic used to infer bleeding-edge deployments that are ## less likely to have working versions of the documentation tools @@ -181,35 +215,6 @@ %endif -# Define conditionals so that "rpmbuild --with " and -# "rpmbuild --without " can enable and disable specific features - -## Add option to enable support for stonith/external fencing agents -%bcond_with stonithd - -## Add option to create binaries suitable for use with profiling tools -%bcond_with profiling - -## Add option to create binaries with coverage analysis -%bcond_with coverage - -## Add option to generate documentation (requires Publican, Asciidoc and Inkscape) -%bcond_with doc - -## Add option to prefix package version with "0." -## (so later "official" packages will be considered updates) -%bcond_with pre_release - -## Add option to ship Upstart job files -%bcond_with upstart_job - -## Add option to turn off hardening of libraries and daemon executables -%bcond_without hardening - -## Add option to disable links for legacy daemon names -%bcond_without legacy_links - - # Keep sane profiling data if requested %if %{with profiling} @@ -219,28 +224,10 @@ %endif -# Define the release version -# (do not look at externally enforced pre-release flag for tagged releases -# as only -rc tags, captured with the second condition, implies that then) -%if (!%{tag_release} && %{with pre_release}) || 0%{pre_release} -%if 0%{pre_release} -%define pcmk_release 0.%{specversion}.%(s=%{shortcommit}; echo ${s: -3}) -%else -%define pcmk_release 0.%{specversion}.%{shortcommit}.git -%endif -%else -%if 0%{tag_release} -%define pcmk_release %{specversion} -%else -# Never use the short commit in a RHEL release number -%define pcmk_release %{specversion} -%endif -%endif - Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.1 +Release: %{pcmk_release}%{?dist} %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else @@ -250,38 +237,33 @@ License: GPLv2+ and LGPLv2+ and BSD Url: http://www.clusterlabs.org Group: System Environment/Daemons -# Hint: use "spectool -s 0 pacemaker.spec" (rpmdevtools) to check the final URL: -# https://github.com/ClusterLabs/pacemaker/archive/e91769e5a39f5cb2f7b097d3c612368f0530535e/pacemaker-e91769e.tar.gz -Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{shortcommit}.tar.gz +# Example: https://codeload.github.com/ClusterLabs/pacemaker/tar.gz/e91769e +# will download pacemaker-e91769e.tar.gz +# +# The ending part starting with '#' is ignored by github but necessary for +# rpmbuild to know what the tar archive name is. (The downloaded file will be +# named correctly only for commit IDs, not tagged releases.) +# +# You can use "spectool -s 0 pacemaker.spec" (rpmdevtools) to show final URL. +Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{archive_github_url} Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz # upstream commits -Patch1: 001-status-deletion.patch -Patch2: 002-status-deletion.patch -Patch3: 003-return-codes.patch -Patch4: 004-unused.patch -Patch5: 005-shutdown-lock.patch -Patch6: 006-shutdown-lock.patch -Patch7: 007-shutdown-lock.patch -Patch8: 008-shutdown-lock.patch -Patch9: 009-shutdown-lock.patch -Patch10: 010-shutdown-lock.patch -Patch11: 011-shutdown-lock.patch -Patch12: 012-shutdown-lock.patch -Patch13: 013-shutdown-lock.patch -Patch14: 014-shutdown-lock.patch -Patch15: 015-shutdown-lock.patch -Patch16: 016-shutdown-lock.patch -Patch17: 017-shutdown-lock.patch -Patch18: 018-shutdown-lock.patch -Patch19: 019-shutdown-lock.patch -Patch20: 020-shutdown-lock.patch -Patch21: 021-shutdown-lock.patch -Patch22: 022-shutdown-lock.patch -Patch23: 023-curses.patch -Patch24: 024-crm_mon-cgi.patch -Patch25: 025-clear-attrs.patch -Patch26: 026-initialize-var.patch +Patch1: 001-rules.patch +Patch2: 002-demote.patch +Patch3: 003-trace.patch +Patch4: 004-test.patch +Patch5: 005-sysconfig.patch +Patch6: 006-ipc_refactor.patch +Patch7: 007-ipc_model.patch +Patch8: 008-crm_node.patch +Patch9: 009-timeout-log.patch +Patch10: 010-crm_mon.patch +Patch11: 011-cts.patch +Patch12: 012-ipc_fix.patch +Patch13: 013-pacemakerd.patch +Patch14: 014-sbd.patch +Patch15: 015-cibsecret.patch # downstream-only commits #Patch100: xxx.patch @@ -296,7 +278,11 @@ Requires: psmisc %endif %{?systemd_requires} +%if %{defined centos} ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 %{arm} +%else +ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 +%endif Requires: %{python_path} BuildRequires: %{python_name}-devel @@ -360,7 +346,8 @@ when related resources fail and can be configured to periodically check resource health. Available rpmbuild rebuild options: - --with(out) : coverage doc stonithd hardening pre_release profiling + --with(out) : cibsecrets coverage doc stonithd hardening pre_release + profiling %package cli License: GPLv2+ and LGPLv2+ @@ -534,7 +521,7 @@ The metadata files required for Pacemaker to execute the nagios plugin monitor resources. %prep -%autosetup -a 1 -n %{name}-%{commit} -S git_am -p 1 +%autosetup -a 1 -n %{name}-%{archive_version} -S git_am -p 1 %build @@ -566,6 +553,7 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" %{!?with_legacy_links: --disable-legacy-links} \ %{?with_profiling: --with-profiling} \ %{?with_coverage: --with-coverage} \ + %{?with_cibsecrets: --with-cibsecrets} \ %{!?with_doc: --with-brand=} \ %{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \ --with-initdir=%{_initrddir} \ @@ -586,6 +574,7 @@ sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool make %{_smp_mflags} V=1 %check +make %{_smp_mflags} check { cts/cts-scheduler --run load-stopped-loop \ && cts/cts-cli \ && touch .CHECKED @@ -765,7 +754,7 @@ getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u %{hacluster_id} - exit 0 %if %{defined ldconfig_scriptlets} -%ldconfig_scriptlets libs +%ldconfig_scriptlets -n %{pkgname_pcmk_libs} %ldconfig_scriptlets cluster-libs %else %post -n %{pkgname_pcmk_libs} -p /sbin/ldconfig @@ -838,6 +827,9 @@ exit 0 %{_sbindir}/attrd_updater %{_sbindir}/cibadmin +%if %{with cibsecrets} +%{_sbindir}/cibsecret +%endif %{_sbindir}/crm_diff %{_sbindir}/crm_error %{_sbindir}/crm_failcount @@ -969,9 +961,58 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog -* Wed May 13 2020 Ken Gaillot - 2.0.3-5.1 -- Fix regression when running "crm_node -n" on remote nodes -- Resolves: rhbz1833384 +* Thu Aug 20 2020 Ken Gaillot - 2.0.4-6 +- Fix cibsecret bug when node name is different from hostname +- Resolves: rhbz1870873 + +* Fri Jul 24 2020 Ken Gaillot - 2.0.4-5 +- Synchronize start-up and shutdown with SBD +- Resolves: rhbz1718324 + +* Wed Jul 22 2020 Ken Gaillot - 2.0.4-4 +- Allow crm_node -l/-p options to work from Pacemaker Remote nodes +- Correct action timeout value listed in log message +- Fix regression in crm_mon --daemonize with HTML output +- Resolves: rhbz1796824 +- Resolves: rhbz1856035 +- Resolves: rhbz1857728 + +* Thu Jun 25 2020 Ken Gaillot - 2.0.4-3 +- Allow resource and operation defaults per resource or operation type +- Rebase on upstream 2.0.4 final release +- Support on-fail="demote" and no-quorum-policy="demote" options +- Remove incorrect comment from sysconfig file +- Resolves: rhbz1628701 +- Resolves: rhbz1828488 +- Resolves: rhbz1837747 +- Resolves: rhbz1848789 + +* Wed Jun 10 2020 Ken Gaillot - 2.0.4-2 +- Improve cibsecret help and clean up code per static analysis +- Resolves: rhbz1793860 + +* Mon Jun 8 2020 Ken Gaillot - 2.0.4-1 +- Clear leaving node's attributes if there is no DC +- Add crm_mon --node option to limit display to particular node or tagged nodes +- Add crm_mon --include/--exclude options to select what sections are shown +- priority-fencing-delay option bases delay on where resources are active +- Pending DC fencing gets 'stuck' in status display +- crm_rule can now check rule expiration when "years" is specified +- crm_mon now formats error messages better +- Support for CIB secrets is enabled +- Rebase on latest upstream Pacemaker release +- Fix regression introduced in 8.2 so crm_node -n works on remote nodes +- Avoid infinite loop when topology is removed while unfencing is in progress +- Resolves: rhbz1300604 +- Resolves: rhbz1363907 +- Resolves: rhbz1784601 +- Resolves: rhbz1787751 +- Resolves: rhbz1790591 +- Resolves: rhbz1793653 +- Resolves: rhbz1793860 +- Resolves: rhbz1828488 +- Resolves: rhbz1830535 +- Resolves: rhbz1831775 * Mon Jan 27 2020 Ken Gaillot - 2.0.3-5 - Clear leaving node's attributes if there is no DC