diff --git a/.gitignore b/.gitignore index d90e995..86d121d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ -SOURCES/016-regression-tests.patch.gz SOURCES/nagios-agents-metadata-105ab8a.tar.gz -SOURCES/pacemaker-c3c624e.tar.gz +SOURCES/pacemaker-3c4c782.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index 77a2d9d..4a34c74 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,3 +1,2 @@ -3b251742f2e6ef37faae17e905612a43635b07ae SOURCES/016-regression-tests.patch.gz ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz -572f66e455beeb43106974ed547118a26834d099 SOURCES/pacemaker-c3c624e.tar.gz +1a5dd220a120eba50048913807dd40c4fcde967c SOURCES/pacemaker-3c4c782.tar.gz diff --git a/SOURCES/001-constraint-fix.patch b/SOURCES/001-constraint-fix.patch new file mode 100644 index 0000000..0461b3f --- /dev/null +++ b/SOURCES/001-constraint-fix.patch @@ -0,0 +1,3135 @@ +From 31ff280494d415b0f4f599b4a3551065daf5f7b1 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Fri, 22 Feb 2019 11:49:30 +0100 +Subject: [PATCH 1/3] Fix: scheduler: cl#5301 - respect order constraints when + relevant resources are being probed + +This fixes violations of order constraints introduced by faf44d811 and +8f76b7821. + +Given the typical scenario with an order constraint "A.stop -> B.stop", +if A has been cleaned up and is being reprobed while B is stopping, +B.stop should wait for A.probe to complete. Since the pseudo action +"probe_complete" has been dropped by 8f76b7821, the solution here is to +optionally order "A.probe -> B.stop" as the possible alternative of +"A.stop -> B.stop". + +This also addresses the cases where actions of B are other actions +than "stop", including order constraints like "A.stop -> B.start" +implied by anti-colocations. +--- + pengine/allocate.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 164 insertions(+), 1 deletion(-) + +diff --git a/pengine/allocate.c b/pengine/allocate.c +index 9177fb1..578db2f 100644 +--- a/pengine/allocate.c ++++ b/pengine/allocate.c +@@ -2267,8 +2267,164 @@ apply_remote_node_ordering(pe_working_set_t *data_set) + } + } + ++static gboolean ++order_first_probe_unneeded(pe_action_t * probe, pe_action_t * rh_action) ++{ ++ /* No need to probe the resource on the node that is being ++ * unfenced. Otherwise it might introduce transition loop ++ * since probe will be performed after the node is ++ * unfenced. ++ */ ++ if (safe_str_eq(rh_action->task, CRM_OP_FENCE) ++ && probe->node && rh_action->node ++ && probe->node->details == rh_action->node->details) { ++ const char *op = g_hash_table_lookup(rh_action->meta, "stonith_action"); ++ ++ if (safe_str_eq(op, "on")) { ++ return TRUE; ++ } ++ } ++ ++ // Shutdown waits for probe to complete only if it's on the same node ++ if ((safe_str_eq(rh_action->task, CRM_OP_SHUTDOWN)) ++ && probe->node && rh_action->node ++ && probe->node->details != rh_action->node->details) { ++ return TRUE; ++ } ++ return FALSE; ++} ++ ++ ++static void ++order_first_probes(pe_working_set_t * data_set) ++{ ++ GListPtr gIter = NULL; ++ ++ for (gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { ++ pe__ordering_t *order = gIter->data; ++ enum pe_ordering order_type = pe_order_optional; ++ ++ pe_resource_t *lh_rsc = order->lh_rsc; ++ pe_resource_t *rh_rsc = order->rh_rsc; ++ pe_action_t *lh_action = order->lh_action; ++ pe_action_t *rh_action = order->rh_action; ++ const char *lh_action_task = order->lh_action_task; ++ const char *rh_action_task = order->rh_action_task; ++ ++ char *key = NULL; ++ GListPtr probes = NULL; ++ GListPtr rh_actions = NULL; ++ ++ GListPtr pIter = NULL; ++ ++ if (lh_rsc == NULL) { ++ continue; ++ ++ } else if (rh_rsc && lh_rsc == rh_rsc) { ++ continue; ++ } ++ ++ if (lh_action == NULL && lh_action_task == NULL) { ++ continue; ++ } ++ ++ if (rh_action == NULL && rh_action_task == NULL) { ++ continue; ++ } ++ ++ /* Technically probe is expected to return "not running", which could be ++ * the alternative of stop action if the status of the resource is ++ * unknown yet. ++ */ ++ if (lh_action && safe_str_neq(lh_action->task, RSC_STOP)) { ++ continue; ++ ++ } else if (lh_action == NULL ++ && lh_action_task ++ && crm_ends_with(lh_action_task, "_" RSC_STOP "_0") == FALSE) { ++ continue; ++ } ++ ++ /* Do not probe the resource inside of a stopping container. Otherwise ++ * it might introduce transition loop since probe will be performed ++ * after the container starts again. ++ */ ++ if (rh_rsc && lh_rsc->container == rh_rsc) { ++ if (rh_action && safe_str_eq(rh_action->task, RSC_STOP)) { ++ continue; ++ ++ } else if (rh_action == NULL && rh_action_task ++ && crm_ends_with(rh_action_task,"_" RSC_STOP "_0")) { ++ continue; ++ } ++ } ++ ++ if (order->type == pe_order_none) { ++ continue; ++ } ++ ++ // Preserve the order options for future filtering ++ if (is_set(order->type, pe_order_apply_first_non_migratable)) { ++ set_bit(order_type, pe_order_apply_first_non_migratable); ++ } ++ ++ if (is_set(order->type, pe_order_same_node)) { ++ set_bit(order_type, pe_order_same_node); ++ } ++ ++ // Keep the order types for future filtering ++ if (order->type == pe_order_anti_colocation ++ || order->type == pe_order_load) { ++ order_type = order->type; ++ } ++ ++ key = generate_op_key(lh_rsc->id, RSC_STATUS, 0); ++ probes = find_actions(lh_rsc->actions, key, NULL); ++ free(key); ++ ++ if (probes == NULL) { ++ continue; ++ } ++ ++ if (rh_action) { ++ rh_actions = g_list_prepend(rh_actions, rh_action); ++ ++ } else if (rh_rsc && rh_action_task) { ++ rh_actions = find_actions(rh_rsc->actions, rh_action_task, NULL); ++ } ++ ++ if (rh_actions == NULL) { ++ g_list_free(probes); ++ continue; ++ } ++ ++ crm_trace("Processing for LH probe based on ordering constraint %s -> %s" ++ " (id=%d, type=%.6x)", ++ lh_action ? lh_action->uuid : lh_action_task, ++ rh_action ? rh_action->uuid : rh_action_task, ++ order->id, order->type); ++ ++ for (pIter = probes; pIter != NULL; pIter = pIter->next) { ++ pe_action_t *probe = (pe_action_t *) pIter->data; ++ GListPtr rIter = NULL; ++ ++ for (rIter = rh_actions; rIter != NULL; rIter = rIter->next) { ++ pe_action_t *rh_action_iter = (pe_action_t *) rIter->data; ++ ++ if (order_first_probe_unneeded(probe, rh_action_iter)) { ++ continue; ++ } ++ order_actions(probe, rh_action_iter, order_type); ++ } ++ } ++ ++ g_list_free(rh_actions); ++ g_list_free(probes); ++ } ++} ++ + static void +-order_probes(pe_working_set_t * data_set) ++order_then_probes(pe_working_set_t * data_set) + { + #if 0 + GListPtr gIter = NULL; +@@ -2389,6 +2545,13 @@ order_probes(pe_working_set_t * data_set) + #endif + } + ++static void ++order_probes(pe_working_set_t * data_set) ++{ ++ order_first_probes(data_set); ++ order_then_probes(data_set); ++} ++ + gboolean + stage7(pe_working_set_t * data_set) + { +-- +1.8.3.1 + + +From 62c73cf7c115d79aa72b8533ee8a820c90d2618b Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Fri, 1 Mar 2019 16:34:23 +0100 +Subject: [PATCH 2/3] Test: scheduler: cl#5301 - respect order constraints when + relevant resources are being probed (update tests) + +--- + .../11-a-then-bm-b-move-a-clone-starting.dot | 1 + + .../11-a-then-bm-b-move-a-clone-starting.exp | 3 ++ + pengine/test10/594.dot | 2 + + pengine/test10/594.exp | 6 +++ + pengine/test10/662.dot | 9 ++++ + pengine/test10/662.exp | 27 ++++++++++++ + pengine/test10/797.dot | 4 ++ + pengine/test10/797.exp | 12 ++++++ + pengine/test10/829.dot | 6 +++ + pengine/test10/829.exp | 18 ++++++++ + pengine/test10/bug-cl-5247.dot | 3 ++ + pengine/test10/bug-cl-5247.exp | 9 ++++ + pengine/test10/bug-lf-2435.dot | 1 + + pengine/test10/bug-lf-2435.exp | 3 ++ + pengine/test10/bug-n-387749.dot | 3 ++ + pengine/test10/bug-n-387749.exp | 9 ++++ + pengine/test10/bug-rh-1097457.dot | 8 ++++ + pengine/test10/bug-rh-1097457.exp | 24 +++++++++++ + pengine/test10/bundle-replicas-change.dot | 2 + + pengine/test10/bundle-replicas-change.exp | 6 +++ + pengine/test10/clone-no-shuffle.dot | 2 + + pengine/test10/clone-no-shuffle.exp | 6 +++ + pengine/test10/group5.dot | 6 +++ + pengine/test10/group5.exp | 18 ++++++++ + pengine/test10/group6.dot | 10 +++++ + pengine/test10/group6.exp | 30 ++++++++++++++ + pengine/test10/group9.dot | 9 ++++ + pengine/test10/group9.exp | 27 ++++++++++++ + pengine/test10/inc2.dot | 5 +++ + pengine/test10/inc2.exp | 15 +++++++ + pengine/test10/inc3.dot | 10 +++++ + pengine/test10/inc3.exp | 30 ++++++++++++++ + pengine/test10/inc4.dot | 13 ++++++ + pengine/test10/inc4.exp | 39 ++++++++++++++++++ + pengine/test10/inc5.dot | 16 ++++++++ + pengine/test10/inc5.exp | 48 ++++++++++++++++++++++ + pengine/test10/master-7.dot | 13 ++++++ + pengine/test10/master-7.exp | 39 ++++++++++++++++++ + pengine/test10/master-8.dot | 14 +++++++ + pengine/test10/master-8.exp | 42 +++++++++++++++++++ + pengine/test10/master-9.dot | 5 +++ + pengine/test10/master-9.exp | 15 +++++++ + pengine/test10/notify-0.dot | 1 + + pengine/test10/notify-0.exp | 3 ++ + pengine/test10/notify-1.dot | 1 + + pengine/test10/notify-1.exp | 3 ++ + pengine/test10/notify-2.dot | 1 + + pengine/test10/notify-2.exp | 3 ++ + pengine/test10/notify-3.dot | 5 +++ + pengine/test10/notify-3.exp | 15 +++++++ + pengine/test10/novell-252693-3.dot | 2 + + pengine/test10/novell-252693-3.exp | 6 +++ + pengine/test10/order3.dot | 3 ++ + pengine/test10/order3.exp | 9 ++++ + pengine/test10/rec-node-11.dot | 3 ++ + pengine/test10/rec-node-11.exp | 9 ++++ + pengine/test10/reload-becomes-restart.dot | 1 + + pengine/test10/reload-becomes-restart.exp | 3 ++ + pengine/test10/remote-connection-unrecoverable.dot | 2 + + pengine/test10/remote-connection-unrecoverable.exp | 6 +++ + pengine/test10/rsc_dep1.dot | 2 + + pengine/test10/rsc_dep1.exp | 6 +++ + pengine/test10/rsc_dep5.dot | 4 ++ + pengine/test10/rsc_dep5.exp | 12 ++++++ + pengine/test10/unfence-definition.dot | 4 ++ + pengine/test10/unfence-definition.exp | 12 ++++++ + pengine/test10/unfence-parameters.dot | 4 ++ + pengine/test10/unfence-parameters.exp | 12 ++++++ + pengine/test10/unrunnable-1.dot | 3 ++ + pengine/test10/unrunnable-1.exp | 9 ++++ + pengine/test10/whitebox-imply-stop-on-fence.dot | 6 +++ + pengine/test10/whitebox-imply-stop-on-fence.exp | 18 ++++++++ + pengine/test10/whitebox-migrate1.dot | 1 + + pengine/test10/whitebox-migrate1.exp | 6 ++- + pengine/test10/whitebox-migrate1.summary | 6 +-- + pengine/test10/whitebox-move.dot | 1 + + pengine/test10/whitebox-move.exp | 3 ++ + pengine/test10/whitebox-ms-ordering.dot | 3 ++ + pengine/test10/whitebox-ms-ordering.exp | 9 ++++ + pengine/test10/whitebox-orphaned.dot | 1 + + pengine/test10/whitebox-orphaned.exp | 3 ++ + pengine/test10/whitebox-stop.dot | 1 + + pengine/test10/whitebox-stop.exp | 3 ++ + 83 files changed, 769 insertions(+), 4 deletions(-) + +diff --git a/pengine/test10/11-a-then-bm-b-move-a-clone-starting.dot b/pengine/test10/11-a-then-bm-b-move-a-clone-starting.dot +index 2b45d58..4a89db6 100644 +--- a/pengine/test10/11-a-then-bm-b-move-a-clone-starting.dot ++++ b/pengine/test10/11-a-then-bm-b-move-a-clone-starting.dot +@@ -11,6 +11,7 @@ + "myclone-clone_stopped_0" -> "myclone-clone_start_0" [ style = bold] + "myclone-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] + "myclone_monitor_0 f20node2" -> "myclone-clone_start_0" [ style = bold] ++"myclone_monitor_0 f20node2" -> "myclone-clone_stopped_0" [ style = bold] + "myclone_monitor_0 f20node2" [ style=bold color="green" fontcolor="black"] + "myclone_start_0 f20node2" -> "myclone-clone_running_0" [ style = bold] + "myclone_start_0 f20node2" [ style=bold color="green" fontcolor="black"] +diff --git a/pengine/test10/11-a-then-bm-b-move-a-clone-starting.exp b/pengine/test10/11-a-then-bm-b-move-a-clone-starting.exp +index b391b42..4eeb086 100644 +--- a/pengine/test10/11-a-then-bm-b-move-a-clone-starting.exp ++++ b/pengine/test10/11-a-then-bm-b-move-a-clone-starting.exp +@@ -45,6 +45,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/594.dot b/pengine/test10/594.dot +index 4f3ea64..8a24440 100644 +--- a/pengine/test10/594.dot ++++ b/pengine/test10/594.dot +@@ -12,10 +12,12 @@ digraph "g" { + "DoFencing_stop_0" -> "child_DoFencing:2_stop_0 hadev1" [ style = bold] + "DoFencing_stop_0" [ style=bold color="green" fontcolor="orange" ] + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:0_monitor_0 hadev1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_monitor_0 hadev1" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:0_stop_0 hadev2" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_stop_0 hadev2" -> "do_shutdown hadev2" [ style = bold] + "child_DoFencing:0_stop_0 hadev2" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 hadev2" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 hadev2" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:2_stop_0 hadev1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_stop_0 hadev1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/594.exp b/pengine/test10/594.exp +index 0bbf746..c0025f0 100644 +--- a/pengine/test10/594.exp ++++ b/pengine/test10/594.exp +@@ -160,6 +160,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/662.dot b/pengine/test10/662.dot +index 9848322..173103c 100644 +--- a/pengine/test10/662.dot ++++ b/pengine/test10/662.dot +@@ -6,17 +6,26 @@ + "DoFencing_stop_0" -> "child_DoFencing:0_stop_0 c001n02" [ style = bold] + "DoFencing_stop_0" [ style=bold color="green" fontcolor="orange" ] + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:0_monitor_0 c001n04" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_monitor_0 c001n04" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:0_monitor_0 c001n09" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_monitor_0 c001n09" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:0_stop_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_stop_0 c001n02" -> "do_shutdown c001n02" [ style = bold] + "child_DoFencing:0_stop_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:1_monitor_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:1_monitor_0 c001n04" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_monitor_0 c001n04" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n09" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n09" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n04" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n04" [ style=bold color="green" fontcolor="black" ] + "do_shutdown c001n02" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n02_monitor_0 c001n03" -> "rsc_c001n02_start_0 c001n03" [ style = bold] +diff --git a/pengine/test10/662.exp b/pengine/test10/662.exp +index 3751565..f0a0560 100644 +--- a/pengine/test10/662.exp ++++ b/pengine/test10/662.exp +@@ -271,6 +271,33 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/797.dot b/pengine/test10/797.dot +index 9ef868a..ff049d9 100644 +--- a/pengine/test10/797.dot ++++ b/pengine/test10/797.dot +@@ -36,12 +36,16 @@ + "child_DoFencing:1_stop_0 c001n02" -> "do_shutdown c001n02" [ style = bold] + "child_DoFencing:1_stop_0 c001n02" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:2_monitor_0 c001n01" -> "DoFencing_start_0" [ style = bold] ++"child_DoFencing:2_monitor_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n01" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:3_monitor_0 c001n01" -> "DoFencing_start_0" [ style = bold] ++"child_DoFencing:3_monitor_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n01" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:3_monitor_0 c001n02" -> "DoFencing_start_0" [ style = bold] ++"child_DoFencing:3_monitor_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_start_0" [ style = bold] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "do_shutdown c001n02" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_0 c001n03" -> "rsc_c001n01_start_0 c001n01" [ style = dashed] +diff --git a/pengine/test10/797.exp b/pengine/test10/797.exp +index 62a01ae..62d82a9 100644 +--- a/pengine/test10/797.exp ++++ b/pengine/test10/797.exp +@@ -253,6 +253,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/829.dot b/pengine/test10/829.dot +index 16ba24c..f01f1ad 100644 +--- a/pengine/test10/829.dot ++++ b/pengine/test10/829.dot +@@ -5,13 +5,19 @@ digraph "g" { + "DoFencing_stop_0" -> "child_DoFencing:0_stop_0 c001n02" [ style = bold] + "DoFencing_stop_0" [ style=bold color="green" fontcolor="orange" ] + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:0_monitor_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_monitor_0 c001n01" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:0_stop_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_stop_0 c001n02" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:1_monitor_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_monitor_0 c001n01" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n08" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n01" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/829.exp b/pengine/test10/829.exp +index 74afbcc..247486a 100644 +--- a/pengine/test10/829.exp ++++ b/pengine/test10/829.exp +@@ -225,6 +225,24 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/bug-cl-5247.dot b/pengine/test10/bug-cl-5247.dot +index a978467..c125dac 100644 +--- a/pengine/test10/bug-cl-5247.dot ++++ b/pengine/test10/bug-cl-5247.dot +@@ -115,6 +115,7 @@ digraph "g" { + "stonith 'off' pgsr02" -> "vip-rep_start_0 pgsr01" [ style = bold] + "stonith 'off' pgsr02" -> "vip-rep_stop_0 pgsr02" [ style = bold] + "stonith 'off' pgsr02" [ style=bold color="green" fontcolor="orange"] ++"vip-master_monitor_0 pgsr01" -> "master-group_stopped_0" [ style = bold] + "vip-master_monitor_0 pgsr01" -> "vip-master_start_0 pgsr01" [ style = bold] + "vip-master_monitor_0 pgsr01" [ style=bold color="green" fontcolor="black"] + "vip-master_monitor_10000 pgsr01" [ style=bold color="green" fontcolor="black"] +@@ -125,6 +126,8 @@ digraph "g" { + "vip-master_stop_0 pgsr02" -> "master-group_stopped_0" [ style = bold] + "vip-master_stop_0 pgsr02" -> "vip-master_start_0 pgsr01" [ style = bold] + "vip-master_stop_0 pgsr02" [ style=bold color="green" fontcolor="orange"] ++"vip-rep_monitor_0 pgsr01" -> "master-group_stopped_0" [ style = bold] ++"vip-rep_monitor_0 pgsr01" -> "vip-master_stop_0 pgsr02" [ style = bold] + "vip-rep_monitor_0 pgsr01" -> "vip-rep_start_0 pgsr01" [ style = bold] + "vip-rep_monitor_0 pgsr01" [ style=bold color="green" fontcolor="black"] + "vip-rep_monitor_10000 pgsr01" [ style=bold color="green" fontcolor="black"] +diff --git a/pengine/test10/bug-cl-5247.exp b/pengine/test10/bug-cl-5247.exp +index 14c9d91..d08214d 100644 +--- a/pengine/test10/bug-cl-5247.exp ++++ b/pengine/test10/bug-cl-5247.exp +@@ -210,6 +210,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -308,6 +314,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/bug-lf-2435.dot b/pengine/test10/bug-lf-2435.dot +index 0407439..76d8f48 100644 +--- a/pengine/test10/bug-lf-2435.dot ++++ b/pengine/test10/bug-lf-2435.dot +@@ -6,5 +6,6 @@ digraph "g" { + "dummy3_stop_0 c21.chepkov.lan" [ style=bold color="green" fontcolor="black" ] + "dummy4_monitor_0 c19.chepkov.lan" [ style=bold color="green" fontcolor="black" ] + "dummy4_monitor_0 c20.chepkov.lan" [ style=bold color="green" fontcolor="black" ] ++"dummy4_monitor_0 c21.chepkov.lan" -> "dummy2_start_0 c21.chepkov.lan" [ style = bold] + "dummy4_monitor_0 c21.chepkov.lan" [ style=bold color="green" fontcolor="black" ] + } +diff --git a/pengine/test10/bug-lf-2435.exp b/pengine/test10/bug-lf-2435.exp +index 387d266..69a4a18 100644 +--- a/pengine/test10/bug-lf-2435.exp ++++ b/pengine/test10/bug-lf-2435.exp +@@ -8,6 +8,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/bug-n-387749.dot b/pengine/test10/bug-n-387749.dot +index 4a978ce..5095351 100644 +--- a/pengine/test10/bug-n-387749.dot ++++ b/pengine/test10/bug-n-387749.dot +@@ -41,6 +41,7 @@ digraph "g" { + "group_nfs_stop_0" [ style=bold color="green" fontcolor="orange" ] + "group_nfs_stopped_0" -> "group_nfs_start_0" [ style = bold] + "group_nfs_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"resource_ipaddr1_single_monitor_0 power720-1" -> "group_nfs_stopped_0" [ style = bold] + "resource_ipaddr1_single_monitor_0 power720-1" -> "resource_ipaddr1_single_start_0 power720-1" [ style = bold] + "resource_ipaddr1_single_monitor_0 power720-1" [ style=bold color="green" fontcolor="black" ] + "resource_ipaddr1_single_monitor_5000 power720-1" [ style=bold color="green" fontcolor="black" ] +@@ -51,6 +52,8 @@ digraph "g" { + "resource_ipaddr1_single_stop_0 power720-2" -> "group_nfs_stopped_0" [ style = bold] + "resource_ipaddr1_single_stop_0 power720-2" -> "resource_ipaddr1_single_start_0 power720-1" [ style = bold] + "resource_ipaddr1_single_stop_0 power720-2" [ style=bold color="green" fontcolor="black" ] ++"resource_nfsserver_single_monitor_0 power720-1" -> "group_nfs_stopped_0" [ style = bold] ++"resource_nfsserver_single_monitor_0 power720-1" -> "resource_ipaddr1_single_stop_0 power720-2" [ style = bold] + "resource_nfsserver_single_monitor_0 power720-1" -> "resource_nfsserver_single_start_0 power720-1" [ style = bold] + "resource_nfsserver_single_monitor_0 power720-1" [ style=bold color="green" fontcolor="black" ] + "resource_nfsserver_single_monitor_15000 power720-1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/bug-n-387749.exp b/pengine/test10/bug-n-387749.exp +index 9f1f22d..d6fe8e4 100644 +--- a/pengine/test10/bug-n-387749.exp ++++ b/pengine/test10/bug-n-387749.exp +@@ -178,6 +178,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -270,6 +276,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/bug-rh-1097457.dot b/pengine/test10/bug-rh-1097457.dot +index 3dc24b6..9658ef6 100644 +--- a/pengine/test10/bug-rh-1097457.dot ++++ b/pengine/test10/bug-rh-1097457.dot +@@ -6,8 +6,12 @@ digraph "g" { + "FAKE3-IP_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FAKE3_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FAKE4-IP_monitor_0 lamaVM1" -> "FAKE4-IP_start_0 lamaVM2" [ style = bold] ++"FAKE4-IP_monitor_0 lamaVM1" -> "FAKE4_stop_0 lamaVM2" [ style = bold] ++"FAKE4-IP_monitor_0 lamaVM1" -> "lamaVM2-G4_stopped_0" [ style = bold] + "FAKE4-IP_monitor_0 lamaVM1" [ style=bold color="green" fontcolor="black"] + "FAKE4-IP_monitor_0 lamaVM3" -> "FAKE4-IP_start_0 lamaVM2" [ style = bold] ++"FAKE4-IP_monitor_0 lamaVM3" -> "FAKE4_stop_0 lamaVM2" [ style = bold] ++"FAKE4-IP_monitor_0 lamaVM3" -> "lamaVM2-G4_stopped_0" [ style = bold] + "FAKE4-IP_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FAKE4-IP_monitor_30000 lamaVM2" [ style=bold color="green" fontcolor="black"] + "FAKE4-IP_start_0 lamaVM2" -> "FAKE4-IP_monitor_30000 lamaVM2" [ style = bold] +@@ -18,8 +22,10 @@ digraph "g" { + "FAKE4-IP_stop_0 lamaVM2" -> "lamaVM2-G4_stopped_0" [ style = bold] + "FAKE4-IP_stop_0 lamaVM2" [ style=bold color="green" fontcolor="orange"] + "FAKE4_monitor_0 lamaVM1" -> "FAKE4_start_0 lamaVM2" [ style = bold] ++"FAKE4_monitor_0 lamaVM1" -> "lamaVM2-G4_stopped_0" [ style = bold] + "FAKE4_monitor_0 lamaVM1" [ style=bold color="green" fontcolor="black"] + "FAKE4_monitor_0 lamaVM3" -> "FAKE4_start_0 lamaVM2" [ style = bold] ++"FAKE4_monitor_0 lamaVM3" -> "lamaVM2-G4_stopped_0" [ style = bold] + "FAKE4_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FAKE4_monitor_30000 lamaVM2" [ style=bold color="green" fontcolor="black"] + "FAKE4_start_0 lamaVM2" -> "FAKE4-IP_start_0 lamaVM2" [ style = bold] +@@ -53,8 +59,10 @@ digraph "g" { + "FSlun1_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FSlun2_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FSlun3_monitor_0 lamaVM1" -> "FSlun3_start_0 lama2" [ style = bold] ++"FSlun3_monitor_0 lamaVM1" -> "VM2_stop_0 lama3" [ style = bold] + "FSlun3_monitor_0 lamaVM1" [ style=bold color="green" fontcolor="black"] + "FSlun3_monitor_0 lamaVM3" -> "FSlun3_start_0 lama2" [ style = bold] ++"FSlun3_monitor_0 lamaVM3" -> "VM2_stop_0 lama3" [ style = bold] + "FSlun3_monitor_0 lamaVM3" [ style=bold color="green" fontcolor="black"] + "FSlun3_monitor_10000 lama2" [ style=bold color="green" fontcolor="black"] + "FSlun3_monitor_10000 lamaVM2" [ style=bold color="green" fontcolor="black"] +diff --git a/pengine/test10/bug-rh-1097457.exp b/pengine/test10/bug-rh-1097457.exp +index 677c24b..0f60fa6 100644 +--- a/pengine/test10/bug-rh-1097457.exp ++++ b/pengine/test10/bug-rh-1097457.exp +@@ -42,6 +42,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -259,6 +265,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -353,6 +371,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/bundle-replicas-change.dot b/pengine/test10/bundle-replicas-change.dot +index 23264d9..c991371 100644 +--- a/pengine/test10/bundle-replicas-change.dot ++++ b/pengine/test10/bundle-replicas-change.dot +@@ -44,6 +44,7 @@ digraph "g" { + "httpd-bundle-docker-0_stop_0 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-1_monitor_0 rh74-test" -> "httpd-bundle-clone_start_0" [ style = bold] + "httpd-bundle-docker-1_monitor_0 rh74-test" -> "httpd-bundle-docker-1_start_0 rh74-test" [ style = bold] ++"httpd-bundle-docker-1_monitor_0 rh74-test" -> "httpd-bundle_stopped_0" [ style = bold] + "httpd-bundle-docker-1_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-1_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-1_start_0 rh74-test" -> "httpd-bundle-1_monitor_0 rh74-test" [ style = bold] +@@ -54,6 +55,7 @@ digraph "g" { + "httpd-bundle-docker-1_start_0 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-2_monitor_0 rh74-test" -> "httpd-bundle-clone_start_0" [ style = bold] + "httpd-bundle-docker-2_monitor_0 rh74-test" -> "httpd-bundle-docker-2_start_0 rh74-test" [ style = bold] ++"httpd-bundle-docker-2_monitor_0 rh74-test" -> "httpd-bundle_stopped_0" [ style = bold] + "httpd-bundle-docker-2_monitor_0 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-2_monitor_60000 rh74-test" [ style=bold color="green" fontcolor="black"] + "httpd-bundle-docker-2_start_0 rh74-test" -> "httpd-bundle-2_monitor_0 rh74-test" [ style = bold] +diff --git a/pengine/test10/bundle-replicas-change.exp b/pengine/test10/bundle-replicas-change.exp +index 8d1c67f..2e8042e 100644 +--- a/pengine/test10/bundle-replicas-change.exp ++++ b/pengine/test10/bundle-replicas-change.exp +@@ -537,6 +537,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/clone-no-shuffle.dot b/pengine/test10/clone-no-shuffle.dot +index 9ac9e13..5174ecb 100644 +--- a/pengine/test10/clone-no-shuffle.dot ++++ b/pengine/test10/clone-no-shuffle.dot +@@ -11,6 +11,7 @@ digraph "g" { + "drbd1:0_stop_0 dktest2sles10" -> "ms-drbd1_stopped_0" [ style = bold] + "drbd1:0_stop_0 dktest2sles10" [ style=bold color="green" fontcolor="black" ] + "drbd1:1_monitor_0 dktest1sles10" -> "ms-drbd1_start_0" [ style = bold] ++"drbd1:1_monitor_0 dktest1sles10" -> "ms-drbd1_stopped_0" [ style = bold] + "drbd1:1_monitor_0 dktest1sles10" [ style=bold color="green" fontcolor="black" ] + "drbd1:1_monitor_11000 dktest1sles10" [ style=bold color="green" fontcolor="black" ] + "drbd1:1_post_notify_start_0 dktest1sles10" -> "ms-drbd1_confirmed-post_notify_running_0" [ style = bold] +@@ -73,6 +74,7 @@ digraph "g" { + "stonith-1_monitor_0 dktest2sles10" -> "stonith-1_start_0 dktest1sles10" [ style = bold] + "stonith-1_monitor_0 dktest2sles10" [ style=bold color="green" fontcolor="black" ] + "stonith-1_start_0 dktest1sles10" [ style=bold color="green" fontcolor="black" ] ++"testip_monitor_0 dktest1sles10" -> "ms-drbd1_demote_0" [ style = bold] + "testip_monitor_0 dktest1sles10" [ style=bold color="green" fontcolor="black" ] + "testip_stop_0 dktest2sles10" -> "ms-drbd1_demote_0" [ style = bold] + "testip_stop_0 dktest2sles10" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/clone-no-shuffle.exp b/pengine/test10/clone-no-shuffle.exp +index 615709c..554d8e2 100644 +--- a/pengine/test10/clone-no-shuffle.exp ++++ b/pengine/test10/clone-no-shuffle.exp +@@ -228,6 +228,9 @@ + + + ++ ++ ++ + + + +@@ -297,6 +300,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/group5.dot b/pengine/test10/group5.dot +index 6f524bd..3fe0193 100644 +--- a/pengine/test10/group5.dot ++++ b/pengine/test10/group5.dot +@@ -1,5 +1,6 @@ + digraph "g" { + "child_rsc1_monitor_0 node2" -> "child_rsc1_start_0 node2" [ style = bold] ++"child_rsc1_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1_start_0 node2" -> "child_rsc2_start_0 node2" [ style = bold] + "child_rsc1_start_0 node2" -> "rsc2_running_0" [ style = bold] +@@ -7,7 +8,9 @@ + "child_rsc1_stop_0 node1" -> "child_rsc1_start_0 node2" [ style = bold] + "child_rsc1_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc1_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2_monitor_0 node2" -> "child_rsc1_stop_0 node1" [ style = bold] + "child_rsc2_monitor_0 node2" -> "child_rsc2_start_0 node2" [ style = bold] ++"child_rsc2_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2_start_0 node2" -> "child_rsc3_start_0 node2" [ style = bold] + "child_rsc2_start_0 node2" -> "rsc2_running_0" [ style = bold] +@@ -16,7 +19,9 @@ + "child_rsc2_stop_0 node1" -> "child_rsc2_start_0 node2" [ style = bold] + "child_rsc2_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc3_monitor_0 node2" -> "child_rsc2_stop_0 node1" [ style = bold] + "child_rsc3_monitor_0 node2" -> "child_rsc3_start_0 node2" [ style = bold] ++"child_rsc3_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc3_start_0 node2" -> "rsc2_running_0" [ style = bold] + "child_rsc3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -45,6 +50,7 @@ + "rsc2_stopped_0" -> "rsc1_stop_0 node1" [ style = bold] + "rsc2_stopped_0" -> "rsc2_start_0" [ style = bold] + "rsc2_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"rsc3_monitor_0 node2" -> "rsc2_stop_0" [ style = bold] + "rsc3_monitor_0 node2" -> "rsc3_start_0 node2" [ style = bold] + "rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/group5.exp b/pengine/test10/group5.exp +index cb3480d..4ea2b08 100644 +--- a/pengine/test10/group5.exp ++++ b/pengine/test10/group5.exp +@@ -45,6 +45,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -66,6 +75,9 @@ + + + ++ ++ ++ + + + +@@ -134,6 +146,9 @@ + + + ++ ++ ++ + + + +@@ -181,6 +196,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/group6.dot b/pengine/test10/group6.dot +index 101763f..a563e05 100644 +--- a/pengine/test10/group6.dot ++++ b/pengine/test10/group6.dot +@@ -1,5 +1,6 @@ + digraph "g" { + "child_rsc1_monitor_0 node2" -> "child_rsc1_start_0 node2" [ style = bold] ++"child_rsc1_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1_start_0 node2" -> "child_rsc2_start_0 node2" [ style = bold] + "child_rsc1_start_0 node2" -> "rsc1_running_0" [ style = bold] +@@ -7,7 +8,9 @@ + "child_rsc1_stop_0 node1" -> "child_rsc1_start_0 node2" [ style = bold] + "child_rsc1_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2_monitor_0 node2" -> "child_rsc1_stop_0 node1" [ style = bold] + "child_rsc2_monitor_0 node2" -> "child_rsc2_start_0 node2" [ style = bold] ++"child_rsc2_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2_start_0 node2" -> "child_rsc3_start_0 node2" [ style = bold] + "child_rsc2_start_0 node2" -> "rsc1_running_0" [ style = bold] +@@ -16,7 +19,9 @@ + "child_rsc2_stop_0 node1" -> "child_rsc2_start_0 node2" [ style = bold] + "child_rsc2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc3_monitor_0 node2" -> "child_rsc2_stop_0 node1" [ style = bold] + "child_rsc3_monitor_0 node2" -> "child_rsc3_start_0 node2" [ style = bold] ++"child_rsc3_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc3_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -25,6 +30,7 @@ + "child_rsc3_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc3_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc4_monitor_0 node2" -> "child_rsc4_start_0 node2" [ style = bold] ++"child_rsc4_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc4_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc4_start_0 node2" -> "child_rsc5_start_0 node2" [ style = bold] + "child_rsc4_start_0 node2" -> "rsc2_running_0" [ style = bold] +@@ -32,7 +38,9 @@ + "child_rsc4_stop_0 node1" -> "child_rsc4_start_0 node2" [ style = bold] + "child_rsc4_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc4_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc5_monitor_0 node2" -> "child_rsc4_stop_0 node1" [ style = bold] + "child_rsc5_monitor_0 node2" -> "child_rsc5_start_0 node2" [ style = bold] ++"child_rsc5_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc5_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc5_start_0 node2" -> "child_rsc6_start_0 node2" [ style = bold] + "child_rsc5_start_0 node2" -> "rsc2_running_0" [ style = bold] +@@ -41,7 +49,9 @@ + "child_rsc5_stop_0 node1" -> "child_rsc5_start_0 node2" [ style = bold] + "child_rsc5_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc5_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc6_monitor_0 node2" -> "child_rsc5_stop_0 node1" [ style = bold] + "child_rsc6_monitor_0 node2" -> "child_rsc6_start_0 node2" [ style = bold] ++"child_rsc6_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc6_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc6_start_0 node2" -> "rsc2_running_0" [ style = bold] + "child_rsc6_start_0 node2" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/group6.exp b/pengine/test10/group6.exp +index a74b155..cddd6f4 100644 +--- a/pengine/test10/group6.exp ++++ b/pengine/test10/group6.exp +@@ -7,6 +7,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -93,6 +102,9 @@ + + + ++ ++ ++ + + + +@@ -140,6 +152,9 @@ + + + ++ ++ ++ + + + +@@ -208,6 +223,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -293,6 +317,9 @@ + + + ++ ++ ++ + + + +@@ -340,6 +367,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/group9.dot b/pengine/test10/group9.dot +index d3c3d0e..610fe93 100644 +--- a/pengine/test10/group9.dot ++++ b/pengine/test10/group9.dot +@@ -25,7 +25,9 @@ + "foo_stopped_0" [ style=bold color="green" fontcolor="orange" ] + "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] ++"rsc3_monitor_0 node2" -> "foo_stopped_0" [ style = bold] + "rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] ++"rsc4_monitor_0 node2" -> "foo_stopped_0" [ style = bold] + "rsc4_monitor_0 node2" -> "rsc4_start_0 node1" [ style = bold] + "rsc4_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc4_start_0 node1" -> "foo_running_0" [ style = bold] +@@ -34,6 +36,8 @@ + "rsc4_stop_0 node1" -> "foo_stopped_0" [ style = bold] + "rsc4_stop_0 node1" -> "rsc4_start_0 node1" [ style = bold] + "rsc4_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc5_monitor_0 node2" -> "foo_stopped_0" [ style = bold] ++"rsc5_monitor_0 node2" -> "rsc4_stop_0 node1" [ style = bold] + "rsc5_monitor_0 node2" -> "rsc5_start_0 node1" [ style = bold] + "rsc5_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc5_start_0 node1" -> "foo_running_0" [ style = bold] +@@ -42,6 +46,7 @@ + "rsc5_stop_0 node1" -> "rsc4_stop_0 node1" [ style = bold] + "rsc5_stop_0 node1" -> "rsc5_start_0 node1" [ style = bold] + "rsc5_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc6_monitor_0 node2" -> "bar_stopped_0" [ style = bold] + "rsc6_monitor_0 node2" -> "rsc6_start_0 node2" [ style = bold] + "rsc6_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc6_start_0 node2" -> "bar_running_0" [ style = bold] +@@ -50,6 +55,8 @@ + "rsc6_stop_0 node1" -> "bar_stopped_0" [ style = bold] + "rsc6_stop_0 node1" -> "rsc6_start_0 node2" [ style = bold] + "rsc6_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc7_monitor_0 node2" -> "bar_stopped_0" [ style = bold] ++"rsc7_monitor_0 node2" -> "rsc6_stop_0 node1" [ style = bold] + "rsc7_monitor_0 node2" -> "rsc7_start_0 node2" [ style = bold] + "rsc7_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc7_start_0 node2" -> "bar_running_0" [ style = bold] +@@ -59,6 +66,8 @@ + "rsc7_stop_0 node1" -> "rsc6_stop_0 node1" [ style = bold] + "rsc7_stop_0 node1" -> "rsc7_start_0 node2" [ style = bold] + "rsc7_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc8_monitor_0 node2" -> "bar_stopped_0" [ style = bold] ++"rsc8_monitor_0 node2" -> "rsc7_stop_0 node1" [ style = bold] + "rsc8_monitor_0 node2" -> "rsc8_start_0 node2" [ style = bold] + "rsc8_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc8_start_0 node2" -> "bar_running_0" [ style = bold] +diff --git a/pengine/test10/group9.exp b/pengine/test10/group9.exp +index cf026e6..f05c2c2 100644 +--- a/pengine/test10/group9.exp ++++ b/pengine/test10/group9.exp +@@ -28,6 +28,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -119,6 +128,9 @@ + + + ++ ++ ++ + + + +@@ -181,6 +193,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -263,6 +284,9 @@ + + + ++ ++ ++ + + + +@@ -316,6 +340,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/inc2.dot b/pengine/test10/inc2.dot +index 456f21f..357536f 100644 +--- a/pengine/test10/inc2.dot ++++ b/pengine/test10/inc2.dot +@@ -1,9 +1,12 @@ + digraph "g" { + "child_rsc1:0_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:0_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:1_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:1_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:2_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc1:2_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -11,6 +14,7 @@ + "child_rsc1:2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:3_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:3_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:3_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc1:3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -18,6 +22,7 @@ + "child_rsc1:3_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:4_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:4_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:4_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_stop_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/inc2.exp b/pengine/test10/inc2.exp +index 19b9b1a..10c6f43 100644 +--- a/pengine/test10/inc2.exp ++++ b/pengine/test10/inc2.exp +@@ -123,6 +123,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/inc3.dot b/pengine/test10/inc3.dot +index 8e5b063..36ab9b4 100644 +--- a/pengine/test10/inc3.dot ++++ b/pengine/test10/inc3.dot +@@ -1,9 +1,12 @@ + digraph "g" { + "child_rsc1:0_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:0_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:1_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:1_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:2_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc1:2_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -11,6 +14,7 @@ + "child_rsc1:2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:3_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:3_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:3_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc1:3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -18,16 +22,21 @@ + "child_rsc1:3_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:4_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:4_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:4_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:0_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:1_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:2_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:2_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:3_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:3_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:3_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:3_start_0 node1" -> "rsc2_running_0" [ style = bold] + "child_rsc2:3_start_0 node1" [ style=bold color="green" fontcolor="black" ] +@@ -35,6 +44,7 @@ + "child_rsc2:3_stop_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:3_stop_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:4_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:4_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:4_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:4_start_0 node1" -> "rsc2_running_0" [ style = bold] + "child_rsc2:4_start_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/inc3.exp b/pengine/test10/inc3.exp +index 55f01b6..790a934 100644 +--- a/pengine/test10/inc3.exp ++++ b/pengine/test10/inc3.exp +@@ -123,6 +123,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -304,6 +319,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/inc4.dot b/pengine/test10/inc4.dot +index 250052f..5c2ec9c 100644 +--- a/pengine/test10/inc4.dot ++++ b/pengine/test10/inc4.dot +@@ -1,9 +1,12 @@ + digraph "g" { + "child_rsc1:0_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:0_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:1_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:1_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:2_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_start_0 node2" -> "child_rsc1:3_start_0 node2" [ style = bold] + "child_rsc1:2_start_0 node2" -> "rsc1_running_0" [ style = bold] +@@ -11,7 +14,9 @@ + "child_rsc1:2_stop_0 node1" -> "child_rsc1:2_start_0 node2" [ style = bold] + "child_rsc1:2_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc1:3_monitor_0 node2" -> "child_rsc1:2_stop_0 node1" [ style = bold] + "child_rsc1:3_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:3_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:3_start_0 node2" -> "rsc1_running_0" [ style = bold] + "child_rsc1:3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -19,18 +24,24 @@ + "child_rsc1:3_stop_0 node1" -> "child_rsc1:3_start_0 node2" [ style = bold] + "child_rsc1:3_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:3_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc1:4_monitor_0 node2" -> "child_rsc1:3_stop_0 node1" [ style = bold] + "child_rsc1:4_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:4_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:4_stop_0 node1" -> "child_rsc1:3_stop_0 node1" [ style = bold] + "child_rsc1:4_stop_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:4_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:0_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:1_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:2_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:2_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:3_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:3_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:3_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:3_start_0 node1" -> "child_rsc2:4_start_0 node1" [ style = bold] + "child_rsc2:3_start_0 node1" -> "rsc2_running_0" [ style = bold] +@@ -38,7 +49,9 @@ + "child_rsc2:3_stop_0 node2" -> "child_rsc2:3_start_0 node1" [ style = bold] + "child_rsc2:3_stop_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:3_stop_0 node2" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:4_monitor_0 node1" -> "child_rsc2:3_stop_0 node2" [ style = bold] + "child_rsc2:4_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:4_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:4_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:4_start_0 node1" -> "rsc2_running_0" [ style = bold] + "child_rsc2:4_start_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/inc4.exp b/pengine/test10/inc4.exp +index ccc77d4..a5e9a09 100644 +--- a/pengine/test10/inc4.exp ++++ b/pengine/test10/inc4.exp +@@ -42,6 +42,9 @@ + + + ++ ++ ++ + + + +@@ -86,6 +89,9 @@ + + + ++ ++ ++ + + + +@@ -132,6 +138,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -254,6 +275,9 @@ + + + ++ ++ ++ + + + +@@ -319,6 +343,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/inc5.dot b/pengine/test10/inc5.dot +index dad7dd3..2a3d9f5 100644 +--- a/pengine/test10/inc5.dot ++++ b/pengine/test10/inc5.dot +@@ -4,8 +4,10 @@ + "child_rsc1:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_monitor_0 node2" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:0_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:1_monitor_0 node2" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:1_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:1_start_0 node2" -> "rsc2_running_0" [ style = bold] + "child_rsc2:1_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -13,16 +15,20 @@ + "child_rsc2:1_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:2_monitor_0 node1" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:2_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:2_monitor_0 node2" -> "rsc2_start_0" [ style = bold] ++"child_rsc2:2_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc3:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc3:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc3:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc3:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc4:0_monitor_0 node2" -> "rsc4_start_0" [ style = bold] ++"child_rsc4:0_monitor_0 node2" -> "rsc4_stopped_0" [ style = bold] + "child_rsc4:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc4:1_monitor_0 node2" -> "rsc4_start_0" [ style = bold] ++"child_rsc4:1_monitor_0 node2" -> "rsc4_stopped_0" [ style = bold] + "child_rsc4:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc4:1_start_0 node2" -> "rsc4_running_0" [ style = bold] + "child_rsc4:1_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -30,12 +36,16 @@ + "child_rsc4:1_stop_0 node1" -> "rsc4_stopped_0" [ style = bold] + "child_rsc4:1_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc4:2_monitor_0 node1" -> "rsc4_start_0" [ style = bold] ++"child_rsc4:2_monitor_0 node1" -> "rsc4_stopped_0" [ style = bold] + "child_rsc4:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc4:2_monitor_0 node2" -> "rsc4_start_0" [ style = bold] ++"child_rsc4:2_monitor_0 node2" -> "rsc4_stopped_0" [ style = bold] + "child_rsc4:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc5:0_monitor_0 node1" -> "rsc5_start_0" [ style = bold] ++"child_rsc5:0_monitor_0 node1" -> "rsc5_stopped_0" [ style = bold] + "child_rsc5:0_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc5:1_monitor_0 node1" -> "rsc5_start_0" [ style = bold] ++"child_rsc5:1_monitor_0 node1" -> "rsc5_stopped_0" [ style = bold] + "child_rsc5:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc5:1_start_0 node1" -> "rsc5_running_0" [ style = bold] + "child_rsc5:1_start_0 node1" [ style=bold color="green" fontcolor="black" ] +@@ -43,16 +53,20 @@ + "child_rsc5:1_stop_0 node2" -> "rsc5_stopped_0" [ style = bold] + "child_rsc5:1_stop_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc5:2_monitor_0 node1" -> "rsc5_start_0" [ style = bold] ++"child_rsc5:2_monitor_0 node1" -> "rsc5_stopped_0" [ style = bold] + "child_rsc5:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc5:2_monitor_0 node2" -> "rsc5_start_0" [ style = bold] ++"child_rsc5:2_monitor_0 node2" -> "rsc5_stopped_0" [ style = bold] + "child_rsc5:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc6:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc6:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc6:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc6:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc7:0_monitor_0 node1" -> "rsc7_start_0" [ style = bold] ++"child_rsc7:0_monitor_0 node1" -> "rsc7_stopped_0" [ style = bold] + "child_rsc7:0_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc7:1_monitor_0 node1" -> "rsc7_start_0" [ style = bold] ++"child_rsc7:1_monitor_0 node1" -> "rsc7_stopped_0" [ style = bold] + "child_rsc7:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc7:1_start_0 node1" -> "rsc7_running_0" [ style = bold] + "child_rsc7:1_start_0 node1" [ style=bold color="green" fontcolor="black" ] +@@ -60,8 +74,10 @@ + "child_rsc7:1_stop_0 node2" -> "rsc7_stopped_0" [ style = bold] + "child_rsc7:1_stop_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc7:2_monitor_0 node1" -> "rsc7_start_0" [ style = bold] ++"child_rsc7:2_monitor_0 node1" -> "rsc7_stopped_0" [ style = bold] + "child_rsc7:2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc7:2_monitor_0 node2" -> "rsc7_start_0" [ style = bold] ++"child_rsc7:2_monitor_0 node2" -> "rsc7_stopped_0" [ style = bold] + "child_rsc7:2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc8:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc8:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/inc5.exp b/pengine/test10/inc5.exp +index 0d19405..ecf8d3d 100644 +--- a/pengine/test10/inc5.exp ++++ b/pengine/test10/inc5.exp +@@ -108,6 +108,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -271,6 +283,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -398,6 +422,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -561,6 +597,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/master-7.dot b/pengine/test10/master-7.dot +index cd89c08..6cf865b 100644 +--- a/pengine/test10/master-7.dot ++++ b/pengine/test10/master-7.dot +@@ -10,9 +10,13 @@ digraph "g" { + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] + "child_DoFencing:0_stop_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_stop_0 c001n01" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:2_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n08" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "group-1_running_0" [ style=bold color="green" fontcolor="orange" ] + "group-1_start_0" -> "group-1_running_0" [ style = bold] +@@ -72,16 +76,25 @@ digraph "g" { + "ocf_msdummy:0_demote_0 c001n01" [ style=bold color="green" fontcolor="orange" ] + "ocf_msdummy:0_stop_0 c001n01" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:0_stop_0 c001n01" [ style=bold color="green" fontcolor="orange" ] ++"ocf_msdummy:4_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:4_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:4_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:4_stop_0 c001n01" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_stop_0 c001n01" [ style=bold color="green" fontcolor="orange" ] ++"ocf_msdummy:5_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:5_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:5_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:5_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:6_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:6_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:6_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:6_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:7_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:7_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"ocf_msdummy:7_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:7_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_5000 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_start_0 c001n03" -> "rsc_c001n01_monitor_5000 c001n03" [ style = bold] +diff --git a/pengine/test10/master-7.exp b/pengine/test10/master-7.exp +index 05abae6..d93ebbf 100644 +--- a/pengine/test10/master-7.exp ++++ b/pengine/test10/master-7.exp +@@ -393,6 +393,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -572,6 +584,33 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/master-8.dot b/pengine/test10/master-8.dot +index 58909fe..067f5da 100644 +--- a/pengine/test10/master-8.dot ++++ b/pengine/test10/master-8.dot +@@ -10,9 +10,13 @@ digraph "g" { + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] + "child_DoFencing:0_stop_0 c001n01" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:0_stop_0 c001n01" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:2_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n08" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n02" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "group-1_running_0" [ style=bold color="green" fontcolor="orange" ] + "group-1_start_0" -> "group-1_running_0" [ style = bold] +@@ -84,24 +88,34 @@ digraph "g" { + "ocf_msdummy:0_stop_0 c001n01" -> "ocf_msdummy:0_start_0 c001n03" [ style = bold] + "ocf_msdummy:0_stop_0 c001n01" [ style=bold color="green" fontcolor="orange" ] + "ocf_msdummy:4_monitor_0 c001n02" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:4_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:4_monitor_0 c001n03" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:4_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:4_monitor_0 c001n08" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:4_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:4_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:5_monitor_0 c001n02" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:5_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:5_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:5_monitor_0 c001n03" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:5_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:5_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:5_monitor_0 c001n08" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:5_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:5_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:6_monitor_0 c001n03" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:6_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:6_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:6_monitor_0 c001n08" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:6_monitor_0 c001n08" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:6_monitor_0 c001n08" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:7_monitor_0 c001n02" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:7_monitor_0 c001n02" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:7_monitor_0 c001n02" [ style=bold color="green" fontcolor="black" ] + "ocf_msdummy:7_monitor_0 c001n03" -> "master_rsc_1_start_0" [ style = bold] ++"ocf_msdummy:7_monitor_0 c001n03" -> "master_rsc_1_stopped_0" [ style = bold] + "ocf_msdummy:7_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_5000 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_start_0 c001n03" -> "rsc_c001n01_monitor_5000 c001n03" [ style = bold] +diff --git a/pengine/test10/master-8.exp b/pengine/test10/master-8.exp +index 477dbf8..fb584c5 100644 +--- a/pengine/test10/master-8.exp ++++ b/pengine/test10/master-8.exp +@@ -393,6 +393,18 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -601,6 +613,36 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/master-9.dot b/pengine/test10/master-9.dot +index 16e756e..370c214 100644 +--- a/pengine/test10/master-9.dot ++++ b/pengine/test10/master-9.dot +@@ -4,13 +4,18 @@ + "DoFencing_stop_0" -> "child_DoFencing:1_stop_0 ibm1" [ style = bold] + "DoFencing_stop_0" [ style=bold color="green" fontcolor="orange" ] + "DoFencing_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"child_DoFencing:1_monitor_0 va1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_monitor_0 va1" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:1_stop_0 ibm1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_stop_0 ibm1" -> "do_shutdown ibm1" [ style = bold] + "child_DoFencing:1_stop_0 ibm1" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 ibm1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 ibm1" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 va1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 va1" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 ibm1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 ibm1" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 va1" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 va1" [ style=bold color="green" fontcolor="black" ] + "do_shutdown ibm1" [ style=bold color="green" fontcolor="black" ] + "heartbeat_127.0.0.12_monitor_5000 va1" [ style=dashed color="red" fontcolor="black" ] +diff --git a/pengine/test10/master-9.exp b/pengine/test10/master-9.exp +index f2b0ba5..436284d 100644 +--- a/pengine/test10/master-9.exp ++++ b/pengine/test10/master-9.exp +@@ -65,6 +65,21 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/notify-0.dot b/pengine/test10/notify-0.dot +index b0a1355..691cc68 100644 +--- a/pengine/test10/notify-0.dot ++++ b/pengine/test10/notify-0.dot +@@ -5,6 +5,7 @@ + "child_rsc1:1_start_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "rsc1_running_0" [ style=bold color="green" fontcolor="orange" ] + "rsc1_start_0" -> "child_rsc1:1_start_0 node1" [ style = bold] +diff --git a/pengine/test10/notify-0.exp b/pengine/test10/notify-0.exp +index e5abf68..bb9955f 100644 +--- a/pengine/test10/notify-0.exp ++++ b/pengine/test10/notify-0.exp +@@ -78,6 +78,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/notify-1.dot b/pengine/test10/notify-1.dot +index d2f6183..ecdf8ba 100644 +--- a/pengine/test10/notify-1.dot ++++ b/pengine/test10/notify-1.dot +@@ -13,6 +13,7 @@ + "child_rsc2:0_pre_notify_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "rsc1_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange" ] + "rsc1_confirmed-pre_notify_start_0" -> "rsc1_post_notify_running_0" [ style = bold] +diff --git a/pengine/test10/notify-1.exp b/pengine/test10/notify-1.exp +index ee763cc..ca7d42a 100644 +--- a/pengine/test10/notify-1.exp ++++ b/pengine/test10/notify-1.exp +@@ -239,6 +239,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/notify-2.dot b/pengine/test10/notify-2.dot +index d2f6183..ecdf8ba 100644 +--- a/pengine/test10/notify-2.dot ++++ b/pengine/test10/notify-2.dot +@@ -13,6 +13,7 @@ + "child_rsc2:0_pre_notify_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "rsc1_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange" ] + "rsc1_confirmed-pre_notify_start_0" -> "rsc1_post_notify_running_0" [ style = bold] +diff --git a/pengine/test10/notify-2.exp b/pengine/test10/notify-2.exp +index ee763cc..ca7d42a 100644 +--- a/pengine/test10/notify-2.exp ++++ b/pengine/test10/notify-2.exp +@@ -239,6 +239,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/notify-3.dot b/pengine/test10/notify-3.dot +index 66341d0..a0e1c0b 100644 +--- a/pengine/test10/notify-3.dot ++++ b/pengine/test10/notify-3.dot +@@ -1,5 +1,6 @@ + digraph "g" { + "child_rsc1:0_monitor_0 node2" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:0_monitor_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:0_post_notify_start_0 node1" -> "rsc1_confirmed-post_notify_running_0" [ style = bold] + "child_rsc1:0_post_notify_start_0 node1" [ style=bold color="green" fontcolor="black" ] +@@ -10,6 +11,7 @@ + "child_rsc1:0_pre_notify_stop_0 node1" -> "rsc1_confirmed-pre_notify_stop_0" [ style = bold] + "child_rsc1:0_pre_notify_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:1_monitor_0 node1" -> "rsc1_start_0" [ style = bold] ++"child_rsc1:1_monitor_0 node1" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc1:1_post_notify_start_0 node1" -> "rsc1_confirmed-post_notify_running_0" [ style = bold] + "child_rsc1:1_post_notify_start_0 node1" [ style=bold color="green" fontcolor="black" ] +@@ -20,12 +22,15 @@ + "child_rsc1:1_stop_0 node2" -> "child_rsc1:1_start_0 node1" [ style = bold] + "child_rsc1:1_stop_0 node2" -> "rsc1_stopped_0" [ style = bold] + "child_rsc1:1_stop_0 node2" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:0_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_pre_notify_stop_0 node1" -> "rsc2_confirmed-pre_notify_stop_0" [ style = bold] + "child_rsc2:0_pre_notify_stop_0 node1" [ style=bold color="green" fontcolor="black" ] + "child_rsc2:0_stop_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:0_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:1_monitor_0 node1" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] ++"child_rsc2:1_monitor_0 node2" -> "rsc2_stopped_0" [ style = bold] + "child_rsc2:1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc1_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange" ] + "rsc1_confirmed-post_notify_stopped_0" -> "rsc1_pre_notify_start_0" [ style = bold] +diff --git a/pengine/test10/notify-3.exp b/pengine/test10/notify-3.exp +index 388e009..7948b38 100644 +--- a/pengine/test10/notify-3.exp ++++ b/pengine/test10/notify-3.exp +@@ -188,6 +188,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -414,6 +420,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/novell-252693-3.dot b/pengine/test10/novell-252693-3.dot +index 6d225f9..ed1d276 100644 +--- a/pengine/test10/novell-252693-3.dot ++++ b/pengine/test10/novell-252693-3.dot +@@ -70,6 +70,7 @@ + "evmsdcloneset_start_0" -> "evmsdcloneset_running_0" [ style = bold] + "evmsdcloneset_start_0" [ style=bold color="green" fontcolor="orange" ] + "imagestoreclone:0_monitor_0 node1" -> "imagestorecloneset_start_0" [ style = bold] ++"imagestoreclone:0_monitor_0 node1" -> "imagestorecloneset_stopped_0" [ style = bold] + "imagestoreclone:0_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "imagestoreclone:0_monitor_20000 node1" [ style=bold color="green" fontcolor="black" ] + "imagestoreclone:0_post_notify_start_0 node1" -> "imagestorecloneset_confirmed-post_notify_running_0" [ style = bold] +@@ -129,6 +130,7 @@ + "sles10_migrate_from_0 node1" [ style=bold color="green" fontcolor="black"] + "sles10_migrate_to_0 node2" -> "sles10_migrate_from_0 node1" [ style = bold] + "sles10_migrate_to_0 node2" [ style=bold color="green" fontcolor="black"] ++"sles10_monitor_0 node1" -> "imagestorecloneset_stop_0" [ style = bold] + "sles10_monitor_0 node1" -> "sles10_migrate_to_0 node2" [ style = bold] + "sles10_monitor_0 node1" -> "sles10_start_0 node1" [ style = bold] + "sles10_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/novell-252693-3.exp b/pengine/test10/novell-252693-3.exp +index e7cc1c4..41b43c8 100644 +--- a/pengine/test10/novell-252693-3.exp ++++ b/pengine/test10/novell-252693-3.exp +@@ -456,6 +456,9 @@ + + + ++ ++ ++ + + + +@@ -468,6 +471,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/order3.dot b/pengine/test10/order3.dot +index dd4e427..9c94c40 100644 +--- a/pengine/test10/order3.dot ++++ b/pengine/test10/order3.dot +@@ -5,7 +5,9 @@ + "rsc1_start_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc1_stop_0 node1" -> "rsc1_start_0 node2" [ style = bold] + "rsc1_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc2_monitor_0 node2" -> "rsc1_stop_0 node1" [ style = bold] + "rsc2_monitor_0 node2" -> "rsc2_start_0 node2" [ style = bold] ++"rsc2_monitor_0 node2" -> "rsc4_stop_0 node1" [ style = bold] + "rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_start_0 node2" -> "rsc3_start_0 node2" [ style = bold] + "rsc2_start_0 node2" [ style=bold color="green" fontcolor="black" ] +@@ -13,6 +15,7 @@ + "rsc2_stop_0 node1" -> "rsc2_start_0 node2" [ style = bold] + "rsc2_stop_0 node1" -> "rsc4_stop_0 node1" [ style = bold] + "rsc2_stop_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc3_monitor_0 node2" -> "rsc2_stop_0 node1" [ style = bold] + "rsc3_monitor_0 node2" -> "rsc3_start_0 node2" [ style = bold] + "rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc3_start_0 node2" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/order3.exp b/pengine/test10/order3.exp +index c6ee38e..038e006 100644 +--- a/pengine/test10/order3.exp ++++ b/pengine/test10/order3.exp +@@ -24,6 +24,9 @@ + + + ++ ++ ++ + + + +@@ -68,6 +71,9 @@ + + + ++ ++ ++ + + + +@@ -143,6 +149,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/rec-node-11.dot b/pengine/test10/rec-node-11.dot +index 6749d23..3f48ea8 100644 +--- a/pengine/test10/rec-node-11.dot ++++ b/pengine/test10/rec-node-11.dot +@@ -11,6 +11,7 @@ digraph "g" { + "group1_stopped_0" -> "group1_start_0" [ style = bold] + "group1_stopped_0" -> "rsc3_stop_0 node2" [ style = bold] + "group1_stopped_0" [ style=bold color="green" fontcolor="orange" ] ++"rsc1_monitor_0 node2" -> "group1_stopped_0" [ style = bold] + "rsc1_monitor_0 node2" -> "rsc1_start_0 node2" [ style = bold] + "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc1_start_0 node2" -> "group1_running_0" [ style = bold] +@@ -19,6 +20,8 @@ digraph "g" { + "rsc1_stop_0 node1" -> "group1_stopped_0" [ style = bold] + "rsc1_stop_0 node1" -> "rsc1_start_0 node2" [ style = bold] + "rsc1_stop_0 node1" [ style=bold color="green" fontcolor="orange" ] ++"rsc2_monitor_0 node2" -> "group1_stopped_0" [ style = bold] ++"rsc2_monitor_0 node2" -> "rsc1_stop_0 node1" [ style = bold] + "rsc2_monitor_0 node2" -> "rsc2_start_0 node2" [ style = bold] + "rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_start_0 node2" -> "group1_running_0" [ style = bold] +diff --git a/pengine/test10/rec-node-11.exp b/pengine/test10/rec-node-11.exp +index eca2455..d68f392 100644 +--- a/pengine/test10/rec-node-11.exp ++++ b/pengine/test10/rec-node-11.exp +@@ -32,6 +32,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -120,6 +126,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/reload-becomes-restart.dot b/pengine/test10/reload-becomes-restart.dot +index ed3720a..a6616f9 100644 +--- a/pengine/test10/reload-becomes-restart.dot ++++ b/pengine/test10/reload-becomes-restart.dot +@@ -37,6 +37,7 @@ digraph "g" { + "rsc1:1_start_0 node1" -> "rsc2_start_0 node1" [ style = bold] + "rsc1:1_start_0 node1" [ style=bold color="green" fontcolor="black"] + "rsc2:1_monitor_0 node2" -> "cl-rsc2_start_0" [ style = bold] ++"rsc2:1_monitor_0 node2" -> "cl-rsc2_stopped_0" [ style = bold] + "rsc2:1_monitor_0 node2" [ style=bold color="green" fontcolor="black"] + "rsc2:1_monitor_200000 node2" [ style=bold color="green" fontcolor="black"] + "rsc2:1_start_0 node2" -> "cl-rsc2_running_0" [ style = bold] +diff --git a/pengine/test10/reload-becomes-restart.exp b/pengine/test10/reload-becomes-restart.exp +index 63ebff0..c3e3721 100644 +--- a/pengine/test10/reload-becomes-restart.exp ++++ b/pengine/test10/reload-becomes-restart.exp +@@ -240,6 +240,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/remote-connection-unrecoverable.dot b/pengine/test10/remote-connection-unrecoverable.dot +index 4cc243b..0360cd0 100644 +--- a/pengine/test10/remote-connection-unrecoverable.dot ++++ b/pengine/test10/remote-connection-unrecoverable.dot +@@ -7,7 +7,9 @@ digraph "g" { + "remote1_stop_0 node1" [ style=bold color="green" fontcolor="orange"] + "rsc1_delete_0 remote1" -> "rsc1_start_0 node2" [ style = dashed] + "rsc1_delete_0 remote1" [ style=dashed color="red" fontcolor="black"] ++"rsc1_monitor_0 node2" -> "remote1_stop_0 node1" [ style = bold] + "rsc1_monitor_0 node2" -> "rsc1_start_0 node2" [ style = bold] ++"rsc1_monitor_0 node2" -> "rsc2-master_demote_0" [ style = bold] + "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black"] + "rsc1_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] + "rsc1_start_0 node2" -> "rsc1_monitor_10000 node2" [ style = bold] +diff --git a/pengine/test10/remote-connection-unrecoverable.exp b/pengine/test10/remote-connection-unrecoverable.exp +index 59132fd..73fa7a1 100644 +--- a/pengine/test10/remote-connection-unrecoverable.exp ++++ b/pengine/test10/remote-connection-unrecoverable.exp +@@ -12,6 +12,9 @@ + + + ++ ++ ++ + + + +@@ -166,6 +169,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/rsc_dep1.dot b/pengine/test10/rsc_dep1.dot +index 49d686c..da1da7d 100644 +--- a/pengine/test10/rsc_dep1.dot ++++ b/pengine/test10/rsc_dep1.dot +@@ -1,11 +1,13 @@ + digraph "g" { + "rsc1_monitor_0 node1" -> "rsc1_start_0 node2" [ style = bold] ++"rsc1_monitor_0 node1" -> "rsc2_start_0 node1" [ style = bold] + "rsc1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "rsc1_monitor_0 node2" -> "rsc1_start_0 node2" [ style = bold] + "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc1_start_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_monitor_0 node1" -> "rsc2_start_0 node1" [ style = bold] + "rsc2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc2_monitor_0 node2" -> "rsc1_start_0 node2" [ style = bold] + "rsc2_monitor_0 node2" -> "rsc2_start_0 node1" [ style = bold] + "rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_start_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/rsc_dep1.exp b/pengine/test10/rsc_dep1.exp +index 50b7506..80a96e9 100644 +--- a/pengine/test10/rsc_dep1.exp ++++ b/pengine/test10/rsc_dep1.exp +@@ -11,6 +11,9 @@ + + + ++ ++ ++ + + + +@@ -45,6 +48,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/rsc_dep5.dot b/pengine/test10/rsc_dep5.dot +index ec7c307..8bdc4ed 100644 +--- a/pengine/test10/rsc_dep5.dot ++++ b/pengine/test10/rsc_dep5.dot +@@ -1,13 +1,17 @@ + digraph "g" { ++"rsc1_monitor_0 node1" -> "rsc3_start_0 node1" [ style = bold] + "rsc1_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc1_monitor_0 node2" -> "rsc2_start_0 node2" [ style = bold] + "rsc1_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_monitor_0 node1" -> "rsc2_start_0 node2" [ style = bold] ++"rsc2_monitor_0 node1" -> "rsc3_start_0 node1" [ style = bold] + "rsc2_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] + "rsc2_monitor_0 node2" -> "rsc2_start_0 node2" [ style = bold] + "rsc2_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc2_start_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc3_monitor_0 node1" -> "rsc3_start_0 node1" [ style = bold] + "rsc3_monitor_0 node1" [ style=bold color="green" fontcolor="black" ] ++"rsc3_monitor_0 node2" -> "rsc2_start_0 node2" [ style = bold] + "rsc3_monitor_0 node2" -> "rsc3_start_0 node1" [ style = bold] + "rsc3_monitor_0 node2" [ style=bold color="green" fontcolor="black" ] + "rsc3_start_0 node1" [ style=bold color="green" fontcolor="black" ] +diff --git a/pengine/test10/rsc_dep5.exp b/pengine/test10/rsc_dep5.exp +index 5647d46..6944272 100644 +--- a/pengine/test10/rsc_dep5.exp ++++ b/pengine/test10/rsc_dep5.exp +@@ -11,6 +11,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -45,8 +51,14 @@ + + + ++ ++ ++ + + ++ ++ ++ + + + +diff --git a/pengine/test10/unfence-definition.dot b/pengine/test10/unfence-definition.dot +index e2dc564..e899ff3 100644 +--- a/pengine/test10/unfence-definition.dot ++++ b/pengine/test10/unfence-definition.dot +@@ -12,11 +12,14 @@ digraph "g" { + "clvmd-clone_stopped_0" -> "dlm-clone_stop_0" [ style = bold] + "clvmd-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] + "clvmd:1_monitor_0 virt-2" -> "clvmd-clone_start_0" [ style = bold] ++"clvmd:1_monitor_0 virt-2" -> "clvmd-clone_stopped_0" [ style = bold] ++"clvmd:1_monitor_0 virt-2" -> "clvmd_stop_0 virt-1" [ style = bold] + "clvmd:1_monitor_0 virt-2" [ style=bold color="green" fontcolor="black"] + "clvmd:1_start_0 virt-2" -> "clvmd-clone_running_0" [ style = bold] + "clvmd:1_start_0 virt-2" -> "clvmd:2_start_0 virt-3" [ style = bold] + "clvmd:1_start_0 virt-2" [ style=bold color="green" fontcolor="black"] + "clvmd:2_monitor_0 virt-3" -> "clvmd-clone_start_0" [ style = bold] ++"clvmd:2_monitor_0 virt-3" -> "clvmd-clone_stopped_0" [ style = bold] + "clvmd:2_monitor_0 virt-3" [ style=bold color="green" fontcolor="black"] + "clvmd:2_start_0 virt-3" -> "clvmd-clone_running_0" [ style = bold] + "clvmd:2_start_0 virt-3" [ style=bold color="green" fontcolor="black"] +@@ -40,6 +43,7 @@ digraph "g" { + "dlm-clone_stopped_0" -> "dlm-clone_start_0" [ style = bold] + "dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] + "dlm:2_monitor_0 virt-3" -> "dlm-clone_start_0" [ style = bold] ++"dlm:2_monitor_0 virt-3" -> "dlm-clone_stopped_0" [ style = bold] + "dlm:2_monitor_0 virt-3" [ style=bold color="green" fontcolor="black"] + "dlm:2_start_0 virt-3" -> "clvmd:2_start_0 virt-3" [ style = bold] + "dlm:2_start_0 virt-3" -> "dlm-clone_running_0" [ style = bold] +diff --git a/pengine/test10/unfence-definition.exp b/pengine/test10/unfence-definition.exp +index 019c03d..840a8d2 100644 +--- a/pengine/test10/unfence-definition.exp ++++ b/pengine/test10/unfence-definition.exp +@@ -137,6 +137,9 @@ + + + ++ ++ ++ + + + +@@ -223,6 +226,9 @@ + + + ++ ++ ++ + + + +@@ -301,6 +307,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/unfence-parameters.dot b/pengine/test10/unfence-parameters.dot +index ab3ce37..a1ee969 100644 +--- a/pengine/test10/unfence-parameters.dot ++++ b/pengine/test10/unfence-parameters.dot +@@ -12,11 +12,14 @@ digraph "g" { + "clvmd-clone_stopped_0" -> "dlm-clone_stop_0" [ style = bold] + "clvmd-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] + "clvmd:1_monitor_0 virt-2" -> "clvmd-clone_start_0" [ style = bold] ++"clvmd:1_monitor_0 virt-2" -> "clvmd-clone_stopped_0" [ style = bold] ++"clvmd:1_monitor_0 virt-2" -> "clvmd_stop_0 virt-1" [ style = bold] + "clvmd:1_monitor_0 virt-2" [ style=bold color="green" fontcolor="black"] + "clvmd:1_start_0 virt-2" -> "clvmd-clone_running_0" [ style = bold] + "clvmd:1_start_0 virt-2" -> "clvmd:2_start_0 virt-3" [ style = bold] + "clvmd:1_start_0 virt-2" [ style=bold color="green" fontcolor="black"] + "clvmd:2_monitor_0 virt-3" -> "clvmd-clone_start_0" [ style = bold] ++"clvmd:2_monitor_0 virt-3" -> "clvmd-clone_stopped_0" [ style = bold] + "clvmd:2_monitor_0 virt-3" [ style=bold color="green" fontcolor="black"] + "clvmd:2_start_0 virt-3" -> "clvmd-clone_running_0" [ style = bold] + "clvmd:2_start_0 virt-3" [ style=bold color="green" fontcolor="black"] +@@ -40,6 +43,7 @@ digraph "g" { + "dlm-clone_stopped_0" -> "dlm-clone_start_0" [ style = bold] + "dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] + "dlm:2_monitor_0 virt-3" -> "dlm-clone_start_0" [ style = bold] ++"dlm:2_monitor_0 virt-3" -> "dlm-clone_stopped_0" [ style = bold] + "dlm:2_monitor_0 virt-3" [ style=bold color="green" fontcolor="black"] + "dlm:2_start_0 virt-3" -> "clvmd:2_start_0 virt-3" [ style = bold] + "dlm:2_start_0 virt-3" -> "dlm-clone_running_0" [ style = bold] +diff --git a/pengine/test10/unfence-parameters.exp b/pengine/test10/unfence-parameters.exp +index fc3317d..3e70cb8 100644 +--- a/pengine/test10/unfence-parameters.exp ++++ b/pengine/test10/unfence-parameters.exp +@@ -121,6 +121,9 @@ + + + ++ ++ ++ + + + +@@ -207,6 +210,9 @@ + + + ++ ++ ++ + + + +@@ -285,6 +291,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/unrunnable-1.dot b/pengine/test10/unrunnable-1.dot +index 7ead826..6164046 100644 +--- a/pengine/test10/unrunnable-1.dot ++++ b/pengine/test10/unrunnable-1.dot +@@ -11,10 +11,13 @@ + "child_192.168.100.182_monitor_5000 c001n03" [ style=dashed color="red" fontcolor="black" ] + "child_192.168.100.183_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "child_192.168.100.183_monitor_5000 c001n03" [ style=dashed color="red" fontcolor="black" ] ++"child_DoFencing:1_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:1_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "child_DoFencing:1_stop_0 c001n02" -> "DoFencing_stopped_0" [ style = dashed] + "child_DoFencing:1_stop_0 c001n02" [ style=dashed color="red" fontcolor="black" ] ++"child_DoFencing:2_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:2_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] ++"child_DoFencing:3_monitor_0 c001n03" -> "DoFencing_stopped_0" [ style = bold] + "child_DoFencing:3_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_0 c001n03" [ style=bold color="green" fontcolor="black" ] + "rsc_c001n01_monitor_5000 c001n03" [ style=dashed color="red" fontcolor="black" ] +diff --git a/pengine/test10/unrunnable-1.exp b/pengine/test10/unrunnable-1.exp +index 56fb4c0..94ab05c 100644 +--- a/pengine/test10/unrunnable-1.exp ++++ b/pengine/test10/unrunnable-1.exp +@@ -106,6 +106,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-imply-stop-on-fence.dot b/pengine/test10/whitebox-imply-stop-on-fence.dot +index a58e7c7..62ba699 100644 +--- a/pengine/test10/whitebox-imply-stop-on-fence.dot ++++ b/pengine/test10/whitebox-imply-stop-on-fence.dot +@@ -20,7 +20,9 @@ + "clvmd-clone_stop_0" [ style=bold color="green" fontcolor="orange"] + "clvmd-clone_stopped_0" -> "dlm-clone_stop_0" [ style = bold] + "clvmd-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"clvmd_monitor_0 lxc-01_kiff-02" -> "clvmd-clone_stopped_0" [ style = bold] + "clvmd_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] ++"clvmd_monitor_0 lxc-02_kiff-02" -> "clvmd-clone_stopped_0" [ style = bold] + "clvmd_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] + "clvmd_stop_0 kiff-01" -> "clvmd-clone_stopped_0" [ style = bold] + "clvmd_stop_0 kiff-01" -> "dlm_stop_0 kiff-01" [ style = bold] +@@ -29,7 +31,9 @@ + "dlm-clone_stop_0" -> "dlm_stop_0 kiff-01" [ style = bold] + "dlm-clone_stop_0" [ style=bold color="green" fontcolor="orange"] + "dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"dlm_monitor_0 lxc-01_kiff-02" -> "dlm-clone_stopped_0" [ style = bold] + "dlm_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] ++"dlm_monitor_0 lxc-02_kiff-02" -> "dlm-clone_stopped_0" [ style = bold] + "dlm_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] + "dlm_stop_0 kiff-01" -> "dlm-clone_stopped_0" [ style = bold] + "dlm_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +@@ -57,7 +61,9 @@ + "shared0-clone_stop_0" [ style=bold color="green" fontcolor="orange"] + "shared0-clone_stopped_0" -> "clvmd-clone_stop_0" [ style = bold] + "shared0-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"shared0_monitor_0 lxc-01_kiff-02" -> "shared0-clone_stopped_0" [ style = bold] + "shared0_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] ++"shared0_monitor_0 lxc-02_kiff-02" -> "shared0-clone_stopped_0" [ style = bold] + "shared0_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] + "shared0_stop_0 kiff-01" -> "clvmd_stop_0 kiff-01" [ style = bold] + "shared0_stop_0 kiff-01" -> "shared0-clone_stopped_0" [ style = bold] +diff --git a/pengine/test10/whitebox-imply-stop-on-fence.exp b/pengine/test10/whitebox-imply-stop-on-fence.exp +index cdba621..3b24768 100644 +--- a/pengine/test10/whitebox-imply-stop-on-fence.exp ++++ b/pengine/test10/whitebox-imply-stop-on-fence.exp +@@ -83,6 +83,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -149,6 +155,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +@@ -212,6 +224,12 @@ + + + ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-migrate1.dot b/pengine/test10/whitebox-migrate1.dot +index ae88207..85e465f 100644 +--- a/pengine/test10/whitebox-migrate1.dot ++++ b/pengine/test10/whitebox-migrate1.dot +@@ -23,6 +23,7 @@ + "rhel7-node1_migrate_from_0 rhel7-node3" [ style=bold color="green" fontcolor="black"] + "rhel7-node1_migrate_to_0 rhel7-node2" -> "rhel7-node1_migrate_from_0 rhel7-node3" [ style = bold] + "rhel7-node1_migrate_to_0 rhel7-node2" [ style=bold color="green" fontcolor="black"] ++"rhel7-node1_monitor_0 rhel7-node3" -> "remote-rsc_migrate_to_0 rhel7-node2" [ style = bold] + "rhel7-node1_monitor_0 rhel7-node3" -> "rhel7-node1_migrate_to_0 rhel7-node2" [ style = bold] + "rhel7-node1_monitor_0 rhel7-node3" -> "rhel7-node1_start_0 rhel7-node3" [ style = bold] + "rhel7-node1_monitor_0 rhel7-node3" [ style=bold color="green" fontcolor="black"] +diff --git a/pengine/test10/whitebox-migrate1.exp b/pengine/test10/whitebox-migrate1.exp +index 81c35ec..48c2550 100644 +--- a/pengine/test10/whitebox-migrate1.exp ++++ b/pengine/test10/whitebox-migrate1.exp +@@ -89,7 +89,11 @@ + + + +- ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-migrate1.summary b/pengine/test10/whitebox-migrate1.summary +index 58ef985..c45f360 100644 +--- a/pengine/test10/whitebox-migrate1.summary ++++ b/pengine/test10/whitebox-migrate1.summary +@@ -22,14 +22,14 @@ Transition Summary: + Executing cluster transition: + * Resource action: shooter1 stop on rhel7-node3 + * Resource action: FAKE3 stop on rhel7-node3 +- * Resource action: remote-rsc migrate_to on rhel7-node2 + * Resource action: rhel7-node1 monitor on rhel7-node3 + * Resource action: shooter1 start on rhel7-node2 + * Resource action: FAKE3 start on rhel7-node2 +- * Resource action: remote-rsc migrate_from on rhel7-node3 +- * Resource action: rhel7-node1 migrate_to on rhel7-node2 ++ * Resource action: remote-rsc migrate_to on rhel7-node2 + * Resource action: shooter1 monitor=60000 on rhel7-node2 + * Resource action: FAKE3 monitor=10000 on rhel7-node2 ++ * Resource action: remote-rsc migrate_from on rhel7-node3 ++ * Resource action: rhel7-node1 migrate_to on rhel7-node2 + * Resource action: rhel7-node1 migrate_from on rhel7-node3 + * Resource action: rhel7-node1 stop on rhel7-node2 + * Resource action: remote-rsc stop on rhel7-node2 +diff --git a/pengine/test10/whitebox-move.dot b/pengine/test10/whitebox-move.dot +index 418e63b..0442f43 100644 +--- a/pengine/test10/whitebox-move.dot ++++ b/pengine/test10/whitebox-move.dot +@@ -1,5 +1,6 @@ + digraph "g" { + "A_monitor_0 lxc2" -> "A_start_0 lxc1" [ style = bold] ++"A_monitor_0 lxc2" -> "lxc1_stop_0 18node1" [ style = bold] + "A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] + "A_monitor_10000 lxc1" [ style=bold color="green" fontcolor="black"] + "A_start_0 lxc1" -> "A_monitor_10000 lxc1" [ style = bold] +diff --git a/pengine/test10/whitebox-move.exp b/pengine/test10/whitebox-move.exp +index 5d17dc6..9f8593c 100644 +--- a/pengine/test10/whitebox-move.exp ++++ b/pengine/test10/whitebox-move.exp +@@ -223,6 +223,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-ms-ordering.dot b/pengine/test10/whitebox-ms-ordering.dot +index ba0dd30..59bdbaa 100644 +--- a/pengine/test10/whitebox-ms-ordering.dot ++++ b/pengine/test10/whitebox-ms-ordering.dot +@@ -46,10 +46,13 @@ + "lxc-ms_demote_0 lxc1" -> "lxc-ms_stop_0 lxc1" [ style = bold] + "lxc-ms_demote_0 lxc1" [ style=bold color="green" fontcolor="orange"] + "lxc-ms_monitor_0 18node1" -> "lxc-ms-master_start_0" [ style = bold] ++"lxc-ms_monitor_0 18node1" -> "lxc-ms-master_stopped_0" [ style = bold] + "lxc-ms_monitor_0 18node1" [ style=bold color="green" fontcolor="black"] + "lxc-ms_monitor_0 18node2" -> "lxc-ms-master_start_0" [ style = bold] ++"lxc-ms_monitor_0 18node2" -> "lxc-ms-master_stopped_0" [ style = bold] + "lxc-ms_monitor_0 18node2" [ style=bold color="green" fontcolor="black"] + "lxc-ms_monitor_0 18node3" -> "lxc-ms-master_start_0" [ style = bold] ++"lxc-ms_monitor_0 18node3" -> "lxc-ms-master_stopped_0" [ style = bold] + "lxc-ms_monitor_0 18node3" [ style=bold color="green" fontcolor="black"] + "lxc-ms_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] + "lxc-ms_promote_0 lxc1" -> "lxc-ms-master_promoted_0" [ style = bold] +diff --git a/pengine/test10/whitebox-ms-ordering.exp b/pengine/test10/whitebox-ms-ordering.exp +index 80d8e5e..03761cf 100644 +--- a/pengine/test10/whitebox-ms-ordering.exp ++++ b/pengine/test10/whitebox-ms-ordering.exp +@@ -342,6 +342,15 @@ + + + ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-orphaned.dot b/pengine/test10/whitebox-orphaned.dot +index 1511877..fdb1547 100644 +--- a/pengine/test10/whitebox-orphaned.dot ++++ b/pengine/test10/whitebox-orphaned.dot +@@ -1,6 +1,7 @@ + digraph "g" { + "A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] + "B_monitor_0 lxc2" -> "B_start_0 lxc2" [ style = bold] ++"B_monitor_0 lxc2" -> "lxc1_stop_0 18node2" [ style = bold] + "B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] + "B_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] + "B_start_0 lxc2" -> "B_monitor_10000 lxc2" [ style = bold] +diff --git a/pengine/test10/whitebox-orphaned.exp b/pengine/test10/whitebox-orphaned.exp +index 251aa49..ee7132a 100644 +--- a/pengine/test10/whitebox-orphaned.exp ++++ b/pengine/test10/whitebox-orphaned.exp +@@ -176,6 +176,9 @@ + + + ++ ++ ++ + + + +diff --git a/pengine/test10/whitebox-stop.dot b/pengine/test10/whitebox-stop.dot +index 304f134..8e03f9b 100644 +--- a/pengine/test10/whitebox-stop.dot ++++ b/pengine/test10/whitebox-stop.dot +@@ -1,6 +1,7 @@ + digraph "g" { + "A_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] + "B_monitor_0 lxc2" -> "B_start_0 lxc2" [ style = bold] ++"B_monitor_0 lxc2" -> "lxc1_stop_0 18node2" [ style = bold] + "B_monitor_0 lxc2" [ style=bold color="green" fontcolor="black"] + "B_monitor_10000 lxc2" [ style=bold color="green" fontcolor="black"] + "B_start_0 lxc2" -> "B_monitor_10000 lxc2" [ style = bold] +diff --git a/pengine/test10/whitebox-stop.exp b/pengine/test10/whitebox-stop.exp +index 3640b03..81f55af 100644 +--- a/pengine/test10/whitebox-stop.exp ++++ b/pengine/test10/whitebox-stop.exp +@@ -125,6 +125,9 @@ + + + ++ ++ ++ + + + +-- +1.8.3.1 + + +From e5b657fe11523c9b24a6f6098dbdb1bdba58003e Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Fri, 1 Mar 2019 17:32:50 +0100 +Subject: [PATCH 3/3] Test: scheduler: cl#5301 - respect order constraints when + relevant resources are being probed (new test) + +--- + pengine/regression.sh | 1 + + pengine/test10/order-first-probes.dot | 28 ++++++ + pengine/test10/order-first-probes.exp | 144 ++++++++++++++++++++++++++++++ + pengine/test10/order-first-probes.scores | 12 +++ + pengine/test10/order-first-probes.summary | 35 ++++++++ + pengine/test10/order-first-probes.xml | 96 ++++++++++++++++++++ + 6 files changed, 316 insertions(+) + create mode 100644 pengine/test10/order-first-probes.dot + create mode 100644 pengine/test10/order-first-probes.exp + create mode 100644 pengine/test10/order-first-probes.scores + create mode 100644 pengine/test10/order-first-probes.summary + create mode 100644 pengine/test10/order-first-probes.xml + +diff --git a/pengine/regression.sh b/pengine/regression.sh +index bb63a72..e25990d 100755 +--- a/pengine/regression.sh ++++ b/pengine/regression.sh +@@ -566,6 +566,7 @@ do_test honor_stonith_rsc_order2 "cl#5056- Honor order constraint, stonith clone + do_test honor_stonith_rsc_order3 "cl#5056- Honor order constraint, stonith clones with nested pure stonith group." + do_test honor_stonith_rsc_order4 "cl#5056- Honor order constraint, between two native stonith rscs." + do_test probe-timeout "cl#5099 - Default probe timeout" ++do_test order-first-probes "cl#5301 - respect order constraints when relevant resources are being probed" + + do_test concurrent-fencing "Allow performing fencing operations in parallel" + +diff --git a/pengine/test10/order-first-probes.dot b/pengine/test10/order-first-probes.dot +new file mode 100644 +index 0000000..1251318 +--- /dev/null ++++ b/pengine/test10/order-first-probes.dot +@@ -0,0 +1,28 @@ ++digraph "g" { ++"grpDummy_running_0" [ style=bold color="green" fontcolor="orange"] ++"grpDummy_start_0" -> "grpDummy_running_0" [ style = bold] ++"grpDummy_start_0" -> "prmDummy1_start_0 rh72-02" [ style = bold] ++"grpDummy_start_0" -> "prmDummy2_start_0 rh72-02" [ style = bold] ++"grpDummy_start_0" [ style=bold color="green" fontcolor="orange"] ++"grpDummy_stop_0" -> "grpDummy_stopped_0" [ style = bold] ++"grpDummy_stop_0" -> "prmDummy1_stop_0 rh72-01" [ style = bold] ++"grpDummy_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grpDummy_stopped_0" -> "grpDummy_start_0" [ style = bold] ++"grpDummy_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"prmDummy1_monitor_10000 rh72-02" [ style=bold color="green" fontcolor="black"] ++"prmDummy1_start_0 rh72-02" -> "grpDummy_running_0" [ style = bold] ++"prmDummy1_start_0 rh72-02" -> "prmDummy1_monitor_10000 rh72-02" [ style = bold] ++"prmDummy1_start_0 rh72-02" -> "prmDummy2_start_0 rh72-02" [ style = bold] ++"prmDummy1_start_0 rh72-02" [ style=bold color="green" fontcolor="black"] ++"prmDummy1_stop_0 rh72-01" -> "grpDummy_stopped_0" [ style = bold] ++"prmDummy1_stop_0 rh72-01" -> "prmDummy1_start_0 rh72-02" [ style = bold] ++"prmDummy1_stop_0 rh72-01" [ style=bold color="green" fontcolor="black"] ++"prmDummy2_monitor_0 rh72-01" -> "grpDummy_stopped_0" [ style = bold] ++"prmDummy2_monitor_0 rh72-01" -> "prmDummy1_stop_0 rh72-01" [ style = bold] ++"prmDummy2_monitor_0 rh72-01" -> "prmDummy2_start_0 rh72-02" [ style = bold] ++"prmDummy2_monitor_0 rh72-01" [ style=bold color="green" fontcolor="black"] ++"prmDummy2_monitor_10000 rh72-02" [ style=bold color="green" fontcolor="black"] ++"prmDummy2_start_0 rh72-02" -> "grpDummy_running_0" [ style = bold] ++"prmDummy2_start_0 rh72-02" -> "prmDummy2_monitor_10000 rh72-02" [ style = bold] ++"prmDummy2_start_0 rh72-02" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/pengine/test10/order-first-probes.exp b/pengine/test10/order-first-probes.exp +new file mode 100644 +index 0000000..3ab8801 +--- /dev/null ++++ b/pengine/test10/order-first-probes.exp +@@ -0,0 +1,144 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/pengine/test10/order-first-probes.scores b/pengine/test10/order-first-probes.scores +new file mode 100644 +index 0000000..555caf4 +--- /dev/null ++++ b/pengine/test10/order-first-probes.scores +@@ -0,0 +1,12 @@ ++Allocation scores: ++Using the original execution date of: 2016-10-05 07:32:34Z ++group_color: grpDummy allocation score on rh72-01: 200 ++group_color: grpDummy allocation score on rh72-02: 100 ++group_color: prmDummy1 allocation score on rh72-01: INFINITY ++group_color: prmDummy1 allocation score on rh72-02: 100 ++group_color: prmDummy2 allocation score on rh72-01: 0 ++group_color: prmDummy2 allocation score on rh72-02: 0 ++native_color: prmDummy1 allocation score on rh72-01: -INFINITY ++native_color: prmDummy1 allocation score on rh72-02: 100 ++native_color: prmDummy2 allocation score on rh72-01: -INFINITY ++native_color: prmDummy2 allocation score on rh72-02: 0 +diff --git a/pengine/test10/order-first-probes.summary b/pengine/test10/order-first-probes.summary +new file mode 100644 +index 0000000..54b5bc1 +--- /dev/null ++++ b/pengine/test10/order-first-probes.summary +@@ -0,0 +1,35 @@ ++Using the original execution date of: 2016-10-05 07:32:34Z ++ ++Current cluster status: ++Node rh72-01 (3232238257): standby ++Online: [ rh72-02 ] ++ ++ Resource Group: grpDummy ++ prmDummy1 (ocf::pacemaker:Dummy1): Started rh72-01 ++ prmDummy2 (ocf::pacemaker:Dummy2): Stopped ++ ++Transition Summary: ++ * Move prmDummy1 ( rh72-01 -> rh72-02 ) ++ * Start prmDummy2 ( rh72-02 ) ++ ++Executing cluster transition: ++ * Pseudo action: grpDummy_stop_0 ++ * Resource action: prmDummy2 monitor on rh72-01 ++ * Resource action: prmDummy1 stop on rh72-01 ++ * Pseudo action: grpDummy_stopped_0 ++ * Pseudo action: grpDummy_start_0 ++ * Resource action: prmDummy1 start on rh72-02 ++ * Resource action: prmDummy2 start on rh72-02 ++ * Pseudo action: grpDummy_running_0 ++ * Resource action: prmDummy1 monitor=10000 on rh72-02 ++ * Resource action: prmDummy2 monitor=10000 on rh72-02 ++Using the original execution date of: 2016-10-05 07:32:34Z ++ ++Revised cluster status: ++Node rh72-01 (3232238257): standby ++Online: [ rh72-02 ] ++ ++ Resource Group: grpDummy ++ prmDummy1 (ocf::pacemaker:Dummy1): Started rh72-02 ++ prmDummy2 (ocf::pacemaker:Dummy2): Started rh72-02 ++ +diff --git a/pengine/test10/order-first-probes.xml b/pengine/test10/order-first-probes.xml +new file mode 100644 +index 0000000..6fe1be2 +--- /dev/null ++++ b/pengine/test10/order-first-probes.xml +@@ -0,0 +1,96 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/001-rollup.patch b/SOURCES/001-rollup.patch deleted file mode 100644 index d4f3cf9..0000000 --- a/SOURCES/001-rollup.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 28076181217313c7e33bf88cb70eb16c2f8e737f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 19 Jul 2018 12:38:08 -0500 -Subject: [PATCH 1/4] Low: tools: enable file consolidation in crm_report - -Correct a variable misspelling that resulted in file de-duplication being -skipped. Also, remove an unused variable, and avoid the unreliable "-a". -Found by static analysis. ---- - tools/crm_report.in | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/tools/crm_report.in b/tools/crm_report.in -index 43ed646..695c1f1 100755 ---- a/tools/crm_report.in -+++ b/tools/crm_report.in -@@ -27,7 +27,6 @@ eval set -- "$TEMP" - - progname=$(basename "$0") - rsh="ssh -T" --times="" - tests="" - nodes="" - compress=1 -@@ -243,10 +242,18 @@ EOF - # check if files have same content in the cluster - # - cibdiff() { -- d1=`dirname $1` -- d2=`dirname $2` -- if [ -f $d1/RUNNING -a -f $d2/RUNNING ] || -- [ -f $d1/STOPPED -a -f $d2/STOPPED ]; then -+ d1=$(dirname $1) -+ d2=$(dirname $2) -+ -+ if [ -f "$d1/RUNNING" ] && [ ! -f "$d2/RUNNING" ]; then -+ DIFF_OK=0 -+ elif [ -f "$d1/STOPPED" ] && [ ! -f "$d2/STOPPED" ]; then -+ DIFF_OK=0 -+ else -+ DIFF_OK=1 -+ fi -+ -+ if [ $DIFF_OK -eq 1 ]; then - if which crm_diff > /dev/null 2>&1; then - crm_diff -c -n $1 -o $2 - else -@@ -277,7 +284,7 @@ esac - # remove duplicates if files are same, make links instead - # - consolidate() { -- for n in $NODES; do -+ for n in $nodes; do - if [ -f $1/$2 ]; then - rm $1/$n/$2 - else -@@ -290,7 +297,7 @@ consolidate() { - analyze_one() { - rc=0 - node0="" -- for n in $NODES; do -+ for n in $nodes; do - if [ "$node0" ]; then - diffcheck $1/$node0/$2 $1/$n/$2 - rc=$(($rc+$?)) --- -1.8.3.1 - - -From 2db3895359beb0f577c142c03ac2c8e6f44c67cf Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 4 Apr 2018 15:47:18 -0500 -Subject: [PATCH 2/4] Low: tools: get sensor lun in ipmiservicelogd before - using it - ---- - tools/ipmiservicelogd.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c -index 47ff220..1047c9c 100644 ---- a/tools/ipmiservicelogd.c -+++ b/tools/ipmiservicelogd.c -@@ -434,14 +434,14 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, - instance = ipmi_entity_get_entity_instance(ent); - ipmi_sensor_get_id(sensor, name, sizeof(name)); - -+ ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); -+ - sel_id = ipmi_entity_get_entity_id(ent); - sel_type = ipmi_entity_get_type(ent); - generator = ipmi_entity_get_slave_address(ent) | (sensor_lun << 5); /* LUN (2 bits) | SLAVE ADDRESS (5 bits) */ - version = 0x04; - sensor_type = ipmi_sensor_get_sensor_type(sensor); - -- ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); -- - event_class = 0; /* @TBD - where does this come from? */ - event_type = ipmi_event_get_type(event); - direction = dir; --- -1.8.3.1 - - -From 7a79e4ef8315842d4d1078475dab287d8f3327de Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 25 Jul 2018 15:15:38 -0500 -Subject: [PATCH 3/4] Low: tools: notifyServicelogEvent FTBFS on ppc64le - ---- - tools/notifyServicelogEvent.c | 24 ++++++++---------------- - 1 file changed, 8 insertions(+), 16 deletions(-) - -diff --git a/tools/notifyServicelogEvent.c b/tools/notifyServicelogEvent.c -index b7f672c..700f068 100644 ---- a/tools/notifyServicelogEvent.c -+++ b/tools/notifyServicelogEvent.c -@@ -1,24 +1,15 @@ - /* -- * Copyright (C) 2009 International Business Machines, IBM, Mark Hamzy -+ * Copyright 2009-2018 International Business Machines, IBM, Mark Hamzy - * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public -- * License as published by the Free Software Foundation; either -- * version 2 of the License, or (at your option) any later version. -- * -- * This software is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * General Public License for more details. -- * -- * You should have received a copy of the GNU General Public -- * License along with this library; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * This source code is licensed under the GNU General Public License version 2 -+ * or later (GPLv2+) WITHOUT ANY WARRANTY. - */ - - /* gcc -o notifyServicelogEvent `pkg-config --cflags servicelog-1` `pkg-config --libs servicelog-1` notifyServicelogEvent.c - */ - -+#include -+ - #include - #include - #include -@@ -27,9 +18,10 @@ - #include - #include - #include -+ - #include - #include --#include -+#include - - typedef enum { STATUS_GREEN = 1, STATUS_YELLOW, STATUS_RED } STATUS; - -@@ -91,7 +83,7 @@ main(int argc, char *argv[]) - struct sl_event *event = NULL; - uint64_t event_id = 0; - -- crm_log_init_quiet("notifyServicelogEvent", LOG_INFO, FALSE, TRUE, argc, argv); -+ crm_log_cli_init("notifyServicelogEvent"); - crm_set_options(NULL, "event_id ", long_options, - "Gets called upon events written to servicelog database"); - --- -1.8.3.1 - - -From b408a3ead462c8f02b68a164f24ba1b05bb3cad1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 25 Jul 2018 16:07:42 -0500 -Subject: [PATCH 4/4] Low: tools: ipmiservicelogd FTBFS on ppc64le - ---- - tools/ipmiservicelogd.c | 53 +++++++++++++++++++++++-------------------------- - 1 file changed, 25 insertions(+), 28 deletions(-) - -diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c -index 1047c9c..4f52865 100644 ---- a/tools/ipmiservicelogd.c -+++ b/tools/ipmiservicelogd.c -@@ -9,13 +9,10 @@ - * Author: Intel Corporation - * Jeff Zheng - * -- * Copyright 2009 International Business Machines, IBM -- * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public License -- * as published by the Free Software Foundation; either version 2 of -- * the License, or (at your option) any later version. -+ * Copyright 2009-2018 International Business Machines, IBM - * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -@@ -27,10 +24,6 @@ - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with this program; if not, write to the Free -- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - /* gcc -o ipmiservicelogd -g `pkg-config --cflags --libs OpenIPMI OpenIPMIposix servicelog-1` ipmiservicelogd.c -@@ -38,6 +31,12 @@ - /* ./ipmiservicelogd smi 0 - */ - -+#include -+ -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE -+#endif -+ - #include - #include - #include -@@ -68,7 +67,7 @@ - - static os_handler_t *os_hnd; - --char *getStringExecOutput(char *args[]); -+char *getStringExecOutput(const char *const args[]); - char *getSerialNumber(void); - char *getProductName(void); - static void con_usage(const char *name, const char *help, void *cb_data); -@@ -91,7 +90,7 @@ void setup_done(ipmi_domain_t * domain, int err, unsigned int conn_num, unsigned - int still_connected, void *user_data); - - char * --getStringExecOutput(char *args[]) -+getStringExecOutput(const char *const args[]) - { - int rc; - pid_t pid; -@@ -201,7 +200,11 @@ getStringExecOutput(char *args[]) - crm_err("Error: child close (pipefd[1]) = %d", errno); - } - -- rc = execvp(args[0], args); -+ /* execvp() takes (char *const *) for backward compatibility, -+ * but POSIX guarantees that it will not modify the strings, -+ * so the cast is safe -+ */ -+ rc = execvp(args[0], (char *const *) args); - - if (rc == -1) { - crm_err("Error: child execvp = %d", errno); -@@ -224,7 +227,7 @@ getStringExecOutput(char *args[]) - char * - getSerialNumber(void) - { -- char *dmiArgs[] = { -+ const char *const dmiArgs[] = { - "dmidecode", - "--string", - "system-serial-number", -@@ -237,7 +240,7 @@ getSerialNumber(void) - char * - getProductName(void) - { -- char *dmiArgs[] = { -+ const char *dmiArgs[] = { - "dmidecode", - "--string", - "system-product-name", -@@ -313,8 +316,8 @@ ipmi2servicelog(struct sl_data_bmc *bmc_data) - sl_event.machine_serial = serial_number; - sl_event.machine_model = product_name; /* it may not have the serial # within the first 20 chars */ - sl_event.nodename = name.nodename; -- sl_event.refcode = "ipmi"; -- sl_event.description = "ipmi event"; -+ sl_event.refcode = strdup("ipmi"); -+ sl_event.description = strdup("ipmi event"); - sl_event.serviceable = 1; /* 1 or 0 */ - sl_event.predictive = 0; /* 1 or 0 */ - sl_event.disposition = SL_DISP_RECOVERABLE; /* one of SL_DISP_* */ -@@ -336,6 +339,8 @@ ipmi2servicelog(struct sl_data_bmc *bmc_data) - crm_debug("Sending to servicelog database"); - } - -+ free(sl_event.refcode); -+ free(sl_event.description); - free(serial_number); - free(product_name); - -@@ -352,7 +357,6 @@ sensor_threshold_event_handler(ipmi_sensor_t * sensor, - double value, void *cb_data, ipmi_event_t * event) - { - ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); -- int id, instance; - char name[IPMI_ENTITY_NAME_LEN]; - struct sl_data_bmc bmc_data; - uint32_t sel_id; -@@ -366,8 +370,6 @@ sensor_threshold_event_handler(ipmi_sensor_t * sensor, - uint8_t event_type; - int direction; - -- id = ipmi_entity_get_entity_id(ent); -- instance = ipmi_entity_get_entity_instance(ent); - ipmi_sensor_get_id(sensor, name, sizeof(name)); - - ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); -@@ -416,7 +418,6 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, - int severity, int prev_severity, void *cb_data, ipmi_event_t * event) - { - ipmi_entity_t *ent = ipmi_sensor_get_entity(sensor); -- int id, instance; - char name[IPMI_ENTITY_NAME_LEN]; - struct sl_data_bmc bmc_data; - uint32_t sel_id; -@@ -430,8 +431,6 @@ sensor_discrete_event_handler(ipmi_sensor_t * sensor, - uint8_t event_type; - int direction; - -- id = ipmi_entity_get_entity_id(ent); -- instance = ipmi_entity_get_entity_instance(ent); - ipmi_sensor_get_id(sensor, name, sizeof(name)); - - ipmi_sensor_get_num(sensor, &sensor_lun, &sensor_number); -@@ -501,10 +500,7 @@ static void - entity_change(enum ipmi_update_e op, ipmi_domain_t * domain, ipmi_entity_t * entity, void *cb_data) - { - int rv; -- int id, instance; - -- id = ipmi_entity_get_entity_id(entity); -- instance = ipmi_entity_get_entity_instance(entity); - if (op == IPMI_ADDED) { - /* Register callback so that when the status of a - sensor changes, sensor_change is called */ -@@ -564,8 +560,9 @@ main(int argc, char *argv[]) - #endif - - crm_make_daemon("ipmiservicelogd", TRUE, "/var/run/ipmiservicelogd.pid0"); -- -- crm_log_init("ipmiservicelogd", LOG_INFO, FALSE, TRUE, argc, argv); -+ crm_log_cli_init("ipmiservicelogd"); -+ // Maybe this should log like a daemon instead? -+ // crm_log_init("ipmiservicelogd", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - - #ifdef COMPLEX - rv = ipmi_args_setup_con(args, os_hnd, NULL, &con); --- -1.8.3.1 - diff --git a/SOURCES/002-null-value.patch b/SOURCES/002-null-value.patch new file mode 100644 index 0000000..1377070 --- /dev/null +++ b/SOURCES/002-null-value.patch @@ -0,0 +1,46 @@ +From 0cfbb0797ba1788e131cf964ca53adcde8209e1f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 21 Mar 2019 09:20:32 -0500 +Subject: [PATCH] Fix: libcrmcommon: pcmk_nvpair_t should handle NULL values + +Detected by static analysis; if pcmk_prepend_nvpair() were given a NULL value, +pcmk__new_nvpair() would try to strdup() it. Now, name is asserted to be +non-NULL, and NULL values are set directly. +--- + lib/common/nvpair.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/lib/common/nvpair.c b/lib/common/nvpair.c +index 5d6853f..e9fec8a 100644 +--- a/lib/common/nvpair.c ++++ b/lib/common/nvpair.c +@@ -33,7 +33,7 @@ + * \internal + * \brief Allocate a new name/value pair + * +- * \param[in] name New name ++ * \param[in] name New name (required) + * \param[in] value New value + * + * \return Newly allocated name/value pair +@@ -43,11 +43,15 @@ + static pcmk_nvpair_t * + pcmk__new_nvpair(const char *name, const char *value) + { +- pcmk_nvpair_t *nvpair = calloc(1, sizeof(pcmk_nvpair_t)); ++ pcmk_nvpair_t *nvpair = NULL; + ++ CRM_ASSERT(name); ++ ++ nvpair = calloc(1, sizeof(pcmk_nvpair_t)); + CRM_ASSERT(nvpair); ++ + nvpair->name = strdup(name); +- nvpair->value = strdup(value); ++ nvpair->value = value? strdup(value) : NULL; + return nvpair; + } + +-- +1.8.3.1 + diff --git a/SOURCES/002-ppc64le.patch b/SOURCES/002-ppc64le.patch deleted file mode 100644 index bb28909..0000000 --- a/SOURCES/002-ppc64le.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 46201f029e4a5ac3ba0aaf05cb6df80341729566 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 30 Jul 2018 14:17:49 -0500 -Subject: [PATCH] Doc: tools: add --help/--version options to ipmiservicelogd - -allows ppc64le build when relevant dependencies are installed ---- - tools/ipmiservicelogd.c | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -diff --git a/tools/ipmiservicelogd.c b/tools/ipmiservicelogd.c -index 4f52865..865eae0 100644 ---- a/tools/ipmiservicelogd.c -+++ b/tools/ipmiservicelogd.c -@@ -253,7 +253,7 @@ getProductName(void) - static void - con_usage(const char *name, const char *help, void *cb_data) - { -- printf("\n%s%s", name, help); -+ printf("%s\n", help); - } - - static void -@@ -261,7 +261,7 @@ usage(const char *progname) - { - printf("Usage:\n"); - printf(" %s \n", progname); -- printf(" Where is one of:"); -+ printf(" Where is one of:\n"); - ipmi_parse_args_iter_help(con_usage, NULL); - } - -@@ -550,6 +550,18 @@ main(int argc, char *argv[]) - /* Initialize the OpenIPMI library. */ - ipmi_init(os_hnd); - -+ // Check for pacemaker-standard help and version options -+ if (argc > 1) { -+ for (char **arg = &argv[1]; *arg != NULL; ++arg) { -+ if (!strcmp(*arg, "--help") || !strcmp(*arg, "-?")) { -+ usage(argv[0]); -+ return 0; -+ } else if (!strcmp(*arg, "--version") || !strcmp(*arg, "-$")) { -+ crm_help('$', 0); -+ } -+ } -+ } -+ - #ifdef COMPLEX - rv = ipmi_parse_args2(&curr_arg, argc, argv, &args); - if (rv) { --- -1.8.3.1 - diff --git a/SOURCES/003-fence-output.patch b/SOURCES/003-fence-output.patch new file mode 100644 index 0000000..f174e6f --- /dev/null +++ b/SOURCES/003-fence-output.patch @@ -0,0 +1,860 @@ +From 06ef8406cf224af6b94dc4672c9b6caa15133f89 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 14 Feb 2019 13:27:46 +0100 +Subject: [PATCH] use common service interface for fence-agents and RAs + +--- + include/crm/services.h | 5 +- + lib/Makefile.am | 3 +- + lib/fencing/Makefile.am | 1 + + lib/fencing/st_client.c | 459 +++++++++------------------------------- + lib/services/services_linux.c | 93 ++++++++ + lib/services/services_private.h | 2 + + 6 files changed, 203 insertions(+), 360 deletions(-) + +diff --git a/include/crm/services.h b/include/crm/services.h +index 0186e66..eddafc3 100644 +--- a/include/crm/services.h ++++ b/include/crm/services.h +@@ -170,7 +170,10 @@ typedef struct svc_action_s { + char *agent; + + int timeout; +- GHashTable *params; /* used by OCF agents and alert agents */ ++ GHashTable *params; /* used for setting up environment for ocf-ra & ++ alert agents ++ and to be sent via stdin for fence-agents ++ */ + + int rc; + int pid; +diff --git a/lib/Makefile.am b/lib/Makefile.am +index 5563819..d73bf2e 100644 +--- a/lib/Makefile.am ++++ b/lib/Makefile.am +@@ -39,11 +39,10 @@ clean-local: + rm -f *.pc + + ## Subdirectories... +-SUBDIRS = gnu common pengine transition cib fencing services lrmd cluster ++SUBDIRS = gnu common pengine transition cib services fencing lrmd cluster + DIST_SUBDIRS = $(SUBDIRS) ais + + if BUILD_CS_PLUGIN + SUBDIRS += ais + endif + +- +diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am +index c3f4ea2..e447627 100644 +--- a/lib/fencing/Makefile.am ++++ b/lib/fencing/Makefile.am +@@ -15,6 +15,7 @@ libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB) + libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) + + libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la ++libstonithd_la_LIBADD += $(top_builddir)/lib/services/libcrmservice.la + + libstonithd_la_SOURCES = st_client.c st_rhcs.c + if BUILD_LHA_SUPPORT +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 3898d45..1c56cf4 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -48,23 +49,18 @@ struct stonith_action_s { + char *agent; + char *action; + char *victim; +- char *args; ++ GHashTable *args; + int timeout; + int async; + void *userdata; + void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); + +- /*! internal async track data */ +- int fd_stdout; +- int fd_stderr; +- int last_timeout_signo; ++ svc_action_t *svc_action; + + /*! internal timing information */ + time_t initial_start_time; + int tries; + int remaining_timeout; +- guint timer_sigterm; +- guint timer_sigkill; + int max_retries; + + /* device output data */ +@@ -448,13 +444,11 @@ stonith_api_register_level(stonith_t * st, int options, const char *node, int le + } + + static void +-append_arg(const char *key, const char *value, char **args) ++append_arg(const char *key, const char *value, GHashTable **args) + { +- int len = 3; /* =, \n, \0 */ +- int last = 0; +- + CRM_CHECK(key != NULL, return); + CRM_CHECK(value != NULL, return); ++ CRM_CHECK(args != NULL, return); + + if (strstr(key, "pcmk_")) { + return; +@@ -464,15 +458,13 @@ append_arg(const char *key, const char *value, char **args) + return; + } + +- len += strlen(key); +- len += strlen(value); +- if (*args != NULL) { +- last = strlen(*args); ++ if (!*args) { ++ *args = crm_str_table_new(); + } + +- *args = realloc_safe(*args, last + len); ++ CRM_CHECK(*args != NULL, return); + crm_trace("Appending: %s=%s", key, value); +- sprintf((*args) + last, "%s=%s\n", key, value); ++ g_hash_table_replace(*args, strdup(key), strdup(value)); + } + + static void +@@ -489,7 +481,7 @@ append_config_arg(gpointer key, gpointer value, gpointer user_data) + } + + static void +-append_host_specific_args(const char *victim, const char *map, GHashTable * params, char **arg_list) ++append_host_specific_args(const char *victim, const char *map, GHashTable * params, GHashTable **args) + { + char *name = NULL; + int last = 0, lpc = 0, max = 0; +@@ -497,7 +489,7 @@ append_host_specific_args(const char *victim, const char *map, GHashTable * para + if (map == NULL) { + /* The best default there is for now... */ + crm_debug("Using default arg map: port=uname"); +- append_arg("port", victim, arg_list); ++ append_arg("port", victim, args); + return; + } + +@@ -540,7 +532,7 @@ append_host_specific_args(const char *victim, const char *map, GHashTable * para + + if (value) { + crm_debug("Setting '%s'='%s' (%s) for %s", name, value, param, victim); +- append_arg(name, value, arg_list); ++ append_arg(name, value, args); + + } else { + crm_err("No node attribute '%s' for '%s'", name, victim); +@@ -560,12 +552,12 @@ append_host_specific_args(const char *victim, const char *map, GHashTable * para + free(name); + } + +-static char * ++static GHashTable * + make_args(const char *agent, const char *action, const char *victim, uint32_t victim_nodeid, GHashTable * device_args, + GHashTable * port_map) + { + char buffer[512]; +- char *arg_list = NULL; ++ GHashTable *arg_list = NULL; + const char *value = NULL; + + CRM_CHECK(action != NULL, return NULL); +@@ -653,66 +645,6 @@ make_args(const char *agent, const char *action, const char *victim, uint32_t vi + return arg_list; + } + +-static gboolean +-st_child_term(gpointer data) +-{ +- int rc = 0; +- stonith_action_t *track = data; +- +- crm_info("Child %d timed out, sending SIGTERM", track->pid); +- track->timer_sigterm = 0; +- track->last_timeout_signo = SIGTERM; +- rc = kill(-track->pid, SIGTERM); +- if (rc < 0) { +- crm_perror(LOG_ERR, "Couldn't send SIGTERM to %d", track->pid); +- } +- return FALSE; +-} +- +-static gboolean +-st_child_kill(gpointer data) +-{ +- int rc = 0; +- stonith_action_t *track = data; +- +- crm_info("Child %d timed out, sending SIGKILL", track->pid); +- track->timer_sigkill = 0; +- track->last_timeout_signo = SIGKILL; +- rc = kill(-track->pid, SIGKILL); +- if (rc < 0) { +- crm_perror(LOG_ERR, "Couldn't send SIGKILL to %d", track->pid); +- } +- return FALSE; +-} +- +-static void +-stonith_action_clear_tracking_data(stonith_action_t * action) +-{ +- if (action->timer_sigterm > 0) { +- g_source_remove(action->timer_sigterm); +- action->timer_sigterm = 0; +- } +- if (action->timer_sigkill > 0) { +- g_source_remove(action->timer_sigkill); +- action->timer_sigkill = 0; +- } +- if (action->fd_stdout) { +- close(action->fd_stdout); +- action->fd_stdout = 0; +- } +- if (action->fd_stderr) { +- close(action->fd_stderr); +- action->fd_stderr = 0; +- } +- free(action->output); +- action->output = NULL; +- free(action->error); +- action->error = NULL; +- action->rc = 0; +- action->pid = 0; +- action->last_timeout_signo = 0; +-} +- + /*! + * \internal + * \brief Free all memory used by a stonith action +@@ -723,11 +655,17 @@ void + stonith__destroy_action(stonith_action_t *action) + { + if (action) { +- stonith_action_clear_tracking_data(action); + free(action->agent); +- free(action->args); ++ if (action->args) { ++ g_hash_table_destroy(action->args); ++ } + free(action->action); + free(action->victim); ++ if (action->svc_action) { ++ services_action_free(action->svc_action); ++ } ++ free(action->output); ++ free(action->error); + free(action); + } + } +@@ -809,38 +747,6 @@ stonith_action_create(const char *agent, + return action; + } + +-#define READ_MAX 500 +-static char * +-read_output(int fd) +-{ +- char buffer[READ_MAX]; +- char *output = NULL; +- int len = 0; +- int more = 0; +- +- if (!fd) { +- return NULL; +- } +- +- do { +- errno = 0; +- memset(&buffer, 0, READ_MAX); +- more = read(fd, buffer, READ_MAX - 1); +- +- if (more > 0) { +- buffer[more] = 0; /* Make sure it's nul-terminated for logging +- * 'more' is always less than our buffer size +- */ +- output = realloc_safe(output, len + more + 1); +- snprintf(output + len, more + 1, "%s", buffer); +- len += more; +- } +- +- } while (more == (READ_MAX - 1) || (more < 0 && errno == EINTR)); +- +- return output; +-} +- + static gboolean + update_remaining_timeout(stonith_action_t * action) + { +@@ -860,58 +766,51 @@ update_remaining_timeout(stonith_action_t * action) + return action->remaining_timeout ? TRUE : FALSE; + } + +-static void +-stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) +-{ +- stonith_action_t *action = mainloop_child_userdata(p); +- +- if (action->timer_sigterm > 0) { +- g_source_remove(action->timer_sigterm); +- action->timer_sigterm = 0; +- } +- if (action->timer_sigkill > 0) { +- g_source_remove(action->timer_sigkill); +- action->timer_sigkill = 0; +- } ++static int ++svc_action_to_errno(svc_action_t *svc_action) { ++ int rv = pcmk_ok; + +- action->output = read_output(action->fd_stdout); +- action->error = read_output(action->fd_stderr); ++ if (svc_action->rc > 0) { ++ /* Try to provide a useful error code based on the fence agent's ++ * error output. ++ */ ++ if (svc_action->rc == PCMK_OCF_TIMEOUT) { ++ rv = -ETIME; + +- if (action->last_timeout_signo) { +- action->rc = -ETIME; +- crm_notice("Child process %d performing action '%s' timed out with signal %d", +- pid, action->action, action->last_timeout_signo); ++ } else if (svc_action->stderr_data == NULL) { ++ rv = -ENODATA; + +- } else if (signo) { +- action->rc = -ECONNABORTED; +- crm_notice("Child process %d performing action '%s' timed out with signal %d", +- pid, action->action, signo); ++ } else if (strstr(svc_action->stderr_data, "imed out")) { ++ /* Some agents have their own internal timeouts */ ++ rv = -ETIME; + +- } else { +- crm_debug("Child process %d performing action '%s' exited with rc %d", +- pid, action->action, exitcode); +- if (exitcode > 0) { +- /* Try to provide a useful error code based on the fence agent's +- * error output. +- */ +- if (action->error == NULL) { +- exitcode = -ENODATA; +- +- } else if (strstr(action->error, "imed out")) { +- /* Some agents have their own internal timeouts */ +- exitcode = -ETIMEDOUT; +- +- } else if (strstr(action->error, "Unrecognised action")) { +- exitcode = -EOPNOTSUPP; ++ } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { ++ rv = -EOPNOTSUPP; + +- } else { +- exitcode = -pcmk_err_generic; +- } ++ } else { ++ rv = -pcmk_err_generic; + } +- action->rc = exitcode; + } ++ return rv; ++} + +- log_action(action, pid); ++static void ++stonith_action_async_done(svc_action_t *svc_action) ++{ ++ stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; ++ ++ action->rc = svc_action_to_errno(svc_action); ++ action->output = svc_action->stdout_data; ++ svc_action->stdout_data = NULL; ++ action->error = svc_action->stderr_data; ++ svc_action->stderr_data = NULL; ++ ++ svc_action->params = NULL; ++ ++ crm_debug("Child process %d performing action '%s' exited with rc %d", ++ action->pid, action->action, svc_action->rc); ++ ++ log_action(action, action->pid); + + if (action->rc != pcmk_ok && update_remaining_timeout(action)) { + int rc = internal_stonith_action_execute(action); +@@ -921,28 +820,21 @@ stonith_action_async_done(mainloop_child_t * p, pid_t pid, int core, int signo, + } + + if (action->done_cb) { +- action->done_cb(pid, action->rc, action->output, action->userdata); ++ action->done_cb(action->pid, action->rc, action->output, action->userdata); + } + ++ action->svc_action = NULL; // don't remove our caller + stonith__destroy_action(action); + } + + static int + internal_stonith_action_execute(stonith_action_t * action) + { +- int pid, status = 0, len, rc = -EPROTO; +- int ret; +- int total = 0; +- int p_read_fd, p_write_fd; /* parent read/write file descriptors */ +- int c_read_fd, c_write_fd; /* child read/write file descriptors */ +- int c_stderr_fd, p_stderr_fd; /* parent/child side file descriptors for stderr */ +- int fd1[2]; +- int fd2[2]; +- int fd3[2]; ++ int rc = -EPROTO; + int is_retry = 0; +- +- /* clear any previous tracking data */ +- stonith_action_clear_tracking_data(action); ++ svc_action_t *svc_action = NULL; ++ static int stonith_sequence = 0; ++ char *buffer = NULL; + + if (!action->tries) { + action->initial_start_time = time(NULL); +@@ -955,207 +847,60 @@ internal_stonith_action_execute(stonith_action_t * action) + is_retry = 1; + } + +- c_read_fd = c_write_fd = p_read_fd = p_write_fd = c_stderr_fd = p_stderr_fd = -1; +- + if (action->args == NULL || action->agent == NULL) + goto fail; +- len = strlen(action->args); +- +- if (pipe(fd1)) +- goto fail; +- p_read_fd = fd1[0]; +- c_write_fd = fd1[1]; +- +- if (pipe(fd2)) +- goto fail; +- c_read_fd = fd2[0]; +- p_write_fd = fd2[1]; +- +- if (pipe(fd3)) +- goto fail; +- p_stderr_fd = fd3[0]; +- c_stderr_fd = fd3[1]; +- +- crm_debug("forking"); +- pid = fork(); +- if (pid < 0) { +- rc = -ECHILD; +- goto fail; +- } +- +- if (!pid) { +- /* child */ +- setpgid(0, 0); +- +- close(1); +- /* coverity[leaked_handle] False positive */ +- if (dup(c_write_fd) < 0) +- goto fail; +- close(2); +- /* coverity[leaked_handle] False positive */ +- if (dup(c_stderr_fd) < 0) +- goto fail; +- close(0); +- /* coverity[leaked_handle] False positive */ +- if (dup(c_read_fd) < 0) +- goto fail; +- +- /* keep c_stderr_fd open so parent can report all errors. */ +- /* keep c_write_fd open so hostlist can be sent to parent. */ +- close(c_read_fd); +- close(p_read_fd); +- close(p_write_fd); +- close(p_stderr_fd); +- +- /* keep retries from executing out of control */ +- if (is_retry) { +- sleep(1); +- } +- execlp(action->agent, action->agent, NULL); +- exit(EXIT_FAILURE); +- } + +- /* parent */ +- action->pid = pid; +- ret = crm_set_nonblocking(p_read_fd); +- if (ret < 0) { +- crm_notice("Could not set output of %s to be non-blocking: %s " +- CRM_XS " rc=%d", +- action->agent, pcmk_strerror(rc), rc); ++ buffer = crm_strdup_printf(RH_STONITH_DIR "/%s", basename(action->agent)); ++ svc_action = services_action_create_generic(buffer, NULL); ++ free(buffer); ++ svc_action->timeout = 1000 * action->remaining_timeout; ++ svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); ++ svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), ++ action->action, action->tries); ++ svc_action->agent = strdup(action->agent); ++ svc_action->sequence = stonith_sequence++; ++ svc_action->params = action->args; ++ svc_action->cb_data = (void *) action; ++ ++ /* keep retries from executing out of control and free previous results */ ++ if (is_retry) { ++ free(action->output); ++ action->output = NULL; ++ free(action->error); ++ action->error = NULL; ++ sleep(1); + } +- ret = crm_set_nonblocking(p_stderr_fd); +- if (ret < 0) { +- crm_notice("Could not set error output of %s to be non-blocking: %s " +- CRM_XS " rc=%d", +- action->agent, pcmk_strerror(rc), rc); +- } +- +- errno = 0; +- do { +- crm_debug("sending args"); +- ret = write(p_write_fd, action->args + total, len - total); +- if (ret > 0) { +- total += ret; +- } +- +- } while (errno == EINTR && total < len); +- +- if (total != len) { +- crm_perror(LOG_ERR, "Sent %d not %d bytes", total, len); +- if (ret >= 0) { +- rc = -ECOMM; +- } +- goto fail; +- } +- +- close(p_write_fd); p_write_fd = -1; + +- /* async */ + if (action->async) { +- action->fd_stdout = p_read_fd; +- action->fd_stderr = p_stderr_fd; +- mainloop_child_add(pid, 0/* Move the timeout here? */, action->action, action, stonith_action_async_done); +- crm_trace("Op: %s on %s, pid: %d, timeout: %ds", action->action, action->agent, pid, +- action->remaining_timeout); +- action->last_timeout_signo = 0; +- if (action->remaining_timeout) { +- action->timer_sigterm = +- g_timeout_add(1000 * action->remaining_timeout, st_child_term, action); +- action->timer_sigkill = +- g_timeout_add(1000 * (action->remaining_timeout + 5), st_child_kill, action); ++ /* async */ ++ if(services_action_async(svc_action, &stonith_action_async_done) == FALSE) { ++ services_action_free(svc_action); ++ svc_action = NULL; + } else { +- crm_err("No timeout set for stonith operation %s with device %s", +- action->action, action->agent); ++ action->pid = svc_action->pid; ++ action->svc_action = svc_action; ++ rc = 0; + } + +- close(c_write_fd); +- close(c_read_fd); +- close(c_stderr_fd); +- return 0; +- + } else { + /* sync */ +- int timeout = action->remaining_timeout + 1; +- pid_t p = 0; +- +- while (action->remaining_timeout < 0 || timeout > 0) { +- p = waitpid(pid, &status, WNOHANG); +- if (p > 0) { +- break; +- } +- sleep(1); +- timeout--; +- } +- +- if (timeout == 0) { +- int killrc = kill(-pid, SIGKILL); +- +- if (killrc && errno != ESRCH) { +- crm_err("kill(%d, KILL) failed: %s (%d)", pid, pcmk_strerror(errno), errno); +- } +- /* +- * From sigprocmask(2): +- * It is not possible to block SIGKILL or SIGSTOP. Attempts to do so are silently ignored. +- * +- * This makes it safe to skip WNOHANG here +- */ +- p = waitpid(pid, &status, 0); +- } +- +- if (p <= 0) { +- crm_perror(LOG_ERR, "waitpid(%d)", pid); +- +- } else if (p != pid) { +- crm_err("Waited for %d, got %d", pid, p); +- } +- +- action->output = read_output(p_read_fd); +- action->error = read_output(p_stderr_fd); +- +- action->rc = -ECONNABORTED; +- +- log_action(action, pid); +- +- rc = action->rc; +- if (timeout == 0) { +- action->rc = -ETIME; +- } else if (WIFEXITED(status)) { +- crm_debug("result = %d", WEXITSTATUS(status)); +- action->rc = -WEXITSTATUS(status); ++ if (services_action_sync(svc_action)) { + rc = 0; +- +- } else if (WIFSIGNALED(status)) { +- crm_err("call %s for %s exited due to signal %d", action->action, action->agent, +- WTERMSIG(status)); +- ++ action->rc = svc_action_to_errno(svc_action); ++ action->output = svc_action->stdout_data; ++ svc_action->stdout_data = NULL; ++ action->error = svc_action->stderr_data; ++ svc_action->stderr_data = NULL; + } else { +- crm_err("call %s for %s returned unexpected status %#x", +- action->action, action->agent, status); ++ action->rc = -ECONNABORTED; ++ rc = action->rc; + } +- } + +- fail: +- +- if (p_read_fd >= 0) { +- close(p_read_fd); +- } +- if (p_write_fd >= 0) { +- close(p_write_fd); +- } +- if (p_stderr_fd >= 0) { +- close(p_stderr_fd); +- } +- +- if (c_read_fd >= 0) { +- close(c_read_fd); +- } +- if (c_write_fd >= 0) { +- close(c_write_fd); +- } +- if (c_stderr_fd >= 0) { +- close(c_stderr_fd); ++ svc_action->params = NULL; ++ services_action_free(svc_action); + } + ++ fail: + return rc; + } + +diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c +index a413484..d79c16d 100644 +--- a/lib/services/services_linux.c ++++ b/lib/services/services_linux.c +@@ -195,6 +195,39 @@ add_action_env_vars(const svc_action_t *op) + } + } + ++static void ++pipe_in_single_parameter(gpointer key, gpointer value, gpointer user_data) ++{ ++ svc_action_t *op = user_data; ++ char *buffer = crm_strdup_printf("%s=%s\n", (char *)key, (char *) value); ++ int ret, total = 0, len = strlen(buffer); ++ ++ do { ++ errno = 0; ++ ret = write(op->opaque->stdin_fd, buffer + total, len - total); ++ if (ret > 0) { ++ total += ret; ++ } ++ ++ } while ((errno == EINTR) && (total < len)); ++ free(buffer); ++} ++ ++/*! ++ * \internal ++ * \brief Pipe parameters in via stdin for action ++ * ++ * \param[in] op Action to use ++ */ ++static void ++pipe_in_action_stdin_parameters(const svc_action_t *op) ++{ ++ crm_debug("sending args"); ++ if (op->params) { ++ g_hash_table_foreach(op->params, pipe_in_single_parameter, (gpointer) op); ++ } ++} ++ + gboolean + recurring_action_timer(gpointer data) + { +@@ -284,6 +317,10 @@ operation_finished(mainloop_child_t * p, pid_t pid, int core, int signo, int exi + op->opaque->stdout_gsource = NULL; + } + ++ if (op->opaque->stdin_fd >= 0) { ++ close(op->opaque->stdin_fd); ++ } ++ + if (signo) { + if (mainloop_child_timeout(p)) { + crm_warn("%s - timed out after %dms", prefix, op->timeout); +@@ -605,6 +642,9 @@ action_synced_wait(svc_action_t * op, sigset_t *mask) + + close(op->opaque->stdout_fd); + close(op->opaque->stderr_fd); ++ if (op->opaque->stdin_fd >= 0) { ++ close(op->opaque->stdin_fd); ++ } + + #ifdef HAVE_SYS_SIGNALFD_H + close(sfd); +@@ -618,6 +658,7 @@ services_os_action_execute(svc_action_t * op) + { + int stdout_fd[2]; + int stderr_fd[2]; ++ int stdin_fd[2] = {-1, -1}; + int rc; + struct stat st; + sigset_t *pmask; +@@ -683,6 +724,25 @@ services_os_action_execute(svc_action_t * op) + return FALSE; + } + ++ if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH)) { ++ if (pipe(stdin_fd) < 0) { ++ rc = errno; ++ ++ close(stdout_fd[0]); ++ close(stdout_fd[1]); ++ close(stderr_fd[0]); ++ close(stderr_fd[1]); ++ ++ crm_err("pipe(stdin_fd) failed. '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); ++ ++ services_handle_exec_error(op, rc); ++ if (!op->synchronous) { ++ return operation_finalize(op); ++ } ++ return FALSE; ++ } ++ } ++ + if (op->synchronous) { + #ifdef HAVE_SYS_SIGNALFD_H + sigemptyset(&mask); +@@ -730,6 +790,10 @@ services_os_action_execute(svc_action_t * op) + close(stdout_fd[1]); + close(stderr_fd[0]); + close(stderr_fd[1]); ++ if (stdin_fd[0] >= 0) { ++ close(stdin_fd[0]); ++ close(stdin_fd[1]); ++ } + + crm_err("Could not execute '%s': %s (%d)", op->opaque->exec, pcmk_strerror(rc), rc); + services_handle_exec_error(op, rc); +@@ -743,6 +807,9 @@ services_os_action_execute(svc_action_t * op) + case 0: /* Child */ + close(stdout_fd[0]); + close(stderr_fd[0]); ++ if (stdin_fd[1] >= 0) { ++ close(stdin_fd[1]); ++ } + if (STDOUT_FILENO != stdout_fd[1]) { + if (dup2(stdout_fd[1], STDOUT_FILENO) != STDOUT_FILENO) { + crm_err("dup2() failed (stdout)"); +@@ -755,6 +822,13 @@ services_os_action_execute(svc_action_t * op) + } + close(stderr_fd[1]); + } ++ if ((stdin_fd[0] >= 0) && ++ (STDIN_FILENO != stdin_fd[0])) { ++ if (dup2(stdin_fd[0], STDIN_FILENO) != STDIN_FILENO) { ++ crm_err("dup2() failed (stdin)"); ++ } ++ close(stdin_fd[0]); ++ } + + if (op->synchronous) { + sigchld_cleanup(); +@@ -767,6 +841,9 @@ services_os_action_execute(svc_action_t * op) + /* Only the parent reaches here */ + close(stdout_fd[1]); + close(stderr_fd[1]); ++ if (stdin_fd[0] >= 0) { ++ close(stdin_fd[0]); ++ } + + op->opaque->stdout_fd = stdout_fd[0]; + rc = crm_set_nonblocking(op->opaque->stdout_fd); +@@ -784,6 +861,22 @@ services_os_action_execute(svc_action_t * op) + pcmk_strerror(rc), rc); + } + ++ op->opaque->stdin_fd = stdin_fd[1]; ++ if (op->opaque->stdin_fd >= 0) { ++ // using buffer behind non-blocking-fd here - that could be improved ++ // as long as no other standard uses stdin_fd assume stonith ++ rc = crm_set_nonblocking(op->opaque->stdin_fd); ++ if (rc < 0) { ++ crm_warn("Could not set child input non-blocking: %s " ++ CRM_XS " fd=%d,rc=%d", ++ pcmk_strerror(rc), op->opaque->stdin_fd, rc); ++ } ++ pipe_in_action_stdin_parameters(op); ++ // as long as we are handling parameters directly in here just close ++ close(op->opaque->stdin_fd); ++ op->opaque->stdin_fd = -1; ++ } ++ + if (op->synchronous) { + action_synced_wait(op, pmask); + sigchld_cleanup(); +diff --git a/lib/services/services_private.h b/lib/services/services_private.h +index 0676c6f..9735da7 100644 +--- a/lib/services/services_private.h ++++ b/lib/services/services_private.h +@@ -42,6 +42,8 @@ struct svc_action_private_s { + + int stdout_fd; + mainloop_io_t *stdout_gsource; ++ ++ int stdin_fd; + #if SUPPORT_DBUS + DBusPendingCall* pending; + unsigned timerid; +-- +1.8.3.1 + diff --git a/SOURCES/003-static-analysis.patch b/SOURCES/003-static-analysis.patch deleted file mode 100644 index d0297d6..0000000 --- a/SOURCES/003-static-analysis.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 1307b6f238fb7f4cada95f6af02c1a4caae3eb63 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 3 Aug 2018 18:30:47 -0500 -Subject: [PATCH 1/2] Refactor: scheduler: remove unused variable setting - -makes static analysis happy ---- - lib/pengine/container.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/lib/pengine/container.c b/lib/pengine/container.c -index d82948a..1526f37 100644 ---- a/lib/pengine/container.c -+++ b/lib/pengine/container.c -@@ -780,7 +780,6 @@ container_fix_remote_addr(resource_t *rsc) - } - - for (int lpc = 0; lpc < DIMOF(attr_list); lpc++) { -- name = attr_list[lpc]; - value = crm_element_value(rsc->xml, attr_list[lpc]); - if (safe_str_eq(value, value_list[lpc]) == FALSE) { - return FALSE; --- -1.8.3.1 - - -From 1a95cbae653df8835906314d77e74091f55ab319 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 3 Aug 2018 18:32:08 -0500 -Subject: [PATCH 2/2] Refactor: libcrmcommon: remove dead code - -makes static analysis happy ---- - lib/common/iso8601.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/lib/common/iso8601.c b/lib/common/iso8601.c -index c95fa13..b661fce 100644 ---- a/lib/common/iso8601.c -+++ b/lib/common/iso8601.c -@@ -1384,7 +1384,7 @@ crm_time_format_hr(const char *format, crm_time_hr_t * hr_dt) - { - const char *mark_s; - int max = 128, scanned_pos = 0, printed_pos = 0, fmt_pos = 0, -- date_len = 0, nano_digits = 0, fmt_len; -+ date_len = 0, nano_digits = 0; - char nano_s[10], date_s[max+1], nanofmt_s[5] = "%", *tmp_fmt_s; - struct tm tm; - crm_time_t dt; -@@ -1397,11 +1397,11 @@ crm_time_format_hr(const char *format, crm_time_hr_t * hr_dt) - sprintf(nano_s, "%06d000", hr_dt->useconds); - - while ((format[scanned_pos]) != '\0') { -- fmt_len = 0; - mark_s = strchr(&format[scanned_pos], '%'); - if (mark_s) { -+ int fmt_len = 1; -+ - fmt_pos = mark_s - format; -- fmt_len = 1; - while ((format[fmt_pos+fmt_len] != '\0') && - (format[fmt_pos+fmt_len] >= '0') && - (format[fmt_pos+fmt_len] <= '9')) { --- -1.8.3.1 - diff --git a/SOURCES/004-cleanup.patch b/SOURCES/004-cleanup.patch deleted file mode 100644 index 9a5a3bc..0000000 --- a/SOURCES/004-cleanup.patch +++ /dev/null @@ -1,955 +0,0 @@ -From 039b778b07f256dd564171430c5427dfb9489a58 Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Fri, 8 Dec 2017 14:47:40 +0100 -Subject: [PATCH 1/8] Refactor: tools: crm_resource - Functionize cleaning up - resource failures - ---- - tools/crm_resource.c | 26 ++------------------------ - tools/crm_resource.h | 3 +++ - tools/crm_resource_runtime.c | 36 ++++++++++++++++++++++++++++++++++++ - 3 files changed, 41 insertions(+), 24 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 0557892..331adf6 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1103,31 +1103,9 @@ main(int argc, char **argv) - - } else if (rsc_cmd == 'C' && just_errors) { - crmd_replies_needed = 0; -- for (xmlNode *xml_op = __xml_first_child(data_set.failed); xml_op != NULL; -- xml_op = __xml_next(xml_op)) { -- -- const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); -- const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); -- const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); -- const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -- -- if(resource_name == NULL) { -- continue; -- } else if(host_uname && safe_str_neq(host_uname, node)) { -- continue; -- } else if(rsc_id && safe_str_neq(rsc_id, resource_name)) { -- continue; -- } else if(operation && safe_str_neq(operation, task)) { -- continue; -- } else if(interval && safe_str_neq(interval, task_interval)) { -- continue; -- } - -- crm_debug("Erasing %s failure for %s (%s detected) on %s", -- task, rsc->id, resource_name, node); -- rc = cli_resource_delete(crmd_channel, node, rsc, task, -- task_interval, &data_set); -- } -+ rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation, -+ interval, &data_set); - - if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) { - /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index 0b8dd2a..e28c9ef 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -76,6 +76,9 @@ int cli_resource_search(resource_t *rsc, const char *requested_name, - int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, - const char *interval, pe_working_set_t *data_set); -+int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, -+ resource_t *rsc, const char *operation, -+ const char *interval, pe_working_set_t *data_set); - int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib); - int cli_resource_move(resource_t *rsc, const char *rsc_id, - const char *host_name, cib_t *cib, -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 5004935..9aa7b7e 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -681,6 +681,42 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - return rc; - } - -+int -+cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, -+ resource_t *rsc, const char *operation, -+ const char *interval, pe_working_set_t *data_set) -+{ -+ int rc = pcmk_ok; -+ -+ for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; -+ xml_op = __xml_next(xml_op)) { -+ -+ const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); -+ const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); -+ const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); -+ const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -+ -+ if(resource_name == NULL) { -+ continue; -+ } else if(host_uname && safe_str_neq(host_uname, node)) { -+ continue; -+ } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) { -+ continue; -+ } else if(operation && safe_str_neq(operation, task)) { -+ continue; -+ } else if(interval && safe_str_neq(interval, task_interval)) { -+ continue; -+ } -+ -+ crm_debug("Erasing %s failure for %s (%s detected) on %s", -+ task, rsc->id, resource_name, node); -+ rc = cli_resource_delete(crmd_channel, node, rsc, task, -+ task_interval, data_set); -+ } -+ -+ return rc; -+} -+ - void - cli_resource_check(cib_t * cib_conn, resource_t *rsc) - { --- -1.8.3.1 - - -From 4ae40b495305b87f59e439de3298910c243c171d Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Fri, 8 Dec 2017 16:22:54 +0100 -Subject: [PATCH 2/8] Fix: tools: crm_resource --cleanup for non-primitive - resources - ---- - tools/crm_resource_runtime.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 9aa7b7e..98cd27f 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -688,6 +688,24 @@ cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, - { - int rc = pcmk_ok; - -+ if (rsc == NULL) { -+ return -ENXIO; -+ -+ } else if (rsc->children) { -+ GListPtr lpc = NULL; -+ -+ for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { -+ resource_t *child = (resource_t *) lpc->data; -+ -+ rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation, -+ interval, data_set); -+ if(rc != pcmk_ok) { -+ return rc; -+ } -+ } -+ return pcmk_ok; -+ } -+ - for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; - xml_op = __xml_next(xml_op)) { - --- -1.8.3.1 - - -From 6ce88cdbcbe15b7e81a4234eb92a93663243a7ff Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 11 Dec 2017 12:23:06 -0600 -Subject: [PATCH 3/8] Fix: tools: crm_resource --cleanup - -The new "failures only" mode of crm_resource --cleanup had multiple issues, -including not working without --resource specified, comparing a -user-provided interval string against a milliseconds interval, and -considering no interval specified as all intervals rather than 0 -but only when clearing LRM history entries. ---- - tools/crm_resource.c | 35 +++--- - tools/crm_resource.h | 9 +- - tools/crm_resource_runtime.c | 258 ++++++++++++++++++++++++++++++------------- - 3 files changed, 202 insertions(+), 100 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 331adf6..e3f8f86 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1101,14 +1101,20 @@ main(int argc, char **argv) - rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, - prop_name, cib_conn, &data_set); - -- } else if (rsc_cmd == 'C' && just_errors) { -+ } else if ((rsc_cmd == 'C') && rsc) { -+ if (do_force == FALSE) { -+ rsc = uber_parent(rsc); -+ } - crmd_replies_needed = 0; - -- rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation, -- interval, &data_set); -+ crm_debug("%s of %s (%s requested) on %s", -+ (just_errors? "Clearing failures" : "Re-checking the state"), -+ rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); -+ rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, -+ interval, just_errors, &data_set); - -- if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) { -- /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ -+ if ((rc == pcmk_ok) && !BE_QUIET) { -+ // Show any reasons why resource might stay stopped - cli_resource_check(cib_conn, rsc); - } - -@@ -1116,22 +1122,9 @@ main(int argc, char **argv) - start_mainloop(); - } - -- } else if ((rsc_cmd == 'C') && rsc) { -- if(do_force == FALSE) { -- rsc = uber_parent(rsc); -- } -- -- crm_debug("Re-checking the state of %s (%s requested) on %s", -- rsc->id, rsc_id, host_uname); -- crmd_replies_needed = 0; -- rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, -- interval, &data_set); -- -- if(rc == pcmk_ok && BE_QUIET == FALSE) { -- /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */ -- cli_resource_check(cib_conn, rsc); -- } -- -+ } else if (rsc_cmd == 'C' && just_errors) { -+ rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval, -+ &data_set); - if (rc == pcmk_ok) { - start_mainloop(); - } -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index e28c9ef..0ac51f2 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -75,10 +75,11 @@ int cli_resource_search(resource_t *rsc, const char *requested_name, - pe_working_set_t *data_set); - int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, -- const char *interval, pe_working_set_t *data_set); --int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, -- resource_t *rsc, const char *operation, -- const char *interval, pe_working_set_t *data_set); -+ const char *interval, bool just_failures, -+ pe_working_set_t *data_set); -+int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, -+ const char *operation, const char *interval, -+ pe_working_set_t *data_set); - int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib); - int cli_resource_move(resource_t *rsc, const char *rsc_id, - const char *host_name, cib_t *cib, -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 98cd27f..2cc2bec 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -558,15 +558,129 @@ rsc_fail_name(resource_t *rsc) - return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); - } - -+static int -+clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, -+ const char *rsc_id, pe_working_set_t *data_set) -+{ -+ int rc = pcmk_ok; -+ -+ /* Erase the resource's entire LRM history in the CIB, even if we're only -+ * clearing a single operation's fail count. If we erased only entries for a -+ * single operation, we might wind up with a wrong idea of the current -+ * resource state, and we might not re-probe the resource. -+ */ -+ rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id, -+ TRUE, data_set); -+ if (rc != pcmk_ok) { -+ return rc; -+ } -+ crmd_replies_needed++; -+ -+ crm_trace("Processing %d mainloop inputs", crmd_replies_needed); -+ while (g_main_context_iteration(NULL, FALSE)) { -+ crm_trace("Processed mainloop input, %d still remaining", -+ crmd_replies_needed); -+ } -+ -+ if (crmd_replies_needed < 0) { -+ crmd_replies_needed = 0; -+ } -+ return rc; -+} -+ -+static int -+clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, -+ const char *rsc_id, const char *operation, -+ const char *interval, pe_working_set_t *data_set) -+{ -+ int rc = pcmk_ok; -+ const char *failed_value = NULL; -+ const char *interval_ms_str = NULL; -+ GHashTable *rscs = NULL; -+ GHashTableIter iter; -+ -+ /* Create a hash table to use as a set of resources to clean. This lets us -+ * clean each resource only once (per node) regardless of how many failed -+ * operations it has. -+ */ -+ rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); -+ -+ // Normalize interval to milliseconds for comparison to history entry -+ if (operation) { -+ interval_ms_str = crm_strdup_printf("%llu", crm_get_interval(interval)); -+ } -+ -+ for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; -+ xml_op = __xml_next(xml_op)) { -+ -+ // No resource specified means all resources match -+ failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -+ if (rsc_id == NULL) { -+ rsc_id = failed_value; -+ } else if (safe_str_neq(rsc_id, failed_value)) { -+ continue; -+ } -+ -+ // Host name should always have been provided by this point -+ failed_value = crm_element_value(xml_op, XML_ATTR_UNAME); -+ if (safe_str_neq(node_name, failed_value)) { -+ continue; -+ } -+ -+ // No operation specified means all operations match -+ if (operation) { -+ failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK); -+ if (safe_str_neq(operation, failed_value)) { -+ continue; -+ } -+ -+ // Interval (if operation was specified) defaults to 0 (not all) -+ failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); -+ if (safe_str_neq(interval_ms_str, failed_value)) { -+ continue; -+ } -+ } -+ -+ g_hash_table_add(rscs, (gpointer) rsc_id); -+ } -+ -+ g_hash_table_iter_init(&iter, rscs); -+ while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) { -+ crm_debug("Erasing failures of %s on %s", rsc_id, node_name); -+ rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set); -+ if (rc != pcmk_ok) { -+ return rc; -+ } -+ } -+ g_hash_table_destroy(rscs); -+ return rc; -+} -+ -+static int -+clear_rsc_fail_attrs(resource_t *rsc, const char *operation, -+ const char *interval, node_t *node) -+{ -+ int rc = pcmk_ok; -+ int attr_options = attrd_opt_none; -+ char *rsc_name = rsc_fail_name(rsc); -+ -+ if (is_remote_node(node)) { -+ attr_options |= attrd_opt_remote; -+ } -+ rc = attrd_clear_delegate(NULL, node->details->uname, rsc_name, operation, -+ interval, NULL, attr_options); -+ free(rsc_name); -+ return rc; -+} -+ - int - cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, -- const char *interval, pe_working_set_t *data_set) -+ const char *interval, bool just_failures, -+ pe_working_set_t *data_set) - { - int rc = pcmk_ok; - node_t *node = NULL; -- char *rsc_name = NULL; -- int attr_options = attrd_opt_none; - - if (rsc == NULL) { - return -ENXIO; -@@ -578,8 +692,8 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *child = (resource_t *) lpc->data; - - rc = cli_resource_delete(crmd_channel, host_uname, child, operation, -- interval, data_set); -- if(rc != pcmk_ok) { -+ interval, just_failures, data_set); -+ if (rc != pcmk_ok) { - return rc; - } - } -@@ -611,8 +725,13 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - node = (node_t *) lpc->data; - - if (node->details->online) { -- cli_resource_delete(crmd_channel, node->details->uname, rsc, -- operation, interval, data_set); -+ rc = cli_resource_delete(crmd_channel, node->details->uname, -+ rsc, operation, interval, -+ just_failures, data_set); -+ } -+ if (rc != pcmk_ok) { -+ g_list_free(nodes); -+ return rc; - } - } - -@@ -637,102 +756,91 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - if (crmd_channel == NULL) { - printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", - rsc->id, host_uname); -- return rc; -- } -+ return pcmk_ok; -+ } - -- /* Erase the resource's entire LRM history in the CIB, even if we're only -- * clearing a single operation's fail count. If we erased only entries for a -- * single operation, we might wind up with a wrong idea of the current -- * resource state, and we might not re-probe the resource. -- */ -- rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, -- TRUE, data_set); -+ rc = clear_rsc_fail_attrs(rsc, operation, interval, node); - if (rc != pcmk_ok) { -- printf("Unable to clean up %s history on %s: %s\n", -- rsc->id, host_uname, pcmk_strerror(rc)); -+ printf("Unable to clean up %s failures on %s: %s\n", -+ rsc->id, host_uname, pcmk_strerror(rc)); - return rc; - } -- crmd_replies_needed++; - -- crm_trace("Processing %d mainloop inputs", crmd_replies_needed); -- while(g_main_context_iteration(NULL, FALSE)) { -- crm_trace("Processed mainloop input, %d still remaining", -- crmd_replies_needed); -- } -- -- if(crmd_replies_needed < 0) { -- crmd_replies_needed = 0; -- } -- -- rsc_name = rsc_fail_name(rsc); -- if (is_remote_node(node)) { -- attr_options |= attrd_opt_remote; -+ if (just_failures) { -+ rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation, -+ interval, data_set); -+ } else { -+ rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set); - } -- rc = attrd_clear_delegate(NULL, host_uname, rsc_name, operation, interval, -- NULL, attr_options); - if (rc != pcmk_ok) { -- printf("Cleaned %s history on %s, but unable to clear failures: %s\n", -+ printf("Cleaned %s failures on %s, but unable to clean history: %s\n", - rsc->id, host_uname, pcmk_strerror(rc)); - } else { - printf("Cleaned up %s on %s\n", rsc->id, host_uname); - } -- free(rsc_name); -- - return rc; - } - - int --cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname, -- resource_t *rsc, const char *operation, -- const char *interval, pe_working_set_t *data_set) -+cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, -+ const char *operation, const char *interval, -+ pe_working_set_t *data_set) - { -+ int attr_options = attrd_opt_none; - int rc = pcmk_ok; -+ const char *display_name = node_name? node_name : "all nodes"; - -- if (rsc == NULL) { -- return -ENXIO; -- -- } else if (rsc->children) { -- GListPtr lpc = NULL; -+ if (crmd_channel == NULL) { -+ printf("Dry run: skipping clean-up of %s due to CIB_file\n", -+ display_name); -+ return pcmk_ok; -+ } -+ crmd_replies_needed = 0; - -- for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { -- resource_t *child = (resource_t *) lpc->data; -+ if (node_name) { -+ node_t *node = pe_find_node(data_set->nodes, node_name); - -- rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation, -- interval, data_set); -- if(rc != pcmk_ok) { -- return rc; -- } -+ if (node == NULL) { -+ CMD_ERR("Unknown node: %s", node_name); -+ return -ENXIO; -+ } -+ if (is_remote_node(node)) { -+ attr_options |= attrd_opt_remote; - } -- return pcmk_ok; - } - -- for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; -- xml_op = __xml_next(xml_op)) { -- -- const char *node = crm_element_value(xml_op, XML_ATTR_UNAME); -- const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); -- const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); -- const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -+ rc = attrd_clear_delegate(NULL, node_name, NULL, operation, interval, -+ NULL, attr_options); -+ if (rc != pcmk_ok) { -+ printf("Unable to clean up all failures on %s: %s\n", -+ display_name, pcmk_strerror(rc)); -+ return rc; -+ } - -- if(resource_name == NULL) { -- continue; -- } else if(host_uname && safe_str_neq(host_uname, node)) { -- continue; -- } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) { -- continue; -- } else if(operation && safe_str_neq(operation, task)) { -- continue; -- } else if(interval && safe_str_neq(interval, task_interval)) { -- continue; -+ if (node_name) { -+ rc = clear_rsc_failures(crmd_channel, node_name, NULL, -+ operation, interval, data_set); -+ if (rc != pcmk_ok) { -+ printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", -+ node_name, pcmk_strerror(rc)); -+ return rc; - } -+ } else { -+ for (GList *iter = data_set->nodes; iter; iter = iter->next) { -+ pe_node_t *node = (pe_node_t *) iter->data; - -- crm_debug("Erasing %s failure for %s (%s detected) on %s", -- task, rsc->id, resource_name, node); -- rc = cli_resource_delete(crmd_channel, node, rsc, task, -- task_interval, data_set); -+ rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, -+ operation, interval, data_set); -+ if (rc != pcmk_ok) { -+ printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n", -+ node->details->uname, pcmk_strerror(rc)); -+ return rc; -+ } -+ } - } - -- return rc; -+ printf("Cleaned up all resources on %s\n", display_name); -+ return pcmk_ok; - } - - void --- -1.8.3.1 - - -From 0b6c3b3064401c8f0ebb48ccfd11f43dc2dc2b1b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 12 Dec 2017 10:02:22 -0600 -Subject: [PATCH 4/8] Fix: tools: crm_resource --cleanup with no resource - specified - -7a813755 failed to completely fix --cleanup without --resource ---- - tools/crm_resource_runtime.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 2cc2bec..ce86a49 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -595,6 +595,7 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, - { - int rc = pcmk_ok; - const char *failed_value = NULL; -+ const char *failed_id = NULL; - const char *interval_ms_str = NULL; - GHashTable *rscs = NULL; - GHashTableIter iter; -@@ -613,11 +614,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, - for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; - xml_op = __xml_next(xml_op)) { - -+ failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -+ if (failed_id == NULL) { -+ // Malformed history entry, should never happen -+ continue; -+ } -+ - // No resource specified means all resources match -- failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); -- if (rsc_id == NULL) { -- rsc_id = failed_value; -- } else if (safe_str_neq(rsc_id, failed_value)) { -+ if (rsc_id && safe_str_neq(rsc_id, failed_id)) { - continue; - } - -@@ -641,13 +645,13 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, - } - } - -- g_hash_table_add(rscs, (gpointer) rsc_id); -+ g_hash_table_add(rscs, (gpointer) failed_id); - } - - g_hash_table_iter_init(&iter, rscs); -- while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) { -- crm_debug("Erasing failures of %s on %s", rsc_id, node_name); -- rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set); -+ while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { -+ crm_debug("Erasing failures of %s on %s", failed_id, node_name); -+ rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set); - if (rc != pcmk_ok) { - return rc; - } --- -1.8.3.1 - - -From 9d5a1dae23a44db190782560d8dbdf50343b3692 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 24 Jan 2018 10:51:34 -0600 -Subject: [PATCH 5/8] Low: tools: crm_resource --refresh should ignore - --operation and --interval - -It already did when a resource was not specified. -Also update help text to clarify cleanup vs refresh. ---- - tools/crm_resource.c | 23 +++++++++++------------ - 1 file changed, 11 insertions(+), 12 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index e3f8f86..d00c8f2 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -214,15 +214,17 @@ static struct crm_option long_options[] = { - "cleanup", no_argument, NULL, 'C', - #if 0 - // new behavior disabled until 2.0.0 -- "\t\tDelete failed operations from a resource's history allowing its current state to be rechecked.\n" -+ "\t\tIf resource has any past failures, clear its history and fail count.\n" - "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n" -+ "\t\t\t\t--operation and --interval apply to fail counts, but entire history is always cleared,\n" -+ "\t\t\t\tto allow current state to be rechecked.\n" - }, - { - "refresh", no_argument, NULL, 'R', - #endif - "\t\tDelete resource's history (including failures) so its current state is rechecked.\n" -- "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n" -- "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be cleaned" -+ "\t\t\t\tOptionally filtered by --resource and --node (otherwise all).\n" -+ "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be refreshed." - }, - { - "set-parameter", required_argument, NULL, 'p', -@@ -442,7 +444,6 @@ main(int argc, char **argv) - bool require_resource = TRUE; /* whether command requires that resource be specified */ - bool require_dataset = TRUE; /* whether command requires populated dataset instance */ - bool require_crmd = FALSE; /* whether command requires connection to CRMd */ -- bool just_errors = TRUE; /* whether cleanup command deletes all history or just errors */ - - int rc = pcmk_ok; - int is_ocf_rc = 0; -@@ -634,8 +635,7 @@ main(int argc, char **argv) - if (cib_file == NULL) { - require_crmd = TRUE; - } -- just_errors = FALSE; -- rsc_cmd = 'C'; -+ rsc_cmd = 'R'; - find_flags = pe_find_renamed|pe_find_anon; - break; - -@@ -645,7 +645,6 @@ main(int argc, char **argv) - if (cib_file == NULL) { - require_crmd = TRUE; - } -- just_errors = FALSE; // disable until 2.0.0 - rsc_cmd = 'C'; - find_flags = pe_find_renamed|pe_find_anon; - break; -@@ -1101,7 +1100,7 @@ main(int argc, char **argv) - rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, - prop_name, cib_conn, &data_set); - -- } else if ((rsc_cmd == 'C') && rsc) { -+ } else if ((rsc_cmd == 'R') && rsc) { - if (do_force == FALSE) { - rsc = uber_parent(rsc); - } -@@ -1110,8 +1109,8 @@ main(int argc, char **argv) - crm_debug("%s of %s (%s requested) on %s", - (just_errors? "Clearing failures" : "Re-checking the state"), - rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); -- rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation, -- interval, just_errors, &data_set); -+ rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, -+ &data_set); - - if ((rc == pcmk_ok) && !BE_QUIET) { - // Show any reasons why resource might stay stopped -@@ -1122,14 +1121,14 @@ main(int argc, char **argv) - start_mainloop(); - } - -- } else if (rsc_cmd == 'C' && just_errors) { -+ } else if (rsc_cmd == 'C') { - rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval, - &data_set); - if (rc == pcmk_ok) { - start_mainloop(); - } - -- } else if (rsc_cmd == 'C') { -+ } else if (rsc_cmd == 'R') { - #if HAVE_ATOMIC_ATTRD - const char *router_node = host_uname; - xmlNode *msg_data = NULL; --- -1.8.3.1 - - -From 035bebd78c1936b0749ae64fe949deb5d77effe9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Feb 2018 12:43:48 -0600 -Subject: [PATCH 6/8] Fix: tools: auto-merge was insufficient - -The master and 2.0 branches had taken different approaches to crm_resource -clean-up refactoring in response to different issues. It was necessary to -combine the code more carefully. ---- - tools/crm_resource.c | 13 ++----------- - tools/crm_resource_runtime.c | 16 +++++++++++----- - 2 files changed, 13 insertions(+), 16 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index d00c8f2..fc46cc0 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -628,6 +628,7 @@ main(int argc, char **argv) - timeout_ms = crm_get_msec(optarg); - break; - -+ case 'C': - case 'R': - case 'P': - crm_log_args(argc, argv); -@@ -635,17 +636,7 @@ main(int argc, char **argv) - if (cib_file == NULL) { - require_crmd = TRUE; - } -- rsc_cmd = 'R'; -- find_flags = pe_find_renamed|pe_find_anon; -- break; -- -- case 'C': -- crm_log_args(argc, argv); -- require_resource = FALSE; -- if (cib_file == NULL) { -- require_crmd = TRUE; -- } -- rsc_cmd = 'C'; -+ rsc_cmd = 'R'; // disable new behavior until 2.0 - find_flags = pe_find_renamed|pe_find_anon; - break; - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index ce86a49..e02cc44 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -621,8 +621,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, - } - - // No resource specified means all resources match -- if (rsc_id && safe_str_neq(rsc_id, failed_id)) { -- continue; -+ if (rsc_id) { -+ resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources, -+ failed_id, -+ pe_find_renamed|pe_find_anon); -+ -+ if (!fail_rsc || safe_str_neq(rsc_id, fail_rsc->id)) { -+ continue; -+ } - } - - // Host name should always have been provided by this point -@@ -790,8 +796,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, - const char *operation, const char *interval, - pe_working_set_t *data_set) - { -- int attr_options = attrd_opt_none; - int rc = pcmk_ok; -+ int attr_options = attrd_opt_none; - const char *display_name = node_name? node_name : "all nodes"; - - if (crmd_channel == NULL) { -@@ -836,8 +842,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, - rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, - operation, interval, data_set); - if (rc != pcmk_ok) { -- printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n", -- node->details->uname, pcmk_strerror(rc)); -+ printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", -+ pcmk_strerror(rc)); - return rc; - } - } --- -1.8.3.1 - - -From 5fa351ec714de6b67c456fb1a85a8ebdb658f604 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 7 Aug 2018 10:42:59 -0500 -Subject: [PATCH 7/8] Low: tools: update crm_resource for 1.1 vs 2.0 - differences - ---- - tools/crm_resource.c | 37 +++++++++++++++++++++++++++---------- - 1 file changed, 27 insertions(+), 10 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index fc46cc0..128d075 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -352,11 +352,13 @@ static struct crm_option long_options[] = { - }, - { - "operation", required_argument, NULL, 'n', -- "\tOperation to clear instead of all (with -C -r)" -+ "\tOperation to clear instead of all (with -C -r)", -+ pcmk_option_hidden // only used with 2.0 -C behavior - }, - { - "interval", required_argument, NULL, 'I', -- "\tInterval of operation to clear (default 0) (with -C -r -n)" -+ "\tInterval of operation to clear (default 0) (with -C -r -n)", -+ pcmk_option_hidden // only used with 2.0 -C behavior - }, - { - "set-name", required_argument, NULL, 's', -@@ -1091,17 +1093,16 @@ main(int argc, char **argv) - rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id, - prop_name, cib_conn, &data_set); - -- } else if ((rsc_cmd == 'R') && rsc) { -+ } else if ((rsc_cmd == 'C') && rsc) { - if (do_force == FALSE) { - rsc = uber_parent(rsc); - } - crmd_replies_needed = 0; - -- crm_debug("%s of %s (%s requested) on %s", -- (just_errors? "Clearing failures" : "Re-checking the state"), -- rsc->id, rsc_id, (host_uname? host_uname : "all hosts")); -- rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, -- &data_set); -+ crm_debug("Erasing failures of %s (%s requested) on %s", -+ rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); -+ rc = cli_resource_delete(crmd_channel, host_uname, rsc, -+ operation, interval, TRUE, &data_set); - - if ((rc == pcmk_ok) && !BE_QUIET) { - // Show any reasons why resource might stay stopped -@@ -1119,6 +1120,22 @@ main(int argc, char **argv) - start_mainloop(); - } - -+ } else if ((rsc_cmd == 'R') && rsc) { -+ if (do_force == FALSE) { -+ rsc = uber_parent(rsc); -+ } -+ crmd_replies_needed = 0; -+ -+ crm_debug("Re-checking the state of %s (%s requested) on %s", -+ rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); -+ rc = cli_resource_delete(crmd_channel, host_uname, rsc, -+ NULL, 0, FALSE, &data_set); -+ -+ if ((rc == pcmk_ok) && !BE_QUIET) { -+ // Show any reasons why resource might stay stopped -+ cli_resource_check(cib_conn, rsc); -+ } -+ - } else if (rsc_cmd == 'R') { - #if HAVE_ATOMIC_ATTRD - const char *router_node = host_uname; -@@ -1174,8 +1191,8 @@ main(int argc, char **argv) - crmd_replies_needed = 0; - for (rIter = data_set.resources; rIter; rIter = rIter->next) { - rsc = rIter->data; -- cli_resource_delete(crmd_channel, host_uname, rsc, NULL, NULL, -- &data_set); -+ cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0, -+ FALSE, &data_set); - } - - start_mainloop(); --- -1.8.3.1 - - -From 555bdce4ceaf9a406059150c9dee047151fb3d94 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 7 Aug 2018 14:11:50 -0500 -Subject: [PATCH 8/8] Low: tools: avoid function not available until glib - 2.32.0 - ---- - tools/crm_resource_runtime.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index e02cc44..41cc742 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -651,7 +651,10 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, - } - } - -+ /* not available until glib 2.32 - g_hash_table_add(rscs, (gpointer) failed_id); -+ */ -+ g_hash_table_insert(rscs, (gpointer) failed_id, (gpointer) failed_id); - } - - g_hash_table_iter_init(&iter, rscs); --- -1.8.3.1 - diff --git a/SOURCES/004-group-ordering.patch b/SOURCES/004-group-ordering.patch new file mode 100644 index 0000000..7428744 --- /dev/null +++ b/SOURCES/004-group-ordering.patch @@ -0,0 +1,602 @@ +From d14fb0110208f270491c696bea0072300db2a947 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 12:37:13 -0500 +Subject: [PATCH 1/6] Refactor: scheduler: functionize handling of restart + ordering + +for readability +--- + pengine/native.c | 97 +++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 60 insertions(+), 37 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index 2f8c011..653a93a 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1942,6 +1942,65 @@ native_action_flags(action_t * action, node_t * node) + return action->flags; + } + ++static inline bool ++is_primitive_action(pe_action_t *action) ++{ ++ return action && action->rsc && (action->rsc->variant == pe_native); ++} ++ ++/*! ++ * \internal ++ * \brief Set action bits appropriately when pe_restart_order is used ++ * ++ * \param[in] first 'First' action in an ordering with pe_restart_order ++ * \param[in] then 'Then' action in an ordering with pe_restart_order ++ * \param[in] filter What ordering flags to care about ++ * ++ * \note pe_restart_order is set for "stop resource before starting it" and ++ * "stop later group member before stopping earlier group member" ++ */ ++static void ++handle_restart_ordering(pe_action_t *first, pe_action_t *then, ++ enum pe_action_flags filter) ++{ ++ const char *reason = NULL; ++ ++ CRM_ASSERT(is_primitive_action(first)); ++ CRM_ASSERT(is_primitive_action(then)); ++ ++ if ((filter & pe_action_runnable) ++ && (then->flags & pe_action_runnable) == 0 ++ && (then->rsc->flags & pe_rsc_managed)) { ++ reason = "shutdown"; ++ } ++ ++ if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { ++ reason = "recover"; ++ } ++ ++ if (reason && is_set(first->flags, pe_action_optional)) { ++ if (is_set(first->flags, pe_action_runnable) ++ || is_not_set(then->flags, pe_action_optional)) { ++ pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); ++ pe_action_implies(first, then, pe_action_optional); ++ } ++ } ++ ++ if (reason && is_not_set(first->flags, pe_action_optional) ++ && is_not_set(first->flags, pe_action_runnable)) { ++ pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); ++ pe_action_implies(then, first, pe_action_runnable); ++ } ++ ++ if (reason && ++ is_not_set(first->flags, pe_action_optional) && ++ is_set(first->flags, pe_action_migrate_runnable) && ++ is_not_set(then->flags, pe_action_migrate_runnable)) { ++ ++ pe_action_implies(first, then, pe_action_migrate_runnable); ++ } ++} ++ + enum pe_graph_flags + native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, + enum pe_action_flags filter, enum pe_ordering type) +@@ -2069,43 +2128,7 @@ native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_ + } + + if (is_set(type, pe_order_restart)) { +- const char *reason = NULL; +- +- CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); +- CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); +- +- if ((filter & pe_action_runnable) +- && (then->flags & pe_action_runnable) == 0 +- && (then->rsc->flags & pe_rsc_managed)) { +- reason = "shutdown"; +- } +- +- if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { +- reason = "recover"; +- } +- +- if (reason && is_set(first->flags, pe_action_optional)) { +- if (is_set(first->flags, pe_action_runnable) +- || is_not_set(then->flags, pe_action_optional)) { +- pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); +- pe_action_implies(first, then, pe_action_optional); +- } +- } +- +- if (reason && is_not_set(first->flags, pe_action_optional) +- && is_not_set(first->flags, pe_action_runnable)) { +- pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); +- pe_action_implies(then, first, pe_action_runnable); +- } +- +- if (reason && +- is_not_set(first->flags, pe_action_optional) && +- is_set(first->flags, pe_action_migrate_runnable) && +- is_not_set(then->flags, pe_action_migrate_runnable)) { +- +- pe_action_implies(first, then, pe_action_migrate_runnable); +- } +- ++ handle_restart_ordering(first, then, filter); + } + + if (then_flags != then->flags) { +-- +1.8.3.1 + + +From dda6a0eb480c8a3079b5c15176e27ed62f98a6ac Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 17:46:44 -0500 +Subject: [PATCH 2/6] Refactor: scheduler: use is_set()/is_not_set() in restart + ordering + +instead of direct bit comparisons, for readability and consistency +--- + pengine/native.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index 653a93a..f43d3cf 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1968,13 +1968,14 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, + CRM_ASSERT(is_primitive_action(first)); + CRM_ASSERT(is_primitive_action(then)); + +- if ((filter & pe_action_runnable) +- && (then->flags & pe_action_runnable) == 0 +- && (then->rsc->flags & pe_rsc_managed)) { ++ if (is_set(filter, pe_action_runnable) ++ && is_not_set(then->flags, pe_action_runnable) ++ && is_set(then->rsc->flags, pe_rsc_managed)) { + reason = "shutdown"; + } + +- if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { ++ if (is_set(filter, pe_action_optional) ++ && is_not_set(then->flags, pe_action_optional)) { + reason = "recover"; + } + +-- +1.8.3.1 + + +From 69951375e4af9d1d1152980afddc94bb5881af5a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 17:49:10 -0500 +Subject: [PATCH 3/6] Log: scheduler: improve restart ordering trace logs + +--- + pengine/native.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index f43d3cf..dfcc910 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1968,33 +1968,41 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, + CRM_ASSERT(is_primitive_action(first)); + CRM_ASSERT(is_primitive_action(then)); + ++ // We need to update the action in two cases: ++ ++ // ... if 'then' is required ++ if (is_set(filter, pe_action_optional) ++ && is_not_set(then->flags, pe_action_optional)) { ++ reason = "restart"; ++ } ++ ++ // ... if 'then' is managed but unrunnable + if (is_set(filter, pe_action_runnable) + && is_not_set(then->flags, pe_action_runnable) + && is_set(then->rsc->flags, pe_rsc_managed)) { +- reason = "shutdown"; ++ reason = "stop"; + } + +- if (is_set(filter, pe_action_optional) +- && is_not_set(then->flags, pe_action_optional)) { +- reason = "recover"; ++ if (reason == NULL) { ++ return; + } + +- if (reason && is_set(first->flags, pe_action_optional)) { ++ pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", ++ first->uuid, then->uuid, reason); ++ ++ if (is_set(first->flags, pe_action_optional)) { + if (is_set(first->flags, pe_action_runnable) + || is_not_set(then->flags, pe_action_optional)) { +- pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); + pe_action_implies(first, then, pe_action_optional); + } + } + +- if (reason && is_not_set(first->flags, pe_action_optional) ++ if (is_not_set(first->flags, pe_action_optional) + && is_not_set(first->flags, pe_action_runnable)) { +- pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); + pe_action_implies(then, first, pe_action_runnable); + } + +- if (reason && +- is_not_set(first->flags, pe_action_optional) && ++ if (is_not_set(first->flags, pe_action_optional) && + is_set(first->flags, pe_action_migrate_runnable) && + is_not_set(then->flags, pe_action_migrate_runnable)) { + +-- +1.8.3.1 + + +From 32da90e58a89d7f9f3cd6d1e3f961c24b646d734 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 17:50:07 -0500 +Subject: [PATCH 4/6] Refactor: scheduler: simplify handling of restart + ordering + +Don't condition pe_action_implies() on the desired state not being already +present, because pe_action_implies() handles that. + +Don't condition clearing first's pe_action_migrate_runnable on first being +required, because if it's optional it doesn't matter, and if (now or in the +future) optional can be changed to required later, it will actually be +important. +--- + pengine/native.c | 27 +++++++++++++++------------ + 1 file changed, 15 insertions(+), 12 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index dfcc910..8912aa1 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1980,6 +1980,7 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, + if (is_set(filter, pe_action_runnable) + && is_not_set(then->flags, pe_action_runnable) + && is_set(then->rsc->flags, pe_rsc_managed)) { ++ // If a resource should restart but can't start, we still want to stop + reason = "stop"; + } + +@@ -1990,24 +1991,26 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, + pe_rsc_trace(first->rsc, "Handling %s -> %s for %s", + first->uuid, then->uuid, reason); + +- if (is_set(first->flags, pe_action_optional)) { +- if (is_set(first->flags, pe_action_runnable) +- || is_not_set(then->flags, pe_action_optional)) { +- pe_action_implies(first, then, pe_action_optional); +- } ++ // Make 'first' required if it is runnable ++ if (is_set(first->flags, pe_action_runnable)) { ++ pe_action_implies(first, then, pe_action_optional); + } + +- if (is_not_set(first->flags, pe_action_optional) +- && is_not_set(first->flags, pe_action_runnable)) { +- pe_action_implies(then, first, pe_action_runnable); ++ // Make 'first' required if 'then' is required ++ if (is_not_set(then->flags, pe_action_optional)) { ++ pe_action_implies(first, then, pe_action_optional); + } + +- if (is_not_set(first->flags, pe_action_optional) && +- is_set(first->flags, pe_action_migrate_runnable) && +- is_not_set(then->flags, pe_action_migrate_runnable)) { +- ++ // Make 'first' unmigratable if 'then' is unmigratable ++ if (is_not_set(then->flags, pe_action_migrate_runnable)) { + pe_action_implies(first, then, pe_action_migrate_runnable); + } ++ ++ // Make 'then' unrunnable if 'first' is required but unrunnable ++ if (is_not_set(first->flags, pe_action_optional) ++ && is_not_set(first->flags, pe_action_runnable)) { ++ pe_action_implies(then, first, pe_action_runnable); ++ } + } + + enum pe_graph_flags +-- +1.8.3.1 + + +From 8cfe743d4373fad6b4e50ee64894a16f7f24afa1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 19:11:25 -0500 +Subject: [PATCH 5/6] Fix: scheduler: one group stop shouldn't make another + required + +1.1.7's 8d2f237d reused pe_order_restart ("stop resource before stopping it") +for "stop later group member before stopping earlier group member". + +pe_order_restart includes a check for an unrunnable 'then', because in a +restart, even if the start is unrunnable, we still want to perform the stop. +However this check does not make sense for group stop ordering, and as of +1.1.10, this caused a regression where a group member could be unnecessarily +stopped. + +Example scenario: if a resource is ordered after a group member, and the +resource failed with on-fail=block, that would make the group member's +(optional) stop blocked as well, and that blocked stop would unnecessarily make +stops of later group members required. + +This commit fixes the issue by only applying the check when the 'then' action +is a start. (RHBZ#1609453) +--- + pengine/native.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/pengine/native.c b/pengine/native.c +index 8912aa1..747cb10 100644 +--- a/pengine/native.c ++++ b/pengine/native.c +@@ -1976,11 +1976,13 @@ handle_restart_ordering(pe_action_t *first, pe_action_t *then, + reason = "restart"; + } + +- // ... if 'then' is managed but unrunnable ++ /* ... if 'then' is unrunnable start of managed resource (if a resource ++ * should restart but can't start, we still want to stop) ++ */ + if (is_set(filter, pe_action_runnable) + && is_not_set(then->flags, pe_action_runnable) +- && is_set(then->rsc->flags, pe_rsc_managed)) { +- // If a resource should restart but can't start, we still want to stop ++ && is_set(then->rsc->flags, pe_rsc_managed) ++ && safe_str_eq(then->task, RSC_START)) { + reason = "stop"; + } + +-- +1.8.3.1 + + +From b8e388eff56143632ef848d52eddad9560aad2cf Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 19:44:39 -0500 +Subject: [PATCH 6/6] Test: scheduler: one group stop shouldn't make another + required + +--- + pengine/regression.sh | 1 + + pengine/test10/group-stop-ordering.dot | 2 + + pengine/test10/group-stop-ordering.exp | 1 + + pengine/test10/group-stop-ordering.scores | 17 ++++ + pengine/test10/group-stop-ordering.summary | 25 ++++++ + pengine/test10/group-stop-ordering.xml | 132 +++++++++++++++++++++++++++++ + 6 files changed, 178 insertions(+) + create mode 100644 pengine/test10/group-stop-ordering.dot + create mode 100644 pengine/test10/group-stop-ordering.exp + create mode 100644 pengine/test10/group-stop-ordering.scores + create mode 100644 pengine/test10/group-stop-ordering.summary + create mode 100644 pengine/test10/group-stop-ordering.xml + +diff --git a/pengine/regression.sh b/pengine/regression.sh +index e25990d..504de46 100755 +--- a/pengine/regression.sh ++++ b/pengine/regression.sh +@@ -68,6 +68,7 @@ do_test group-fail "Ensure stop order is preserved for partially active groups" + do_test group-unmanaged "No need to restart r115 because r114 is unmanaged" + do_test group-unmanaged-stopped "Make sure r115 is stopped when r114 fails" + do_test group-dependents "Account for the location preferences of things colocated with a group" ++do_test group-stop-ordering "Ensure blocked group member stop does not force other member stops" + + echo "" + do_test rsc_dep1 "Must not " +diff --git a/pengine/test10/group-stop-ordering.dot b/pengine/test10/group-stop-ordering.dot +new file mode 100644 +index 0000000..4b30191 +--- /dev/null ++++ b/pengine/test10/group-stop-ordering.dot +@@ -0,0 +1,2 @@ ++digraph "g" { ++} +diff --git a/pengine/test10/group-stop-ordering.exp b/pengine/test10/group-stop-ordering.exp +new file mode 100644 +index 0000000..56e315f +--- /dev/null ++++ b/pengine/test10/group-stop-ordering.exp +@@ -0,0 +1 @@ ++ +diff --git a/pengine/test10/group-stop-ordering.scores b/pengine/test10/group-stop-ordering.scores +new file mode 100644 +index 0000000..5f144d2 +--- /dev/null ++++ b/pengine/test10/group-stop-ordering.scores +@@ -0,0 +1,17 @@ ++Allocation scores: ++group_color: grp allocation score on fastvm-rhel-7-5-73: 0 ++group_color: grp allocation score on fastvm-rhel-7-5-74: 0 ++group_color: inside_resource_2 allocation score on fastvm-rhel-7-5-73: 0 ++group_color: inside_resource_2 allocation score on fastvm-rhel-7-5-74: 0 ++group_color: inside_resource_3 allocation score on fastvm-rhel-7-5-73: 0 ++group_color: inside_resource_3 allocation score on fastvm-rhel-7-5-74: 0 ++native_color: fence-fastvm-rhel-7-5-73 allocation score on fastvm-rhel-7-5-73: -INFINITY ++native_color: fence-fastvm-rhel-7-5-73 allocation score on fastvm-rhel-7-5-74: 0 ++native_color: fence-fastvm-rhel-7-5-74 allocation score on fastvm-rhel-7-5-73: 0 ++native_color: fence-fastvm-rhel-7-5-74 allocation score on fastvm-rhel-7-5-74: -INFINITY ++native_color: inside_resource_2 allocation score on fastvm-rhel-7-5-73: 0 ++native_color: inside_resource_2 allocation score on fastvm-rhel-7-5-74: 0 ++native_color: inside_resource_3 allocation score on fastvm-rhel-7-5-73: -INFINITY ++native_color: inside_resource_3 allocation score on fastvm-rhel-7-5-74: 0 ++native_color: outside_resource allocation score on fastvm-rhel-7-5-73: INFINITY ++native_color: outside_resource allocation score on fastvm-rhel-7-5-74: 0 +diff --git a/pengine/test10/group-stop-ordering.summary b/pengine/test10/group-stop-ordering.summary +new file mode 100644 +index 0000000..0ec8eb6 +--- /dev/null ++++ b/pengine/test10/group-stop-ordering.summary +@@ -0,0 +1,25 @@ ++ ++Current cluster status: ++Online: [ fastvm-rhel-7-5-73 fastvm-rhel-7-5-74 ] ++ ++ fence-fastvm-rhel-7-5-73 (stonith:fence_xvm): Started fastvm-rhel-7-5-74 ++ fence-fastvm-rhel-7-5-74 (stonith:fence_xvm): Started fastvm-rhel-7-5-73 ++ outside_resource (ocf::pacemaker:Dummy): FAILED fastvm-rhel-7-5-73 (blocked) ++ Resource Group: grp ++ inside_resource_2 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 ++ inside_resource_3 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 ++ ++Transition Summary: ++ ++Executing cluster transition: ++ ++Revised cluster status: ++Online: [ fastvm-rhel-7-5-73 fastvm-rhel-7-5-74 ] ++ ++ fence-fastvm-rhel-7-5-73 (stonith:fence_xvm): Started fastvm-rhel-7-5-74 ++ fence-fastvm-rhel-7-5-74 (stonith:fence_xvm): Started fastvm-rhel-7-5-73 ++ outside_resource (ocf::pacemaker:Dummy): FAILED fastvm-rhel-7-5-73 (blocked) ++ Resource Group: grp ++ inside_resource_2 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 ++ inside_resource_3 (ocf::pacemaker:Dummy): Started fastvm-rhel-7-5-74 ++ +diff --git a/pengine/test10/group-stop-ordering.xml b/pengine/test10/group-stop-ordering.xml +new file mode 100644 +index 0000000..8439c1f +--- /dev/null ++++ b/pengine/test10/group-stop-ordering.xml +@@ -0,0 +1,132 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/005-bug-url.patch b/SOURCES/005-bug-url.patch new file mode 100644 index 0000000..5f183b4 --- /dev/null +++ b/SOURCES/005-bug-url.patch @@ -0,0 +1,109 @@ +From 55c576d970ed5b3446204668e7439d626e6d1a13 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 19:48:23 -0500 +Subject: [PATCH 1/3] Build: configure: make bug report URL configurable + +to allow distributions to direct users to their own bug reporters +--- + configure.ac | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/configure.ac b/configure.ac +index 70e074e..6fd17fd 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -318,6 +318,12 @@ AC_ARG_WITH(brand, + [ test x"$withval" = x"no" || PUBLICAN_BRAND="$withval" ]) + AC_SUBST(PUBLICAN_BRAND) + ++BUG_URL="" ++AC_ARG_WITH(bug-url, ++ [ --with-bug-url=DIR Address where users should submit bug reports @<:@https://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker@:>@], ++ [ BUG_URL="$withval" ] ++) ++ + ASCIIDOC_CLI_TYPE="pcs" + AC_ARG_WITH(doc-cli, + [ --with-doc-cli=cli_type CLI type to use for generated documentation. [$ASCIIDOC_CLI_TYPE]], +@@ -442,6 +448,10 @@ if test x"${CONFIGDIR}" = x""; then + fi + AC_SUBST(CONFIGDIR) + ++if test x"${BUG_URL}" = x""; then ++ BUG_URL="https://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker" ++fi ++ + for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ + sharedstatedir localstatedir libdir includedir oldincludedir infodir \ + mandir INITDIR docdir CONFIGDIR +-- +1.8.3.1 + + +From 6c5938df90f83dcd686f11453ae701099a98836b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 29 Mar 2019 19:50:21 -0500 +Subject: [PATCH 2/3] Feature: tools: make crm_report bug report URL + configurable at build time + +should have been done with 52aaffd +--- + configure.ac | 1 + + tools/crm_report.in | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/configure.ac b/configure.ac +index 6fd17fd..ed51f67 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -451,6 +451,7 @@ AC_SUBST(CONFIGDIR) + if test x"${BUG_URL}" = x""; then + BUG_URL="https://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker" + fi ++AC_SUBST(BUG_URL) + + for j in prefix exec_prefix bindir sbindir libexecdir datadir sysconfdir \ + sharedstatedir localstatedir libdir includedir oldincludedir infodir \ +diff --git a/tools/crm_report.in b/tools/crm_report.in +index 8e0d4b6..63c137c 100755 +--- a/tools/crm_report.in ++++ b/tools/crm_report.in +@@ -230,7 +230,7 @@ EOF + log "Collected results are available in $fname" + log " " + log "Please create a bug entry at" +- log " http://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker" ++ log " @BUG_URL@" + log "Include a description of your problem and attach this tarball" + log " " + log "Thank you for taking time to create this report." +-- +1.8.3.1 + + +From d99c1fa3fd52be01e03ce27a964fb1b09d29c46c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 2 Apr 2019 10:35:16 -0500 +Subject: [PATCH 3/3] Test: tools: define missing constant + +accidentally omitted in abc33eff +--- + tools/regression.sh | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/regression.sh b/tools/regression.sh +index 0c38629..3680f13 100755 +--- a/tools/regression.sh ++++ b/tools/regression.sh +@@ -8,6 +8,8 @@ GREP_OPTIONS= + verbose=0 + tests="dates tools acls validity" + ++CRM_EX_OK=0 ++ + function test_assert() { + target=$1; shift + cib=$1; shift +-- +1.8.3.1 + diff --git a/SOURCES/005-corosync.patch b/SOURCES/005-corosync.patch deleted file mode 100644 index 19ca4e3..0000000 --- a/SOURCES/005-corosync.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 041026835bce4d4ae2390daccade0e74c4fa3c1c Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Thu, 26 Jul 2018 08:06:45 +0100 -Subject: [PATCH] Shutdown corosync after a fatal error - -If pacemaker shuts down due to being fenced by a non-power (eg fabric) -fence agent then it should also take down corosync so that full cluster -service on that node is lost, rather than just resource management. - -https://bugzilla.redhat.com/show_bug.cgi?id=1448221 - -Signed-off-by: Christine Caulfield ---- - mcp/pacemaker.c | 27 ++++++++++++++++++++++++++- - 1 file changed, 26 insertions(+), 1 deletion(-) - -diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c -index f57fc25..40a2de2 100644 ---- a/mcp/pacemaker.c -+++ b/mcp/pacemaker.c -@@ -21,6 +21,9 @@ - #include - #include - #include -+#ifdef SUPPORT_COROSYNC -+#include -+#endif - - #include - #include -@@ -142,6 +145,28 @@ pcmk_process_exit(pcmk_child_t * child) - } - } - -+static void pcmk_exit_with_cluster(int exitcode) -+{ -+#ifdef SUPPORT_COROSYNC -+ corosync_cfg_handle_t cfg_handle; -+ cs_error_t err; -+ -+ if (exitcode == DAEMON_RESPAWN_STOP) { -+ crm_info("Asking Corosync to shut down"); -+ err = corosync_cfg_initialize(&cfg_handle, NULL); -+ if (err != CS_OK) { -+ crm_warn("Unable to open handle to corosync to close it down. err=%d", err); -+ } -+ err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); -+ if (err != CS_OK) { -+ crm_warn("Corosync shutdown failed. err=%d", err); -+ } -+ corosync_cfg_finalize(cfg_handle); -+ } -+#endif -+ crm_exit(exitcode); -+} -+ - static void - pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) - { -@@ -423,7 +448,7 @@ pcmk_shutdown_worker(gpointer user_data) - - if (fatal_error) { - crm_notice("Attempting to inhibit respawning after fatal error"); -- crm_exit(DAEMON_RESPAWN_STOP); -+ pcmk_exit_with_cluster(DAEMON_RESPAWN_STOP); - } - - return TRUE; --- -1.8.3.1 - diff --git a/SOURCES/006-fail-count.patch b/SOURCES/006-fail-count.patch deleted file mode 100644 index d902549..0000000 --- a/SOURCES/006-fail-count.patch +++ /dev/null @@ -1,31 +0,0 @@ -From cdbc6bfc8995ecc86da3fe81a463d8fc4ce1f8fd Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Mon, 30 Jul 2018 07:33:09 +0900 -Subject: [PATCH] Mid: controld: No update by already confirmed events. - ---- - crmd/te_events.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/crmd/te_events.c b/crmd/te_events.c -index 7b5ca2a..1f7a34c 100644 ---- a/crmd/te_events.c -+++ b/crmd/te_events.c -@@ -510,6 +510,14 @@ process_graph_event(xmlNode *event, const char *event_node) - abort_transition(INFINITY, tg_restart, "Unknown event", event); - - } else { -+ -+ /* Actions already confirmed skip matching. */ -+ /* ex. Ignoring xxx_last_0 or xxx_last_failure_0 generated by create_operation_update() in order to prevent duplicate fail-count from increasing. */ -+ if (action->confirmed == TRUE) { -+ crm_log_xml_debug(event, "No update by already confirmed events :"); -+ goto bail; -+ } -+ - ignore_failures = safe_str_eq( - crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore"); - match_graph_event(action, event, status, rc, target_rc, ignore_failures); --- -1.8.3.1 - diff --git a/SOURCES/006-fence-output-fix.patch b/SOURCES/006-fence-output-fix.patch new file mode 100644 index 0000000..d0dd09a --- /dev/null +++ b/SOURCES/006-fence-output-fix.patch @@ -0,0 +1,143 @@ +From 70577be56d841d2f58545877d36f3c3eaeaaae63 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 3 Apr 2019 16:11:20 +0200 +Subject: [PATCH] Fix: service-lib: avoid call-pattern leading to + use-after-free + +--- + include/crm/services.h | 8 +++++++- + lib/fencing/st_client.c | 18 +++++++++++++++--- + lib/services/services.c | 13 ++++++++++++- + lib/services/services_linux.c | 5 +++++ + lib/services/services_private.h | 1 + + 5 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/include/crm/services.h b/include/crm/services.h +index eddafc3..cbb2354 100644 +--- a/include/crm/services.h ++++ b/include/crm/services.h +@@ -307,11 +307,17 @@ typedef struct svc_action_s { + * + * \param[in] op services action data + * \param[in] action_callback callback for when the action completes ++ * \param[in] action_fork_callback callback for when action forked successfully + * + * \retval TRUE succesfully started execution + * \retval FALSE failed to start execution, no callback will be received + */ +- gboolean services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)); ++ gboolean services_action_async_fork_notify(svc_action_t * op, ++ void (*action_callback) (svc_action_t *), ++ void (*action_fork_callback) (svc_action_t *)); ++ ++ gboolean services_action_async(svc_action_t * op, ++ void (*action_callback) (svc_action_t *)); + + gboolean services_action_cancel(const char *name, const char *action, int interval); + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 1c56cf4..0c1eadc 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -827,6 +827,18 @@ stonith_action_async_done(svc_action_t *svc_action) + stonith__destroy_action(action); + } + ++static void ++stonith_action_async_forked(svc_action_t *svc_action) ++{ ++ stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; ++ ++ action->pid = svc_action->pid; ++ action->svc_action = svc_action; ++ ++ crm_trace("Child process %d performing action '%s' successfully forked", ++ action->pid, action->action); ++} ++ + static int + internal_stonith_action_execute(stonith_action_t * action) + { +@@ -873,12 +885,12 @@ internal_stonith_action_execute(stonith_action_t * action) + + if (action->async) { + /* async */ +- if(services_action_async(svc_action, &stonith_action_async_done) == FALSE) { ++ if(services_action_async_fork_notify(svc_action, ++ &stonith_action_async_done, ++ &stonith_action_async_forked) == FALSE) { + services_action_free(svc_action); + svc_action = NULL; + } else { +- action->pid = svc_action->pid; +- action->svc_action = svc_action; + rc = 0; + } + +diff --git a/lib/services/services.c b/lib/services/services.c +index 60402e7..ef2c5fc 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -843,12 +843,17 @@ services_untrack_op(svc_action_t *op) + } + + gboolean +-services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *)) ++services_action_async_fork_notify(svc_action_t * op, ++ void (*action_callback) (svc_action_t *), ++ void (*action_fork_callback) (svc_action_t *)) + { + op->synchronous = false; + if (action_callback) { + op->opaque->callback = action_callback; + } ++ if (action_fork_callback) { ++ op->opaque->fork_callback = action_fork_callback; ++ } + + if (op->interval > 0) { + init_recurring_actions(); +@@ -868,6 +873,12 @@ services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t * + return action_exec_helper(op); + } + ++gboolean ++services_action_async(svc_action_t * op, ++ void (*action_callback) (svc_action_t *)) ++{ ++ return services_action_async_fork_notify(op, action_callback, NULL); ++} + + static gboolean processing_blocked_ops = FALSE; + +diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c +index d79c16d..705901e 100644 +--- a/lib/services/services_linux.c ++++ b/lib/services/services_linux.c +@@ -877,6 +877,11 @@ services_os_action_execute(svc_action_t * op) + op->opaque->stdin_fd = -1; + } + ++ // after fds are setup properly and before we plug anything into mainloop ++ if (op->opaque->fork_callback) { ++ op->opaque->fork_callback(op); ++ } ++ + if (op->synchronous) { + action_synced_wait(op, pmask); + sigchld_cleanup(); +diff --git a/lib/services/services_private.h b/lib/services/services_private.h +index 9735da7..227e17f 100644 +--- a/lib/services/services_private.h ++++ b/lib/services/services_private.h +@@ -36,6 +36,7 @@ struct svc_action_private_s { + + guint repeat_timer; + void (*callback) (svc_action_t * op); ++ void (*fork_callback) (svc_action_t * op); + + int stderr_fd; + mainloop_io_t *stderr_gsource; +-- +1.8.3.1 + diff --git a/SOURCES/007-security.patch b/SOURCES/007-security.patch new file mode 100644 index 0000000..33fb725 --- /dev/null +++ b/SOURCES/007-security.patch @@ -0,0 +1,2659 @@ +From f91a961112ec9796181b42aa52f9c36dfa3c6a99 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 2 Apr 2019 10:13:21 +0200 +Subject: [PATCH 1/7] High: libservices: fix use-after-free wrt. alert handling + +This could possibly lead to unsolicited information disclosure by the +means of standard output of the immediately preceding agent/resource +execution leaking into the log stream under some circumstances. +It was hence assigned CVE-2019-3885. + +The provoked pathological state of pacemaker-execd daemon progresses +towards crashing it for hitting segmentation fault. +--- + lib/services/services.c | 40 +--------------------------------------- + lib/services/services_linux.c | 35 +++++++++++++++++++++++++++++++---- + 2 files changed, 32 insertions(+), 43 deletions(-) + +diff --git a/lib/services/services.c b/lib/services/services.c +index ef2c5fc..1d06c5d 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -450,35 +450,6 @@ services_action_user(svc_action_t *op, const char *user) + return crm_user_lookup(user, &(op->opaque->uid), &(op->opaque->gid)); + } + +-static void +-set_alert_env(gpointer key, gpointer value, gpointer user_data) +-{ +- int rc; +- +- if (value) { +- rc = setenv(key, value, 1); +- } else { +- rc = unsetenv(key); +- } +- +- if (rc < 0) { +- crm_perror(LOG_ERR, "setenv %s=%s", +- (char*)key, (value? (char*)value : "")); +- } else { +- crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); +- } +-} +- +-static void +-unset_alert_env(gpointer key, gpointer value, gpointer user_data) +-{ +- if (unsetenv(key) < 0) { +- crm_perror(LOG_ERR, "unset %s", (char*)key); +- } else { +- crm_trace("unset %s", (char*)key); +- } +-} +- + /*! + * \brief Execute an alert agent action + * +@@ -493,18 +464,9 @@ unset_alert_env(gpointer key, gpointer value, gpointer user_data) + gboolean + services_alert_async(svc_action_t *action, void (*cb)(svc_action_t *op)) + { +- gboolean responsible; +- + action->synchronous = false; + action->opaque->callback = cb; +- if (action->params) { +- g_hash_table_foreach(action->params, set_alert_env, NULL); +- } +- responsible = services_os_action_execute(action); +- if (action->params) { +- g_hash_table_foreach(action->params, unset_alert_env, NULL); +- } +- return responsible; ++ return services_os_action_execute(action); + } + + #if SUPPORT_DBUS +diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c +index 705901e..2047b64 100644 +--- a/lib/services/services_linux.c ++++ b/lib/services/services_linux.c +@@ -159,6 +159,25 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) + set_ocf_env(buffer, value, user_data); + } + ++static void ++set_alert_env(gpointer key, gpointer value, gpointer user_data) ++{ ++ int rc; ++ ++ if (value != NULL) { ++ rc = setenv(key, value, 1); ++ } else { ++ rc = unsetenv(key); ++ } ++ ++ if (rc < 0) { ++ crm_perror(LOG_ERR, "setenv %s=%s", ++ (char*)key, (value? (char*)value : "")); ++ } else { ++ crm_trace("setenv %s=%s", (char*)key, (value? (char*)value : "")); ++ } ++} ++ + /*! + * \internal + * \brief Add environment variables suitable for an action +@@ -168,12 +187,20 @@ set_ocf_env_with_prefix(gpointer key, gpointer value, gpointer user_data) + static void + add_action_env_vars(const svc_action_t *op) + { +- if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF) == FALSE) { +- return; ++ void (*env_setter)(gpointer, gpointer, gpointer) = NULL; ++ if (op->agent == NULL) { ++ env_setter = set_alert_env; /* we deal with alert handler */ ++ ++ } else if (safe_str_eq(op->standard, PCMK_RESOURCE_CLASS_OCF)) { ++ env_setter = set_ocf_env_with_prefix; + } + +- if (op->params) { +- g_hash_table_foreach(op->params, set_ocf_env_with_prefix, NULL); ++ if (env_setter != NULL && op->params != NULL) { ++ g_hash_table_foreach(op->params, env_setter, NULL); ++ } ++ ++ if (env_setter == NULL || env_setter == set_alert_env) { ++ return; + } + + set_ocf_env("OCF_RA_VERSION_MAJOR", "1", NULL); +-- +1.8.3.1 + + +From ab44422fa955c2dff1ac1822521e7ad335d4aab7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Mon, 15 Apr 2019 23:19:44 +0200 +Subject: [PATCH 2/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (0/4) + +[0/4: make crm_pid_active more precise as to when detections fail] + +It would be bad if the function claimed the process is not active +when the only obstacle in the detection process was that none of the +detection methods worked for a plain lack of permissions to apply +them. Also, do some other minor cleanup of the function and add its +documentation. As an additional measure, log spamming is kept at +minimum for repeated queries about the same PID. +--- + include/crm_internal.h | 21 +++++++++++ + lib/common/utils.c | 96 +++++++++++++++++++++++++++----------------------- + 2 files changed, 73 insertions(+), 44 deletions(-) + +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 5692929..0adeb7b 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -140,6 +140,27 @@ extern int node_score_yellow; + extern int node_score_infinity; + + /* Assorted convenience functions */ ++ ++/*! ++ * \internal ++ * \brief Detect if process per PID and optionally exe path (component) exists ++ * ++ * \param[in] pid PID of process assumed alive, disproving of which to try ++ * \param[in] daemon exe path (component) to possibly match with procfs entry ++ * ++ * \return -1 on invalid PID specification, -2 when the calling process has no ++ * (is refused an) ability to (dis)prove the predicate, ++ * 0 if the negation of the predicate is confirmed (check-through-kill ++ * indicates so, or the subsequent check-through-procfs-match on ++ * \p daemon when provided and procfs available at the standard path), ++ * 1 if it cannot be disproved (reliably [modulo race conditions] ++ * when \p daemon provided, procfs available at the standard path ++ * and the calling process has permissions to access the respective ++ * procfs location, less so otherwise, since mere check-through-kill ++ * is exercised without powers to exclude PID recycled in the interim). ++ * ++ * \note This function cannot be used to verify \e authenticity of the process. ++ */ + int crm_pid_active(long pid, const char *daemon); + void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); + +diff --git a/lib/common/utils.c b/lib/common/utils.c +index f3f60ed..2ac7901 100644 +--- a/lib/common/utils.c ++++ b/lib/common/utils.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -717,16 +708,21 @@ crm_abort(const char *file, const char *function, int line, + int + crm_pid_active(long pid, const char *daemon) + { ++ static int last_asked_pid = 0; /* log spam prevention */ ++#if SUPPORT_PROCFS + static int have_proc_pid = 0; ++#else ++ static int have_proc_pid = -1; ++#endif ++ int rc = 0; + +- if(have_proc_pid == 0) { ++ if (have_proc_pid == 0) { ++ /* evaluation of /proc/PID/exe applicability via self-introspection */ + char proc_path[PATH_MAX], exe_path[PATH_MAX]; +- +- /* check to make sure pid hasn't been reused by another process */ +- snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)getpid()); +- ++ snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", ++ (long unsigned int) getpid()); + have_proc_pid = 1; +- if(readlink(proc_path, exe_path, PATH_MAX - 1) < 0) { ++ if (readlink(proc_path, exe_path, sizeof(exe_path) - 1) < 0) { + have_proc_pid = -1; + } + } +@@ -734,40 +730,52 @@ crm_pid_active(long pid, const char *daemon) + if (pid <= 0) { + return -1; + +- } else if (kill(pid, 0) < 0 && errno == ESRCH) { +- return 0; ++ } else if ((rc = kill(pid, 0)) < 0 && errno == ESRCH) { ++ return 0; /* no such PID detected */ + +- } else if(daemon == NULL || have_proc_pid == -1) { +- return 1; ++ } else if (rc < 0 && have_proc_pid == -1) { ++ if (last_asked_pid != pid) { ++ crm_info("Cannot examine PID %ld: %s", pid, strerror(errno)); ++ last_asked_pid = pid; ++ } ++ return -2; /* errno != ESRCH */ ++ ++ } else if (rc == 0 && (daemon == NULL || have_proc_pid == -1)) { ++ return 1; /* kill as the only indicator, cannot double check */ + + } else { +- int rc = 0; ++ /* make sure PID hasn't been reused by another process ++ XXX: might still be just a zombie, which could confuse decisions */ ++ bool checked_through_kill = (rc == 0); + char proc_path[PATH_MAX], exe_path[PATH_MAX], myexe_path[PATH_MAX]; +- +- /* check to make sure pid hasn't been reused by another process */ +- snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", pid); +- +- rc = readlink(proc_path, exe_path, PATH_MAX - 1); +- if (rc < 0 && errno == EACCES) { +- crm_perror(LOG_INFO, "Could not read from %s", proc_path); +- return 1; ++ snprintf(proc_path, sizeof(proc_path), "/proc/%ld/exe", pid); ++ ++ rc = readlink(proc_path, exe_path, sizeof(exe_path) - 1); ++ if ((rc < 0) && (errno == EACCES)) { ++ if (last_asked_pid != pid) { ++ crm_info("Could not read from %s: %s", proc_path, ++ strerror(errno)); ++ last_asked_pid = pid; ++ } ++ return checked_through_kill ? 1 : -2; + } else if (rc < 0) { +- crm_perror(LOG_ERR, "Could not read from %s", proc_path); +- return 0; ++ if (last_asked_pid != pid) { ++ crm_err("Could not read from %s: %s (%d)", proc_path, ++ strerror(errno), errno); ++ last_asked_pid = pid; ++ } ++ return 0; /* most likely errno == ENOENT */ + } +- ++ exe_path[rc] = '\0'; + +- exe_path[rc] = 0; +- +- if(daemon[0] != '/') { +- rc = snprintf(myexe_path, sizeof(proc_path), CRM_DAEMON_DIR"/%s", daemon); +- myexe_path[rc] = 0; ++ if (daemon[0] != '/') { ++ rc = snprintf(myexe_path, sizeof(myexe_path), CRM_DAEMON_DIR"/%s", ++ daemon); + } else { +- rc = snprintf(myexe_path, sizeof(proc_path), "%s", daemon); +- myexe_path[rc] = 0; ++ rc = snprintf(myexe_path, sizeof(myexe_path), "%s", daemon); + } +- +- if (strcmp(exe_path, myexe_path) == 0) { ++ ++ if (rc > 0 && rc < sizeof(myexe_path) && !strcmp(exe_path, myexe_path)) { + return 1; + } + } +-- +1.8.3.1 + + +From 6888aaf3ad365ef772f8189c9958f58b85ec62d4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Mon, 15 Apr 2019 23:20:42 +0200 +Subject: [PATCH 3/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (1/4) + +[1/4: new helpers to allow IPC client side to authenticate the server] + +The title problem here could possibly lead to local privilege escalation +up to the root's level (and implicitly unguarded by some additional +protection layers like SELinux unless the defaults constrained further). + +Main problem is that the authenticity assumptions were built on two, +seemingly mutually supporting legs leading to two CVEs assigned: + +* procfs (mere process existence and right path to binary check) + used to verify (this part was assigned CVE-2018-16878), and + +* one-way only client-server authentication, putting the client + here at the mercy of the server not necessarily cross-verified + per the above point if at all (this part was assigned + CVE-2018-16877) + +whereas these two were in fact orthogonal, tearing security +assumptions about the "passive influencers" in the pacemaker's daemon +resilience-friendly constellation (orchestrated by the main of them, +pacemakerd) apart. Moreover, procfs-based approach is discouraged +for other reasons. + +The layout of the basic fix is as follows: +* 1/4: new helpers to allow IPC client side to authenticate the server + (this commit, along with unifying subsequent solution for + both CVEs) +* 2/4: pacemakerd to trust pre-existing processes via new checks instead + (along with unifying solution for both CVEs) +* 3/4: other daemons to authenticate IPC servers of fellow processes + (along with addressing CVE-2018-16877 alone, for parts of + pacemaker not covered earlier) +* 4/4: CPG users to be careful about now-more-probable rival processes + (this is merely to mitigate corner case fallout from the new + approaches taken to face CVE-2018-16878 in particular; + courtesy of Yan Gao of SUSE for reporting this) + +With "basic", it is meant that it constitutes a self-contained best +effort solution with some compromises that can only be overcome with the +assistance of IPC library, libqb, as is also elaborated in messages of +remaining "fix" commits. Beside that, also conventional encapsulation +of server-by-client authentication would be useful, but lack thereof +is not an obstacle (more so should there by any security related +neglectations on the IPC client side and its connection initiating +arrangement within libqb that has a potential to strike as early as +when the authenticity of the server side is yet to be examined). + +One extra kludge that's introduced for FreeBSD lacking Unix socket to +remote peer PID mapping is masquerading such an unspecified PID with +value of 1, since that shall always be around as "init" task and, +deferring to proof by contradiction, cannot be pacemakerd-spawned +child either even if PID 1 was pacemakerd (and running such a child +alone is rather nonsensical). The code making decisions based on that +value must acknowledge this craze and refrain from killing/signalling +the underlying process on this platform (but shall in general follow +the same elsewhere, keep in mind systemd socket-based activation for +instance, which would end up in such a situation easily!). +--- + configure.ac | 43 +++++++++++ + include/crm/common/Makefile.am | 7 +- + include/crm/common/ipc.h | 55 +++++++++---- + include/crm/common/ipc_internal.h | 69 +++++++++++++++++ + lib/common/ipc.c | 158 ++++++++++++++++++++++++++++++++++---- + 5 files changed, 303 insertions(+), 29 deletions(-) + create mode 100644 include/crm/common/ipc_internal.h + +diff --git a/configure.ac b/configure.ac +index ed51f67..ce02777 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -465,6 +465,48 @@ do + fi + done + ++us_auth= ++AC_CHECK_HEADER([sys/socket.h], [ ++ AC_CHECK_DECL([SO_PEERCRED], [ ++ # Linux ++ AC_CHECK_TYPE([struct ucred], [ ++ us_auth=peercred_ucred; ++ AC_DEFINE([US_AUTH_PEERCRED_UCRED], [1], ++ [Define if Unix socket auth method is ++ getsockopt(s, SO_PEERCRED, &ucred, ...)]) ++ ], [ ++ # OpenBSD ++ AC_CHECK_TYPE([struct sockpeercred], [ ++ us_auth=localpeercred_sockepeercred; ++ AC_DEFINE([US_AUTH_PEERCRED_SOCKPEERCRED], [1], ++ [Define if Unix socket auth method is ++ getsockopt(s, SO_PEERCRED, &sockpeercred, ...)]) ++ ], [], [[#include ]]) ++ ], [[#define _GNU_SOURCE ++ #include ]]) ++ ], [], [[#include ]]) ++]) ++ ++if test -z "${us_auth}"; then ++ # FreeBSD ++ AC_CHECK_DECL([getpeereid], [ ++ us_auth=getpeereid; ++ AC_DEFINE([US_AUTH_GETPEEREID], [1], ++ [Define if Unix socket auth method is ++ getpeereid(s, &uid, &gid)]) ++ ], [ ++ # Solaris/OpenIndiana ++ AC_CHECK_DECL([getpeerucred], [ ++ us_auth=getpeerucred; ++ AC_DEFINE([US_AUTH_GETPEERUCRED], [1], ++ [Define if Unix socket auth method is ++ getpeercred(s, &ucred)]) ++ ], [ ++ AC_MSG_ERROR([No way to authenticate a Unix socket peer]) ++ ], [[#include ]]) ++ ]) ++fi ++ + dnl This OS-based decision-making is poor autotools practice; + dnl feature-based mechanisms are strongly preferred. + dnl +@@ -2179,3 +2221,4 @@ AC_MSG_RESULT([ LDFLAGS_HARDENED_EXE = ${LDFLAGS_HARDENED_EXE}]) + AC_MSG_RESULT([ LDFLAGS_HARDENED_LIB = ${LDFLAGS_HARDENED_LIB}]) + AC_MSG_RESULT([ Libraries = ${LIBS}]) + AC_MSG_RESULT([ Stack Libraries = ${CLUSTERLIBS}]) ++AC_MSG_RESULT([ Unix socket auth method = ${us_auth}]) +diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am +index b90ac79..aacb6ff 100644 +--- a/include/crm/common/Makefile.am ++++ b/include/crm/common/Makefile.am +@@ -1,5 +1,7 @@ + # +-# Copyright 2004-2019 Andrew Beekhof ++# Copyright 2004-2019 the Pacemaker project contributors ++# ++# The version control history for this file may have further details. + # + # This source code is licensed under the GNU General Public License version 2 + # or later (GPLv2+) WITHOUT ANY WARRANTY. +@@ -11,7 +13,8 @@ headerdir=$(pkgincludedir)/crm/common + + header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h \ + nvpair.h +-noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h ++noinst_HEADERS = ipcs.h internal.h remote_internal.h xml_internal.h \ ++ ipc_internal.h + if BUILD_CIBSECRETS + noinst_HEADERS += cib_secrets.h + endif +diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h +index 8722252..df56bbe 100644 +--- a/include/crm/common/ipc.h ++++ b/include/crm/common/ipc.h +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2013 Andrew Beekhof ++ * Copyright 2013-2019 the Pacemaker project contributors + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + #ifndef CRM_COMMON_IPC__H + # define CRM_COMMON_IPC__H +@@ -77,6 +68,44 @@ uint32_t crm_ipc_buffer_flags(crm_ipc_t * client); + const char *crm_ipc_name(crm_ipc_t * client); + unsigned int crm_ipc_default_buffer_size(void); + ++/*! ++ * \brief Check the authenticity of the IPC socket peer process ++ * ++ * If everything goes well, peer's authenticity is verified by the means ++ * of comparing against provided referential UID and GID (either satisfies), ++ * and the result of this check can be deduced from the return value. ++ * As an exception, detected UID of 0 ("root") satisfies arbitrary ++ * provided referential daemon's credentials. ++ * ++ * \param[in] sock IPC related, connected Unix socket to check peer of ++ * \param[in] refuid referential UID to check against ++ * \param[in] refgid referential GID to check against ++ * \param[out] gotpid to optionally store obtained PID of the peer ++ * (not available on FreeBSD, special value of 1 ++ * used instead, and the caller is required to ++ * special case this value respectively) ++ * \param[out] gotuid to optionally store obtained UID of the peer ++ * \param[out] gotgid to optionally store obtained GID of the peer ++ * ++ * \return 0 if IPC related socket's peer is not authentic given the ++ * referential credentials (see above), 1 if it is, ++ * negative value on error (generally expressing -errno unless ++ * it was zero even on nonhappy path, -pcmk_err_generic is ++ * returned then; no message is directly emitted) ++ * ++ * \note While this function is tolerant on what constitutes authorized ++ * IPC daemon process (its effective user matches UID=0 or \p refuid, ++ * or at least its group matches \p refroup), either or both (in case ++ * of UID=0) mismatches on the expected credentials of such peer ++ * process \e shall be investigated at the caller when value of 1 ++ * gets returned there, since higher-than-expected privileges in ++ * respect to the expected/intended credentials possibly violate ++ * the least privilege principle and may pose an additional risk ++ * (i.e. such accidental inconsistency shall be eventually fixed). ++ */ ++int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, ++ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); ++ + /* Utils */ + xmlNode *create_hello_message(const char *uuid, const char *client_name, + const char *major_version, const char *minor_version); +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +new file mode 100644 +index 0000000..41a6653 +--- /dev/null ++++ b/include/crm/common/ipc_internal.h +@@ -0,0 +1,69 @@ ++/* ++ * Copyright 2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#ifndef PCMK__IPC_INTERNAL_H ++#define PCMK__IPC_INTERNAL_H ++ ++#include ++ ++#include /* US_AUTH_GETPEEREID */ ++ ++ ++/* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that ++ require a delicate handling anyway (socket-based activation with systemd); ++ we can be reasonably sure that this PID is never possessed by the actual ++ child daemon, as it gets taken either by the proper init, or by pacemakerd ++ itself (i.e. this precludes anything else); note that value of zero ++ is meant to carry "unset" meaning, and better not to bet on/conditionalize ++ over signedness of pid_t */ ++#define PCMK__SPECIAL_PID 1 ++ ++#if defined(US_AUTH_GETPEEREID) ++/* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to ++ "IPC liveness check only") as its nominal representation, which could ++ cause confusion -- this is unambiguous as long as there's no ++ socket-based activation like with systemd (very improbable) */ ++#define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p)) ++#else ++#define PCMK__SPECIAL_PID_AS_0(p) (p) ++#endif ++ ++/*! ++ * \internal ++ * \brief Check the authenticity and liveness of the process via IPC end-point ++ * ++ * When IPC daemon under given IPC end-point (name) detected, its authenticity ++ * is verified by the means of comparing against provided referential UID and ++ * GID, and the result of this check can be deduced from the return value. ++ * As an exception, referential UID of 0 (~ root) satisfies arbitrary ++ * detected daemon's credentials. ++ * ++ * \param[in] name IPC name to base the search on ++ * \param[in] refuid referential UID to check against ++ * \param[in] refgid referential GID to check against ++ * \param[out] gotpid to optionally store obtained PID of the found process ++ * upon returning 1 or -2 ++ * (not available on FreeBSD, special value of 1, ++ * see PCMK__SPECIAL_PID, used instead, and the caller ++ * is required to special case this value respectively) ++ * ++ * \return 0 if no trace of IPC peer's liveness detected, 1 if it was, ++ * -1 on error, and -2 when the IPC blocked with unauthorized ++ * process (log message emitted in both latter cases) ++ * ++ * \note This function emits a log message also in case there isn't a perfect ++ * match in respect to \p reguid and/or \p refgid, for a possible ++ * least privilege principle violation. ++ * ++ * \see crm_ipc_is_authentic_process ++ */ ++int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, ++ gid_t refgid, pid_t *gotpid); ++ ++#endif +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index 3258bcb..5b47dd6 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -1,23 +1,25 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include + ++#if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++# ifdef US_AUTH_PEERCRED_UCRED ++# ifndef _GNU_SOURCE ++# define _GNU_SOURCE ++# endif ++# endif ++# include ++#elif defined(US_AUTH_GETPEERUCRED) ++# include ++#endif ++ + #include + + #include +@@ -30,11 +32,13 @@ + #include + #include + +-#include ++#include /* indirectly: pcmk_err_generic */ + #include + #include + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + #define PCMK_IPC_VERSION 1 + + /* Evict clients whose event queue grows this large (by default) */ +@@ -1375,6 +1379,132 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in + return rc; + } + ++int ++crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, ++ pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { ++ int ret = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++#if defined(US_AUTH_PEERCRED_UCRED) ++ struct ucred ucred; ++ socklen_t ucred_len = sizeof(ucred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &ucred, &ucred_len) ++ && ucred_len == sizeof(ucred)) { ++ found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; ++ ++#elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) ++ struct sockpeercred sockpeercred; ++ socklen_t sockpeercred_len = sizeof(sockpeercred); ++ ++ if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, ++ &sockpeercred, &sockpeercred_len) ++ && sockpeercred_len == sizeof(sockpeercred_len)) { ++ found_pid = sockpeercred.pid; ++ found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; ++ ++#elif defined(US_AUTH_GETPEEREID) ++ if (!getpeereid(sock, &found_uid, &found_gid)) { ++ found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ ++ ++#elif defined(US_AUTH_GETPEERUCRED) ++ ucred_t *ucred; ++ if (!getpeerucred(sock, &ucred)) { ++ errno = 0; ++ found_pid = ucred_getpid(ucred); ++ found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ++ ret = -errno; ++ ucred_free(ucred); ++ if (ret) { ++ return (ret < 0) ? ret : -pcmk_err_generic; ++ } ++ ++#else ++# error "No way to authenticate a Unix socket peer" ++ errno = 0; ++ if (0) { ++#endif ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ if (gotuid != NULL) { ++ *gotuid = found_uid; ++ } ++ if (gotgid != NULL) { ++ *gotgid = found_gid; ++ } ++ ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); ++ } else { ++ ret = (errno > 0) ? -errno : -pcmk_err_generic; ++ } ++ ++ return ret; ++} ++ ++int ++pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, ++ gid_t refgid, pid_t *gotpid) { ++ static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ ++ int fd, ret = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ qb_ipcc_connection_t *c; ++ ++ if ((c = qb_ipcc_connect(name, 0)) == NULL) { ++ crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); ++ ++ } else if ((ret = qb_ipcc_fd_get(c, &fd))) { ++ crm_err("Could not get fd from %s IPC: %s (%d)", name, ++ strerror(-ret), -ret); ++ ret = -1; ++ ++ } else if ((ret = crm_ipc_is_authentic_process(fd, refuid, refgid, ++ &found_pid, &found_uid, ++ &found_gid)) < 0) { ++ if (ret == -pcmk_err_generic) { ++ crm_err("Could not get peer credentials from %s IPC", name); ++ } else { ++ crm_err("Could not get peer credentials from %s IPC: %s (%d)", ++ name, strerror(-ret), -ret); ++ } ++ ret = -1; ++ ++ } else { ++ if (gotpid != NULL) { ++ *gotpid = found_pid; ++ } ++ ++ if (!ret) { ++ crm_err("Daemon (IPC %s) effectively blocked with unauthorized" ++ " process %lld (uid: %lld, gid: %lld)", ++ name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ ret = -2; ++ } else if ((found_uid != refuid || found_gid != refgid) ++ && strncmp(last_asked_name, name, sizeof(last_asked_name))) { ++ if (!found_uid && refuid) { ++ crm_warn("Daemon (IPC %s) runs as root, whereas the expected" ++ " credentials are %lld:%lld, hazard of violating" ++ " the least privilege principle", ++ name, (long long) refuid, (long long) refgid); ++ } else { ++ crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" ++ " expected credentials are %lld:%lld, which may" ++ " mean a different set of privileges than expected", ++ name, (long long) found_uid, (long long) found_gid, ++ (long long) refuid, (long long) refgid); ++ } ++ memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); ++ } ++ } ++ ++ if (ret) { /* here, !ret only when we could not initially connect */ ++ qb_ipcc_disconnect(c); ++ } ++ ++ return ret; ++} ++ ++ + /* Utils */ + + xmlNode * +-- +1.8.3.1 + + +From 904c53ea311fd6fae945a55202b0a7ccf3783465 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:04:47 +0200 +Subject: [PATCH 4/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (2/4) + +[2/4: pacemakerd to trust pre-existing processes via new checks instead] + +In pacemakerd in the context of entrusting pre-existing processes, +we now resort to procfs-based solution only in boundary, fouled cases, +and primarily examine the credentials of the processes already +occupying known IPC end-points before adopting them. + +The commit applies the new helpers from 1/1 so as to close the both +related sensitive problems, CVE-2018-16877 and CVE-2018-16878, in +a unified manner, this time limited to the main daemon of pacemaker +(pacemakerd). + +To be noted that it is clearly not 100% for this purpose for still +allowing for TOCTTOU, but that's what commit (3/3) is meant to solve +for the most part, plus there may be optimizations solving this concern +as a side effect, but that requires an active assistance on the libqb +side (https://github.com/ClusterLabs/libqb/issues/325) since any +improvement on pacemaker side in isolation would be very +cumbersome if generally possible at all, but either way +means a new, soft compatibility encumberment. + +As a follow-up to what was put in preceding 1/3 commit, PID of 1 tracked +as child's identification on FreeBSD (or when socket-based activation is +used with systemd) is treated specially, incl. special precaution with +child's PID discovered as 1 elsewhere. + +v2: courtesy of Yan Gao of SUSE for early discovery and report for + what's primarily solved with 4/4 commit, in extension, child + daemons in the initialization phase coinciding with IPC-feasibility + based process scan in pacemakerd in a way that those are missed + (although they are to come up fully just moments later only + to interfere with naturally spawned ones) are now considered so + that if any native children later fail for said clash, the + pre-existing counterpart may get adopted instead of ending up + with repeated spawn-bury loop ad nauseam without real progress + (note that PCMK_fail_fast=true could possibly help, but that's + rather a big hammer not suitable for all the use cases, not + the ones we try to deal with gracefully here) +--- + mcp/pacemaker.c | 431 +++++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 362 insertions(+), 69 deletions(-) + +diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c +index 2986be6..86df216 100644 +--- a/mcp/pacemaker.c ++++ b/mcp/pacemaker.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2010-2018 Andrew Beekhof ++ * Copyright 2010-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +@@ -10,17 +12,23 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include + ++#include /* indirectly: CRM_EX_* */ ++#include /* cib_channel_ro */ + #include + #include + #include + #include + #include ++ ++#include /* PCMK__SPECIAL_PID*, ... */ ++ + #ifdef SUPPORT_COROSYNC + #include + #endif +@@ -31,6 +39,7 @@ + gboolean pcmk_quorate = FALSE; + gboolean fatal_error = FALSE; + GMainLoop *mainloop = NULL; ++static bool global_keep_tracking = false; + + #define PCMK_PROCESS_CHECK_INTERVAL 5 + +@@ -48,6 +57,7 @@ typedef struct pcmk_child_s { + const char *name; + const char *uid; + const char *command; ++ const char *endpoint; /* IPC server name */ + + gboolean active_before_startup; + } pcmk_child_t; +@@ -59,17 +69,35 @@ typedef struct pcmk_child_s { + static pcmk_child_t pcmk_children[] = { + { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, + { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL }, +- { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" }, +- { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, +- { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, +- { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, +- { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, +- { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, +- { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, +- { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, ++ { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd", ++ CRM_SYSTEM_LRMD ++ }, ++ { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib", ++ cib_channel_ro ++ }, ++ { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd", ++ CRM_SYSTEM_CRMD ++ }, ++ { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd", ++ T_ATTRD ++ }, ++ { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL, ++ NULL ++ }, ++ { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine", ++ CRM_SYSTEM_PENGINE ++ }, ++ { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd", ++ NULL ++ }, ++ { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd", ++ "stonith-ng" ++ }, + }; + /* *INDENT-ON* */ + ++static gboolean check_active_before_startup_processes(gpointer user_data); ++static int pcmk_child_active(pcmk_child_t *child); + static gboolean start_child(pcmk_child_t * child); + void update_process_clients(crm_client_t *client); + void update_process_peers(void); +@@ -131,14 +159,31 @@ pcmk_process_exit(pcmk_child_t * child) + } + + if (shutdown_trigger) { ++ /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ + mainloop_set_trigger(shutdown_trigger); ++ /* intended to speed up propagating expected lay-off of the daemons? */ + update_node_processes(local_nodeid, NULL, get_process_list()); + +- } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { ++ } else if (!child->respawn) { ++ /* nothing to do */ ++ ++ } else if (crm_is_true(getenv("PCMK_fail_fast"))) { + crm_err("Rebooting system because of %s", child->name); + pcmk_panic(__FUNCTION__); + +- } else if (child->respawn) { ++ } else if (pcmk_child_active(child) == 1) { ++ crm_warn("One-off suppressing strict respawning of a child process %s," ++ " appears alright per %s IPC end-point", ++ child->name, child->endpoint); ++ /* need to monitor how it evolves, and start new process if badly */ ++ child->active_before_startup = TRUE; ++ if (!global_keep_tracking) { ++ global_keep_tracking = true; ++ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, ++ check_active_before_startup_processes, NULL); ++ } ++ ++ } else { + crm_notice("Respawning failed child process: %s", child->name); + start_child(child); + } +@@ -215,8 +260,13 @@ stop_child(pcmk_child_t * child, int signal) + signal = SIGTERM; + } + +- if (child->command == NULL) { +- crm_debug("Nothing to do for child \"%s\"", child->name); ++ /* why to skip PID of 1? ++ - FreeBSD ~ how untrackable process behind IPC is masqueraded as ++ - elsewhere: how "init" task is designated; in particular, in systemd ++ arrangement of socket-based activation, this is pretty real */ ++ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { ++ crm_debug("Nothing to do for child \"%s\" (process %lld)", ++ child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); + return TRUE; + } + +@@ -241,6 +291,11 @@ stop_child(pcmk_child_t * child, int signal) + static char *opts_default[] = { NULL, NULL }; + static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; + ++/* TODO once libqb is taught to juggle with IPC end-points carried over as ++ bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) ++ it shall hand over these descriptors here if/once they are successfully ++ pre-opened in (presumably) pcmk_child_active, to avoid any remaining ++ room for races */ + static gboolean + start_child(pcmk_child_t * child) + { +@@ -371,7 +426,10 @@ escalate_shutdown(gpointer data) + + pcmk_child_t *child = data; + +- if (child->pid) { ++ if (child->pid == PCMK__SPECIAL_PID) { ++ pcmk_process_exit(child); ++ ++ } else if (child->pid) { + /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ + crm_err("Child %s not terminating in a timely manner, forcing", child->name); + stop_child(child, SIGSEGV); +@@ -379,6 +437,8 @@ escalate_shutdown(gpointer data) + return FALSE; + } + ++#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ ++ + static gboolean + pcmk_shutdown_worker(gpointer user_data) + { +@@ -407,11 +467,24 @@ pcmk_shutdown_worker(gpointer user_data) + time_t now = time(NULL); + + if (child->respawn) { ++ if (child->pid == PCMK__SPECIAL_PID) { ++ crm_warn("The process behind %s IPC cannot be" ++ " terminated, so either wait the graceful" ++ " period of %ld s for its native termination" ++ " if it vitally depends on some other daemons" ++ " going down in a controlled way already," ++ " or locate and kill the correct %s process" ++ " on your own; set PCMK_fail_fast=1 to avoid" ++ " this altogether next time around", ++ child->name, (long) SHUTDOWN_ESCALATION_PERIOD, ++ child->command); ++ } + next_log = now + 30; + child->respawn = FALSE; + stop_child(child, SIGTERM); + if (phase < pcmk_children[pcmk_child_crmd].start_seq) { +- g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); ++ g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, ++ escalate_shutdown, child); + } + + } else if (now >= next_log) { +@@ -696,7 +769,106 @@ mcp_chown(const char *path, uid_t uid, gid_t gid) + } + } + +-#if SUPPORT_PROCFS ++/*! ++ * \internal ++ * \brief Check the liveness of the child based on IPC name and PID if tracked ++ * ++ * \param[inout] child Child tracked data ++ * ++ * \return 0 if no trace of child's liveness detected (while it is detectable ++ * to begin with, at least according to one of the two properties), ++ * 1 if everything is fine, 2 if it's up per PID, but not per IPC ++ * end-point (still starting?), -1 on error, and -2 when the child ++ * (its IPC) blocked with an unauthorized process (log message ++ * emitted in both latter cases) ++ * ++ * \note This function doesn't modify any of \p child members but \c pid, ++ * and is not actively toying with processes as such but invoking ++ * \c stop_child in one particular case (there's for some reason ++ * a different authentic holder of the IPC end-point). ++ */ ++static int ++pcmk_child_active(pcmk_child_t *child) { ++ static uid_t cl_uid = 0; ++ static gid_t cl_gid = 0; ++ const uid_t root_uid = 0; ++ const gid_t root_gid = 0; ++ const uid_t *ref_uid; ++ const gid_t *ref_gid; ++ int ret = 0; ++ pid_t ipc_pid = 0; ++ const char *use_name; ++ ++ if (child->endpoint == NULL ++ && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { ++ crm_err("Cannot track child %s for missing both API end-point and PID", ++ child->name); ++ ret = -1; /* misuse of the function when child is not trackable */ ++ ++ } else if (child->endpoint != NULL) { ++ ++ ref_uid = (child->uid != NULL) ? &cl_uid : &root_uid; ++ ref_gid = (child->uid != NULL) ? &cl_gid : &root_gid; ++ ++ if (child->uid != NULL && !cl_uid && !cl_gid ++ && crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid) < 0) { ++ crm_err("Could not find user and group IDs for user %s", ++ CRM_DAEMON_USER); ++ ret = -1; ++ } else if ((ret = pcmk__ipc_is_authentic_process_active(child->endpoint, ++ *ref_uid, *ref_gid, ++ &ipc_pid)) < 0) { ++ /* game over */ ++ } else if (child->pid <= 0) { ++ /* hit new child to be initialized, or reset to zero ++ and investigate further for ret == 0 */ ++ child->pid = ipc_pid; ++ } else if (ipc_pid && child->pid != ipc_pid) { ++ /* ultimately strange for ret == 1; either way, investigate */ ++ ret = 0; ++ } ++ } ++ ++ if (!ret) { ++ use_name = (child->flag == crm_proc_stonith_ng) ++ ? "stonithd" /* compensate "simplification" 61fc951e9 */ ++ : child->name; ++ /* when no IPC based liveness detected (incl. if ever a child without ++ IPC is tracked), or detected for a different _authentic_ process; ++ safe on FreeBSD since the only change possible from a proper child's ++ PID into "special" PID of 1 behind more loosely related process */ ++ ret = crm_pid_active(child->pid, use_name); ++ if (ipc_pid && (ret != 1 ++ || ipc_pid == PCMK__SPECIAL_PID ++ || crm_pid_active(ipc_pid, use_name) == 1)) { ++ if (ret == 1) { ++ /* assume there's no forking-while-retaining-IPC-socket ++ involved in the "children's" lifecycle, hence that the ++ tracking got out of sync purely because of some external ++ (esotheric?) forces (user initiated process "refresh" by ++ force? or intentionally racing on start-up, even?), and ++ that switching over to this other detected, authentic ++ instance with an IPC already in possession is a better ++ trade-off than "neutralizing" it first so as to give ++ either the original or possibly a new to-be-spawned ++ daemon process a leeway for operation, which would ++ otherwise have to be carried out */ ++ /* not possessing IPC, afterall (what about corosync CPG?) */ ++ stop_child(child, SIGKILL); ++ } else { ++ ret = 1; ++ } ++ child->pid = ipc_pid; ++ } else if (ret == 1) { ++ ret = 2; /* up per PID, but not per IPC (still starting?) */ ++ } else if (!child->pid && ret == -1) { ++ ret = 0; /* correct -1 on FreeBSD from above back to 0 */ ++ } ++ } ++ ++ return ret; ++} ++ + static gboolean + check_active_before_startup_processes(gpointer user_data) + { +@@ -713,15 +885,41 @@ check_active_before_startup_processes(gpointer user_data) + continue; + } else { + const char *name = pcmk_children[lpc].name; +- if (pcmk_children[lpc].flag == crm_proc_stonith_ng) { +- name = "stonithd"; +- } +- +- if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) { +- crm_notice("Process %s terminated (pid=%d)", +- name, pcmk_children[lpc].pid); +- pcmk_process_exit(&(pcmk_children[lpc])); +- continue; ++ int ret; ++ ++ switch ((ret = pcmk_child_active(&pcmk_children[lpc]))) { ++ case 1: ++ break; ++ case 0: ++ case 2: /* this very case: it was OK once already */ ++ if (pcmk_children[lpc].respawn == TRUE) { ++ /* presumably after crash, hence critical */ ++ crm_crit("Process %s terminated (pid=%lld)%s", \ ++ name, (long long) ++ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ++ ret ? ", at least per IPC end-point that went AWOL" ++ : ""); ++ } else { ++ /* orderly shutdown */ ++ crm_notice("Process %s terminated (pid=%lld)%s", \ ++ name, (long long) ++ PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ++ ret ? ", at least per IPC end-point that went AWOL" ++ : ""); ++ } ++ pcmk_process_exit(&(pcmk_children[lpc])); ++ continue; ++ default: ++ crm_crit("Unexpected value from pcmk_child_active:" ++ " %d (pid=%lld)", ret, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[lpc].pid)); ++ /* fall through */ ++ case -1: ++ case -2: ++ /* message(s) already emitted */ ++ crm_exit(DAEMON_RESPAWN_STOP); ++ break; /* static analysis/noreturn */ + } + } + /* at least one of the processes found at startup +@@ -730,61 +928,147 @@ check_active_before_startup_processes(gpointer user_data) + } + } + ++ global_keep_tracking = keep_tracking; + return keep_tracking; + } +-#endif // SUPPORT_PROCFS + +-static void ++/*! ++ * \internal ++ * \brief Initial one-off check of the pre-existing "child" processes ++ * ++ * With "child" process, we mean the subdaemon that defines an API end-point ++ * (all of them do as of the comment) -- the possible complement is skipped ++ * as it is deemed it has no such shared resources to cause conflicts about, ++ * hence it can presumably be started anew without hesitation. ++ * If that won't hold true in the future, the concept of a shared resource ++ * will have to be generalized beyond the API end-point. ++ * ++ * For boundary cases that the "child" is still starting (IPC end-point is yet ++ * to be witnessed), or more rarely (practically FreeBSD only), when there's ++ * a pre-existing "untrackable" authentic process, we give the situation some ++ * time to possibly unfold in the right direction, meaning that said socket ++ * will appear or the unattainable process will disappear per the observable ++ * IPC, respectively. ++ * ++ * \return 0 if no such "child" process found, positive number X when X ++ * "children" detected, -1 on an internal error, -2 when any ++ * would-be-used IPC is blocked with an unauthorized process ++ * ++ * \note Since this gets run at the very start, \c respawn_count fields ++ * for particular children get temporarily overloaded with "rounds ++ * of waiting" tracking, restored once we are about to finish with ++ * success (i.e. returning value >=0) and will remain unrestored ++ * otherwise. One way to suppress liveness detection logic for ++ * particular child is to set the said value to a negative number. ++ */ ++#define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ ++static int + find_and_track_existing_processes(void) + { +-#if SUPPORT_PROCFS +- DIR *dp; +- struct dirent *entry; +- bool start_tracker = FALSE; +- char entry_name[64]; +- +- dp = opendir("/proc"); +- if (!dp) { +- /* no proc directory to search through */ +- crm_notice("Can not read /proc directory to track existing components"); +- return; +- } +- +- while ((entry = readdir(dp)) != NULL) { +- int pid; +- int max = SIZEOF(pcmk_children); +- int i; +- +- if (crm_procfs_process_info(entry, entry_name, &pid) < 0) { +- continue; +- } +- for (i = 0; i < max; i++) { +- const char *name = pcmk_children[i].name; +- +- if (pcmk_children[i].start_seq == 0) { ++ unsigned tracking = 0U; ++ bool wait_in_progress; ++ int cur; ++ size_t i, rounds; ++ ++ for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { ++ wait_in_progress = false; ++ for (i = 0; i < SIZEOF(pcmk_children); i++) { ++ if (!pcmk_children[i].endpoint ++ || pcmk_children[i].respawn_count < 0 ++ || !(cur = pcmk_child_active(&pcmk_children[i]))) { ++ /* as a speculation, don't give up in the context of ++ pcmk_child_active check if there are more rounds to ++ come for other reasons, but don't artificially wait just ++ because of this, since we would preferably start ASAP */ + continue; + } +- if (pcmk_children[i].flag == crm_proc_stonith_ng) { +- name = "stonithd"; +- } +- if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) { +- crm_notice("Tracking existing %s process (pid=%d)", name, pid); +- pcmk_children[i].pid = pid; +- pcmk_children[i].active_before_startup = TRUE; +- start_tracker = TRUE; +- break; ++ pcmk_children[i].respawn_count = rounds; ++ switch (cur) { ++ case 1: ++ if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { ++ if (crm_is_true(getenv("PCMK_fail_fast"))) { ++ crm_crit("Cannot reliably track pre-existing" ++ " authentic process behind %s IPC on this" ++ " platform and PCMK_fail_fast requested", ++ pcmk_children[i].endpoint); ++ return -1; ++ } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { ++ crm_notice("Assuming pre-existing authentic, though" ++ " on this platform untrackable, process" ++ " behind %s IPC is stable (was in %d" ++ " previous samples) so rather than" ++ " bailing out (PCMK_fail_fast not" ++ " requested), we just switch to a less" ++ " optimal IPC liveness monitoring" ++ " (not very suitable for heavy load)", ++ pcmk_children[i].name, WAIT_TRIES - 1); ++ crm_warn("The process behind %s IPC cannot be" ++ " terminated, so the overall shutdown" ++ " will get delayed implicitly (%ld s)," ++ " which serves as a graceful period for" ++ " its native termination if it vitally" ++ " depends on some other daemons going" ++ " down in a controlled way already", ++ pcmk_children[i].name, ++ (long) SHUTDOWN_ESCALATION_PERIOD); ++ } else { ++ wait_in_progress = true; ++ crm_warn("Cannot reliably track pre-existing" ++ " authentic process behind %s IPC on this" ++ " platform, can still disappear in %d" ++ " attempt(s)", pcmk_children[i].endpoint, ++ WAIT_TRIES - pcmk_children[i].respawn_count); ++ continue; ++ } ++ } ++ crm_notice("Tracking existing %s process (pid=%lld)", ++ pcmk_children[i].name, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid)); ++ pcmk_children[i].respawn_count = -1; /* 0~keep watching */ ++ pcmk_children[i].active_before_startup = TRUE; ++ tracking++; ++ break; ++ case 2: ++ if (pcmk_children[i].respawn_count == WAIT_TRIES) { ++ crm_crit("%s IPC end-point for existing authentic" ++ " process %lld did not (re)appear", ++ pcmk_children[i].endpoint, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid)); ++ return -1; ++ } ++ wait_in_progress = true; ++ crm_warn("Cannot find %s IPC end-point for existing" ++ " authentic process %lld, can still (re)appear" ++ " in %d attempts (?)", ++ pcmk_children[i].endpoint, ++ (long long) PCMK__SPECIAL_PID_AS_0( ++ pcmk_children[i].pid), ++ WAIT_TRIES - pcmk_children[i].respawn_count); ++ continue; ++ case -1: ++ case -2: ++ return cur; /* messages already emitted */ ++ default: ++ crm_crit("Unexpected condition"CRM_XS"cur=%d", cur); ++ return -1; /* unexpected condition */ + } + } ++ if (!wait_in_progress) { ++ break; ++ } ++ (void) poll(NULL, 0, 250); /* a bit for changes to possibly happen */ ++ } ++ for (i = 0; i < SIZEOF(pcmk_children); i++) { ++ pcmk_children[i].respawn_count = 0; /* restore pristine state */ + } + +- if (start_tracker) { +- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, +- NULL); ++ if (tracking) { ++ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, ++ check_active_before_startup_processes, NULL); + } +- closedir(dp); +-#else +- crm_notice("No procfs support, so skipping check for existing components"); +-#endif // SUPPORT_PROCFS ++ return (tracking > INT_MAX) ? INT_MAX : tracking; + } + + static void +@@ -1106,7 +1390,16 @@ main(int argc, char **argv) + setenv("PCMK_watchdog", "false", 1); + } + +- find_and_track_existing_processes(); ++ switch (find_and_track_existing_processes()) { ++ case -1: ++ crm_crit("Internal fatality, see the log"); ++ crm_exit(DAEMON_RESPAWN_STOP); ++ case -2: ++ crm_crit("Blocked by foreign process, kill the offender"); ++ crm_exit(ENOLCK); ++ default: ++ break; ++ }; + + cluster.destroy = mcp_cpg_destroy; + cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; +-- +1.8.3.1 + + +From 07a82c5c8f9d60989ea88c5a3cc316ee290ea784 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:04:57 +0200 +Subject: [PATCH 5/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (3/4) + +[3/4: other daemons to authenticate IPC servers of fellow processes] + +Now that CVE-2018-16877 issue alone is still only partially covered +based on the preceding commits in the set, put the server-by-client +authentication (enabled and 1/3 and partially sported in 2/3) into +practice widely amongst the communicating pacemaker child daemons and +towards CPG API provided by 3rd party but principally using the same +underlying IPC mechanism facilitated by libqb, and consequently close +the remaining "big gap". + +As a small justification to introducing yet another "return +value" int variable, type-correctness is restored for those +that shall be cs_error_t to begin with. +--- + lib/cluster/corosync.c | 178 +++++++++++++++++++++++++++++++++++++++++-------- + lib/cluster/cpg.c | 94 +++++++++++++++++++------- + lib/common/ipc.c | 43 +++++++++++- + mcp/corosync.c | 76 ++++++++++++++++----- + 4 files changed, 320 insertions(+), 71 deletions(-) + +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 9719541..0acf9b2 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -40,6 +31,8 @@ + + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + quorum_handle_t pcmk_quorum_handle = 0; + + gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; +@@ -52,10 +45,15 @@ char * + corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + { + int lpc = 0; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + char *name = NULL; + cmap_handle_t local_handle = 0; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + /* nodeid == 0 == CMAN_NODEID_US */ + if (nodeid == 0) { +@@ -85,6 +83,27 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + + if (cmap_handle == 0) { + cmap_handle = local_handle; ++ ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } + } + + while (name == NULL && cmap_handle != 0) { +@@ -126,6 +145,7 @@ corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) + lpc++; + } + ++bail: + if(local_handle) { + cmap_finalize(local_handle); + } +@@ -249,11 +269,15 @@ gboolean + cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + void (*destroy) (gpointer)) + { +- int rc = -1; ++ cs_error_t rc; + int fd = 0; + int quorate = 0; + uint32_t quorum_type = 0; + struct mainloop_fd_callbacks quorum_fd_callbacks; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; + quorum_fd_callbacks.destroy = destroy; +@@ -262,7 +286,8 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + + rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); + if (rc != CS_OK) { +- crm_err("Could not connect to the Quorum API: %d", rc); ++ crm_err("Could not connect to the Quorum API: %s (%d)", ++ cs_strerror(rc), rc); + goto bail; + + } else if (quorum_type != QUORUM_SET) { +@@ -270,6 +295,29 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + goto bail; + } + ++ rc = quorum_fd_get(pcmk_quorum_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the Quorum API connection: %s (%d)", ++ strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* Quorum provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("Quorum provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of Quorum provider: %s (%d)", ++ strerror(-rv), -rv); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } ++ + rc = quorum_getquorate(pcmk_quorum_handle, &quorate); + if (rc != CS_OK) { + crm_err("Could not obtain the current Quorum API state: %d", rc); +@@ -290,12 +338,6 @@ cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), + goto bail; + } + +- rc = quorum_fd_get(pcmk_quorum_handle, &fd); +- if (rc != CS_OK) { +- crm_err("Could not obtain the Quorum API connection: %d", rc); +- goto bail; +- } +- + mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); + + corosync_initialize_nodelist(NULL, FALSE, NULL); +@@ -486,10 +528,15 @@ gboolean + corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) + { + int lpc = 0; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + gboolean any = FALSE; + cmap_handle_t cmap_handle; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + do { + rc = cmap_initialize(&cmap_handle); +@@ -507,6 +554,27 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml + return FALSE; + } + ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + crm_peer_init(); + crm_trace("Initializing corosync nodelist"); + for (lpc = 0; TRUE; lpc++) { +@@ -560,6 +628,7 @@ corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml + + free(name); + } ++bail: + cmap_finalize(cmap_handle); + return any; + } +@@ -569,36 +638,68 @@ corosync_cluster_name(void) + { + cmap_handle_t handle; + char *cluster_name = NULL; +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + rc = cmap_initialize(&handle); + if (rc != CS_OK) { +- crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); ++ crm_info("Failed to initialize the cmap API: %s (%d)", ++ cs_strerror(rc), rc); + return NULL; + } + ++ rc = cmap_fd_get(handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); + if (rc != CS_OK) { +- crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc); ++ crm_info("Cannot get totem.cluster_name: %s (%d)", cs_strerror(rc), rc); + + } else { + crm_debug("cmap totem.cluster_name = '%s'", cluster_name); + } + ++bail: + cmap_finalize(handle); +- + return cluster_name; + } + + int + corosync_cmap_has_config(const char *prefix) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + static int found = -1; + cmap_handle_t cmap_handle; + cmap_iter_handle_t iter_handle; + char key_name[CMAP_KEYNAME_MAXLEN + 1]; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + if(found != -1) { + return found; +@@ -621,6 +722,27 @@ corosync_cmap_has_config(const char *prefix) + return -1; + } + ++ rc = cmap_fd_get(cmap_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } ++ + rc = cmap_iter_init(cmap_handle, prefix, &iter_handle); + if (rc != CS_OK) { + crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)", +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 1e6cf79..a61d492 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Lesser General Public +- * License as published by the Free Software Foundation; either +- * version 2.1 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Lesser General Public License for more details. +- * +- * You should have received a copy of the GNU Lesser General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -38,6 +29,8 @@ + + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + cpg_handle_t pcmk_cpg_handle = 0; /* TODO: Remove, use cluster.cpg_handle */ + + static bool cpg_evicted = FALSE; +@@ -71,11 +64,16 @@ cluster_disconnect_cpg(crm_cluster_t *cluster) + + uint32_t get_local_nodeid(cpg_handle_t handle) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + static uint32_t local_nodeid = 0; + cpg_handle_t local_handle = handle; + cpg_callbacks_t cb = { }; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + if(local_nodeid != 0) { + return local_nodeid; +@@ -92,6 +90,32 @@ uint32_t get_local_nodeid(cpg_handle_t handle) + if(handle == 0) { + crm_trace("Creating connection"); + cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); ++ if (rc != CS_OK) { ++ crm_err("Could not connect to the CPG API: %s (%d)", ++ cs_strerror(rc), rc); ++ return 0; ++ } ++ ++ rc = cpg_fd_get(local_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CPG API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CPG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CPG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CPG provider: %s (%d)", ++ strerror(-rv), -rv); ++ goto bail; ++ } + } + + if (rc == CS_OK) { +@@ -103,6 +127,8 @@ uint32_t get_local_nodeid(cpg_handle_t handle) + if (rc != CS_OK) { + crm_err("Could not get local node id from the CPG API: %s (%d)", ais_error2text(rc), rc); + } ++ ++bail: + if(handle == 0) { + crm_trace("Closing connection"); + cpg_finalize(local_handle); +@@ -435,12 +461,16 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean + cluster_connect_cpg(crm_cluster_t *cluster) + { +- int rc = -1; +- int fd = 0; ++ cs_error_t rc; ++ int fd = -1; + int retries = 0; + uint32_t id = 0; + crm_node_t *peer = NULL; + cpg_handle_t handle = 0; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + struct mainloop_fd_callbacks cpg_fd_callbacks = { + .dispatch = pcmk_cpg_dispatch, +@@ -465,7 +495,31 @@ cluster_connect_cpg(crm_cluster_t *cluster) + + cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); + if (rc != CS_OK) { +- crm_err("Could not connect to the Cluster Process Group API: %d", rc); ++ crm_err("Could not connect to the CPG API: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ rc = cpg_fd_get(handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CPG API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CPG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CPG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ rc = CS_ERR_ACCESS; ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CPG provider: %s (%d)", ++ strerror(-rv), -rv); ++ rc = CS_ERR_ACCESS; + goto bail; + } + +@@ -484,12 +538,6 @@ cluster_connect_cpg(crm_cluster_t *cluster) + goto bail; + } + +- rc = cpg_fd_get(handle, &fd); +- if (rc != CS_OK) { +- crm_err("Could not obtain the CPG API connection: %d", rc); +- goto bail; +- } +- + pcmk_cpg_handle = handle; + cluster->cpg_handle = handle; + mainloop_add_fd("corosync-cpg", G_PRIORITY_MEDIUM, fd, cluster, &cpg_fd_callbacks); +diff --git a/lib/common/ipc.c b/lib/common/ipc.c +index 5b47dd6..3e547f3 100644 +--- a/lib/common/ipc.c ++++ b/lib/common/ipc.c +@@ -916,11 +916,18 @@ crm_ipc_new(const char *name, size_t max_size) + * + * \param[in] client Connection instance obtained from crm_ipc_new() + * +- * \return TRUE on success, FALSE otherwise (in which case errno will be set) ++ * \return TRUE on success, FALSE otherwise (in which case errno will be set; ++ * specifically, in case of discovering the remote side is not ++ * authentic, its value is set to ECONNABORTED). + */ + bool + crm_ipc_connect(crm_ipc_t * client) + { ++ static uid_t cl_uid = 0; ++ static gid_t cl_gid = 0; ++ pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; ++ int rv; ++ + client->need_reply = FALSE; + client->ipc = qb_ipcc_connect(client->name, client->buf_size); + +@@ -931,7 +938,39 @@ crm_ipc_connect(crm_ipc_t * client) + + client->pfd.fd = crm_ipc_get_fd(client); + if (client->pfd.fd < 0) { +- crm_debug("Could not obtain file descriptor for %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); ++ rv = errno; ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = rv; ++ return FALSE; ++ } ++ ++ if (!cl_uid && !cl_gid ++ && (rv = crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid)) < 0) { ++ errno = -rv; ++ /* message already omitted */ ++ crm_ipc_close(client); ++ errno = -rv; ++ return FALSE; ++ } ++ ++ if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, ++ &found_pid, &found_uid, ++ &found_gid))) { ++ crm_err("Daemon (IPC %s) is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ crm_ipc_close(client); ++ errno = ECONNABORTED; ++ return FALSE; ++ ++ } else if (rv < 0) { ++ errno = -rv; ++ crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", ++ client->name); ++ crm_ipc_close(client); ++ errno = -rv; + return FALSE; + } + +diff --git a/mcp/corosync.c b/mcp/corosync.c +index 7502da7..407a63f 100644 +--- a/mcp/corosync.c ++++ b/mcp/corosync.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2010 Andrew Beekhof ++ * Copyright 2010-2019 the Pacemaker project contributors + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + #include + #include +@@ -33,8 +24,11 @@ + #endif + + #include ++#include /* for crm_ipc_is_authentic_process */ + #include + ++#include /* PCMK__SPECIAL_PID* */ ++ + #if SUPPORT_CMAN + # include + #endif +@@ -111,7 +105,10 @@ gboolean + cluster_connect_cfg(uint32_t * nodeid) + { + cs_error_t rc; +- int fd = 0, retries = 0; ++ int fd = -1, retries = 0, rv; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; + + static struct mainloop_fd_callbacks cfg_fd_callbacks = { + .dispatch = pcmk_cfg_dispatch, +@@ -121,13 +118,27 @@ cluster_connect_cfg(uint32_t * nodeid) + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { +- crm_err("corosync cfg init error %d", rc); ++ crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { +- crm_err("corosync cfg fd_get error %d", rc); ++ crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc); ++ goto bail; ++ } ++ ++ /* CFG provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CFG provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ goto bail; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CFG provider: %s (%d)", ++ strerror(-rv), -rv); + goto bail; + } + +@@ -264,7 +275,7 @@ get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, ch + gboolean + mcp_read_config(void) + { +- int rc = CS_OK; ++ cs_error_t rc = CS_OK; + int retries = 0; + + const char *const_value = NULL; +@@ -287,11 +298,16 @@ mcp_read_config(void) + } else { + break; + } +- + } while (retries < 5); ++ + #elif HAVE_CMAP + cmap_handle_t local_handle; + uint64_t config = 0; ++ int fd = -1; ++ uid_t found_uid = 0; ++ gid_t found_gid = 0; ++ pid_t found_pid = 0; ++ int rv; + + /* There can be only one (possibility if confdb isn't around) */ + do { +@@ -315,6 +331,30 @@ mcp_read_config(void) + return FALSE; + } + ++ rc = cmap_fd_get(local_handle, &fd); ++ if (rc != CS_OK) { ++ crm_err("Could not obtain the CMAP API connection: %s (%d)", ++ cs_strerror(rc), rc); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } ++ ++ /* CMAP provider run as root (in given user namespace, anyway)? */ ++ if (!(rv = crm_ipc_is_authentic_process(fd, (uid_t) 0,(gid_t) 0, &found_pid, ++ &found_uid, &found_gid))) { ++ crm_err("CMAP provider is not authentic:" ++ " process %lld (uid: %lld, gid: %lld)", ++ (long long) PCMK__SPECIAL_PID_AS_0(found_pid), ++ (long long) found_uid, (long long) found_gid); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } else if (rv < 0) { ++ crm_err("Could not verify authenticity of CMAP provider: %s (%d)", ++ strerror(-rv), -rv); ++ cmap_finalize(local_handle); ++ return FALSE; ++ } ++ + stack = get_cluster_type(); + crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); + +-- +1.8.3.1 + + +From 4d6f6e01b309cda7b3f8fe791247566d247d8028 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Tue, 16 Apr 2019 00:08:28 +0200 +Subject: [PATCH 6/7] High: pacemakerd vs. IPC/procfs confused deputy + authenticity issue (4/4) + +[4/4: CPG users to be careful about now-more-probable rival processes] + +In essence, this comes down to pacemaker confusing at-node CPG members +with effectively the only plausible to co-exist at particular node, +which doesn't hold and asks for a wider reconciliation of this +reality-check. + +However, in practical terms, since there are two factors lowering the +priority of doing so: + +1/ possibly the only non-self-inflicted scenario is either that + some of the cluster stack processes fail -- this the problem + that shall rather be deferred to arranged node disarming/fencing + to stay on the safe side with 100% certainty, at the cost of + possibly long-lasting failover process at other nodes + (for other possibility, someone running some of these by accident + so they effectively become rival processes, it's like getting + hands cut when playing with a lawnmower in an unintended way) + +2/ for state tracking of the peer nodes, it may possibly cause troubles + in case the process observed as left wasn't the last for the + particular node, even if presumably just temporary, since the + situation may eventually resolve with imposed serialization of + the rival processes via API end-point singleton restriction (this + is also the most likely cause of why such non-final leave gets + observed in the first place), except in one case -- the legitimate + API end-point carrier won't possibly acknowledged as returned + by its peers, at least not immediately, unless it tries to join + anew, which verges on undefined behaviour (at least per corosync + documentation) + +we make do just with a light code change so as to + +* limit 1/ some more with in-daemon self-check for pre-existing + end-point existence (this is to complement the checks already made in + the parent daemon prior to spawning new instances, only some moments + later; note that we don't have any lock file etc. mechanisms to + prevent parallel runs of the same daemons, and people could run these + on their own deliberation), and to + +* guard against the interferences from the rivals at the same node + per 2/ with ignoring their non-final leave messages altogether. + +Note that CPG at this point is already expected to be authenticity-safe. + +Regarding now-more-probable part, we actually traded the inherently racy +procfs scanning for something (exactly that singleton mentioned above) +rather firm (and unfakeable), but we admittedly got lost track of +processes that are after CPG membership (that is, another form of +a shared state) prior to (or in non-deterministic order allowing for +the same) carring about publishing the end-point. + +Big thanks is owed to Yan Gao of SUSE, for early discovery and reporting +this discrepancy arising from the earlier commits in the set. +--- + attrd/main.c | 19 ++++++++++- + cib/main.c | 35 ++++++++++++--------- + crmd/main.c | 35 ++++++++++++--------- + fencing/main.c | 32 +++++++++++-------- + lib/cluster/cpg.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++----- + 5 files changed, 163 insertions(+), 52 deletions(-) + +diff --git a/attrd/main.c b/attrd/main.c +index 4cc15cc..e0a1e7c 100644 +--- a/attrd/main.c ++++ b/attrd/main.c +@@ -1,5 +1,7 @@ + /* +- * Copyright 2013-2019 Andrew Beekhof ++ * Copyright 2013-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public +@@ -336,6 +338,7 @@ main(int argc, char **argv) + int index = 0; + int argerr = 0; + qb_ipcs_service_t *ipcs = NULL; ++ crm_ipc_t *old_instance = NULL; + + attrd_init_mainloop(); + crm_log_preinit(NULL, argc, argv); +@@ -372,6 +375,20 @@ main(int argc, char **argv) + + crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_info("Starting up"); ++ ++ old_instance = crm_ipc_new(T_ATTRD, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("attrd is already active, aborting startup"); ++ crm_exit(EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); + + attrd_exit_status = attrd_cluster_connect(); +diff --git a/cib/main.c b/cib/main.c +index 5473d40..7c745da 100644 +--- a/cib/main.c ++++ b/cib/main.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -135,13 +126,12 @@ main(int argc, char **argv) + int index = 0; + int argerr = 0; + struct passwd *pwentry = NULL; ++ crm_ipc_t *old_instance = NULL; + + crm_log_preinit(NULL, argc, argv); + crm_set_options(NULL, "[options]", + long_options, "Daemon for storing and replicating the cluster configuration"); + +- crm_peer_init(); +- + mainloop_add_signal(SIGTERM, cib_shutdown); + mainloop_add_signal(SIGPIPE, cib_enable_writes); + +@@ -216,6 +206,19 @@ main(int argc, char **argv) + + crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + ++ old_instance = crm_ipc_new(cib_channel_ro, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("cib is already active, aborting startup"); ++ crm_exit(EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + if (cib_root == NULL) { + if ((g_file_test(CRM_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == FALSE) + && (g_file_test(CRM_LEGACY_CONFIG_DIR "/cib.xml", G_FILE_TEST_EXISTS) == TRUE)) { +@@ -238,6 +241,8 @@ main(int argc, char **argv) + return 100; + } + ++ crm_peer_init(); ++ + /* read local config file */ + rc = cib_init(); + +diff --git a/crmd/main.c b/crmd/main.c +index e8baa12..6eb7c03 100644 +--- a/crmd/main.c ++++ b/crmd/main.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * Copyright 2004-2019 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -61,6 +52,7 @@ main(int argc, char **argv) + int flag; + int index = 0; + int argerr = 0; ++ crm_ipc_t *old_instance = NULL; + + crmd_mainloop = g_main_new(FALSE); + crm_log_preinit(NULL, argc, argv); +@@ -104,6 +96,19 @@ main(int argc, char **argv) + crm_help('?', EX_USAGE); + } + ++ old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("crmd is already active, aborting startup"); ++ crm_exit(EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { + crm_err("Terminating due to bad permissions on " PE_STATE_DIR); + fprintf(stderr, +diff --git a/fencing/main.c b/fencing/main.c +index 16663f6..c46c9a5 100644 +--- a/fencing/main.c ++++ b/fencing/main.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2009 Andrew Beekhof ++ * Copyright 2009-2019 the Pacemaker project contributors + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -1289,6 +1280,7 @@ main(int argc, char **argv) + int option_index = 0; + crm_cluster_t cluster; + const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; ++ crm_ipc_t *old_instance = NULL; + + crm_log_preinit("stonith-ng", argc, argv); + crm_set_options(NULL, "mode [options]", long_options, +@@ -1459,6 +1451,20 @@ main(int argc, char **argv) + } + + crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); ++ ++ old_instance = crm_ipc_new("stonith-ng", 0); ++ if (crm_ipc_connect(old_instance)) { ++ /* IPC end-point already up */ ++ crm_ipc_close(old_instance); ++ crm_ipc_destroy(old_instance); ++ crm_err("stonithd is already active, aborting startup"); ++ crm_exit(EX_OK); ++ } else { ++ /* not up or not authentic, we'll proceed either way */ ++ crm_ipc_destroy(old_instance); ++ old_instance = NULL; ++ } ++ + mainloop_add_signal(SIGTERM, stonith_shutdown); + + crm_peer_init(); +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index a61d492..c5ecc67 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -385,6 +385,20 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + return NULL; + } + ++static int cmp_member_list_nodeid(const void *first, ++ const void *second) ++{ ++ const struct cpg_address *const a = *((const struct cpg_address **) first), ++ *const b = *((const struct cpg_address **) second); ++ if (a->nodeid < b->nodeid) { ++ return -1; ++ } else if (a->nodeid > b->nodeid) { ++ return 1; ++ } ++ /* don't bother with "reason" nor "pid" */ ++ return 0; ++} ++ + void + pcmk_cpg_membership(cpg_handle_t handle, + const struct cpg_name *groupName, +@@ -396,29 +410,91 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean found = FALSE; + static int counter = 0; + uint32_t local_nodeid = get_local_nodeid(handle); ++ const struct cpg_address *key, **rival, **sorted; ++ ++ sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); ++ CRM_ASSERT(sorted != NULL); ++ ++ for (size_t iter = 0; iter < member_list_entries; iter++) { ++ sorted[iter] = member_list + iter; ++ } ++ /* so that the cross-matching multiply-subscribed nodes is then cheap */ ++ qsort(sorted, member_list_entries, sizeof(const struct cpg_address *), ++ cmp_member_list_nodeid); + + for (i = 0; i < left_list_entries; i++) { + crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); + +- crm_info("Node %u left group %s (peer=%s, counter=%d.%d)", ++ crm_info("Node %u left group %s (peer=%s:%llu, counter=%d.%d)", + left_list[i].nodeid, groupName->value, +- (peer? peer->uname : ""), counter, i); ++ (peer? peer->uname : ""), ++ (unsigned long long) left_list[i].pid, counter, i); ++ ++ /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation ++ and not playing by this rule may go wild in case of multiple ++ residual instances of the same pacemaker daemon at the same node ++ -- we must ensure that the possible local rival(s) won't make us ++ cry out and bail (e.g. when they quit themselves), since all the ++ surrounding logic denies this simple fact that the full membership ++ is discriminated also per the PID of the process beside mere node ++ ID (and implicitly, group ID); practically, this will be sound in ++ terms of not preventing progress, since all the CPG joiners are ++ also API end-point carriers, and that's what matters locally ++ (who's the winner); ++ remotely, we will just compare leave_list and member_list and if ++ the left process has it's node retained in member_list (under some ++ other PID, anyway) we will just ignore it as well ++ XXX: long-term fix is to establish in-out PID-aware tracking? */ + if (peer) { +- crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, OFFLINESTATUS); ++ key = &left_list[i]; ++ rival = bsearch(&key, sorted, member_list_entries, ++ sizeof(const struct cpg_address *), ++ cmp_member_list_nodeid); ++ if (rival == NULL) { ++ crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, ++ OFFLINESTATUS); ++ } else if (left_list[i].nodeid == local_nodeid) { ++ crm_info("Ignoring the above event %s.%d, comes from a local" ++ " rival process (presumably not us): %llu", ++ groupName->value, counter, ++ (unsigned long long) left_list[i].pid); ++ } else { ++ crm_info("Ignoring the above event %s.%d, comes from" ++ " a rival-rich node: %llu (e.g. %llu process" ++ " carries on)", ++ groupName->value, counter, ++ (unsigned long long) left_list[i].pid, ++ (unsigned long long) (*rival)->pid); ++ } + } + } ++ free(sorted); ++ sorted = NULL; + + for (i = 0; i < joined_list_entries; i++) { +- crm_info("Node %u joined group %s (counter=%d.%d)", +- joined_list[i].nodeid, groupName->value, counter, i); ++ crm_info("Node %u joined group %s (counter=%d.%d, pid=%llu," ++ " unchecked for rivals)", ++ joined_list[i].nodeid, groupName->value, counter, i, ++ (unsigned long long) left_list[i].pid); + } + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); + +- crm_info("Node %u still member of group %s (peer=%s, counter=%d.%d)", ++ crm_info("Node %u still member of group %s (peer=%s:%llu," ++ " counter=%d.%d, at least once)", + member_list[i].nodeid, groupName->value, +- (peer? peer->uname : ""), counter, i); ++ (peer? peer->uname : ""), member_list[i].pid, ++ counter, i); ++ ++ if (member_list[i].nodeid == local_nodeid ++ && member_list[i].pid != getpid()) { ++ /* see the note above */ ++ crm_info("Ignoring the above event %s.%d, comes from a local rival" ++ " process: %llu", groupName->value, counter, ++ (unsigned long long) member_list[i].pid); ++ continue; ++ } + + /* Anyone that is sending us CPG messages must also be a _CPG_ member. + * But it's _not_ safe to assume it's in the quorum membership. +@@ -438,7 +514,9 @@ pcmk_cpg_membership(cpg_handle_t handle, + * + * Set the threshold to 1 minute + */ +- crm_err("Node %s[%u] appears to be online even though we think it is dead", peer->uname, peer->id); ++ crm_err("Node %s[%u] appears to be online even though we think" ++ " it is dead (unchecked for rivals)", ++ peer->uname, peer->id); + if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) { + peer->votes = 0; + } +-- +1.8.3.1 + + +From 9dc38d81cb6e1967c368faed78de1927cabf06b3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= +Date: Wed, 17 Apr 2019 15:17:50 +0200 +Subject: [PATCH 7/7] Med: controld: fix possible NULL pointer dereference + +This is now more likely triggerable once the problems related to +CVE-2018-16878 are avoided. +--- + crmd/control.c | 32 +++++++++++++------------------- + 1 file changed, 13 insertions(+), 19 deletions(-) + +diff --git a/crmd/control.c b/crmd/control.c +index e01066a..488ea88 100644 +--- a/crmd/control.c ++++ b/crmd/control.c +@@ -1,19 +1,10 @@ + /* +- * Copyright (C) 2004 Andrew Beekhof ++ * Copyright 2004-2019 the Pacemaker project contributors + * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. ++ * The version control history for this file may have further details. + * +- * This software is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public +- * License along with this library; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + + #include +@@ -127,12 +118,15 @@ do_ha_control(long long action, + } + #endif + } +- controld_election_init(cluster->uname); +- fsa_our_uname = cluster->uname; +- fsa_our_uuid = cluster->uuid; +- if(cluster->uuid == NULL) { +- crm_err("Could not obtain local uuid"); +- registered = FALSE; ++ ++ if (registered == TRUE) { ++ controld_election_init(cluster->uname); ++ fsa_our_uname = cluster->uname; ++ fsa_our_uuid = cluster->uuid; ++ if(cluster->uuid == NULL) { ++ crm_err("Could not obtain local uuid"); ++ registered = FALSE; ++ } + } + + if (registered == FALSE) { +-- +1.8.3.1 + diff --git a/SOURCES/007-stderr.patch b/SOURCES/007-stderr.patch deleted file mode 100644 index e71570a..0000000 --- a/SOURCES/007-stderr.patch +++ /dev/null @@ -1,30 +0,0 @@ -From d351a328ec7d785b1ab3a39c34cb29f870164733 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 21 Sep 2018 16:07:37 -0500 -Subject: [PATCH] Low: tools: ensure crm_resource --force-* commands get stderr - messages - ---- - tools/crm_resource_runtime.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 1b6a3f3..1a360a0 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1757,6 +1757,12 @@ cli_resource_execute(resource_t *rsc, const char *requested_name, - setenv("OCF_TRACE_RA", "1", 1); - } - -+ /* A resource agent using the standard ocf-shellfuncs library will not print -+ * messages to stderr if it doesn't have a controlling terminal (e.g. if -+ * crm_resource is called via script or ssh). This forces it to do so. -+ */ -+ setenv("OCF_TRACE_FILE", "/dev/stderr", 0); -+ - if (override_hash) { - GHashTableIter iter; - char *name = NULL; --- -1.8.3.1 - diff --git a/SOURCES/008-bundle-ordering.patch b/SOURCES/008-bundle-ordering.patch deleted file mode 100644 index 916c590..0000000 --- a/SOURCES/008-bundle-ordering.patch +++ /dev/null @@ -1,2386 +0,0 @@ -From a07ff469d96312b37f9c0c5ac65e7c1f87394ce5 Mon Sep 17 00:00:00 2001 -From: Andrew Beekhof -Date: Fri, 12 Oct 2018 12:13:14 +1100 -Subject: [PATCH 1/9] Fix: schedulerd: Improve internal bundle ordering - -If the remote resource is scheduled to stop, we should at least wait to -know the state of the underlying container resource before executing it. - -Otherwise we may end up needlessly tearing it down just because someone -ran a cleanup on the container. ---- - pengine/container.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pengine/container.c b/pengine/container.c -index 15d094d..4e77545 100644 ---- a/pengine/container.c -+++ b/pengine/container.c -@@ -278,6 +278,7 @@ container_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - order_start_start(rsc, tuple->docker, pe_order_runnable_left | pe_order_implies_first_printed); - - if(tuple->child) { -+ new_rsc_order(tuple->docker, RSC_STATUS, tuple->remote, RSC_STOP, pe_order_optional, data_set); - order_stop_stop(rsc, tuple->child, pe_order_implies_first_printed); - } - order_stop_stop(rsc, tuple->docker, pe_order_implies_first_printed); --- -1.8.3.1 - - -From 35dc265c4577b7fbe46c8e5919b7ef59028c1de5 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 29 Oct 2018 14:23:06 -0500 -Subject: [PATCH 2/9] Test: pengine: Improve internal bundle ordering - ---- - pengine/test10/bundle-nested-colocation.exp | 12 +++--- - pengine/test10/bundle-order-fencing.exp | 56 ++++++++++++------------- - pengine/test10/bundle-order-partial-start-2.exp | 12 +++--- - pengine/test10/bundle-order-partial-start.exp | 12 +++--- - pengine/test10/bundle-order-partial-stop.exp | 16 +++---- - pengine/test10/bundle-order-stop-clone.exp | 20 ++++----- - pengine/test10/bundle-order-stop-on-remote.exp | 44 +++++++++---------- - pengine/test10/bundle-order-stop.exp | 16 +++---- - 8 files changed, 94 insertions(+), 94 deletions(-) - -diff --git a/pengine/test10/bundle-nested-colocation.exp b/pengine/test10/bundle-nested-colocation.exp -index a50809c..29c2eda 100644 ---- a/pengine/test10/bundle-nested-colocation.exp -+++ b/pengine/test10/bundle-nested-colocation.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -65,7 +65,7 @@ - - - -- -+ - - - -@@ -132,7 +132,7 @@ - - - -- -+ - - - -@@ -208,13 +208,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp -index 599c299..2b8f5cf 100644 ---- a/pengine/test10/bundle-order-fencing.exp -+++ b/pengine/test10/bundle-order-fencing.exp -@@ -46,7 +46,7 @@ - - - -- -+ - - - -@@ -75,7 +75,7 @@ - - - -- -+ - - - -@@ -146,10 +146,10 @@ - - - -- -+ - - -- -+ - - - -@@ -521,7 +521,7 @@ - - - -- -+ - - - -@@ -534,7 +534,7 @@ - - - -- -+ - - - -@@ -607,7 +607,7 @@ - - - -- -+ - - - -@@ -620,7 +620,7 @@ - - - -- -+ - - - -@@ -633,7 +633,7 @@ - - - -- -+ - - - -@@ -646,7 +646,7 @@ - - - -- -+ - - - -@@ -659,7 +659,7 @@ - - - -- -+ - - - -@@ -753,7 +753,7 @@ - - - -- -+ - - - -@@ -766,7 +766,7 @@ - - - -- -+ - - - -@@ -779,7 +779,7 @@ - - - -- -+ - - - -@@ -792,7 +792,7 @@ - - - -- -+ - - - -@@ -805,7 +805,7 @@ - - - -- -+ - - - -@@ -843,10 +843,10 @@ - - - -- -+ - - -- -+ - - - -@@ -876,10 +876,10 @@ - - - -- -+ - - -- -+ - - - -@@ -932,13 +932,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -@@ -968,13 +968,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -@@ -1076,10 +1076,10 @@ - - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-partial-start-2.exp b/pengine/test10/bundle-order-partial-start-2.exp -index bf9a0b0..168ec7a 100644 ---- a/pengine/test10/bundle-order-partial-start-2.exp -+++ b/pengine/test10/bundle-order-partial-start-2.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -61,7 +61,7 @@ - - - -- -+ - - - -@@ -315,7 +315,7 @@ - - - -- -+ - - - -@@ -328,7 +328,7 @@ - - - -- -+ - - - -@@ -379,7 +379,7 @@ - - - -- -+ - - - -@@ -409,7 +409,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-partial-start.exp b/pengine/test10/bundle-order-partial-start.exp -index 8e28f19..5e80822 100644 ---- a/pengine/test10/bundle-order-partial-start.exp -+++ b/pengine/test10/bundle-order-partial-start.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -61,7 +61,7 @@ - - - -- -+ - - - -@@ -296,7 +296,7 @@ - - - -- -+ - - - -@@ -309,7 +309,7 @@ - - - -- -+ - - - -@@ -360,7 +360,7 @@ - - - -- -+ - - - -@@ -390,7 +390,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-partial-stop.exp b/pengine/test10/bundle-order-partial-stop.exp -index 89d87aa..626fc44 100644 ---- a/pengine/test10/bundle-order-partial-stop.exp -+++ b/pengine/test10/bundle-order-partial-stop.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -66,7 +66,7 @@ - - - -- -+ - - - -@@ -278,7 +278,7 @@ - - - -- -+ - - - -@@ -291,7 +291,7 @@ - - - -- -+ - - - -@@ -304,7 +304,7 @@ - - - -- -+ - - - -@@ -370,7 +370,7 @@ - - - -- -+ - - - -@@ -400,7 +400,7 @@ - - - -- -+ - - - -@@ -480,7 +480,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-stop-clone.exp b/pengine/test10/bundle-order-stop-clone.exp -index 3e66f54..6ef5dac 100644 ---- a/pengine/test10/bundle-order-stop-clone.exp -+++ b/pengine/test10/bundle-order-stop-clone.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -27,7 +27,7 @@ - - - -- -+ - - - -@@ -40,7 +40,7 @@ - - - -- -+ - - - -@@ -53,7 +53,7 @@ - - - -- -+ - - - -@@ -66,7 +66,7 @@ - - - -- -+ - - - -@@ -88,10 +88,10 @@ - - - -- -+ - - -- -+ - - - -@@ -121,13 +121,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-stop-on-remote.exp b/pengine/test10/bundle-order-stop-on-remote.exp -index 96588dc..6559dfd 100644 ---- a/pengine/test10/bundle-order-stop-on-remote.exp -+++ b/pengine/test10/bundle-order-stop-on-remote.exp -@@ -588,7 +588,7 @@ - - - -- -+ - - - -@@ -601,7 +601,7 @@ - - - -- -+ - - - -@@ -614,7 +614,7 @@ - - - -- -+ - - - -@@ -627,7 +627,7 @@ - - - -- -+ - - - -@@ -675,7 +675,7 @@ - - - -- -+ - - - -@@ -688,7 +688,7 @@ - - - -- -+ - - - -@@ -701,7 +701,7 @@ - - - -- -+ - - - -@@ -780,7 +780,7 @@ - - - -- -+ - - - -@@ -793,7 +793,7 @@ - - - -- -+ - - - -@@ -806,7 +806,7 @@ - - - -- -+ - - - -@@ -819,7 +819,7 @@ - - - -- -+ - - - -@@ -841,13 +841,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -@@ -877,13 +877,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -@@ -940,13 +940,13 @@ - - - -- -+ - - -- -+ - - -- -+ - - - -@@ -976,10 +976,10 @@ - - - -- -+ - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-stop.exp b/pengine/test10/bundle-order-stop.exp -index 89d87aa..626fc44 100644 ---- a/pengine/test10/bundle-order-stop.exp -+++ b/pengine/test10/bundle-order-stop.exp -@@ -1,7 +1,7 @@ - - - -- -+ - - - -@@ -66,7 +66,7 @@ - - - -- -+ - - - -@@ -278,7 +278,7 @@ - - - -- -+ - - - -@@ -291,7 +291,7 @@ - - - -- -+ - - - -@@ -304,7 +304,7 @@ - - - -- -+ - - - -@@ -370,7 +370,7 @@ - - - -- -+ - - - -@@ -400,7 +400,7 @@ - - - -- -+ - - - -@@ -480,7 +480,7 @@ - - - -- -+ - - - --- -1.8.3.1 - - -From 16fda11606c3cbc432153a8677ab7c378f0cbd2e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 9 Oct 2018 11:57:43 -0500 -Subject: [PATCH 3/9] Log: scheduler: improve bundle address fixing messages - -Mostly, try to make clear when the bundle container's node is used vs. -the bundle connection's node. ---- - lib/pengine/container.c | 5 +++-- - lib/pengine/utils.c | 3 ++- - pengine/container.c | 6 ++++-- - 3 files changed, 9 insertions(+), 5 deletions(-) - -diff --git a/lib/pengine/container.c b/lib/pengine/container.c -index 1526f37..483d219 100644 ---- a/lib/pengine/container.c -+++ b/lib/pengine/container.c -@@ -814,11 +814,12 @@ container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field) - } - - if(node == NULL) { -- crm_trace("Cannot fix address for %s", tuple->remote->id); -+ crm_trace("Cannot determine address for bundle connection %s", rsc->id); - return NULL; - } - -- crm_trace("Fixing addr for %s on %s", rsc->id, node->details->uname); -+ crm_trace("Setting address for bundle connection %s to bundle host %s", -+ rsc->id, node->details->uname); - if(xml != NULL && field != NULL) { - crm_xml_add(xml, field, node->details->uname); - } -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index a9ca86f..3f8dc30 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -1971,7 +1971,8 @@ rsc_action_digest(resource_t * rsc, const char *task, const char *key, - - // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside - if (container_fix_remote_addr_in(rsc, data->params_all, "addr")) { -- crm_trace("Fixed addr for %s on %s", rsc->id, node->details->uname); -+ crm_trace("Set address for bundle connection %s (on %s)", -+ rsc->id, node->details->uname); - } - - g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); -diff --git a/pengine/container.c b/pengine/container.c -index 4e77545..02ee425 100644 ---- a/pengine/container.c -+++ b/pengine/container.c -@@ -846,10 +846,12 @@ container_expand(resource_t * rsc, pe_working_set_t * data_set) - const char *calculated_addr = container_fix_remote_addr_in(tuple->remote, nvpair, "value"); - - if (calculated_addr) { -- crm_trace("Fixed addr for %s on %s", tuple->remote->id, calculated_addr); -+ crm_trace("Set address for bundle connection %s to bundle host %s", -+ tuple->remote->id, calculated_addr); - g_hash_table_replace(tuple->remote->parameters, strdup("addr"), strdup(calculated_addr)); - } else { -- crm_err("Could not fix addr for %s", tuple->remote->id); -+ crm_err("Could not determine address for bundle connection %s", -+ tuple->remote->id); - } - } - if(tuple->ip) { --- -1.8.3.1 - - -From 0907d0e3061f05d0fdd3a276e8a5c578813d5ea9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 9 Oct 2018 16:09:33 -0500 -Subject: [PATCH 4/9] Refactor: scheduler: remove redundant code - -unpack_rsc_op() doesn't need to map status PCMK_LRM_OP_ERROR to -PCMK_LRM_OP_DONE because it explicitly looks for both when it uses status - -check_operation_expiry() doesn't need to check that failure_timeout is positive -when expired is true, because expired can be true only if failure_timeout is -positive - -check_action_definition() doesn't need to check whether task is stop because -it cannot be called for stops - -check_actions_for() doesn't need to check whether a status operation is a probe -because it also checks for recurring operations in the same place ---- - lib/pengine/unpack.c | 32 +++++++++++++++----------------- - pengine/allocate.c | 14 ++++---------- - 2 files changed, 19 insertions(+), 27 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 52518d4..7bc78a3 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2990,21 +2990,24 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod - } - - if (expired) { -- if (failure_timeout > 0) { -- if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, -- xml_op, data_set)) { -+ if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op, -+ data_set)) { - -- if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, -- xml_op, data_set) == 0) { -- clear_reason = "it expired"; -- } else { -- expired = FALSE; -- } -+ // There is a fail count ignoring timeout - -- } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { -- /* always clear last failure when reconnect interval is set */ -- clear_reason = "reconnect interval is set"; -+ if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective, -+ xml_op, data_set) == 0) { -+ // There is no fail count considering timeout -+ clear_reason = "it expired"; -+ -+ } else { -+ expired = FALSE; - } -+ -+ } else if (rsc->remote_reconnect_interval -+ && strstr(ID(xml_op), "last_failure")) { -+ // Always clear last failure when reconnect interval is set -+ clear_reason = "reconnect interval is set"; - } - - } else if (strstr(ID(xml_op), "last_failure") && -@@ -3240,11 +3243,6 @@ unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last - node->details->uname, rsc->id); - } - -- if (status == PCMK_LRM_OP_ERROR) { -- /* Older versions set this if rc != 0 but it's up to us to decide */ -- status = PCMK_LRM_OP_DONE; -- } -- - if(status != PCMK_LRM_OP_NOT_INSTALLED) { - expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); - } -diff --git a/pengine/allocate.c b/pengine/allocate.c -index dc8017a..5589a2f 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -262,9 +262,6 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - const char *digest_secure = NULL; - - CRM_CHECK(active_node != NULL, return FALSE); -- if (safe_str_eq(task, RSC_STOP)) { -- return FALSE; -- } - - interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); - interval = crm_parse_int(interval_s, "0"); -@@ -395,7 +392,6 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - xmlNode *rsc_op = NULL; - GListPtr op_list = NULL; - GListPtr sorted_op_list = NULL; -- gboolean is_probe = FALSE; - gboolean did_change = FALSE; - - CRM_CHECK(node != NULL, return); -@@ -449,22 +445,20 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - continue; - } - -- is_probe = FALSE; - did_change = FALSE; - task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); - - interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); - interval = crm_parse_int(interval_s, "0"); - -- if (interval == 0 && safe_str_eq(task, RSC_STATUS)) { -- is_probe = TRUE; -- } -- - if (interval > 0 && - (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { - CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); - -- } else if (is_probe || safe_str_eq(task, RSC_START) || safe_str_eq(task, RSC_PROMOTE) || interval > 0 -+ } else if ((interval > 0) -+ || safe_str_eq(task, RSC_STATUS) -+ || safe_str_eq(task, RSC_START) -+ || safe_str_eq(task, RSC_PROMOTE) - || safe_str_eq(task, RSC_MIGRATED)) { - did_change = check_action_definition(rsc, node, rsc_op, data_set); - } --- -1.8.3.1 - - -From 83b6e8d1453dc6656384bbde3bfb86c5970d54c2 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 9 Oct 2018 15:04:24 -0500 -Subject: [PATCH 5/9] Refactor: scheduler: functionize scheduling clearing of - fail count - -Reduces duplication, allows reuse, and improves consistency (for example, -some uses previously set XML_ATTR_TE_NOWAIT and some didn't). ---- - include/crm/pengine/internal.h | 3 +++ - lib/pengine/failcounts.c | 30 +++++++++++++++++++++++++++- - lib/pengine/unpack.c | 10 ++-------- - pengine/allocate.c | 44 +++++++++++++----------------------------- - 4 files changed, 47 insertions(+), 40 deletions(-) - -diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h -index 4aca751..6745ae3 100644 ---- a/include/crm/pengine/internal.h -+++ b/include/crm/pengine/internal.h -@@ -114,6 +114,9 @@ int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, - uint32_t flags, xmlNode *xml_op, - pe_working_set_t *data_set); - -+pe_action_t *pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, -+ const char *reason, -+ pe_working_set_t *data_set); - - /* Functions for finding/counting a resource's active nodes */ - -diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c -index e217176..8a7d0e4 100644 ---- a/lib/pengine/failcounts.c -+++ b/lib/pengine/failcounts.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2008-2017 Andrew Beekhof -+ * Copyright 2008-2018 Andrew Beekhof - * - * This source code is licensed under the GNU Lesser General Public License - * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -@@ -319,3 +319,31 @@ pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, - - return failcount; - } -+ -+/*! -+ * \brief Schedule a controller operation to clear a fail count -+ * -+ * \param[in] rsc Resource with failure -+ * \param[in] node Node failure occurred on -+ * \param[in] reason Readable description why needed (for logging) -+ * \param[in] data_set Working set for cluster -+ * -+ * \return Scheduled action -+ */ -+pe_action_t * -+pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, -+ const char *reason, pe_working_set_t *data_set) -+{ -+ char *key = NULL; -+ action_t *clear = NULL; -+ -+ CRM_CHECK(rsc && node && reason && data_set, return NULL); -+ -+ key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); -+ clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, -+ data_set); -+ add_hash_param(clear->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); -+ crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", -+ rsc->id, node->details->uname, reason, clear->uuid); -+ return clear; -+} -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 7bc78a3..8f9099c 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -3031,14 +3031,8 @@ static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNod - - if (clear_reason != NULL) { - node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); -- char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); -- action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, -- node, FALSE, TRUE, data_set); -- -- add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); -- -- crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s", -- rsc->id, node->details->uname, clear_reason, clear_op->uuid); -+ pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason, -+ data_set); - - if (is_set(data_set->flags, pe_flag_stonith_enabled) - && rsc->remote_reconnect_interval -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 5589a2f..569a4a5 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -376,7 +376,6 @@ check_action_definition(resource_t * rsc, node_t * active_node, xmlNode * xml_op - return did_change; - } - -- - static void - check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_working_set_t * data_set) - { -@@ -392,7 +391,6 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - xmlNode *rsc_op = NULL; - GListPtr op_list = NULL; - GListPtr sorted_op_list = NULL; -- gboolean did_change = FALSE; - - CRM_CHECK(node != NULL, return); - -@@ -445,7 +443,6 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - continue; - } - -- did_change = FALSE; - task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); - - interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); -@@ -453,6 +450,7 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - - if (interval > 0 && - (is_set(rsc->flags, pe_rsc_maintenance) || node->details->maintenance)) { -+ // Maintenance mode cancels recurring operations - CancelXmlOp(rsc, rsc_op, node, "maintenance mode", data_set); - - } else if ((interval > 0) -@@ -460,28 +458,18 @@ check_actions_for(xmlNode * rsc_entry, resource_t * rsc, node_t * node, pe_worki - || safe_str_eq(task, RSC_START) - || safe_str_eq(task, RSC_PROMOTE) - || safe_str_eq(task, RSC_MIGRATED)) { -- did_change = check_action_definition(rsc, node, rsc_op, data_set); -- } -- -- if (did_change && pe_get_failcount(node, rsc, NULL, pe_fc_effective, -- NULL, data_set)) { -- -- char *key = NULL; -- action_t *action_clear = NULL; -- -- key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); -- action_clear = -- custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); -- set_bit(action_clear->flags, pe_action_runnable); -- -- crm_notice("Clearing failure of %s on %s " -- "because action definition changed " CRM_XS " %s", -- rsc->id, node->details->uname, action_clear->uuid); -+ /* If a resource operation failed, and the operation's definition -+ * has changed, clear any fail count so they can be retried fresh. -+ */ -+ if (check_action_definition(rsc, node, rsc_op, data_set) -+ && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, -+ data_set)) { -+ pe__clear_failcount(rsc, node, "action definition changed", -+ data_set); -+ } - } - } -- - g_list_free(sorted_op_list); -- - } - - static GListPtr -@@ -1254,16 +1242,10 @@ cleanup_orphans(resource_t * rsc, pe_working_set_t * data_set) - && pe_get_failcount(node, rsc, NULL, pe_fc_effective, NULL, - data_set)) { - -- char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0); -- action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, -- node, FALSE, TRUE, data_set); -- -- add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); -+ pe_action_t *clear_op = NULL; - -- pe_rsc_info(rsc, -- "Clearing failure of %s on %s because it is orphaned " -- CRM_XS " %s", -- rsc->id, node->details->uname, clear_op->uuid); -+ clear_op = pe__clear_failcount(rsc, node, "it is orphaned", -+ data_set); - - /* We can't use order_action_then_stop() here because its - * pe_order_preserve breaks things --- -1.8.3.1 - - -From fade22882d52ac743b99adc4cbd98780b21e250b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 15 Oct 2018 10:26:46 -0500 -Subject: [PATCH 6/9] Fix: scheduler: avoid unnecessary recovery of cleaned - guest nodes - -Generalize Andrew Beekhof's fix for bundle nodes in 407f524 to all guest nodes. -Fixes RHBZ#1448467 ---- - pengine/container.c | 1 - - pengine/native.c | 39 +++++++++++++++++++++++++-------------- - 2 files changed, 25 insertions(+), 15 deletions(-) - -diff --git a/pengine/container.c b/pengine/container.c -index 02ee425..a35763b 100644 ---- a/pengine/container.c -+++ b/pengine/container.c -@@ -278,7 +278,6 @@ container_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - order_start_start(rsc, tuple->docker, pe_order_runnable_left | pe_order_implies_first_printed); - - if(tuple->child) { -- new_rsc_order(tuple->docker, RSC_STATUS, tuple->remote, RSC_STOP, pe_order_optional, data_set); - order_stop_stop(rsc, tuple->child, pe_order_implies_first_printed); - } - order_stop_stop(rsc, tuple->docker, pe_order_implies_first_printed); -diff --git a/pengine/native.c b/pengine/native.c -index 6447234..cd746b6 100644 ---- a/pengine/native.c -+++ b/pengine/native.c -@@ -1486,6 +1486,26 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - if (rsc->container) { - resource_t *remote_rsc = NULL; - -+ if (rsc->is_remote_node) { -+ // rsc is the implicit remote connection for a guest or bundle node -+ -+ /* Do not allow a guest resource to live on a Pacemaker Remote node, -+ * to avoid nesting remotes. However, allow bundles to run on remote -+ * nodes. -+ */ -+ if (is_not_set(rsc->flags, pe_rsc_allow_remote_remotes)) { -+ rsc_avoids_remote_nodes(rsc->container); -+ } -+ -+ /* If someone cleans up a guest or bundle node's container, we will -+ * likely schedule a (re-)probe of the container and recovery of the -+ * connection. Order the connection stop after the container probe, -+ * so that if we detect the container running, we will trigger a new -+ * transition and avoid the unnecessary recovery. -+ */ -+ new_rsc_order(rsc->container, RSC_STATUS, rsc, RSC_STOP, -+ pe_order_optional, data_set); -+ - /* A user can specify that a resource must start on a Pacemaker Remote - * node by explicitly configuring it with the container=NODENAME - * meta-attribute. This is of questionable merit, since location -@@ -1493,16 +1513,15 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - * we check whether a resource (that is not itself a remote connection) - * has container set to a remote node or guest node resource. - */ -- if (rsc->container->is_remote_node) { -+ } else if (rsc->container->is_remote_node) { - remote_rsc = rsc->container; -- } else if (rsc->is_remote_node == FALSE) { -+ } else { - remote_rsc = rsc_contains_remote_node(data_set, rsc->container); - } - - if (remote_rsc) { -- /* The container represents a Pacemaker Remote node, so force the -- * resource on the Pacemaker Remote node instead of colocating the -- * resource with the container resource. -+ /* Force the resource on the Pacemaker Remote node instead of -+ * colocating the resource with the container resource. - */ - GHashTableIter iter; - node_t *node = NULL; -@@ -1512,6 +1531,7 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - node->weight = -INFINITY; - } - } -+ - } else { - /* This resource is either a filler for a container that does NOT - * represent a Pacemaker Remote node, or a Pacemaker Remote -@@ -1545,15 +1565,6 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - * or remote connection resources.*/ - rsc_avoids_remote_nodes(rsc); - } -- -- /* If this is a guest node's implicit remote connection, do not allow the -- * guest resource to live on a Pacemaker Remote node, to avoid nesting -- * remotes. However, allow bundles to run on remote nodes. -- */ -- if (rsc->is_remote_node && rsc->container -- && is_not_set(rsc->flags, pe_rsc_allow_remote_remotes)) { -- rsc_avoids_remote_nodes(rsc->container); -- } - } - - void --- -1.8.3.1 - - -From af4f6a1cc13b92bba8109007a1fac809e0d80887 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 15 Oct 2018 12:21:14 -0500 -Subject: [PATCH 7/9] Fix: scheduler: order guest pseudo-fencing properly after - clean-up - -If the resource history of a guest node's container has been cleaned, we will -schedule a (pseudo-)fence of the guest node, and a stop of the guest node's -connection resource, but not a stop of the container (which appears already -stopped). In this case, order the pseudo-fence after the connection stop, so we -don't call remote_node_down() unless the container is really down (the -connection stop will be avoided if the container is really up). ---- - pengine/allocate.c | 26 ++++++++++++++++++++++---- - 1 file changed, 22 insertions(+), 4 deletions(-) - -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 569a4a5..e867368 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -1446,12 +1446,30 @@ fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) - node->details->uname, stonith_op->id, - container->id, stop->id); - } else { -- crm_info("Implying guest node %s is down (action %d) ", -- node->details->uname, stonith_op->id); -+ /* If we're fencing the guest node but there's no stop for the guest -+ * resource, we must think the guest is already stopped. However, we may -+ * think so because its resource history was just cleaned. To avoid -+ * unnecessarily considering the guest node down if it's really up, -+ * order the pseudo-fencing after any stop of the connection resource, -+ * which will be ordered after any container (re-)probe. -+ */ -+ stop = find_first_action(node->details->remote_rsc->actions, NULL, -+ RSC_STOP, NULL); -+ -+ if (stop) { -+ order_actions(stop, stonith_op, pe_order_optional); -+ crm_info("Implying guest node %s is down (action %d) " -+ "after connection is stopped (action %d)", -+ node->details->uname, stonith_op->id, stop->id); -+ } else { -+ /* Not sure why we're fencing, but everything must already be -+ * cleanly stopped. -+ */ -+ crm_info("Implying guest node %s is down (action %d) ", -+ node->details->uname, stonith_op->id); -+ } - } - -- /* @TODO: Order pseudo-fence after any (optional) fence of guest's host */ -- - /* Order/imply other actions relative to pseudo-fence as with real fence */ - stonith_constraints(node, stonith_op, data_set); - if(done) { --- -1.8.3.1 - - -From 4c771013389f395e16165312993597064d06d149 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 29 Oct 2018 14:56:47 -0500 -Subject: [PATCH 8/9] Test: pengine: update regression tests for recent changes - -clear-failcount change: operations in generated graphs now consistently have -XML_ATTR_TE_NOWAIT - -guest node change: insignificant change in op numbering due to newly added -optional constraint ---- - pengine/test10/bug-5025-1.exp | 2 +- - pengine/test10/bug-5025-3.exp | 2 +- - pengine/test10/bug-5069-op-disabled.exp | 2 +- - pengine/test10/bug-cl-5247.exp | 12 ++++++------ - 4 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/pengine/test10/bug-5025-1.exp b/pengine/test10/bug-5025-1.exp -index 053ece4..2a82e79 100644 ---- a/pengine/test10/bug-5025-1.exp -+++ b/pengine/test10/bug-5025-1.exp -@@ -3,7 +3,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5025-3.exp b/pengine/test10/bug-5025-3.exp -index eb2e2e6..9360ca7 100644 ---- a/pengine/test10/bug-5025-3.exp -+++ b/pengine/test10/bug-5025-3.exp -@@ -12,7 +12,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-5069-op-disabled.exp b/pengine/test10/bug-5069-op-disabled.exp -index 9653af1..fffb8c4 100644 ---- a/pengine/test10/bug-5069-op-disabled.exp -+++ b/pengine/test10/bug-5069-op-disabled.exp -@@ -3,7 +3,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bug-cl-5247.exp b/pengine/test10/bug-cl-5247.exp -index c21ed7d..5315360 100644 ---- a/pengine/test10/bug-cl-5247.exp -+++ b/pengine/test10/bug-cl-5247.exp -@@ -451,7 +451,7 @@ - - - -- -+ - - - -@@ -464,7 +464,7 @@ - - - -- -+ - - - -@@ -477,7 +477,7 @@ - - - -- -+ - - - -@@ -534,7 +534,7 @@ - - - -- -+ - - - -@@ -564,7 +564,7 @@ - - - -- -+ - - - -@@ -644,7 +644,7 @@ - - - -- -+ - - - --- -1.8.3.1 - - -From 6ab7a2cb1ed28548706db3bc8c006cea3b605681 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 15 Oct 2018 11:24:46 -0500 -Subject: [PATCH 9/9] Test: scheduler: order guest node connection recovery - after container probe - ---- - pengine/regression.sh | 1 + - pengine/test10/guest-node-cleanup.dot | 64 ++++++ - pengine/test10/guest-node-cleanup.exp | 319 ++++++++++++++++++++++++++++++ - pengine/test10/guest-node-cleanup.scores | 81 ++++++++ - pengine/test10/guest-node-cleanup.summary | 55 ++++++ - pengine/test10/guest-node-cleanup.xml | 304 ++++++++++++++++++++++++++++ - 6 files changed, 824 insertions(+) - create mode 100644 pengine/test10/guest-node-cleanup.dot - create mode 100644 pengine/test10/guest-node-cleanup.exp - create mode 100644 pengine/test10/guest-node-cleanup.scores - create mode 100644 pengine/test10/guest-node-cleanup.summary - create mode 100644 pengine/test10/guest-node-cleanup.xml - -diff --git a/pengine/regression.sh b/pengine/regression.sh -index deca1b6..ead5fd8 100755 ---- a/pengine/regression.sh -+++ b/pengine/regression.sh -@@ -854,6 +854,7 @@ do_test whitebox-migrate1 "Migrate both container and connection resource" - do_test whitebox-imply-stop-on-fence "imply stop action on container node rsc when host node is fenced" - do_test whitebox-nested-group "Verify guest remote-node works nested in a group" - do_test guest-node-host-dies "Verify guest node is recovered if host goes away" -+do_test guest-node-cleanup "Order guest node connection recovery after container probe" - - echo "" - do_test remote-startup-probes "Baremetal remote-node startup probes" -diff --git a/pengine/test10/guest-node-cleanup.dot b/pengine/test10/guest-node-cleanup.dot -new file mode 100644 -index 0000000..45fe4be ---- /dev/null -+++ b/pengine/test10/guest-node-cleanup.dot -@@ -0,0 +1,64 @@ -+digraph "g" { -+"all_stopped" -> "lxc1_start_0 rhel7-1" [ style = bold] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"container1_monitor_0 rhel7-1" -> "container1_start_0 rhel7-1" [ style = bold] -+"container1_monitor_0 rhel7-1" -> "lxc1_stop_0 rhel7-1" [ style = bold] -+"container1_monitor_0 rhel7-1" [ style=bold color="green" fontcolor="black"] -+"container1_start_0 rhel7-1" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"container1_start_0 rhel7-1" -> "lxc-ms_start_0 lxc1" [ style = bold] -+"container1_start_0 rhel7-1" -> "lxc1_start_0 rhel7-1" [ style = bold] -+"container1_start_0 rhel7-1" [ style=bold color="green" fontcolor="black"] -+"lxc-ms-master_demote_0" -> "lxc-ms-master_demoted_0" [ style = bold] -+"lxc-ms-master_demote_0" -> "lxc-ms_demote_0 lxc1" [ style = bold] -+"lxc-ms-master_demote_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_demoted_0" -> "lxc-ms-master_promote_0" [ style = bold] -+"lxc-ms-master_demoted_0" -> "lxc-ms-master_start_0" [ style = bold] -+"lxc-ms-master_demoted_0" -> "lxc-ms-master_stop_0" [ style = bold] -+"lxc-ms-master_demoted_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_promote_0" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"lxc-ms-master_promote_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_running_0" -> "lxc-ms-master_promote_0" [ style = bold] -+"lxc-ms-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_start_0" -> "lxc-ms-master_running_0" [ style = bold] -+"lxc-ms-master_start_0" -> "lxc-ms_start_0 lxc1" [ style = bold] -+"lxc-ms-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_stop_0" -> "lxc-ms-master_stopped_0" [ style = bold] -+"lxc-ms-master_stop_0" -> "lxc-ms_stop_0 lxc1" [ style = bold] -+"lxc-ms-master_stop_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms-master_stopped_0" -> "lxc-ms-master_promote_0" [ style = bold] -+"lxc-ms-master_stopped_0" -> "lxc-ms-master_start_0" [ style = bold] -+"lxc-ms-master_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms_demote_0 lxc1" -> "lxc-ms-master_demoted_0" [ style = bold] -+"lxc-ms_demote_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"lxc-ms_demote_0 lxc1" -> "lxc-ms_stop_0 lxc1" [ style = bold] -+"lxc-ms_demote_0 lxc1" [ style=bold color="green" fontcolor="orange"] -+"lxc-ms_promote_0 lxc1" -> "lxc-ms-master_promoted_0" [ style = bold] -+"lxc-ms_promote_0 lxc1" [ style=bold color="green" fontcolor="black"] -+"lxc-ms_start_0 lxc1" -> "lxc-ms-master_running_0" [ style = bold] -+"lxc-ms_start_0 lxc1" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"lxc-ms_start_0 lxc1" [ style=bold color="green" fontcolor="black"] -+"lxc-ms_stop_0 lxc1" -> "all_stopped" [ style = bold] -+"lxc-ms_stop_0 lxc1" -> "lxc-ms-master_stopped_0" [ style = bold] -+"lxc-ms_stop_0 lxc1" -> "lxc-ms_start_0 lxc1" [ style = bold] -+"lxc-ms_stop_0 lxc1" [ style=bold color="green" fontcolor="orange"] -+"lxc1_monitor_30000 rhel7-1" [ style=bold color="green" fontcolor="black"] -+"lxc1_start_0 rhel7-1" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"lxc1_start_0 rhel7-1" -> "lxc-ms_start_0 lxc1" [ style = bold] -+"lxc1_start_0 rhel7-1" -> "lxc1_monitor_30000 rhel7-1" [ style = bold] -+"lxc1_start_0 rhel7-1" [ style=bold color="green" fontcolor="black"] -+"lxc1_stop_0 rhel7-1" -> "all_stopped" [ style = bold] -+"lxc1_stop_0 rhel7-1" -> "lxc1_start_0 rhel7-1" [ style = bold] -+"lxc1_stop_0 rhel7-1" -> "stonith 'reboot' lxc1" [ style = bold] -+"lxc1_stop_0 rhel7-1" [ style=bold color="green" fontcolor="black"] -+"stonith 'reboot' lxc1" -> "lxc-ms-master_stop_0" [ style = bold] -+"stonith 'reboot' lxc1" -> "lxc-ms_demote_0 lxc1" [ style = bold] -+"stonith 'reboot' lxc1" -> "lxc-ms_stop_0 lxc1" [ style = bold] -+"stonith 'reboot' lxc1" -> "stonith_complete" [ style = bold] -+"stonith 'reboot' lxc1" [ style=bold color="green" fontcolor="orange"] -+"stonith_complete" -> "all_stopped" [ style = bold] -+"stonith_complete" -> "container1_start_0 rhel7-1" [ style = bold] -+"stonith_complete" -> "lxc-ms_promote_0 lxc1" [ style = bold] -+"stonith_complete" -> "lxc-ms_start_0 lxc1" [ style = bold] -+"stonith_complete" [ style=bold color="green" fontcolor="orange"] -+} -diff --git a/pengine/test10/guest-node-cleanup.exp b/pengine/test10/guest-node-cleanup.exp -new file mode 100644 -index 0000000..9503a03 ---- /dev/null -+++ b/pengine/test10/guest-node-cleanup.exp -@@ -0,0 +1,319 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/guest-node-cleanup.scores b/pengine/test10/guest-node-cleanup.scores -new file mode 100644 -index 0000000..9bc8250 ---- /dev/null -+++ b/pengine/test10/guest-node-cleanup.scores -@@ -0,0 +1,81 @@ -+Allocation scores: -+Using the original execution date of: 2018-10-15 16:02:04Z -+clone_color: lxc-ms-master allocation score on lxc1: INFINITY -+clone_color: lxc-ms-master allocation score on lxc2: INFINITY -+clone_color: lxc-ms-master allocation score on rhel7-1: 0 -+clone_color: lxc-ms-master allocation score on rhel7-2: 0 -+clone_color: lxc-ms-master allocation score on rhel7-3: 0 -+clone_color: lxc-ms-master allocation score on rhel7-4: 0 -+clone_color: lxc-ms-master allocation score on rhel7-5: 0 -+clone_color: lxc-ms:0 allocation score on lxc1: INFINITY -+clone_color: lxc-ms:0 allocation score on lxc2: INFINITY -+clone_color: lxc-ms:0 allocation score on rhel7-1: 0 -+clone_color: lxc-ms:0 allocation score on rhel7-2: 0 -+clone_color: lxc-ms:0 allocation score on rhel7-3: 0 -+clone_color: lxc-ms:0 allocation score on rhel7-4: 0 -+clone_color: lxc-ms:0 allocation score on rhel7-5: 0 -+clone_color: lxc-ms:1 allocation score on lxc1: INFINITY -+clone_color: lxc-ms:1 allocation score on lxc2: INFINITY -+clone_color: lxc-ms:1 allocation score on rhel7-1: 0 -+clone_color: lxc-ms:1 allocation score on rhel7-2: 0 -+clone_color: lxc-ms:1 allocation score on rhel7-3: 0 -+clone_color: lxc-ms:1 allocation score on rhel7-4: 0 -+clone_color: lxc-ms:1 allocation score on rhel7-5: 0 -+lxc-ms:0 promotion score on lxc2: INFINITY -+lxc-ms:1 promotion score on lxc1: INFINITY -+native_color: Fencing allocation score on lxc1: -INFINITY -+native_color: Fencing allocation score on lxc2: -INFINITY -+native_color: Fencing allocation score on rhel7-1: 0 -+native_color: Fencing allocation score on rhel7-2: 0 -+native_color: Fencing allocation score on rhel7-3: 0 -+native_color: Fencing allocation score on rhel7-4: 0 -+native_color: Fencing allocation score on rhel7-5: 0 -+native_color: FencingPass allocation score on lxc1: -INFINITY -+native_color: FencingPass allocation score on lxc2: -INFINITY -+native_color: FencingPass allocation score on rhel7-1: 0 -+native_color: FencingPass allocation score on rhel7-2: 0 -+native_color: FencingPass allocation score on rhel7-3: 0 -+native_color: FencingPass allocation score on rhel7-4: 0 -+native_color: FencingPass allocation score on rhel7-5: 0 -+native_color: container1 allocation score on lxc1: -INFINITY -+native_color: container1 allocation score on lxc2: -INFINITY -+native_color: container1 allocation score on rhel7-1: INFINITY -+native_color: container1 allocation score on rhel7-2: 0 -+native_color: container1 allocation score on rhel7-3: 0 -+native_color: container1 allocation score on rhel7-4: 0 -+native_color: container1 allocation score on rhel7-5: 0 -+native_color: container2 allocation score on lxc1: -INFINITY -+native_color: container2 allocation score on lxc2: -INFINITY -+native_color: container2 allocation score on rhel7-1: INFINITY -+native_color: container2 allocation score on rhel7-2: 0 -+native_color: container2 allocation score on rhel7-3: 0 -+native_color: container2 allocation score on rhel7-4: 0 -+native_color: container2 allocation score on rhel7-5: 0 -+native_color: lxc-ms:0 allocation score on lxc1: INFINITY -+native_color: lxc-ms:0 allocation score on lxc2: INFINITY -+native_color: lxc-ms:0 allocation score on rhel7-1: 0 -+native_color: lxc-ms:0 allocation score on rhel7-2: 0 -+native_color: lxc-ms:0 allocation score on rhel7-3: 0 -+native_color: lxc-ms:0 allocation score on rhel7-4: 0 -+native_color: lxc-ms:0 allocation score on rhel7-5: 0 -+native_color: lxc-ms:1 allocation score on lxc1: INFINITY -+native_color: lxc-ms:1 allocation score on lxc2: -INFINITY -+native_color: lxc-ms:1 allocation score on rhel7-1: 0 -+native_color: lxc-ms:1 allocation score on rhel7-2: 0 -+native_color: lxc-ms:1 allocation score on rhel7-3: 0 -+native_color: lxc-ms:1 allocation score on rhel7-4: 0 -+native_color: lxc-ms:1 allocation score on rhel7-5: 0 -+native_color: lxc1 allocation score on lxc1: -INFINITY -+native_color: lxc1 allocation score on lxc2: -INFINITY -+native_color: lxc1 allocation score on rhel7-1: 0 -+native_color: lxc1 allocation score on rhel7-2: -INFINITY -+native_color: lxc1 allocation score on rhel7-3: -INFINITY -+native_color: lxc1 allocation score on rhel7-4: -INFINITY -+native_color: lxc1 allocation score on rhel7-5: -INFINITY -+native_color: lxc2 allocation score on lxc1: -INFINITY -+native_color: lxc2 allocation score on lxc2: -INFINITY -+native_color: lxc2 allocation score on rhel7-1: 0 -+native_color: lxc2 allocation score on rhel7-2: -INFINITY -+native_color: lxc2 allocation score on rhel7-3: -INFINITY -+native_color: lxc2 allocation score on rhel7-4: -INFINITY -+native_color: lxc2 allocation score on rhel7-5: -INFINITY -diff --git a/pengine/test10/guest-node-cleanup.summary b/pengine/test10/guest-node-cleanup.summary -new file mode 100644 -index 0000000..6378f48 ---- /dev/null -+++ b/pengine/test10/guest-node-cleanup.summary -@@ -0,0 +1,55 @@ -+Using the original execution date of: 2018-10-15 16:02:04Z -+ -+Current cluster status: -+Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] -+Containers: [ lxc2:container2 ] -+ -+ Fencing (stonith:fence_xvm): Started rhel7-2 -+ FencingPass (stonith:fence_dummy): Started rhel7-3 -+ container1 (ocf::heartbeat:VirtualDomain): FAILED -+ container2 (ocf::heartbeat:VirtualDomain): Started rhel7-1 -+ Master/Slave Set: lxc-ms-master [lxc-ms] -+ Slaves: [ lxc2 ] -+ Stopped: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] -+ -+Transition Summary: -+ * Fence (reboot) lxc1 (resource: container1) 'guest is unclean' -+ * Start container1 ( rhel7-1 ) -+ * Recover lxc-ms:1 ( Master lxc1 ) -+ * Restart lxc1 ( rhel7-1 ) due to required container1 start -+ -+Executing cluster transition: -+ * Resource action: container1 monitor on rhel7-1 -+ * Pseudo action: lxc-ms-master_demote_0 -+ * Resource action: lxc1 stop on rhel7-1 -+ * Pseudo action: stonith-lxc1-reboot on lxc1 -+ * Pseudo action: stonith_complete -+ * Resource action: container1 start on rhel7-1 -+ * Pseudo action: lxc-ms_demote_0 -+ * Pseudo action: lxc-ms-master_demoted_0 -+ * Pseudo action: lxc-ms-master_stop_0 -+ * Pseudo action: lxc-ms_stop_0 -+ * Pseudo action: lxc-ms-master_stopped_0 -+ * Pseudo action: lxc-ms-master_start_0 -+ * Pseudo action: all_stopped -+ * Resource action: lxc1 start on rhel7-1 -+ * Resource action: lxc1 monitor=30000 on rhel7-1 -+ * Resource action: lxc-ms start on lxc1 -+ * Pseudo action: lxc-ms-master_running_0 -+ * Pseudo action: lxc-ms-master_promote_0 -+ * Resource action: lxc-ms promote on lxc1 -+ * Pseudo action: lxc-ms-master_promoted_0 -+Using the original execution date of: 2018-10-15 16:02:04Z -+ -+Revised cluster status: -+Online: [ rhel7-1 rhel7-2 rhel7-3 rhel7-4 rhel7-5 ] -+Containers: [ lxc1:container1 lxc2:container2 ] -+ -+ Fencing (stonith:fence_xvm): Started rhel7-2 -+ FencingPass (stonith:fence_dummy): Started rhel7-3 -+ container1 (ocf::heartbeat:VirtualDomain): Started rhel7-1 -+ container2 (ocf::heartbeat:VirtualDomain): Started rhel7-1 -+ Master/Slave Set: lxc-ms-master [lxc-ms] -+ Masters: [ lxc1 ] -+ Slaves: [ lxc2 ] -+ -diff --git a/pengine/test10/guest-node-cleanup.xml b/pengine/test10/guest-node-cleanup.xml -new file mode 100644 -index 0000000..35835bc ---- /dev/null -+++ b/pengine/test10/guest-node-cleanup.xml -@@ -0,0 +1,304 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - diff --git a/SOURCES/008-security-log.patch b/SOURCES/008-security-log.patch new file mode 100644 index 0000000..0fde849 --- /dev/null +++ b/SOURCES/008-security-log.patch @@ -0,0 +1,297 @@ +From 83811e2115f5516a7faec2e653b1be3d58b35a79 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Apr 2019 09:46:51 -0500 +Subject: [PATCH 1/2] Log: libcrmcluster: improve CPG membership messages + +Show CPG event reason when provided by corosync, make messages more readable, +upgrade duplicate pid messages to warnings (and log only one message in those +cases). + +This also fixes a typo in 4d6f6e01 that led to using an index with the wrong +array, potentially leading to use of an uninitialized value or invalid memory +access. +--- + lib/cluster/cpg.c | 95 +++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 58 insertions(+), 37 deletions(-) + +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index c5ecc67..85476be 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -399,6 +399,32 @@ static int cmp_member_list_nodeid(const void *first, + return 0; + } + ++static const char * ++cpgreason2str(cpg_reason_t reason) ++{ ++ switch (reason) { ++ case CPG_REASON_JOIN: return " via cpg_join"; ++ case CPG_REASON_LEAVE: return " via cpg_leave"; ++ case CPG_REASON_NODEDOWN: return " via cluster exit"; ++ case CPG_REASON_NODEUP: return " via cluster join"; ++ case CPG_REASON_PROCDOWN: return " for unknown reason"; ++ default: break; ++ } ++ return ""; ++} ++ ++static inline const char * ++peer_name(crm_node_t *peer) ++{ ++ if (peer == NULL) { ++ return "unknown node"; ++ } else if (peer->uname == NULL) { ++ return "peer node"; ++ } else { ++ return peer->uname; ++ } ++} ++ + void + pcmk_cpg_membership(cpg_handle_t handle, + const struct cpg_name *groupName, +@@ -410,7 +436,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + gboolean found = FALSE; + static int counter = 0; + uint32_t local_nodeid = get_local_nodeid(handle); +- const struct cpg_address *key, **rival, **sorted; ++ const struct cpg_address *key, **sorted; + + sorted = malloc(member_list_entries * sizeof(const struct cpg_address *)); + CRM_ASSERT(sorted != NULL); +@@ -424,11 +450,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + + for (i = 0; i < left_list_entries; i++) { + crm_node_t *peer = crm_find_peer(left_list[i].nodeid, NULL); +- +- crm_info("Node %u left group %s (peer=%s:%llu, counter=%d.%d)", +- left_list[i].nodeid, groupName->value, +- (peer? peer->uname : ""), +- (unsigned long long) left_list[i].pid, counter, i); ++ const struct cpg_address **rival = NULL; + + /* in CPG world, NODE:PROCESS-IN-MEMBERSHIP-OF-G is an 1:N relation + and not playing by this rule may go wild in case of multiple +@@ -442,7 +464,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + also API end-point carriers, and that's what matters locally + (who's the winner); + remotely, we will just compare leave_list and member_list and if +- the left process has it's node retained in member_list (under some ++ the left process has its node retained in member_list (under some + other PID, anyway) we will just ignore it as well + XXX: long-term fix is to establish in-out PID-aware tracking? */ + if (peer) { +@@ -450,51 +472,51 @@ pcmk_cpg_membership(cpg_handle_t handle, + rival = bsearch(&key, sorted, member_list_entries, + sizeof(const struct cpg_address *), + cmp_member_list_nodeid); +- if (rival == NULL) { ++ } ++ ++ if (rival == NULL) { ++ crm_info("Group %s event %d: %s (node %u pid %u) left%s", ++ groupName->value, counter, peer_name(peer), ++ left_list[i].nodeid, left_list[i].pid, ++ cpgreason2str(left_list[i].reason)); ++ if (peer) { + crm_update_peer_proc(__FUNCTION__, peer, crm_proc_cpg, + OFFLINESTATUS); +- } else if (left_list[i].nodeid == local_nodeid) { +- crm_info("Ignoring the above event %s.%d, comes from a local" +- " rival process (presumably not us): %llu", +- groupName->value, counter, +- (unsigned long long) left_list[i].pid); +- } else { +- crm_info("Ignoring the above event %s.%d, comes from" +- " a rival-rich node: %llu (e.g. %llu process" +- " carries on)", +- groupName->value, counter, +- (unsigned long long) left_list[i].pid, +- (unsigned long long) (*rival)->pid); + } ++ } else if (left_list[i].nodeid == local_nodeid) { ++ crm_warn("Group %s event %d: duplicate local pid %u left%s", ++ groupName->value, counter, ++ left_list[i].pid, cpgreason2str(left_list[i].reason)); ++ } else { ++ crm_warn("Group %s event %d: " ++ "%s (node %u) duplicate pid %u left%s (%u remains)", ++ groupName->value, counter, peer_name(peer), ++ left_list[i].nodeid, left_list[i].pid, ++ cpgreason2str(left_list[i].reason), (*rival)->pid); + } + } + free(sorted); + sorted = NULL; + + for (i = 0; i < joined_list_entries; i++) { +- crm_info("Node %u joined group %s (counter=%d.%d, pid=%llu," +- " unchecked for rivals)", +- joined_list[i].nodeid, groupName->value, counter, i, +- (unsigned long long) left_list[i].pid); ++ crm_info("Group %s event %d: node %u pid %u joined%s", ++ groupName->value, counter, joined_list[i].nodeid, ++ joined_list[i].pid, cpgreason2str(joined_list[i].reason)); + } + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); + +- crm_info("Node %u still member of group %s (peer=%s:%llu," +- " counter=%d.%d, at least once)", +- member_list[i].nodeid, groupName->value, +- (peer? peer->uname : ""), member_list[i].pid, +- counter, i); +- + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { + /* see the note above */ +- crm_info("Ignoring the above event %s.%d, comes from a local rival" +- " process: %llu", groupName->value, counter, +- (unsigned long long) member_list[i].pid); ++ crm_warn("Group %s event %d: detected duplicate local pid %u", ++ groupName->value, counter, member_list[i].pid); + continue; + } ++ crm_info("Group %s event %d: %s (node %u pid %u) is member", ++ groupName->value, counter, peer_name(peer), ++ member_list[i].nodeid, member_list[i].pid); + + /* Anyone that is sending us CPG messages must also be a _CPG_ member. + * But it's _not_ safe to assume it's in the quorum membership. +@@ -514,9 +536,8 @@ pcmk_cpg_membership(cpg_handle_t handle, + * + * Set the threshold to 1 minute + */ +- crm_err("Node %s[%u] appears to be online even though we think" +- " it is dead (unchecked for rivals)", +- peer->uname, peer->id); ++ crm_warn("Node %u is member of group %s but was believed offline", ++ member_list[i].nodeid, groupName->value); + if (crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, 0)) { + peer->votes = 0; + } +@@ -529,7 +550,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + if (!found) { +- crm_err("We're not part of CPG group '%s' anymore!", groupName->value); ++ crm_err("Local node was evicted from group %s", groupName->value); + cpg_evicted = TRUE; + } + +-- +1.8.3.1 + + +From 87769895ebccc1033a876ef98a21577d6f4d1c0e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Apr 2019 22:18:27 -0500 +Subject: [PATCH 2/2] Fix: libcrmcluster,pacemakerd: restore compatibility with + corosync 1 + +Pacemaker 1.1 supports older versions of corosync that don't supply +cs_strerror() or CMAP. This simply drops usage cs_strerror() (in favor of just +the raw error code, as before 07a82c5c) and properly conditionalizes CMAP +usage. +--- + lib/cluster/cpg.c | 12 ++++-------- + mcp/corosync.c | 13 +++++++------ + 2 files changed, 11 insertions(+), 14 deletions(-) + +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 85476be..e4783e5 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -91,15 +91,13 @@ uint32_t get_local_nodeid(cpg_handle_t handle) + crm_trace("Creating connection"); + cs_repeat(retries, 5, rc = cpg_initialize(&local_handle, &cb)); + if (rc != CS_OK) { +- crm_err("Could not connect to the CPG API: %s (%d)", +- cs_strerror(rc), rc); ++ crm_err("Could not connect to the CPG API (rc=%d)", rc); + return 0; + } + + rc = cpg_fd_get(local_handle, &fd); + if (rc != CS_OK) { +- crm_err("Could not obtain the CPG API connection: %s (%d)", +- cs_strerror(rc), rc); ++ crm_err("Could not obtain the CPG API connection (rc=%d)", rc); + goto bail; + } + +@@ -594,15 +592,13 @@ cluster_connect_cpg(crm_cluster_t *cluster) + + cs_repeat(retries, 30, rc = cpg_initialize(&handle, &cpg_callbacks)); + if (rc != CS_OK) { +- crm_err("Could not connect to the CPG API: %s (%d)", +- cs_strerror(rc), rc); ++ crm_err("Could not connect to the CPG API (rc=%d)", rc); + goto bail; + } + + rc = cpg_fd_get(handle, &fd); + if (rc != CS_OK) { +- crm_err("Could not obtain the CPG API connection: %s (%d)", +- cs_strerror(rc), rc); ++ crm_err("Could not obtain the CPG API connection (rc=%d)", rc); + goto bail; + } + +diff --git a/mcp/corosync.c b/mcp/corosync.c +index 407a63f..40be727 100644 +--- a/mcp/corosync.c ++++ b/mcp/corosync.c +@@ -118,13 +118,13 @@ cluster_connect_cfg(uint32_t * nodeid) + cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); + + if (rc != CS_OK) { +- crm_err("corosync cfg init: %s (%d)", cs_strerror(rc), rc); ++ crm_err("corosync cfg init error %d", rc); + return FALSE; + } + + rc = corosync_cfg_fd_get(cfg_handle, &fd); + if (rc != CS_OK) { +- crm_err("corosync cfg fd_get: %s (%d)", cs_strerror(rc), rc); ++ crm_err("corosync cfg fd_get error %d", rc); + goto bail; + } + +@@ -314,8 +314,8 @@ mcp_read_config(void) + rc = cmap_initialize(&local_handle); + if (rc != CS_OK) { + retries++; +- printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); +- crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); ++ printf("cmap connection setup failed: error %d. Retrying in %ds\n", rc, retries); ++ crm_info("cmap connection setup failed: error %d. Retrying in %ds", rc, retries); + sleep(retries); + + } else { +@@ -331,10 +331,10 @@ mcp_read_config(void) + return FALSE; + } + ++#if HAVE_CMAP + rc = cmap_fd_get(local_handle, &fd); + if (rc != CS_OK) { +- crm_err("Could not obtain the CMAP API connection: %s (%d)", +- cs_strerror(rc), rc); ++ crm_err("Could not obtain the CMAP API connection: error %d", rc); + cmap_finalize(local_handle); + return FALSE; + } +@@ -354,6 +354,7 @@ mcp_read_config(void) + cmap_finalize(local_handle); + return FALSE; + } ++#endif + + stack = get_cluster_type(); + crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); +-- +1.8.3.1 + diff --git a/SOURCES/009-sbd-guest.patch b/SOURCES/009-sbd-guest.patch deleted file mode 100644 index 0fd41b1..0000000 --- a/SOURCES/009-sbd-guest.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 6f7ce4e903eed136cc9038952f9d57d4423736dd Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Sun, 4 Nov 2018 23:15:58 +0100 -Subject: [PATCH 1/2] Refactor: remote_ra: have attribute strings in msg_xml.h - ---- - crmd/remote_lrmd_ra.c | 6 +++--- - include/crm/msg_xml.h | 3 +++ - 2 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c -index 3cdc7f0..a164fc1 100644 ---- a/crmd/remote_lrmd_ra.c -+++ b/crmd/remote_lrmd_ra.c -@@ -724,10 +724,10 @@ handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeo - int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; - - for (tmp = cmd->params; tmp; tmp = tmp->next) { -- if (safe_str_eq(tmp->key, "addr") || safe_str_eq(tmp->key, "server")) { -+ if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR) || -+ safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_SERVER)) { - server = tmp->value; -- } -- if (safe_str_eq(tmp->key, "port")) { -+ } else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) { - port = atoi(tmp->value); - } - } -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index 1ac4302..55f42c4 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -233,6 +233,9 @@ - # define XML_RSC_ATTR_REMOTE_NODE "remote-node" - # define XML_RSC_ATTR_CLEAR_OP "clear_failure_op" - # define XML_RSC_ATTR_CLEAR_INTERVAL "clear_failure_interval" -+# define XML_RSC_ATTR_REMOTE_RA_ADDR "addr" -+# define XML_RSC_ATTR_REMOTE_RA_SERVER "server" -+# define XML_RSC_ATTR_REMOTE_RA_PORT "port" - - # define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval" - --- -1.8.3.1 - - -From 4dae6746002b034868feda763a85de85e08834e7 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Sun, 4 Nov 2018 23:54:11 +0100 -Subject: [PATCH 2/2] Fix: pacemaker-remote: skip remote_config_check for - guest-nodes - -This is crucial when watchdog-fencing is enabled as the sbd-check -done by pacemaker-remote would fail on guest-containers & bundles -(eventually tearing down pacemaker-remote inside the container) -and even on system-virtualized-guests the sbd-check doesn't make -sense as these guests would be fenced by stop/start-cycling the -VM. ---- - crmd/crmd_lrm.h | 1 + - crmd/lrm_state.c | 14 +++++++++----- - crmd/remote_lrmd_ra.c | 20 ++++++++++++++++++++ - 3 files changed, 30 insertions(+), 5 deletions(-) - -diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h -index 7a74f7a..d115056 100644 ---- a/crmd/crmd_lrm.h -+++ b/crmd/crmd_lrm.h -@@ -168,5 +168,6 @@ void remote_ra_fail(const char *node_name); - void remote_ra_process_pseudo(xmlNode *xml); - gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state); - void remote_ra_process_maintenance_nodes(xmlNode *xml); -+gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); - - gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending); -diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c -index 497afe1..40da762 100644 ---- a/crmd/lrm_state.c -+++ b/crmd/lrm_state.c -@@ -500,11 +500,15 @@ crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) - const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); - - proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); -- if (proxy != NULL) { -- /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ -- int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); -- fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd, -- "remote_config_check", remote_config_check, NULL); -+ if (!remote_ra_controlling_guest(lrm_state)) { -+ if (proxy != NULL) { -+ /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ -+ int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); -+ fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd, -+ "remote_config_check", remote_config_check, NULL); -+ } -+ } else { -+ crm_debug("Skipping remote_config_check for guest-nodes"); - } - - } else if (safe_str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ)) { -diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c -index a164fc1..0414709 100644 ---- a/crmd/remote_lrmd_ra.c -+++ b/crmd/remote_lrmd_ra.c -@@ -85,6 +85,15 @@ typedef struct remote_ra_data_s { - * so we have it signalled back with the - * transition from pengine - */ -+ gboolean controlling_guest; /* Similar for if we are controlling a guest -+ * or a bare-metal remote. -+ * Fortunately there is a meta-attribute in -+ * the transition already and as the -+ * situation doesn't change over time we can -+ * use the resource start for noting down -+ * the information for later use when the -+ * attributes aren't at hand. -+ */ - } remote_ra_data_t; - - static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms); -@@ -721,6 +730,7 @@ handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeo - const char *server = NULL; - lrmd_key_value_t *tmp = NULL; - int port = 0; -+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data; - int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms; - - for (tmp = cmd->params; tmp; tmp = tmp->next) { -@@ -729,6 +739,8 @@ handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeo - server = tmp->value; - } else if (safe_str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT)) { - port = atoi(tmp->value); -+ } else if (safe_str_eq(tmp->key, CRM_META"_"XML_RSC_ATTR_CONTAINER)) { -+ ra_data->controlling_guest = TRUE; - } - } - -@@ -1262,3 +1274,11 @@ remote_ra_is_in_maintenance(lrm_state_t * lrm_state) - - return ra_data->is_maintenance; - } -+ -+gboolean -+remote_ra_controlling_guest(lrm_state_t * lrm_state) -+{ -+ remote_ra_data_t *ra_data = lrm_state->remote_ra_data; -+ -+ return ra_data->controlling_guest; -+} --- -1.8.3.1 - diff --git a/SOURCES/009-use-after-free.patch b/SOURCES/009-use-after-free.patch new file mode 100644 index 0000000..d419a0f --- /dev/null +++ b/SOURCES/009-use-after-free.patch @@ -0,0 +1,158 @@ +From dd521e724c4c2d4f074ffcf30a9e7a844fcfb7d2 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Sat, 18 May 2019 06:17:36 +0200 +Subject: [PATCH] Fix: fence-lib: avoid use-after-free on early failure return + +Bailing out in case of non-existant fence-agent is such a case. +--- + fencing/commands.c | 30 +++++++++++++++++++++--------- + include/crm/fencing/internal.h | 5 +++-- + lib/fencing/st_client.c | 28 +++++++++++++++++++++------- + 3 files changed, 45 insertions(+), 18 deletions(-) + +diff --git a/fencing/commands.c b/fencing/commands.c +index af5324a..d47a5ea 100644 +--- a/fencing/commands.c ++++ b/fencing/commands.c +@@ -107,6 +107,7 @@ typedef struct async_command_s { + int last_timeout_signo; + + stonith_device_t *active_on; ++ stonith_device_t *activating_on; + } async_command_t; + + static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, +@@ -301,6 +302,19 @@ get_active_cmds(stonith_device_t * device) + return counter; + } + ++static void ++fork_cb(GPid pid, gpointer user_data) ++{ ++ async_command_t *cmd = (async_command_t *) user_data; ++ stonith_device_t * device = cmd->activating_on; ++ ++ crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", ++ cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", ++ device->id, pid, cmd->timeout); ++ cmd->active_on = device; ++ cmd->activating_on = NULL; ++} ++ + static gboolean + stonith_device_execute(stonith_device_t * device) + { +@@ -387,19 +401,17 @@ stonith_device_execute(stonith_device_t * device) + cmd->victim_nodeid, + cmd->timeout, device->params, device->aliases); + +- /* for async exec, exec_rc is pid if positive and error code if negative/zero */ +- exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb); +- +- if (exec_rc > 0) { +- crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", +- cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", +- device->id, exec_rc, cmd->timeout); +- cmd->active_on = device; ++ /* for async exec, exec_rc is negative for early error exit ++ otherwise handling of success/errors is done via callbacks */ ++ cmd->activating_on = device; ++ exec_rc = stonith_action_execute_async(action, (void *)cmd, ++ cmd->done_cb, fork_cb); + +- } else { ++ if (exec_rc < 0) { + crm_warn("Operation %s%s%s on %s failed: %s (%d)", + cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", + device->id, pcmk_strerror(exec_rc), exec_rc); ++ cmd->activating_on = NULL; + cmd->done_cb(0, exec_rc, NULL, cmd); + } + return TRUE; +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 90673bb..24df230 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -24,11 +24,12 @@ void stonith__destroy_action(stonith_action_t *action); + void stonith__action_result(stonith_action_t *action, int *rc, char **output, + char **error_output); + +-GPid ++int + stonith_action_execute_async(stonith_action_t * action, + void *userdata, + void (*done) (GPid pid, int rc, const char *output, +- gpointer user_data)); ++ gpointer user_data), ++ void (*fork_cb) (GPid pid, gpointer user_data)); + + int stonith__execute(stonith_action_t *action); + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 0c1eadc..c38f356 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -54,6 +54,7 @@ struct stonith_action_s { + int async; + void *userdata; + void (*done_cb) (GPid pid, gint status, const char *output, gpointer user_data); ++ void (*fork_cb) (GPid pid, gpointer user_data); + + svc_action_t *svc_action; + +@@ -835,6 +836,10 @@ stonith_action_async_forked(svc_action_t *svc_action) + action->pid = svc_action->pid; + action->svc_action = svc_action; + ++ if (action->fork_cb) { ++ (action->fork_cb) (svc_action->pid, action->userdata); ++ } ++ + crm_trace("Child process %d performing action '%s' successfully forked", + action->pid, action->action); + } +@@ -916,25 +921,34 @@ internal_stonith_action_execute(stonith_action_t * action) + return rc; + } + +-GPid ++/*! ++ * \internal ++ * \brief Kick off execution of an async stonith action ++ * ++ * \param[in,out] action Action to be executed ++ * \param[in,out] userdata Datapointer to be passed to callbacks ++ * \param[in] done Callback to notify action has failed/succeeded ++ * \param[in] fork_callback Callback to notify successful fork of child ++ * ++ * \return pcmk_ok if ownership of action has been taken, -errno otherwise ++ */ ++int + stonith_action_execute_async(stonith_action_t * action, + void *userdata, + void (*done) (GPid pid, int rc, const char *output, +- gpointer user_data)) ++ gpointer user_data), ++ void (*fork_cb) (GPid pid, gpointer user_data)) + { +- int rc = 0; +- + if (!action) { + return -1; + } + + action->userdata = userdata; + action->done_cb = done; ++ action->fork_cb = fork_cb; + action->async = 1; + +- rc = internal_stonith_action_execute(action); +- +- return rc < 0 ? rc : action->pid; ++ return internal_stonith_action_execute(action); + } + + /*! +-- +1.8.3.1 + diff --git a/SOURCES/010-fork-callback.patch b/SOURCES/010-fork-callback.patch new file mode 100644 index 0000000..44db052 --- /dev/null +++ b/SOURCES/010-fork-callback.patch @@ -0,0 +1,33 @@ +From b3d9a6e313cb77ef0b22aa0a182f0cabbaa2a70e Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 22 May 2019 16:34:58 +0200 +Subject: [PATCH] Fix: fence-lib: regression introduced with fork callback + +If it is a retry device is already moved from activating_on to +active_on. +--- + fencing/commands.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/fencing/commands.c b/fencing/commands.c +index d47a5ea..264697e 100644 +--- a/fencing/commands.c ++++ b/fencing/commands.c +@@ -306,8 +306,13 @@ static void + fork_cb(GPid pid, gpointer user_data) + { + async_command_t *cmd = (async_command_t *) user_data; +- stonith_device_t * device = cmd->activating_on; ++ stonith_device_t * device = ++ /* in case of a retry we've done the move from ++ * activating_on to active_on already ++ */ ++ cmd->activating_on?cmd->activating_on:cmd->active_on; + ++ CRM_ASSERT(device); + crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", + cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", + device->id, pid, cmd->timeout); +-- +1.8.3.1 + diff --git a/SOURCES/010-route-notify.patch b/SOURCES/010-route-notify.patch deleted file mode 100644 index c435db1..0000000 --- a/SOURCES/010-route-notify.patch +++ /dev/null @@ -1,1621 +0,0 @@ -From 2ba3fffcac2f11d12795a69eb019d961a0f39450 Mon Sep 17 00:00:00 2001 -From: Andrew Beekhof -Date: Thu, 1 Nov 2018 22:08:22 +1100 -Subject: [PATCH 1/2] Fix: rhbz#1644076 - Ensure the bundle stop/demote - notifications are directed to the correct host - ---- - pengine/graph.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) - -diff --git a/pengine/graph.c b/pengine/graph.c -index 236b278..9edd1a1 100644 ---- a/pengine/graph.c -+++ b/pengine/graph.c -@@ -784,8 +784,9 @@ get_router_node(action_t *action) - node_t *ended_on = NULL; - node_t *router_node = NULL; - bool partial_migration = FALSE; -+ const char *task = action->task; - -- if (safe_str_eq(action->task, CRM_OP_FENCE) || is_remote_node(action->node) == FALSE) { -+ if (safe_str_eq(task, CRM_OP_FENCE) || is_remote_node(action->node) == FALSE) { - return NULL; - } - -@@ -831,11 +832,15 @@ get_router_node(action_t *action) - * moving to. - */ - -+ if (safe_str_eq(task, "notify")) { -+ task = g_hash_table_lookup(action->meta, "notify_operation"); -+ } -+ - /* 1. before connection rsc moves. */ -- if ((safe_str_eq(action->task, "stop") || -- safe_str_eq(action->task, "demote") || -- safe_str_eq(action->task, "migrate_from") || -- safe_str_eq(action->task, "migrate_to")) && !partial_migration) { -+ if ((safe_str_eq(task, "stop") || -+ safe_str_eq(task, "demote") || -+ safe_str_eq(task, "migrate_from") || -+ safe_str_eq(task, "migrate_to")) && !partial_migration) { - - router_node = began_on; - --- -1.8.3.1 - - -From 61f9622dfd42f7d4eb71cdc2eeae374e589d34e0 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 5 Nov 2018 11:41:12 -0600 -Subject: [PATCH 2/2] Test: scheduler: ensure remote notifications are directed - to correct host - -This test ensures that the pre-notify actions sent to rabbitmq-bundle-0 for the -stop of rabbitmq-bundle (which is moving from controller-0 to controller-1) are -routed through controller-0. ---- - pengine/regression.sh | 1 + - pengine/test10/route-remote-notify.dot | 121 ++++++ - pengine/test10/route-remote-notify.exp | 612 +++++++++++++++++++++++++++++ - pengine/test10/route-remote-notify.scores | 182 +++++++++ - pengine/test10/route-remote-notify.summary | 102 +++++ - pengine/test10/route-remote-notify.xml | 487 +++++++++++++++++++++++ - 6 files changed, 1505 insertions(+) - create mode 100644 pengine/test10/route-remote-notify.dot - create mode 100644 pengine/test10/route-remote-notify.exp - create mode 100644 pengine/test10/route-remote-notify.scores - create mode 100644 pengine/test10/route-remote-notify.summary - create mode 100644 pengine/test10/route-remote-notify.xml - -diff --git a/pengine/regression.sh b/pengine/regression.sh -index ead5fd8..4ba4671 100755 ---- a/pengine/regression.sh -+++ b/pengine/regression.sh -@@ -483,6 +483,7 @@ do_test notify-2 "Notify simple, confirm" - do_test notify-3 "Notify move, confirm" - do_test novell-239079 "Notification priority" - #do_test notify-2 "Notify - 764" -+do_test route-remote-notify "Route remote notify actions through correct cluster node" - - echo "" - do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" -diff --git a/pengine/test10/route-remote-notify.dot b/pengine/test10/route-remote-notify.dot -new file mode 100644 -index 0000000..c473321 ---- /dev/null -+++ b/pengine/test10/route-remote-notify.dot -@@ -0,0 +1,121 @@ -+digraph "g" { -+"Cancel rabbitmq-bundle-1_monitor_30000 controller-1" [ style=bold color="green" fontcolor="black"] -+"Cancel rabbitmq-bundle-2_monitor_30000 controller-2" [ style=bold color="green" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"do_shutdown controller-0" [ style=bold color="green" fontcolor="black"] -+"haproxy-bundle-docker-0_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"haproxy-bundle-docker-0_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"haproxy-bundle-docker-0_stop_0 controller-0" -> "haproxy-bundle_stopped_0" [ style = bold] -+"haproxy-bundle-docker-0_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"haproxy-bundle_stop_0" -> "haproxy-bundle-docker-0_stop_0 controller-0" [ style = bold] -+"haproxy-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] -+"haproxy-bundle_stopped_0" -> "ip-172.17.1.11_stop_0 controller-0" [ style = bold] -+"haproxy-bundle_stopped_0" -> "ip-192.168.24.12_stop_0 controller-0" [ style = bold] -+"haproxy-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"ip-172.17.1.11_monitor_10000 controller-1" [ style=bold color="green" fontcolor="black"] -+"ip-172.17.1.11_start_0 controller-1" -> "ip-172.17.1.11_monitor_10000 controller-1" [ style = bold] -+"ip-172.17.1.11_start_0 controller-1" [ style=bold color="green" fontcolor="black"] -+"ip-172.17.1.11_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"ip-172.17.1.11_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"ip-172.17.1.11_stop_0 controller-0" -> "ip-172.17.1.11_start_0 controller-1" [ style = bold] -+"ip-172.17.1.11_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"ip-192.168.24.12_monitor_10000 controller-2" [ style=bold color="green" fontcolor="black"] -+"ip-192.168.24.12_start_0 controller-2" -> "ip-192.168.24.12_monitor_10000 controller-2" [ style = bold] -+"ip-192.168.24.12_start_0 controller-2" [ style=bold color="green" fontcolor="black"] -+"ip-192.168.24.12_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"ip-192.168.24.12_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"ip-192.168.24.12_stop_0 controller-0" -> "ip-192.168.24.12_start_0 controller-2" [ style = bold] -+"ip-192.168.24.12_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"openstack-cinder-volume-docker-0_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] -+"openstack-cinder-volume-docker-0_start_0 controller-2" -> "openstack-cinder-volume-docker-0_monitor_60000 controller-2" [ style = bold] -+"openstack-cinder-volume-docker-0_start_0 controller-2" -> "openstack-cinder-volume_running_0" [ style = bold] -+"openstack-cinder-volume-docker-0_start_0 controller-2" [ style=bold color="green" fontcolor="black"] -+"openstack-cinder-volume-docker-0_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"openstack-cinder-volume-docker-0_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"openstack-cinder-volume-docker-0_stop_0 controller-0" -> "openstack-cinder-volume-docker-0_start_0 controller-2" [ style = bold] -+"openstack-cinder-volume-docker-0_stop_0 controller-0" -> "openstack-cinder-volume_stopped_0" [ style = bold] -+"openstack-cinder-volume-docker-0_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"openstack-cinder-volume_running_0" [ style=bold color="green" fontcolor="orange"] -+"openstack-cinder-volume_start_0" -> "openstack-cinder-volume-docker-0_start_0 controller-2" [ style = bold] -+"openstack-cinder-volume_start_0" [ style=bold color="green" fontcolor="orange"] -+"openstack-cinder-volume_stop_0" -> "openstack-cinder-volume-docker-0_stop_0 controller-0" [ style = bold] -+"openstack-cinder-volume_stop_0" [ style=bold color="green" fontcolor="orange"] -+"openstack-cinder-volume_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-0_monitor_60000 controller-1" [ style=dashed color="red" fontcolor="black"] -+"rabbitmq-bundle-0_start_0 controller-1" -> "rabbitmq-bundle-0_monitor_60000 controller-1" [ style = dashed] -+"rabbitmq-bundle-0_start_0 controller-1" -> "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style = dashed] -+"rabbitmq-bundle-0_start_0 controller-1" -> "rabbitmq_start_0 rabbitmq-bundle-0" [ style = dashed] -+"rabbitmq-bundle-0_start_0 controller-1" [ style=dashed color="red" fontcolor="black"] -+"rabbitmq-bundle-0_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"rabbitmq-bundle-0_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"rabbitmq-bundle-0_stop_0 controller-0" -> "rabbitmq-bundle-0_start_0 controller-1" [ style = dashed] -+"rabbitmq-bundle-0_stop_0 controller-0" -> "rabbitmq-bundle-docker-0_stop_0 controller-0" [ style = bold] -+"rabbitmq-bundle-0_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"rabbitmq-bundle-1_monitor_60000 controller-1" [ style=bold color="green" fontcolor="black"] -+"rabbitmq-bundle-2_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] -+"rabbitmq-bundle-clone_confirmed-post_notify_running_0" -> "rabbitmq-bundle_running_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "all_stopped" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "rabbitmq-bundle-clone_pre_notify_start_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" -> "rabbitmq-bundle_stopped_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_confirmed-pre_notify_start_0" -> "rabbitmq-bundle-clone_post_notify_running_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-pre_notify_start_0" -> "rabbitmq-bundle-clone_start_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" -> "rabbitmq-bundle-clone_post_notify_stopped_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" -> "rabbitmq-bundle-clone_stop_0" [ style = bold] -+"rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_post_notify_running_0" -> "rabbitmq-bundle-clone_confirmed-post_notify_running_0" [ style = bold] -+"rabbitmq-bundle-clone_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style = bold] -+"rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq_post_notify_stopped_0 rabbitmq-bundle-1" [ style = bold] -+"rabbitmq-bundle-clone_post_notify_stopped_0" -> "rabbitmq_post_notify_stopped_0 rabbitmq-bundle-2" [ style = bold] -+"rabbitmq-bundle-clone_post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_pre_notify_start_0" -> "rabbitmq-bundle-clone_confirmed-pre_notify_start_0" [ style = bold] -+"rabbitmq-bundle-clone_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] -+"rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-0" [ style = bold] -+"rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" [ style = bold] -+"rabbitmq-bundle-clone_pre_notify_stop_0" -> "rabbitmq_pre_notify_stop_0 rabbitmq-bundle-2" [ style = bold] -+"rabbitmq-bundle-clone_pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_running_0" -> "rabbitmq-bundle-clone_post_notify_running_0" [ style = bold] -+"rabbitmq-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_start_0" -> "rabbitmq-bundle-clone_running_0" [ style = bold] -+"rabbitmq-bundle-clone_start_0" -> "rabbitmq_start_0 rabbitmq-bundle-0" [ style = dashed] -+"rabbitmq-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_stop_0" -> "rabbitmq-bundle-clone_stopped_0" [ style = bold] -+"rabbitmq-bundle-clone_stop_0" -> "rabbitmq_stop_0 rabbitmq-bundle-0" [ style = bold] -+"rabbitmq-bundle-clone_stop_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-clone_stopped_0" -> "rabbitmq-bundle-clone_post_notify_stopped_0" [ style = bold] -+"rabbitmq-bundle-clone_stopped_0" -> "rabbitmq-bundle-clone_start_0" [ style = bold] -+"rabbitmq-bundle-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle-docker-0_stop_0 controller-0" -> "all_stopped" [ style = bold] -+"rabbitmq-bundle-docker-0_stop_0 controller-0" -> "do_shutdown controller-0" [ style = bold] -+"rabbitmq-bundle-docker-0_stop_0 controller-0" -> "rabbitmq-bundle_stopped_0" [ style = bold] -+"rabbitmq-bundle-docker-0_stop_0 controller-0" [ style=bold color="green" fontcolor="black"] -+"rabbitmq-bundle_running_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle_stop_0" -> "rabbitmq-bundle-clone_stop_0" [ style = bold] -+"rabbitmq-bundle_stop_0" -> "rabbitmq-bundle-docker-0_stop_0 controller-0" [ style = bold] -+"rabbitmq-bundle_stop_0" -> "rabbitmq_stop_0 rabbitmq-bundle-0" [ style = bold] -+"rabbitmq-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style=dashed color="red" fontcolor="black"] -+"rabbitmq_post_notify_stopped_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style = bold] -+"rabbitmq_post_notify_stopped_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] -+"rabbitmq_post_notify_stopped_0 rabbitmq-bundle-2" -> "rabbitmq-bundle-clone_confirmed-post_notify_stopped_0" [ style = bold] -+"rabbitmq_post_notify_stopped_0 rabbitmq-bundle-2" [ style=bold color="green" fontcolor="black"] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-0" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-0" [ style=bold color="green" fontcolor="black"] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-1" [ style=bold color="green" fontcolor="black"] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-2" -> "rabbitmq-bundle-clone_confirmed-pre_notify_stop_0" [ style = bold] -+"rabbitmq_pre_notify_stop_0 rabbitmq-bundle-2" [ style=bold color="green" fontcolor="black"] -+"rabbitmq_start_0 rabbitmq-bundle-0" -> "rabbitmq-bundle-clone_running_0" [ style = dashed] -+"rabbitmq_start_0 rabbitmq-bundle-0" -> "rabbitmq_monitor_10000 rabbitmq-bundle-0" [ style = dashed] -+"rabbitmq_start_0 rabbitmq-bundle-0" [ style=dashed color="red" fontcolor="black"] -+"rabbitmq_stop_0 rabbitmq-bundle-0" -> "all_stopped" [ style = bold] -+"rabbitmq_stop_0 rabbitmq-bundle-0" -> "rabbitmq-bundle-0_stop_0 controller-0" [ style = bold] -+"rabbitmq_stop_0 rabbitmq-bundle-0" -> "rabbitmq-bundle-clone_stopped_0" [ style = bold] -+"rabbitmq_stop_0 rabbitmq-bundle-0" -> "rabbitmq_start_0 rabbitmq-bundle-0" [ style = dashed] -+"rabbitmq_stop_0 rabbitmq-bundle-0" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/pengine/test10/route-remote-notify.exp b/pengine/test10/route-remote-notify.exp -new file mode 100644 -index 0000000..339555e ---- /dev/null -+++ b/pengine/test10/route-remote-notify.exp -@@ -0,0 +1,612 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/route-remote-notify.scores b/pengine/test10/route-remote-notify.scores -new file mode 100644 -index 0000000..ae96db5 ---- /dev/null -+++ b/pengine/test10/route-remote-notify.scores -@@ -0,0 +1,182 @@ -+Allocation scores: -+Using the original execution date of: 2018-10-31 11:51:32Z -+clone_color: rabbitmq-bundle-clone allocation score on controller-0: -INFINITY -+clone_color: rabbitmq-bundle-clone allocation score on controller-1: -INFINITY -+clone_color: rabbitmq-bundle-clone allocation score on controller-2: -INFINITY -+clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: 0 -+clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: 0 -+clone_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: 0 -+clone_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY -+clone_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY -+clone_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-0: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-1: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-0: INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-1: 0 -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+container_color: haproxy-bundle-docker-0 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-0: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-0: 0 -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-1: 0 -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: openstack-cinder-volume allocation score on controller-0: 0 -+container_color: openstack-cinder-volume allocation score on controller-1: 0 -+container_color: openstack-cinder-volume allocation score on controller-2: 0 -+container_color: openstack-cinder-volume-docker-0 allocation score on controller-0: INFINITY -+container_color: openstack-cinder-volume-docker-0 allocation score on controller-1: 0 -+container_color: openstack-cinder-volume-docker-0 allocation score on controller-2: 0 -+container_color: rabbitmq-bundle allocation score on controller-0: 0 -+container_color: rabbitmq-bundle allocation score on controller-1: 0 -+container_color: rabbitmq-bundle allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-0 allocation score on controller-0: INFINITY -+container_color: rabbitmq-bundle-0 allocation score on controller-1: 0 -+container_color: rabbitmq-bundle-0 allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-1 allocation score on controller-0: 0 -+container_color: rabbitmq-bundle-1 allocation score on controller-1: INFINITY -+container_color: rabbitmq-bundle-1 allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-2 allocation score on controller-0: 0 -+container_color: rabbitmq-bundle-2 allocation score on controller-1: 0 -+container_color: rabbitmq-bundle-2 allocation score on controller-2: INFINITY -+container_color: rabbitmq-bundle-clone allocation score on controller-0: 0 -+container_color: rabbitmq-bundle-clone allocation score on controller-1: 0 -+container_color: rabbitmq-bundle-clone allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-0: -INFINITY -+container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-1: -INFINITY -+container_color: rabbitmq-bundle-clone allocation score on rabbitmq-bundle-2: -INFINITY -+container_color: rabbitmq-bundle-docker-0 allocation score on controller-0: INFINITY -+container_color: rabbitmq-bundle-docker-0 allocation score on controller-1: 0 -+container_color: rabbitmq-bundle-docker-0 allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-docker-1 allocation score on controller-0: 0 -+container_color: rabbitmq-bundle-docker-1 allocation score on controller-1: INFINITY -+container_color: rabbitmq-bundle-docker-1 allocation score on controller-2: 0 -+container_color: rabbitmq-bundle-docker-2 allocation score on controller-0: 0 -+container_color: rabbitmq-bundle-docker-2 allocation score on controller-1: 0 -+container_color: rabbitmq-bundle-docker-2 allocation score on controller-2: INFINITY -+container_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY -+container_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY -+container_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY -+native_color: haproxy-bundle-docker-0 allocation score on controller-0: -INFINITY -+native_color: haproxy-bundle-docker-0 allocation score on controller-1: -INFINITY -+native_color: haproxy-bundle-docker-0 allocation score on controller-2: -INFINITY -+native_color: haproxy-bundle-docker-1 allocation score on controller-0: -INFINITY -+native_color: haproxy-bundle-docker-1 allocation score on controller-1: INFINITY -+native_color: haproxy-bundle-docker-1 allocation score on controller-2: 0 -+native_color: haproxy-bundle-docker-2 allocation score on controller-0: -INFINITY -+native_color: haproxy-bundle-docker-2 allocation score on controller-1: -INFINITY -+native_color: haproxy-bundle-docker-2 allocation score on controller-2: INFINITY -+native_color: ip-10.0.0.101 allocation score on controller-0: -INFINITY -+native_color: ip-10.0.0.101 allocation score on controller-1: INFINITY -+native_color: ip-10.0.0.101 allocation score on controller-2: 0 -+native_color: ip-172.17.1.11 allocation score on controller-0: -INFINITY -+native_color: ip-172.17.1.11 allocation score on controller-1: 0 -+native_color: ip-172.17.1.11 allocation score on controller-2: 0 -+native_color: ip-172.17.1.20 allocation score on controller-0: -INFINITY -+native_color: ip-172.17.1.20 allocation score on controller-1: 0 -+native_color: ip-172.17.1.20 allocation score on controller-2: INFINITY -+native_color: ip-172.17.3.16 allocation score on controller-0: -INFINITY -+native_color: ip-172.17.3.16 allocation score on controller-1: INFINITY -+native_color: ip-172.17.3.16 allocation score on controller-2: 0 -+native_color: ip-172.17.4.15 allocation score on controller-0: -INFINITY -+native_color: ip-172.17.4.15 allocation score on controller-1: 0 -+native_color: ip-172.17.4.15 allocation score on controller-2: INFINITY -+native_color: ip-192.168.24.12 allocation score on controller-0: -INFINITY -+native_color: ip-192.168.24.12 allocation score on controller-1: 0 -+native_color: ip-192.168.24.12 allocation score on controller-2: 0 -+native_color: openstack-cinder-volume-docker-0 allocation score on controller-0: -INFINITY -+native_color: openstack-cinder-volume-docker-0 allocation score on controller-1: 0 -+native_color: openstack-cinder-volume-docker-0 allocation score on controller-2: 0 -+native_color: rabbitmq-bundle-0 allocation score on controller-0: INFINITY -+native_color: rabbitmq-bundle-0 allocation score on controller-1: 0 -+native_color: rabbitmq-bundle-0 allocation score on controller-2: 0 -+native_color: rabbitmq-bundle-1 allocation score on controller-0: 0 -+native_color: rabbitmq-bundle-1 allocation score on controller-1: INFINITY -+native_color: rabbitmq-bundle-1 allocation score on controller-2: 0 -+native_color: rabbitmq-bundle-2 allocation score on controller-0: 0 -+native_color: rabbitmq-bundle-2 allocation score on controller-1: 0 -+native_color: rabbitmq-bundle-2 allocation score on controller-2: INFINITY -+native_color: rabbitmq-bundle-docker-0 allocation score on controller-0: -INFINITY -+native_color: rabbitmq-bundle-docker-0 allocation score on controller-1: -INFINITY -+native_color: rabbitmq-bundle-docker-0 allocation score on controller-2: -INFINITY -+native_color: rabbitmq-bundle-docker-1 allocation score on controller-0: -INFINITY -+native_color: rabbitmq-bundle-docker-1 allocation score on controller-1: INFINITY -+native_color: rabbitmq-bundle-docker-1 allocation score on controller-2: 0 -+native_color: rabbitmq-bundle-docker-2 allocation score on controller-0: -INFINITY -+native_color: rabbitmq-bundle-docker-2 allocation score on controller-1: -INFINITY -+native_color: rabbitmq-bundle-docker-2 allocation score on controller-2: INFINITY -+native_color: rabbitmq:0 allocation score on rabbitmq-bundle-0: INFINITY -+native_color: rabbitmq:1 allocation score on rabbitmq-bundle-1: INFINITY -+native_color: rabbitmq:2 allocation score on rabbitmq-bundle-2: INFINITY -diff --git a/pengine/test10/route-remote-notify.summary b/pengine/test10/route-remote-notify.summary -new file mode 100644 -index 0000000..79b676c ---- /dev/null -+++ b/pengine/test10/route-remote-notify.summary -@@ -0,0 +1,102 @@ -+Using the original execution date of: 2018-10-31 11:51:32Z -+ -+Current cluster status: -+Online: [ controller-0 controller-1 controller-2 ] -+Containers: [ rabbitmq-bundle-0:rabbitmq-bundle-docker-0 rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 ] -+ -+ Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp13/openstack-rabbitmq:pcmklatest] -+ rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started controller-0 -+ rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1 -+ rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started controller-2 -+ ip-192.168.24.12 (ocf::heartbeat:IPaddr2): Started controller-0 -+ ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1 -+ ip-172.17.1.20 (ocf::heartbeat:IPaddr2): Started controller-2 -+ ip-172.17.1.11 (ocf::heartbeat:IPaddr2): Started controller-0 -+ ip-172.17.3.16 (ocf::heartbeat:IPaddr2): Started controller-1 -+ ip-172.17.4.15 (ocf::heartbeat:IPaddr2): Started controller-2 -+ Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp13/openstack-haproxy:pcmklatest] -+ haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started controller-0 -+ haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started controller-1 -+ haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 -+ Docker container: openstack-cinder-volume [192.168.24.1:8787/rhosp13/openstack-cinder-volume:pcmklatest] -+ openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Started controller-0 -+ -+Transition Summary: -+ * Shutdown controller-0 -+ * Stop rabbitmq-bundle-docker-0 ( controller-0 ) due to node availability -+ * Stop rabbitmq-bundle-0 ( controller-0 ) due to unrunnable rabbitmq-bundle-docker-0 start -+ * Stop rabbitmq:0 ( rabbitmq-bundle-0 ) due to unrunnable rabbitmq-bundle-docker-0 start -+ * Move ip-192.168.24.12 ( controller-0 -> controller-2 ) -+ * Move ip-172.17.1.11 ( controller-0 -> controller-1 ) -+ * Stop haproxy-bundle-docker-0 ( controller-0 ) due to node availability -+ * Move openstack-cinder-volume-docker-0 ( controller-0 -> controller-2 ) -+ -+Executing cluster transition: -+ * Pseudo action: rabbitmq-bundle-clone_pre_notify_stop_0 -+ * Resource action: rabbitmq-bundle-1 monitor=60000 on controller-1 -+ * Resource action: rabbitmq-bundle-1 cancel=30000 on controller-1 -+ * Resource action: rabbitmq-bundle-2 monitor=60000 on controller-2 -+ * Resource action: rabbitmq-bundle-2 cancel=30000 on controller-2 -+ * Pseudo action: openstack-cinder-volume_stop_0 -+ * Pseudo action: openstack-cinder-volume_start_0 -+ * Pseudo action: haproxy-bundle_stop_0 -+ * Pseudo action: rabbitmq-bundle_stop_0 -+ * Resource action: rabbitmq notify on rabbitmq-bundle-0 -+ * Resource action: rabbitmq notify on rabbitmq-bundle-1 -+ * Resource action: rabbitmq notify on rabbitmq-bundle-2 -+ * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_stop_0 -+ * Pseudo action: rabbitmq-bundle-clone_stop_0 -+ * Resource action: haproxy-bundle-docker-0 stop on controller-0 -+ * Resource action: openstack-cinder-volume-docker-0 stop on controller-0 -+ * Pseudo action: openstack-cinder-volume_stopped_0 -+ * Pseudo action: haproxy-bundle_stopped_0 -+ * Resource action: rabbitmq stop on rabbitmq-bundle-0 -+ * Pseudo action: rabbitmq-bundle-clone_stopped_0 -+ * Resource action: rabbitmq-bundle-0 stop on controller-0 -+ * Resource action: ip-192.168.24.12 stop on controller-0 -+ * Resource action: ip-172.17.1.11 stop on controller-0 -+ * Resource action: openstack-cinder-volume-docker-0 start on controller-2 -+ * Pseudo action: openstack-cinder-volume_running_0 -+ * Pseudo action: rabbitmq-bundle-clone_post_notify_stopped_0 -+ * Resource action: rabbitmq-bundle-docker-0 stop on controller-0 -+ * Resource action: ip-192.168.24.12 start on controller-2 -+ * Resource action: ip-172.17.1.11 start on controller-1 -+ * Resource action: openstack-cinder-volume-docker-0 monitor=60000 on controller-2 -+ * Cluster action: do_shutdown on controller-0 -+ * Resource action: rabbitmq notify on rabbitmq-bundle-1 -+ * Resource action: rabbitmq notify on rabbitmq-bundle-2 -+ * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_stopped_0 -+ * Pseudo action: rabbitmq-bundle-clone_pre_notify_start_0 -+ * Resource action: ip-192.168.24.12 monitor=10000 on controller-2 -+ * Resource action: ip-172.17.1.11 monitor=10000 on controller-1 -+ * Pseudo action: rabbitmq-bundle_stopped_0 -+ * Pseudo action: all_stopped -+ * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_start_0 -+ * Pseudo action: rabbitmq-bundle-clone_start_0 -+ * Pseudo action: rabbitmq-bundle-clone_running_0 -+ * Pseudo action: rabbitmq-bundle-clone_post_notify_running_0 -+ * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_running_0 -+ * Pseudo action: rabbitmq-bundle_running_0 -+Using the original execution date of: 2018-10-31 11:51:32Z -+ -+Revised cluster status: -+Online: [ controller-0 controller-1 controller-2 ] -+Containers: [ rabbitmq-bundle-1:rabbitmq-bundle-docker-1 rabbitmq-bundle-2:rabbitmq-bundle-docker-2 ] -+ -+ Docker container set: rabbitmq-bundle [192.168.24.1:8787/rhosp13/openstack-rabbitmq:pcmklatest] -+ rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Stopped -+ rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started controller-1 -+ rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started controller-2 -+ ip-192.168.24.12 (ocf::heartbeat:IPaddr2): Started controller-2 -+ ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1 -+ ip-172.17.1.20 (ocf::heartbeat:IPaddr2): Started controller-2 -+ ip-172.17.1.11 (ocf::heartbeat:IPaddr2): Started controller-1 -+ ip-172.17.3.16 (ocf::heartbeat:IPaddr2): Started controller-1 -+ ip-172.17.4.15 (ocf::heartbeat:IPaddr2): Started controller-2 -+ Docker container set: haproxy-bundle [192.168.24.1:8787/rhosp13/openstack-haproxy:pcmklatest] -+ haproxy-bundle-docker-0 (ocf::heartbeat:docker): Stopped -+ haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started controller-1 -+ haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started controller-2 -+ Docker container: openstack-cinder-volume [192.168.24.1:8787/rhosp13/openstack-cinder-volume:pcmklatest] -+ openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Started controller-2 -+ -diff --git a/pengine/test10/route-remote-notify.xml b/pengine/test10/route-remote-notify.xml -new file mode 100644 -index 0000000..0beba46 ---- /dev/null -+++ b/pengine/test10/route-remote-notify.xml -@@ -0,0 +1,487 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - diff --git a/SOURCES/011-notifs.patch b/SOURCES/011-notifs.patch deleted file mode 100644 index d67c6c5..0000000 --- a/SOURCES/011-notifs.patch +++ /dev/null @@ -1,2352 +0,0 @@ -From 141de63f1ddf48dc3c27f8397e0058e21bdecf46 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 26 Nov 2018 15:45:56 -0600 -Subject: [PATCH 1/5] Fix: scheduler: don't send clone notifications to a - stopped remote node - -Since b3f9a5bbb, we discard faked executor results when resource information is -unavailable. This has exposed pre-existing issues where clone notifications were -mistakenly scheduled for Pacemaker Remote nodes. Previously, the cluster node -that had hosted the Pacemaker Remote connection would fake the result, and the -transition would proceed. Now, if the cluster node doesn't happen to have the -resource information cached, the result will not be sent, and thus the -transition will get an action timeout. This permanently blocks later actions in -the transition. - -This commit avoids such a situation where start and promote clone notifications -were scheduled for a clone instance on a Pacemaker Remote node whose remote -connection is stopping, and thus would be stopped by the time the notification -would be needed. - -This is slightly modified from a patch provided by Andrew Beekhof -. - -RHBZ#1652752 ---- - pengine/notif.c | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/pengine/notif.c b/pengine/notif.c -index 4913249..cdc382d 100644 ---- a/pengine/notif.c -+++ b/pengine/notif.c -@@ -631,6 +631,28 @@ expand_notification_data(resource_t *rsc, notify_data_t * n_data, pe_working_set - return required; - } - -+/* -+ * \internal -+ * \brief Find any remote connection start relevant to an action -+ * -+ * \param[in] action Action to chek -+ * -+ * \return If action is behind a remote connection, connection's start -+ */ -+static pe_action_t * -+find_remote_start(pe_action_t *action) -+{ -+ if (action && action->node) { -+ pe_resource_t *remote_rsc = action->node->details->remote_rsc; -+ -+ if (remote_rsc) { -+ return find_first_action(remote_rsc->actions, NULL, RSC_START, -+ NULL); -+ } -+ } -+ return NULL; -+} -+ - void - create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) - { -@@ -738,6 +760,20 @@ create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t - rsc->id); - - } else if (task == start_rsc || task == action_promote) { -+ -+ if (start) { -+ pe_action_t *remote_start = find_remote_start(start); -+ -+ if (remote_start -+ && is_not_set(remote_start->flags, pe_action_runnable)) { -+ /* Start and promote actions for a clone instance behind -+ * a Pacemaker Remote connection happen after the -+ * connection starts. If the connection start is blocked, do -+ * not schedule notifications for these actions. -+ */ -+ return; -+ } -+ } - if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { - pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); - } --- -1.8.3.1 - - -From 2793dcd9cfd20989d64ce1c553d63c28d9c7cb59 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 28 Nov 2018 18:12:35 -0600 -Subject: [PATCH 2/5] Test: scheduler: new test for clone notifications behind - a stopping remote - -Don't send notifications to a remote node whose connection has been stopped. ---- - pengine/regression.sh | 1 + - pengine/test10/notify-behind-stopping-remote.dot | 84 +++++ - pengine/test10/notify-behind-stopping-remote.exp | 388 +++++++++++++++++++++ - .../test10/notify-behind-stopping-remote.scores | 65 ++++ - .../test10/notify-behind-stopping-remote.summary | 58 +++ - pengine/test10/notify-behind-stopping-remote.xml | 187 ++++++++++ - 6 files changed, 783 insertions(+) - create mode 100644 pengine/test10/notify-behind-stopping-remote.dot - create mode 100644 pengine/test10/notify-behind-stopping-remote.exp - create mode 100644 pengine/test10/notify-behind-stopping-remote.scores - create mode 100644 pengine/test10/notify-behind-stopping-remote.summary - create mode 100644 pengine/test10/notify-behind-stopping-remote.xml - -diff --git a/pengine/regression.sh b/pengine/regression.sh -index fedd1b7..f719df6 100755 ---- a/pengine/regression.sh -+++ b/pengine/regression.sh -@@ -484,6 +484,7 @@ do_test notify-3 "Notify move, confirm" - do_test novell-239079 "Notification priority" - #do_test notify-2 "Notify - 764" - do_test route-remote-notify "Route remote notify actions through correct cluster node" -+do_test notify-behind-stopping-remote "Don't schedule notifications behind stopped remote" - - echo "" - do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" -diff --git a/pengine/test10/notify-behind-stopping-remote.dot b/pengine/test10/notify-behind-stopping-remote.dot -new file mode 100644 -index 0000000..cac4d5a ---- /dev/null -+++ b/pengine/test10/notify-behind-stopping-remote.dot -@@ -0,0 +1,84 @@ -+digraph "g" { -+"Cancel redis_monitor_45000 redis-bundle-0" -> "redis_promote_0 redis-bundle-0" [ style = bold] -+"Cancel redis_monitor_45000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"Cancel redis_monitor_60000 redis-bundle-0" -> "redis_promote_0 redis-bundle-0" [ style = bold] -+"Cancel redis_monitor_60000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"all_stopped" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-1_monitor_60000 ra2" [ style=dashed color="red" fontcolor="black"] -+"redis-bundle-1_start_0 ra2" -> "redis-bundle-1_monitor_60000 ra2" [ style = dashed] -+"redis-bundle-1_start_0 ra2" -> "redis_monitor_45000 redis-bundle-1" [ style = dashed] -+"redis-bundle-1_start_0 ra2" -> "redis_monitor_60000 redis-bundle-1" [ style = dashed] -+"redis-bundle-1_start_0 ra2" -> "redis_start_0 redis-bundle-1" [ style = dashed] -+"redis-bundle-1_start_0 ra2" [ style=dashed color="red" fontcolor="black"] -+"redis-bundle-1_stop_0 ra2" -> "all_stopped" [ style = bold] -+"redis-bundle-1_stop_0 ra2" -> "redis-bundle-1_start_0 ra2" [ style = dashed] -+"redis-bundle-1_stop_0 ra2" -> "redis-bundle-docker-1_stop_0 ra2" [ style = bold] -+"redis-bundle-1_stop_0 ra2" [ style=bold color="green" fontcolor="black"] -+"redis-bundle-docker-1_stop_0 ra2" -> "all_stopped" [ style = bold] -+"redis-bundle-docker-1_stop_0 ra2" -> "redis-bundle_stopped_0" [ style = bold] -+"redis-bundle-docker-1_stop_0 ra2" [ style=bold color="green" fontcolor="black"] -+"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis-bundle_promoted_0" [ style = bold] -+"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] -+"redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] -+"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] -+"redis-bundle-master_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] -+"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] -+"redis-bundle-master_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] -+"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_start_0" [ style = bold] -+"redis-bundle-master_confirmed-pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_post_notify_promoted_0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -+"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-0" [ style = bold] -+"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] -+"redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_post_notify_running_0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] -+"redis-bundle-master_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_pre_notify_promote_0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -+"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-0" [ style = bold] -+"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] -+"redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_pre_notify_start_0" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] -+"redis-bundle-master_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_promote_0" -> "redis_promote_0 redis-bundle-0" [ style = bold] -+"redis-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_promoted_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] -+"redis-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_running_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] -+"redis-bundle-master_running_0" -> "redis-bundle-master_promote_0" [ style = bold] -+"redis-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle-master_start_0" -> "redis-bundle-master_running_0" [ style = bold] -+"redis-bundle-master_start_0" -> "redis_start_0 redis-bundle-1" [ style = dashed] -+"redis-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] -+"redis-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_running_0" -> "redis-bundle_promote_0" [ style = bold] -+"redis-bundle_running_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_start_0" -> "redis-bundle-master_start_0" [ style = bold] -+"redis-bundle_start_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_stop_0" -> "redis-bundle-docker-1_stop_0 ra2" [ style = bold] -+"redis-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] -+"redis-bundle_stopped_0" -> "redis-bundle_promote_0" [ style = bold] -+"redis-bundle_stopped_0" -> "redis-bundle_start_0" [ style = bold] -+"redis-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] -+"redis_monitor_20000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"redis_monitor_45000 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] -+"redis_monitor_60000 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] -+"redis_post_notify_promoted_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -+"redis_post_notify_promoted_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -+"redis_post_notify_promoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -+"redis_pre_notify_promote_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -+"redis_pre_notify_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -+"redis_pre_notify_promote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -+"redis_promote_0 redis-bundle-0" -> "redis-bundle-master_promoted_0" [ style = bold] -+"redis_promote_0 redis-bundle-0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] -+"redis_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] -+"redis_start_0 redis-bundle-1" -> "redis-bundle-master_running_0" [ style = dashed] -+"redis_start_0 redis-bundle-1" -> "redis_monitor_45000 redis-bundle-1" [ style = dashed] -+"redis_start_0 redis-bundle-1" -> "redis_monitor_60000 redis-bundle-1" [ style = dashed] -+"redis_start_0 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] -+} -diff --git a/pengine/test10/notify-behind-stopping-remote.exp b/pengine/test10/notify-behind-stopping-remote.exp -new file mode 100644 -index 0000000..44e6356 ---- /dev/null -+++ b/pengine/test10/notify-behind-stopping-remote.exp -@@ -0,0 +1,388 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/pengine/test10/notify-behind-stopping-remote.scores b/pengine/test10/notify-behind-stopping-remote.scores -new file mode 100644 -index 0000000..e58b614 ---- /dev/null -+++ b/pengine/test10/notify-behind-stopping-remote.scores -@@ -0,0 +1,65 @@ -+Allocation scores: -+Using the original execution date of: 2018-11-22 20:36:07Z -+clone_color: redis-bundle-master allocation score on ra1: -INFINITY -+clone_color: redis-bundle-master allocation score on ra2: -INFINITY -+clone_color: redis-bundle-master allocation score on ra3: -INFINITY -+clone_color: redis-bundle-master allocation score on redis-bundle-0: 0 -+clone_color: redis-bundle-master allocation score on redis-bundle-1: 0 -+clone_color: redis-bundle-master allocation score on redis-bundle-2: 0 -+clone_color: redis:0 allocation score on redis-bundle-0: INFINITY -+clone_color: redis:1 allocation score on redis-bundle-1: INFINITY -+clone_color: redis:2 allocation score on redis-bundle-2: INFINITY -+container_color: redis-bundle allocation score on ra1: 0 -+container_color: redis-bundle allocation score on ra2: -INFINITY -+container_color: redis-bundle allocation score on ra3: 0 -+container_color: redis-bundle-0 allocation score on ra1: 0 -+container_color: redis-bundle-0 allocation score on ra2: 0 -+container_color: redis-bundle-0 allocation score on ra3: 0 -+container_color: redis-bundle-1 allocation score on ra1: 0 -+container_color: redis-bundle-1 allocation score on ra2: 0 -+container_color: redis-bundle-1 allocation score on ra3: 0 -+container_color: redis-bundle-2 allocation score on ra1: 0 -+container_color: redis-bundle-2 allocation score on ra2: 0 -+container_color: redis-bundle-2 allocation score on ra3: 0 -+container_color: redis-bundle-docker-0 allocation score on ra1: 0 -+container_color: redis-bundle-docker-0 allocation score on ra2: -INFINITY -+container_color: redis-bundle-docker-0 allocation score on ra3: 0 -+container_color: redis-bundle-docker-1 allocation score on ra1: 0 -+container_color: redis-bundle-docker-1 allocation score on ra2: -INFINITY -+container_color: redis-bundle-docker-1 allocation score on ra3: 0 -+container_color: redis-bundle-docker-2 allocation score on ra1: 0 -+container_color: redis-bundle-docker-2 allocation score on ra2: -INFINITY -+container_color: redis-bundle-docker-2 allocation score on ra3: 0 -+container_color: redis-bundle-master allocation score on ra1: 0 -+container_color: redis-bundle-master allocation score on ra2: 0 -+container_color: redis-bundle-master allocation score on ra3: 0 -+container_color: redis-bundle-master allocation score on redis-bundle-0: -INFINITY -+container_color: redis-bundle-master allocation score on redis-bundle-1: -INFINITY -+container_color: redis-bundle-master allocation score on redis-bundle-2: -INFINITY -+container_color: redis:0 allocation score on redis-bundle-0: 501 -+container_color: redis:1 allocation score on redis-bundle-1: 500 -+container_color: redis:2 allocation score on redis-bundle-2: 501 -+native_color: redis-bundle-0 allocation score on ra1: 10000 -+native_color: redis-bundle-0 allocation score on ra2: 0 -+native_color: redis-bundle-0 allocation score on ra3: 0 -+native_color: redis-bundle-1 allocation score on ra1: 0 -+native_color: redis-bundle-1 allocation score on ra2: 0 -+native_color: redis-bundle-1 allocation score on ra3: 0 -+native_color: redis-bundle-2 allocation score on ra1: 0 -+native_color: redis-bundle-2 allocation score on ra2: 0 -+native_color: redis-bundle-2 allocation score on ra3: 10000 -+native_color: redis-bundle-docker-0 allocation score on ra1: 0 -+native_color: redis-bundle-docker-0 allocation score on ra2: -INFINITY -+native_color: redis-bundle-docker-0 allocation score on ra3: 0 -+native_color: redis-bundle-docker-1 allocation score on ra1: -INFINITY -+native_color: redis-bundle-docker-1 allocation score on ra2: -INFINITY -+native_color: redis-bundle-docker-1 allocation score on ra3: -INFINITY -+native_color: redis-bundle-docker-2 allocation score on ra1: -INFINITY -+native_color: redis-bundle-docker-2 allocation score on ra2: -INFINITY -+native_color: redis-bundle-docker-2 allocation score on ra3: 0 -+native_color: redis:0 allocation score on redis-bundle-0: INFINITY -+native_color: redis:1 allocation score on redis-bundle-1: INFINITY -+native_color: redis:2 allocation score on redis-bundle-2: INFINITY -+redis:0 promotion score on redis-bundle-0: 1 -+redis:1 promotion score on redis-bundle-1: -1 -+redis:2 promotion score on redis-bundle-2: 1 -diff --git a/pengine/test10/notify-behind-stopping-remote.summary b/pengine/test10/notify-behind-stopping-remote.summary -new file mode 100644 -index 0000000..b9342b9 ---- /dev/null -+++ b/pengine/test10/notify-behind-stopping-remote.summary -@@ -0,0 +1,58 @@ -+Using the original execution date of: 2018-11-22 20:36:07Z -+ -+Current cluster status: -+Online: [ ra1 ra2 ra3 ] -+Containers: [ redis-bundle-0:redis-bundle-docker-0 redis-bundle-1:redis-bundle-docker-1 redis-bundle-2:redis-bundle-docker-2 ] -+ -+ Docker container set: redis-bundle [docker.io/tripleoqueens/centos-binary-redis:current-tripleo-rdo] -+ redis-bundle-0 (ocf::heartbeat:redis): Slave ra1 -+ redis-bundle-1 (ocf::heartbeat:redis): Stopped ra2 -+ redis-bundle-2 (ocf::heartbeat:redis): Slave ra3 -+ -+Transition Summary: -+ * Promote redis:0 ( Slave -> Master redis-bundle-0 ) -+ * Stop redis-bundle-docker-1 ( ra2 ) due to node availability -+ * Stop redis-bundle-1 ( ra2 ) due to unrunnable redis-bundle-docker-1 start -+ * Start redis:1 ( redis-bundle-1 ) due to unrunnable redis-bundle-docker-1 start (blocked) -+ -+Executing cluster transition: -+ * Resource action: redis cancel=45000 on redis-bundle-0 -+ * Resource action: redis cancel=60000 on redis-bundle-0 -+ * Pseudo action: redis-bundle-master_pre_notify_start_0 -+ * Resource action: redis-bundle-1 stop on ra2 -+ * Pseudo action: redis-bundle_stop_0 -+ * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 -+ * Resource action: redis-bundle-docker-1 stop on ra2 -+ * Pseudo action: redis-bundle_stopped_0 -+ * Pseudo action: redis-bundle_start_0 -+ * Pseudo action: all_stopped -+ * Pseudo action: redis-bundle-master_start_0 -+ * Pseudo action: redis-bundle-master_running_0 -+ * Pseudo action: redis-bundle-master_post_notify_running_0 -+ * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 -+ * Pseudo action: redis-bundle_running_0 -+ * Pseudo action: redis-bundle-master_pre_notify_promote_0 -+ * Pseudo action: redis-bundle_promote_0 -+ * Resource action: redis notify on redis-bundle-0 -+ * Resource action: redis notify on redis-bundle-2 -+ * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 -+ * Pseudo action: redis-bundle-master_promote_0 -+ * Resource action: redis promote on redis-bundle-0 -+ * Pseudo action: redis-bundle-master_promoted_0 -+ * Pseudo action: redis-bundle-master_post_notify_promoted_0 -+ * Resource action: redis notify on redis-bundle-0 -+ * Resource action: redis notify on redis-bundle-2 -+ * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 -+ * Pseudo action: redis-bundle_promoted_0 -+ * Resource action: redis monitor=20000 on redis-bundle-0 -+Using the original execution date of: 2018-11-22 20:36:07Z -+ -+Revised cluster status: -+Online: [ ra1 ra2 ra3 ] -+Containers: [ redis-bundle-0:redis-bundle-docker-0 redis-bundle-2:redis-bundle-docker-2 ] -+ -+ Docker container set: redis-bundle [docker.io/tripleoqueens/centos-binary-redis:current-tripleo-rdo] -+ redis-bundle-0 (ocf::heartbeat:redis): Master ra1 -+ redis-bundle-1 (ocf::heartbeat:redis): Stopped -+ redis-bundle-2 (ocf::heartbeat:redis): Slave ra3 -+ -diff --git a/pengine/test10/notify-behind-stopping-remote.xml b/pengine/test10/notify-behind-stopping-remote.xml -new file mode 100644 -index 0000000..66351b8 ---- /dev/null -+++ b/pengine/test10/notify-behind-stopping-remote.xml -@@ -0,0 +1,187 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - - -From 90bed507285d23218617f0cd520d788ba246761e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 28 Nov 2018 18:30:50 -0600 -Subject: [PATCH 3/5] Test: scheduler: update bundle-order-fencing test for - notification change - -Don't schedule clone notifications on a Pacemaker Remote node that -has just been fenced. ---- - pengine/test10/bundle-order-fencing.dot | 8 - - pengine/test10/bundle-order-fencing.exp | 246 ++++++++++++---------------- - pengine/test10/bundle-order-fencing.summary | 2 - - 3 files changed, 107 insertions(+), 149 deletions(-) - -diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot -index e53a062..1e2721b 100644 ---- a/pengine/test10/bundle-order-fencing.dot -+++ b/pengine/test10/bundle-order-fencing.dot -@@ -210,8 +210,6 @@ digraph "g" { - "redis-bundle-master_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] - "redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis-bundle_promoted_0" [ style = bold] - "redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_20000 redis-bundle-1" [ style = bold] --"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] --"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] - "redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] - "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] - "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] -@@ -247,7 +245,6 @@ digraph "g" { - "redis-bundle-master_post_notify_demoted_0" -> "redis_post_notify_demoted_0 redis-bundle-2" [ style = bold] - "redis-bundle-master_post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] - "redis-bundle-master_post_notify_promoted_0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] --"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-0" [ style = bold] - "redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-1" [ style = bold] - "redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] - "redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] -@@ -262,7 +259,6 @@ digraph "g" { - "redis-bundle-master_pre_notify_demote_0" -> "redis_pre_notify_demote_0 redis-bundle-2" [ style = bold] - "redis-bundle-master_pre_notify_demote_0" [ style=bold color="green" fontcolor="orange"] - "redis-bundle-master_pre_notify_promote_0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] --"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-0" [ style = bold] - "redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-1" [ style = bold] - "redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] - "redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] -@@ -325,8 +321,6 @@ digraph "g" { - "redis_post_notify_demoted_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] - "redis_post_notify_demoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_demoted_0" [ style = bold] - "redis_post_notify_demoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] --"redis_post_notify_promoted_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] --"redis_post_notify_promoted_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] - "redis_post_notify_promoted_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] - "redis_post_notify_promoted_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] - "redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -@@ -345,8 +339,6 @@ digraph "g" { - "redis_pre_notify_demote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] - "redis_pre_notify_demote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_demote_0" [ style = bold] - "redis_pre_notify_demote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] --"redis_pre_notify_promote_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] --"redis_pre_notify_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] - "redis_pre_notify_promote_0 redis-bundle-1" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] - "redis_pre_notify_promote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] - "redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp -index 2b8f5cf..84bffaa 100644 ---- a/pengine/test10/bundle-order-fencing.exp -+++ b/pengine/test10/bundle-order-fencing.exp -@@ -521,32 +521,6 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - - - -@@ -563,7 +537,7 @@ - - - -- -+ - - - -@@ -575,7 +549,7 @@ - - - -- -+ - - - -@@ -593,7 +567,7 @@ - - - -- -+ - - - -@@ -605,9 +579,9 @@ - - - -- -+ - -- -+ - - - -@@ -618,9 +592,9 @@ - - - -- -+ - -- -+ - - - -@@ -631,9 +605,9 @@ - - - -- -+ - -- -+ - - - -@@ -644,9 +618,9 @@ - - - -- -+ - -- -+ - - - -@@ -657,7 +631,7 @@ - - - -- -+ - - - -@@ -670,7 +644,7 @@ - - - -- -+ - - - -@@ -686,7 +660,7 @@ - - - -- -+ - - - -@@ -711,7 +685,7 @@ - - - -- -+ - - - -@@ -733,7 +707,7 @@ - - - -- -+ - - - -@@ -742,7 +716,7 @@ - - - -- -+ - - - -@@ -751,9 +725,9 @@ - - - -- -+ - -- -+ - - - -@@ -764,9 +738,9 @@ - - - -- -+ - -- -+ - - - -@@ -777,9 +751,9 @@ - - - -- -+ - -- -+ - - - -@@ -790,9 +764,9 @@ - - - -- -+ - -- -+ - - - -@@ -803,7 +777,7 @@ - - - -- -+ - - - -@@ -816,7 +790,7 @@ - - - -- -+ - - - -@@ -832,7 +806,7 @@ - - - -- -+ - - - -@@ -843,14 +817,14 @@ - - - -- -+ - - -- -+ - - - -- -+ - - - -@@ -865,7 +839,7 @@ - - - -- -+ - - - -@@ -876,14 +850,14 @@ - - - -- -+ - - -- -+ - - - -- -+ - - - -@@ -891,7 +865,7 @@ - - - -- -+ - - - -@@ -906,7 +880,7 @@ - - - -- -+ - - - -@@ -921,7 +895,7 @@ - - - -- -+ - - - -@@ -932,17 +906,14 @@ - - - -- -+ - - -- -- -- -- -+ - - - -- -+ - - - -@@ -957,7 +928,7 @@ - - - -- -+ - - - -@@ -968,17 +939,14 @@ - - - -- -- -- -- -+ - - -- -+ - - - -- -+ - - - -@@ -996,7 +964,7 @@ - - - -- -+ - - - -@@ -1008,7 +976,7 @@ - - - -- -+ - - - -@@ -1032,7 +1000,7 @@ - - - -- -+ - - - -@@ -1050,7 +1018,7 @@ - - - -- -+ - - - -@@ -1065,7 +1033,7 @@ - - - -- -+ - - - -@@ -1083,7 +1051,7 @@ - - - -- -+ - - - -@@ -1095,7 +1063,7 @@ - - - -- -+ - - - -@@ -1110,7 +1078,7 @@ - - - -- -+ - - - -@@ -1131,7 +1099,7 @@ - - - -- -+ - - - -@@ -1143,7 +1111,7 @@ - - - -- -+ - - - -@@ -1158,7 +1126,7 @@ - - - -- -+ - - - -@@ -1170,7 +1138,7 @@ - - - -- -+ - - - -@@ -1185,7 +1153,7 @@ - - - -- -+ - - - -@@ -1197,7 +1165,7 @@ - - - -- -+ - - - -@@ -1218,7 +1186,7 @@ - - - -- -+ - - - -@@ -1236,7 +1204,7 @@ - - - -- -+ - - - -@@ -1244,7 +1212,7 @@ - - - -- -+ - - - -@@ -1253,7 +1221,7 @@ - - - -- -+ - - - -@@ -1262,7 +1230,7 @@ - - - -- -+ - - - -@@ -1271,7 +1239,7 @@ - - - -- -+ - - - -@@ -1280,7 +1248,7 @@ - - - -- -+ - - - -@@ -1293,7 +1261,7 @@ - - - -- -+ - - - -@@ -1309,7 +1277,7 @@ - - - -- -+ - - - -@@ -1324,7 +1292,7 @@ - - - -- -+ - - - -@@ -1337,7 +1305,7 @@ - - - -- -+ - - - -@@ -1353,7 +1321,7 @@ - - - -- -+ - - - -@@ -1368,7 +1336,7 @@ - - - -- -+ - - - -@@ -1381,7 +1349,7 @@ - - - -- -+ - - - -@@ -1397,7 +1365,7 @@ - - - -- -+ - - - -@@ -1412,7 +1380,7 @@ - - - -- -+ - - - -@@ -1427,7 +1395,7 @@ - - - -- -+ - - - -@@ -1440,7 +1408,7 @@ - - - -- -+ - - - -@@ -1453,7 +1421,7 @@ - - - -- -+ - - - -@@ -1461,7 +1429,7 @@ - - - -- -+ - - - -@@ -1474,7 +1442,7 @@ - - - -- -+ - - - -@@ -1487,7 +1455,7 @@ - - - -- -+ - - - -@@ -1495,7 +1463,7 @@ - - - -- -+ - - - -@@ -1510,7 +1478,7 @@ - - - -- -+ - - - -@@ -1525,7 +1493,7 @@ - - - -- -+ - - - -@@ -1540,7 +1508,7 @@ - - - -- -+ - - - -@@ -1561,7 +1529,7 @@ - - - -- -+ - - - -@@ -1573,7 +1541,7 @@ - - - -- -+ - - - -@@ -1581,7 +1549,7 @@ - - - -- -+ - - - -@@ -1596,7 +1564,7 @@ - - - -- -+ - - - -@@ -1604,7 +1572,7 @@ - - - -- -+ - - - -@@ -1616,7 +1584,7 @@ - - - -- -+ - - - -@@ -1634,7 +1602,7 @@ - - - -- -+ - - - -@@ -1649,7 +1617,7 @@ - - - -- -+ - - - -@@ -1661,7 +1629,7 @@ - - - -- -+ - - - -@@ -1673,7 +1641,7 @@ - - - -- -+ - - - -@@ -1688,7 +1656,7 @@ - - - -- -+ - - - -@@ -1703,7 +1671,7 @@ - - - -- -+ - - - -@@ -1711,7 +1679,7 @@ - - - -- -+ - - - -@@ -1726,7 +1694,7 @@ - - - -- -+ - - - -@@ -1738,7 +1706,7 @@ - - - -- -+ - - - -@@ -1750,7 +1718,7 @@ - - - -- -+ - - - -@@ -1765,7 +1733,7 @@ - - - -- -+ - - - -@@ -1780,7 +1748,7 @@ - - - -- -+ - - - -@@ -1788,7 +1756,7 @@ - - - -- -+ - - - -@@ -1800,7 +1768,7 @@ - - - -- -+ - - - -@@ -1808,7 +1776,7 @@ - - - -- -+ - - - -@@ -1877,7 +1845,7 @@ - - - -- -+ - - - -diff --git a/pengine/test10/bundle-order-fencing.summary b/pengine/test10/bundle-order-fencing.summary -index d398a12..a3dc3d4 100644 ---- a/pengine/test10/bundle-order-fencing.summary -+++ b/pengine/test10/bundle-order-fencing.summary -@@ -174,7 +174,6 @@ Executing cluster transition: - * Pseudo action: redis-bundle_running_0 - * Pseudo action: redis-bundle-master_pre_notify_promote_0 - * Pseudo action: redis-bundle_promote_0 -- * Resource action: redis notify on redis-bundle-0 - * Resource action: redis notify on redis-bundle-1 - * Resource action: redis notify on redis-bundle-2 - * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 -@@ -182,7 +181,6 @@ Executing cluster transition: - * Resource action: redis promote on redis-bundle-1 - * Pseudo action: redis-bundle-master_promoted_0 - * Pseudo action: redis-bundle-master_post_notify_promoted_0 -- * Resource action: redis notify on redis-bundle-0 - * Resource action: redis notify on redis-bundle-1 - * Resource action: redis notify on redis-bundle-2 - * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 --- -1.8.3.1 - - -From 48198ca839b62de1316d7ae6ab0994dedb37b523 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 27 Nov 2018 17:00:25 -0600 -Subject: [PATCH 4/5] Refactor: controller: make process_lrm_event() void - -All callers ignored the return value ---- - crmd/crmd_lrm.h | 3 ++- - crmd/lrm.c | 8 +++----- - 2 files changed, 5 insertions(+), 6 deletions(-) - -diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h -index d115056..3e1596d 100644 ---- a/crmd/crmd_lrm.h -+++ b/crmd/crmd_lrm.h -@@ -170,4 +170,5 @@ gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state); - void remote_ra_process_maintenance_nodes(xmlNode *xml); - gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); - --gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending); -+void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, -+ struct recurring_op_s *pending); -diff --git a/crmd/lrm.c b/crmd/lrm.c -index d18665c..5e5af9f 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -2515,7 +2515,7 @@ unescape_newlines(const char *string) - return ret; - } - --gboolean -+void - process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) - { - char *op_id = NULL; -@@ -2526,8 +2526,8 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - gboolean removed = FALSE; - lrmd_rsc_info_t *rsc = NULL; - -- CRM_CHECK(op != NULL, return FALSE); -- CRM_CHECK(op->rsc_id != NULL, return FALSE); -+ CRM_CHECK(op != NULL, return); -+ CRM_CHECK(op->rsc_id != NULL, return); - - op_id = make_stop_id(op->rsc_id, op->call_id); - op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); -@@ -2679,6 +2679,4 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - lrmd_free_rsc_info(rsc); - free(op_key); - free(op_id); -- -- return TRUE; - } --- -1.8.3.1 - - -From 77dd44e214401d4dd953a8bafa2469b36d70948e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 27 Nov 2018 17:02:36 -0600 -Subject: [PATCH 5/5] Low: controller: be more tolerant of malformed executor - events - -b3f9a5bb was overzealous in discarding faked executor results without any -resource information. Since that commit, synthesize_lrmd_failure() would check -for resource information, and send a CIB update if the synthesized operation -were recordable, but would otherwise (such as for notifications) discard the -result. - -This means the fix was complete, because non-recordable actions for a -resource behind a just-died remote connection would get lost. It also -exposed two pre-existing bugs regarding notifications mis-scheduled on -the wrong node. Any of these would block the transition from completing. - -Now, process_lrm_event() can handle missing lrm_state or resource information, -so it can be called by synthesize_lrmd_failure() without any checking. This -leads to all the normal handling for non-recordable operations, which doesn't -require resource information. We log an assertion if the resource information -is not found, so that we can still get some visibility into bugs. This won't -be of use in the case of mis-scheduled notifications, but it could help in -other situations. ---- - crmd/crmd_lrm.h | 2 +- - crmd/lrm.c | 148 ++++++++++++++++++++++++++++++-------------------- - crmd/lrm_state.c | 2 +- - crmd/remote_lrmd_ra.c | 2 +- - 4 files changed, 93 insertions(+), 61 deletions(-) - -diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h -index 3e1596d..0870817 100644 ---- a/crmd/crmd_lrm.h -+++ b/crmd/crmd_lrm.h -@@ -171,4 +171,4 @@ void remote_ra_process_maintenance_nodes(xmlNode *xml); - gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); - - void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, -- struct recurring_op_s *pending); -+ struct recurring_op_s *pending, xmlNode *action_xml); -diff --git a/crmd/lrm.c b/crmd/lrm.c -index 5e5af9f..0d64f59 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -314,7 +314,7 @@ lrm_op_callback(lrmd_event_data_t * op) - lrm_state = lrm_state_find(nodename); - CRM_ASSERT(lrm_state != NULL); - -- process_lrm_event(lrm_state, op, NULL); -+ process_lrm_event(lrm_state, op, NULL, NULL); - } - - /* A_LRM_CONNECT */ -@@ -1434,7 +1434,6 @@ static void - synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) - { - lrmd_event_data_t *op = NULL; -- lrmd_rsc_info_t *rsc_info = NULL; - const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); - const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); - xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); -@@ -1464,35 +1463,8 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) - crm_info("Faking %s_%s_%d result (%d) on %s", - op->rsc_id, op->op_type, op->interval, op->rc, target_node); - -- /* Process the result as if it came from the LRM, if possible -- * (i.e. resource info can be obtained from the lrm_state). -- */ -- if (lrm_state) { -- rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); -- } -- if (rsc_info) { -- lrmd_free_rsc_info(rsc_info); -- process_lrm_event(lrm_state, op, NULL); -- -- } else if (controld_action_is_recordable(op->op_type)) { -- /* If we can't process the result normally, at least write it to the CIB -- * if possible, so the PE can act on it. -- */ -- const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); -- const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); -- const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE); -- -- if (standard && type) { -- rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); -- do_update_resource(target_node, rsc_info, op); -- lrmd_free_rsc_info(rsc_info); -- } else { -- // @TODO Should we direct ack? -- crm_info("Can't fake %s failure (%d) on %s without resource standard and type", -- crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, -- target_node); -- } -- } -+ // Process the result as if it came from the LRM -+ process_lrm_event(lrm_state, op, NULL, action); - lrmd_free_event(op); - } - -@@ -1555,7 +1527,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, - - if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { - crm_info("Failing resource %s...", rsc->id); -- process_lrm_event(lrm_state, op, NULL); -+ process_lrm_event(lrm_state, op, NULL, xml); - op->op_status = PCMK_LRM_OP_DONE; - op->rc = PCMK_OCF_OK; - lrmd_free_rsc_info(rsc); -@@ -2315,7 +2287,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat - crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", - operation, rsc->id, lrm_state->node_name, call_id); - fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); -- process_lrm_event(lrm_state, op, NULL); -+ process_lrm_event(lrm_state, op, NULL, NULL); - - } else { - /* record all operations so we can wait -@@ -2516,7 +2488,8 @@ unescape_newlines(const char *string) - } - - void --process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) -+process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, -+ struct recurring_op_s *pending, xmlNode *action_xml) - { - char *op_id = NULL; - char *op_key = NULL; -@@ -2525,16 +2498,49 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - gboolean remove = FALSE; - gboolean removed = FALSE; - lrmd_rsc_info_t *rsc = NULL; -+ const char *node_name = NULL; - - CRM_CHECK(op != NULL, return); - CRM_CHECK(op->rsc_id != NULL, return); - - op_id = make_stop_id(op->rsc_id, op->call_id); - op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); -- rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); -+ -+ // Get resource info if available (from executor state or action XML) -+ if (lrm_state) { -+ rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); -+ } -+ if ((rsc == NULL) && action_xml) { -+ xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); -+ -+ const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); -+ const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); -+ const char *type = crm_element_value(xml, XML_ATTR_TYPE); -+ -+ if (standard && type) { -+ crm_info("%s agent information not cached, using %s%s%s:%s from action XML", -+ op->rsc_id, standard, -+ (provider? ":" : ""), (provider? provider : ""), type); -+ rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); -+ } else { -+ crm_err("Can't process %s result because %s agent information not cached or in XML", -+ op_key, op->rsc_id); -+ } -+ } -+ CRM_LOG_ASSERT(rsc != NULL); // If it's still NULL, there's a bug somewhere -+ -+ // Get node name if available (from executor state or action XML) -+ if (lrm_state) { -+ node_name = lrm_state->node_name; -+ } else if (action_xml) { -+ node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); -+ } -+ - if(pending == NULL) { - remove = TRUE; -- pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); -+ if (lrm_state) { -+ pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); -+ } - } - - if (op->op_status == PCMK_LRM_OP_ERROR) { -@@ -2554,7 +2560,14 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - - if (op->op_status != PCMK_LRM_OP_CANCELLED) { - if (controld_action_is_recordable(op->op_type)) { -- update_id = do_update_resource(lrm_state->node_name, rsc, op); -+ if (node_name && rsc) { -+ update_id = do_update_resource(node_name, rsc, op); -+ } else { -+ // @TODO Should we direct ack? -+ crm_err("Unable to record %s result in CIB: %s", -+ op_key, -+ (node_name? "No resource information" : "No node name")); -+ } - } else { - send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); - } -@@ -2575,7 +2588,9 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - - } else if (pending->remove) { - /* The tengine canceled this op, we have been waiting for the cancel to finish. */ -- erase_lrm_history_by_op(lrm_state, op); -+ if (lrm_state) { -+ erase_lrm_history_by_op(lrm_state, op); -+ } - - } else if (op->rsc_deleted) { - /* The tengine initiated this op, but it was cancelled outside of the -@@ -2595,14 +2610,23 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - /* The caller will do this afterwards, but keep the logging consistent */ - removed = TRUE; - -- } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { -- removed = TRUE; -- crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", -- op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); -+ } else if (lrm_state && ((op->interval == 0) -+ || (op->op_status == PCMK_LRM_OP_CANCELLED))) { - -- } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { -- removed = TRUE; -- g_hash_table_remove(lrm_state->pending_ops, op_id); -+ gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id); -+ -+ if (op->interval != 0) { -+ removed = TRUE; -+ } else if (found) { -+ removed = TRUE; -+ crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", -+ op_key, op->call_id, op_id, -+ g_hash_table_size(lrm_state->pending_ops)); -+ } -+ } -+ -+ if (node_name == NULL) { -+ node_name = "unknown node"; // for logging - } - - switch (op->op_status) { -@@ -2610,7 +2634,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - crm_info("Result of %s operation for %s on %s: %s " - CRM_XS " call=%d key=%s confirmed=%s", - crm_action_str(op->op_type, op->interval), -- op->rsc_id, lrm_state->node_name, -+ op->rsc_id, node_name, - services_lrm_status_str(op->op_status), - op->call_id, op_key, (removed? "true" : "false")); - break; -@@ -2620,7 +2644,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - "Result of %s operation for %s on %s: %d (%s) " - CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", - crm_action_str(op->op_type, op->interval), -- op->rsc_id, lrm_state->node_name, -+ op->rsc_id, node_name, - op->rc, services_ocf_exitcode_str(op->rc), - op->call_id, op_key, (removed? "true" : "false"), - update_id); -@@ -2630,7 +2654,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - crm_err("Result of %s operation for %s on %s: %s " - CRM_XS " call=%d key=%s timeout=%dms", - crm_action_str(op->op_type, op->interval), -- op->rsc_id, lrm_state->node_name, -+ op->rsc_id, node_name, - services_lrm_status_str(op->op_status), - op->call_id, op_key, op->timeout); - break; -@@ -2639,14 +2663,16 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - crm_err("Result of %s operation for %s on %s: %s " - CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", - crm_action_str(op->op_type, op->interval), -- op->rsc_id, lrm_state->node_name, -+ op->rsc_id, node_name, - services_lrm_status_str(op->op_status), op->call_id, op_key, - (removed? "true" : "false"), op->op_status, update_id); - } - - if (op->output) { - char *prefix = -- crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); -+ crm_strdup_printf("%s-%s_%s_%d:%d", node_name, -+ op->rsc_id, op->op_type, op->interval, -+ op->call_id); - - if (op->rc) { - crm_log_output(LOG_NOTICE, prefix, op->output); -@@ -2656,25 +2682,31 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr - free(prefix); - } - -- if (safe_str_neq(op->op_type, RSC_METADATA)) { -- crmd_alert_resource_op(lrm_state->node_name, op); -- } else if (op->rc == PCMK_OCF_OK) { -- char *metadata = unescape_newlines(op->output); -+ if (lrm_state) { -+ if (safe_str_neq(op->op_type, RSC_METADATA)) { -+ crmd_alert_resource_op(lrm_state->node_name, op); -+ } else if (rsc && (op->rc == PCMK_OCF_OK)) { -+ char *metadata = unescape_newlines(op->output); - -- metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); -- free(metadata); -+ metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); -+ free(metadata); -+ } - } - - if (op->rsc_deleted) { - crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); -- delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); -+ if (lrm_state) { -+ delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); -+ } - } - - /* If a shutdown was escalated while operations were pending, - * then the FSA will be stalled right now... allow it to continue - */ - mainloop_set_trigger(fsa_source); -- update_history_cache(lrm_state, rsc, op); -+ if (lrm_state && rsc) { -+ update_history_cache(lrm_state, rsc, op); -+ } - - lrmd_free_rsc_info(rsc); - free(op_key); -diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c -index 40da762..d8a0039 100644 ---- a/crmd/lrm_state.c -+++ b/crmd/lrm_state.c -@@ -96,7 +96,7 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data) - event.remote_nodename = lrm_state->node_name; - event.params = op->params; - -- process_lrm_event(lrm_state, &event, op); -+ process_lrm_event(lrm_state, &event, op, NULL); - return TRUE; - } - -diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c -index 6fa05f6..2d04588 100644 ---- a/crmd/remote_lrmd_ra.c -+++ b/crmd/remote_lrmd_ra.c -@@ -519,7 +519,7 @@ synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char * - op.t_run = time(NULL); - op.t_rcchange = op.t_run; - op.call_id = generate_callid(); -- process_lrm_event(lrm_state, &op, NULL); -+ process_lrm_event(lrm_state, &op, NULL, NULL); - } - - void --- -1.8.3.1 - diff --git a/SOURCES/012-stonith-ordering.patch b/SOURCES/012-stonith-ordering.patch deleted file mode 100644 index 7c80cec..0000000 --- a/SOURCES/012-stonith-ordering.patch +++ /dev/null @@ -1,108 +0,0 @@ -From eb2854add713f22b083a54aa7caf04be5067b469 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Nov 2018 18:05:04 -0600 -Subject: [PATCH] Low: scheduler: order start after particular stonith op - -Previously, if a resource's status was unknown on a node about to be fenced, -we ordered the resource start after all_stopped. This approximated stonith_done -before that was available. However, it makes more sense to order the start -after the particular stonith op for the node in question. - -This improves recovery when multiple nodes are being fenced: resources can now -be recovered from one node when it is successfully fenced, even if the fencing -of another node fails. ---- - pengine/native.c | 63 +++++++++++++++++++++++++++++++------------------------- - 1 file changed, 35 insertions(+), 28 deletions(-) - -diff --git a/pengine/native.c b/pengine/native.c -index c6c1d55..9ee5990 100644 ---- a/pengine/native.c -+++ b/pengine/native.c -@@ -2948,13 +2948,19 @@ native_create_probe(resource_t * rsc, node_t * node, action_t * complete, - return TRUE; - } - -+/*! -+ * \internal -+ * \brief Order a resource's start and promote actions relative to fencing -+ * -+ * \param[in] rsc Resource to be ordered -+ * \param[in] stonith_op Fence action -+ * \param[in] data_set Cluster information -+ */ - static void - native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) - { - node_t *target; - GListPtr gIter = NULL; -- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); -- action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); - - CRM_CHECK(stonith_op && stonith_op->node, return); - target = stonith_op->node; -@@ -2962,34 +2968,35 @@ native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set - for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { - action_t *action = (action_t *) gIter->data; - -- if(action->needs == rsc_req_nothing) { -- /* Anything other than start or promote requires nothing */ -- -- } else if (action->needs == rsc_req_stonith) { -- order_actions(stonith_done, action, pe_order_optional); -+ switch (action->needs) { -+ case rsc_req_nothing: -+ // Anything other than start or promote requires nothing -+ break; - -- } else if (safe_str_eq(action->task, RSC_START) -- && NULL != pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) -- && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { -- /* if known == NULL, then we don't know if -- * the resource is active on the node -- * we're about to shoot -- * -- * in this case, regardless of action->needs, -- * the only safe option is to wait until -- * the node is shot before doing anything -- * to with the resource -- * -- * it's analogous to waiting for all the probes -- * for rscX to complete before starting rscX -- * -- * the most likely explanation is that the -- * DC died and took its status with it -- */ -+ case rsc_req_stonith: -+ order_actions(stonith_op, action, pe_order_optional); -+ break; - -- pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, -- target->details->uname); -- order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); -+ case rsc_req_quorum: -+ if (safe_str_eq(action->task, RSC_START) -+ && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id) -+ && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { -+ -+ /* If we don't know the status of the resource on the node -+ * we're about to shoot, we have to assume it may be active -+ * there. Order the resource start after the fencing. This -+ * is analogous to waiting for all the probes for a resource -+ * to complete before starting it. -+ * -+ * The most likely explanation is that the DC died and took -+ * its status with it. -+ */ -+ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, -+ target->details->uname); -+ order_actions(stonith_op, action, -+ pe_order_optional | pe_order_runnable_left); -+ } -+ break; - } - } - } --- -1.8.3.1 - diff --git a/SOURCES/013-pseudo-removal.patch b/SOURCES/013-pseudo-removal.patch deleted file mode 100644 index 0812570..0000000 --- a/SOURCES/013-pseudo-removal.patch +++ /dev/null @@ -1,225 +0,0 @@ -From dcc8e65891537cfdffb1b18e1412b12868d20241 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 16 Nov 2018 21:02:13 -0600 -Subject: [PATCH 1/2] Low: scheduler: get rid of now-unused all_stopped - pseudo-action - ---- - lib/pengine/common.c | 2 -- - pengine/allocate.c | 3 --- - pengine/native.c | 8 -------- - pengine/notif.c | 7 ------- - pengine/utils.h | 1 - - 5 files changed, 21 deletions(-) - -diff --git a/lib/pengine/common.c b/lib/pengine/common.c -index c54bc44..d04e4ae 100644 ---- a/lib/pengine/common.c -+++ b/lib/pengine/common.c -@@ -280,8 +280,6 @@ text2task(const char *task) - return no_action; - } else if (safe_str_eq(task, "stonith_complete")) { - return no_action; -- } else if (safe_str_eq(task, "all_stopped")) { -- return no_action; - } - crm_trace("Unsupported action: %s", task); - #endif -diff --git a/pengine/allocate.c b/pengine/allocate.c -index adc07d8..81f3f51 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -1556,7 +1556,6 @@ stage6(pe_working_set_t * data_set) - action_t *stonith_op = NULL; - action_t *last_stonith = NULL; - gboolean integrity_lost = FALSE; -- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); - action_t *done = get_pseudo_op(STONITH_DONE, data_set); - gboolean need_stonith = TRUE; - GListPtr gIter; -@@ -1706,8 +1705,6 @@ stage6(pe_working_set_t * data_set) - order_actions(last_stonith, done, pe_order_implies_then); - } - -- order_actions(done, all_stopped, pe_order_implies_then); -- - g_list_free(stonith_ops); - return TRUE; - } -diff --git a/pengine/native.c b/pengine/native.c -index 9ee5990..bd0b7d0 100644 ---- a/pengine/native.c -+++ b/pengine/native.c -@@ -1430,14 +1430,6 @@ native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) - return; - } - -- { -- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); -- -- custom_action_order(rsc, stop_key(rsc), NULL, -- NULL, strdup(all_stopped->task), all_stopped, -- pe_order_implies_then | pe_order_runnable_left, data_set); -- } -- - if (g_hash_table_size(rsc->utilization) > 0 - && safe_str_neq(data_set->placement_strategy, "default")) { - GHashTableIter iter; -diff --git a/pengine/notif.c b/pengine/notif.c -index cdc382d..b333e5c 100644 ---- a/pengine/notif.c -+++ b/pengine/notif.c -@@ -411,13 +411,6 @@ create_notification_boundaries(resource_t * rsc, const char *action, action_t * - if (start && end) { - order_actions(n_data->pre_done, n_data->post, pe_order_optional); - } -- -- if (safe_str_eq(action, RSC_STOP)) { -- action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); -- -- order_actions(n_data->post_done, all_stopped, pe_order_optional); -- } -- - return n_data; - } - -diff --git a/pengine/utils.h b/pengine/utils.h -index 04ee36b..aee7708 100644 ---- a/pengine/utils.h -+++ b/pengine/utils.h -@@ -66,7 +66,6 @@ pe_action_t *create_pseudo_resource_op(resource_t * rsc, const char *task, bool - - # define STONITH_UP "stonith_up" - # define STONITH_DONE "stonith_complete" --# define ALL_STOPPED "all_stopped" - # define LOAD_STOPPED "load_stopped" - - #endif --- -1.8.3.1 - - -From 811e6291f18b11471d8b4a98b0079de8f6b00091 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 16 Nov 2018 21:08:31 -0600 -Subject: [PATCH 2/2] Low: scheduler: get rid of now-unused stonith_complete - pseudo-action - -also last reference to long-gone stonith_up pseudo-action ---- - lib/pengine/common.c | 4 ---- - pengine/allocate.c | 21 ++------------------- - pengine/utils.h | 2 -- - 3 files changed, 2 insertions(+), 25 deletions(-) - -diff --git a/lib/pengine/common.c b/lib/pengine/common.c -index d04e4ae..d03a6aa 100644 ---- a/lib/pengine/common.c -+++ b/lib/pengine/common.c -@@ -276,10 +276,6 @@ text2task(const char *task) - return no_action; - } else if (safe_str_eq(task, "fail")) { - return no_action; -- } else if (safe_str_eq(task, "stonith_up")) { -- return no_action; -- } else if (safe_str_eq(task, "stonith_complete")) { -- return no_action; - } - crm_trace("Unsupported action: %s", task); - #endif -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 81f3f51..0ee8bb0 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -1459,11 +1459,10 @@ any_managed_resources(pe_working_set_t * data_set) - * \brief Create pseudo-op for guest node fence, and order relative to it - * - * \param[in] node Guest node to fence -- * \param[in] done STONITH_DONE operation - * \param[in] data_set Working set of CIB state - */ - static void --fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) -+fence_guest(pe_node_t *node, pe_working_set_t *data_set) - { - resource_t *container = node->details->remote_rsc->container; - pe_action_t *stop = NULL; -@@ -1540,9 +1539,6 @@ fence_guest(pe_node_t *node, pe_action_t *done, pe_working_set_t *data_set) - - /* Order/imply other actions relative to pseudo-fence as with real fence */ - stonith_constraints(node, stonith_op, data_set); -- if(done) { -- order_actions(stonith_op, done, pe_order_implies_then); -- } - } - - /* -@@ -1552,11 +1548,9 @@ gboolean - stage6(pe_working_set_t * data_set) - { - action_t *dc_down = NULL; -- action_t *dc_fence = NULL; - action_t *stonith_op = NULL; - action_t *last_stonith = NULL; - gboolean integrity_lost = FALSE; -- action_t *done = get_pseudo_op(STONITH_DONE, data_set); - gboolean need_stonith = TRUE; - GListPtr gIter; - GListPtr stonith_ops = NULL; -@@ -1587,7 +1581,7 @@ stage6(pe_working_set_t * data_set) - */ - if (is_container_remote_node(node)) { - if (node->details->remote_requires_reset && need_stonith) { -- fence_guest(node, done, data_set); -+ fence_guest(node, data_set); - } - continue; - } -@@ -1604,7 +1598,6 @@ stage6(pe_working_set_t * data_set) - - if (node->details->is_dc) { - dc_down = stonith_op; -- dc_fence = stonith_op; - - } else if (is_set(data_set->flags, pe_flag_concurrent_fencing) == FALSE) { - if (last_stonith) { -@@ -1613,7 +1606,6 @@ stage6(pe_working_set_t * data_set) - last_stonith = stonith_op; - - } else { -- order_actions(stonith_op, done, pe_order_implies_then); - stonith_ops = g_list_append(stonith_ops, stonith_op); - } - -@@ -1696,15 +1688,6 @@ stage6(pe_working_set_t * data_set) - } - } - } -- -- -- if (dc_fence) { -- order_actions(dc_down, done, pe_order_implies_then); -- -- } else if (last_stonith) { -- order_actions(last_stonith, done, pe_order_implies_then); -- } -- - g_list_free(stonith_ops); - return TRUE; - } -diff --git a/pengine/utils.h b/pengine/utils.h -index aee7708..0e81cb3 100644 ---- a/pengine/utils.h -+++ b/pengine/utils.h -@@ -64,8 +64,6 @@ extern void calculate_utilization(GHashTable * current_utilization, - extern void process_utilization(resource_t * rsc, node_t ** prefer, pe_working_set_t * data_set); - pe_action_t *create_pseudo_resource_op(resource_t * rsc, const char *task, bool optional, bool runnable, pe_working_set_t *data_set); - --# define STONITH_UP "stonith_up" --# define STONITH_DONE "stonith_complete" - # define LOAD_STOPPED "load_stopped" - - #endif --- -1.8.3.1 - diff --git a/SOURCES/014-cli-test.patch b/SOURCES/014-cli-test.patch deleted file mode 100644 index 960170a..0000000 --- a/SOURCES/014-cli-test.patch +++ /dev/null @@ -1,32 +0,0 @@ -From b47749de4916c6090d0e1593139553a84be97db1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 30 Nov 2018 16:39:53 -0600 -Subject: [PATCH] Test: cts-cli: update regression test for all_stopped removal - ---- - tools/regression.tools.exp | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/tools/regression.tools.exp b/tools/regression.tools.exp -index ce9f352..f5a2a42 100644 ---- a/tools/regression.tools.exp -+++ b/tools/regression.tools.exp -@@ -1805,7 +1805,6 @@ Executing cluster transition: - * Resource action: Fence stop on node1 - * Resource action: Fence monitor on node3 - * Resource action: Fence monitor on node2 -- * Pseudo action: all_stopped - * Resource action: Fence start on node2 - - Revised cluster status: -@@ -2074,7 +2073,6 @@ Transition Summary: - - Executing cluster transition: - * Resource action: dummy stop on node1 -- * Pseudo action: all_stopped - * Resource action: dummy start on node3 - - Revised cluster status: --- -1.8.3.1 - diff --git a/SOURCES/015-remote-ordering.patch b/SOURCES/015-remote-ordering.patch deleted file mode 100644 index f36418e..0000000 --- a/SOURCES/015-remote-ordering.patch +++ /dev/null @@ -1,51 +0,0 @@ -From e4dae772074c964e10da59e2678f329c9c8a3bf1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Nov 2018 17:51:30 -0600 -Subject: [PATCH] Fix: scheduler: order resource moves after connection starts - -This addresses a regression in behavior since 1.1.18 (via 3a34fed). By allowing -stops to proceed before probes finished, that change allowed the stop of a -resource moving to a coming-up remote node to happen before the remote node -connection's start. If the remote connection start fails, the resource will -have to be started again where it was, leading to unnecessary downtime. - -Now, order the resource's stop after the remote connection's start. - -RHBZ#1648507 ---- - pengine/allocate.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/pengine/allocate.c b/pengine/allocate.c -index 0ee8bb0..126ba90 100644 ---- a/pengine/allocate.c -+++ b/pengine/allocate.c -@@ -2224,6 +2224,25 @@ apply_remote_node_ordering(pe_working_set_t *data_set) - continue; - } - -+ /* Another special case: if a resource is moving to a Pacemaker Remote -+ * node, order the stop on the original node after any start of the -+ * remote connection. This ensures that if the connection fails to -+ * start, we leave the resource running on the original node. -+ */ -+ if (safe_str_eq(action->task, RSC_START)) { -+ for (GList *item = action->rsc->actions; item != NULL; -+ item = item->next) { -+ pe_action_t *rsc_action = item->data; -+ -+ if ((rsc_action->node->details != action->node->details) -+ && safe_str_eq(rsc_action->task, RSC_STOP)) { -+ custom_action_order(remote, start_key(remote), NULL, -+ action->rsc, NULL, rsc_action, -+ pe_order_optional, data_set); -+ } -+ } -+ } -+ - /* The action occurs across a remote connection, so create - * ordering constraints that guarantee the action occurs while the node - * is active (after start, before stop ... things like that). --- -1.8.3.1 - diff --git a/SOURCES/017-cleanup-pending-op.patch b/SOURCES/017-cleanup-pending-op.patch deleted file mode 100644 index 94cef2c..0000000 --- a/SOURCES/017-cleanup-pending-op.patch +++ /dev/null @@ -1,150 +0,0 @@ -From a48d73f23c6800ae51522c1a91d0f0e1eb967078 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 8 Jan 2019 15:31:14 -0600 -Subject: [PATCH] Fix: controller: directly acknowledge unrecordable operation - results - -Regression introduced in 2.0.1-rc1 by 0363985dd - -Before that commit, if an operation result arrived when there was no resource -information available, a warning would be logged and the operation would be -directly acknowledged. This could occur, for example, if resource history were -cleaned while an operation was pending on that resource. - -After that commit, in that situation, an assertion and error would be logged, -and no acknowledgement would be sent, leading to a transition timeout. - -Restore the direct ack. Also improve related log messages. ---- - crmd/lrm.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------- - 1 file changed, 55 insertions(+), 25 deletions(-) - -diff --git a/crmd/lrm.c b/crmd/lrm.c -index 0d64f59..51cb50b 100644 ---- a/crmd/lrm.c -+++ b/crmd/lrm.c -@@ -2497,6 +2497,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - int update_id = 0; - gboolean remove = FALSE; - gboolean removed = FALSE; -+ bool need_direct_ack = FALSE; - lrmd_rsc_info_t *rsc = NULL; - const char *node_name = NULL; - -@@ -2527,7 +2528,6 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - op_key, op->rsc_id); - } - } -- CRM_LOG_ASSERT(rsc != NULL); // If it's still NULL, there's a bug somewhere - - // Get node name if available (from executor state or action XML) - if (lrm_state) { -@@ -2559,51 +2559,81 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - } - - if (op->op_status != PCMK_LRM_OP_CANCELLED) { -+ /* We might not record the result, so directly acknowledge it to the -+ * originator instead, so it doesn't time out waiting for the result -+ * (especially important if part of a transition). -+ */ -+ need_direct_ack = TRUE; -+ - if (controld_action_is_recordable(op->op_type)) { - if (node_name && rsc) { -+ // We should record the result, and happily, we can - update_id = do_update_resource(node_name, rsc, op); -+ need_direct_ack = FALSE; -+ -+ } else if (op->rsc_deleted) { -+ /* We shouldn't record the result (likely the resource was -+ * refreshed, cleaned, or removed while this operation was -+ * in flight). -+ */ -+ crm_notice("Not recording %s result in CIB because " -+ "resource information was removed since it was initiated", -+ op_key); - } else { -- // @TODO Should we direct ack? -- crm_err("Unable to record %s result in CIB: %s", -- op_key, -+ /* This shouldn't be possible; the executor didn't consider the -+ * resource deleted, but we couldn't find resource or node -+ * information. -+ */ -+ crm_err("Unable to record %s result in CIB: %s", op_key, - (node_name? "No resource information" : "No node name")); - } -- } else { -- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); - } -+ - } else if (op->interval == 0) { -- /* This will occur when "crm resource cleanup" is called while actions are in-flight */ -- crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); -- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); -+ /* A non-recurring operation was cancelled. Most likely, the -+ * never-initiated action was removed from the executor's pending -+ * operations list upon resource removal. -+ */ -+ need_direct_ack = TRUE; - - } else if (pending == NULL) { -- /* We don't need to do anything for cancelled ops -- * that are not in our pending op list. There are no -- * transition actions waiting on these operations. */ -+ /* This recurring operation was cancelled, but was not pending. No -+ * transition actions are waiting on it, nothing needs to be done. -+ */ - - } else if (op->user_data == NULL) { -- /* At this point we have a pending entry, but no transition -- * key present in the user_data field. report this */ -- crm_err("Op %s (call=%d): No user data", op_key, op->call_id); -+ /* This recurring operation was cancelled and pending, but we don't -+ * have a transition key. This should never happen. -+ */ -+ crm_err("Recurring operation %s was cancelled without transition information", -+ op_key); - - } else if (pending->remove) { -- /* The tengine canceled this op, we have been waiting for the cancel to finish. */ -+ /* This recurring operation was cancelled (by us) and pending, and we -+ * have been waiting for it to finish. -+ */ - if (lrm_state) { - erase_lrm_history_by_op(lrm_state, op); - } - - } else if (op->rsc_deleted) { -- /* The tengine initiated this op, but it was cancelled outside of the -- * tengine's control during a resource cleanup/re-probe request. The tengine -- * must be alerted that this operation completed, otherwise the tengine -- * will continue waiting for this update to occur until it is timed out. -- * We don't want this update going to the cib though, so use a direct ack. */ -- crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); -- send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); -+ /* This recurring operation was cancelled (but not by us, and the -+ * executor does not have resource information, likely due to resource -+ * cleanup, refresh, or removal) and pending. -+ */ -+ crm_debug("Recurring op %s was cancelled due to resource deletion", -+ op_key); -+ need_direct_ack = TRUE; - - } else { -- /* Before a stop is called, no need to direct ack */ -- crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); -+ /* This recurring operation was cancelled (but not by us, likely by the -+ * executor before stopping the resource) and pending. We don't need to -+ * do anything special. -+ */ -+ } -+ -+ if (need_direct_ack) { -+ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); - } - - if(remove == FALSE) { --- -1.8.3.1 - diff --git a/SOURCES/2.0-cleanup-behavior.patch b/SOURCES/2.0-cleanup-behavior.patch index c0e13c9..4a20eaa 100644 --- a/SOURCES/2.0-cleanup-behavior.patch +++ b/SOURCES/2.0-cleanup-behavior.patch @@ -53,7 +53,7 @@ index 128d075..bbdba25 100644 {"-spacer-", 1, NULL, '-', "\nExamples:", pcmk_option_paragraph}, @@ -630,15 +624,16 @@ main(int argc, char **argv) - timeout_ms = crm_get_msec(optarg); + require_resource = FALSE; break; + case 'P': @@ -84,7 +84,7 @@ index 128d075..bbdba25 100644 - crm_debug("Re-checking the state of %s (%s requested) on %s", - rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, -- NULL, 0, FALSE, &data_set); +- NULL, 0, FALSE, data_set); - - if ((rc == pcmk_ok) && !BE_QUIET) { - // Show any reasons why resource might stay stopped @@ -99,7 +99,7 @@ index 128d075..bbdba25 100644 + crm_debug("Re-checking the state of %s (%s requested) on %s", + rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); + rc = cli_resource_delete(crmd_channel, host_uname, rsc, -+ NULL, 0, FALSE, &data_set); ++ NULL, 0, FALSE, data_set); + + if ((rc == pcmk_ok) && !BE_QUIET) { + // Show any reasons why resource might stay stopped diff --git a/SOURCES/rhbz-url.patch b/SOURCES/rhbz-url.patch deleted file mode 100644 index 1c09cd2..0000000 --- a/SOURCES/rhbz-url.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 9b74fb4d667cf187c1c80aeb39ff3b3c12846421 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 18 Apr 2017 14:17:38 -0500 -Subject: [PATCH] Low: tools: show Red Hat bugzilla URL when using crm_report - ---- - tools/crm_report.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/crm_report.in b/tools/crm_report.in -index 26050a7..4715155 100755 ---- a/tools/crm_report.in -+++ b/tools/crm_report.in -@@ -222,7 +222,7 @@ EOF - log "Collected results are available in $fname" - log " " - log "Please create a bug entry at" -- log " http://bugs.clusterlabs.org/enter_bug.cgi?product=Pacemaker" -+ log " https://bugzilla.redhat.com/" - log "Include a description of your problem and attach this tarball" - log " " - log "Thank you for taking time to create this report." --- -1.8.3.1 - diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index fa6d2f2..09549af 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -13,12 +13,12 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 1.1.19 -%global specversion 8 +%global pcmkversion 1.1.20 +%global specversion 5 ## Upstream commit (or git tag, such as "Pacemaker-" plus the ## {pcmkversion} macro for an official release) to use for this package -%global commit c3c624ea3d98a74a8a287671a156db126c99a7bb +%global commit 3c4c782f70ebdc9c6882859d16b7975193697640 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. %global commit_abbrev 7 @@ -160,7 +160,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.4 +Release: %{pcmk_release}%{?dist} %if %{defined _unitdir} License: GPLv2+ and LGPLv2+ %else @@ -176,35 +176,27 @@ Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{na Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz # upstream commits -Patch1: 001-rollup.patch -Patch2: 002-ppc64le.patch -Patch3: 003-static-analysis.patch -Patch4: 004-cleanup.patch -Patch5: 005-corosync.patch -Patch6: 006-fail-count.patch -Patch7: 007-stderr.patch -Patch8: 008-bundle-ordering.patch -Patch9: 009-sbd-guest.patch -Patch10: 010-route-notify.patch -Patch11: 011-notifs.patch -Patch12: 012-stonith-ordering.patch -Patch13: 013-pseudo-removal.patch -Patch14: 014-cli-test.patch -Patch15: 015-remote-ordering.patch -Patch16: 016-regression-tests.patch.gz -Patch17: 017-cleanup-pending-op.patch +Patch1: 001-constraint-fix.patch +Patch2: 002-null-value.patch +Patch3: 003-fence-output.patch +Patch4: 004-group-ordering.patch +Patch5: 005-bug-url.patch +Patch6: 006-fence-output-fix.patch +Patch7: 007-security.patch +Patch8: 008-security-log.patch +Patch9: 009-use-after-free.patch +Patch10: 010-fork-callback.patch # patches that aren't from upstream Patch100: lrmd-protocol-version.patch -Patch101: rhbz-url.patch -Patch102: 2.0-record-pending-behavior.patch -Patch103: 2.0-cleanup-behavior.patch +Patch101: 2.0-record-pending-behavior.patch +Patch102: 2.0-cleanup-behavior.patch BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) AutoReqProv: on Requires: resource-agents -Requires: %{name}-libs = %{version}-%{release} -Requires: %{name}-cluster-libs = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Obsoletes: rgmanager < 3.2.0 Provides: rgmanager >= 3.2.0 @@ -285,7 +277,7 @@ Available rpmbuild rebuild options: License: GPLv2+ and LGPLv2+ Summary: Command line tools for controlling Pacemaker clusters Group: System Environment/Daemons -Requires: %{name}-libs = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: perl-TimeDate %description cli @@ -301,6 +293,8 @@ License: GPLv2+ and LGPLv2+ Summary: Core Pacemaker libraries Group: System Environment/Daemons Requires(pre): shadow-utils +# RHEL: required for libpe_status API change from 7.6 to 7.7 +Conflicts: sbd < 1.4.0 %description -n %{name}-libs Pacemaker is an advanced, scalable High-Availability cluster resource @@ -313,7 +307,7 @@ nodes and those just running the CLI tools. License: GPLv2+ and LGPLv2+ Summary: Cluster Libraries used by Pacemaker Group: System Environment/Daemons -Requires: %{name}-libs = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description -n %{name}-cluster-libs Pacemaker is an advanced, scalable High-Availability cluster resource @@ -331,7 +325,7 @@ License: GPLv2+ and LGPLv2+ and BSD %endif Summary: Pacemaker remote daemon for non-cluster nodes Group: System Environment/Daemons -Requires: %{name}-libs = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: resource-agents Provides: pcmk-cluster-manager @@ -468,6 +462,7 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" %{!?with_hardening: --disable-hardening} \ --with-initdir=%{_initrddir} \ --localstatedir=%{_var} \ + --with-bug-url=https://bugzilla.redhat.com/ \ --with-nagios \ --with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \ --with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ \ @@ -877,27 +872,75 @@ exit 0 %attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* %changelog -* Mon Jan 14 2019 Ken Gaillot - 1.1.19-8.4 -- Fix regression in resource clean-up/refresh when an operation is pending -- Resolves: rhbz#1665816 - -* Tue Jan 8 2019 Ken Gaillot - 1.1.19-8.3 -- Fix regression in crm_resource --refresh -- Order resource moves after remote connection starts -- Resolves: rhbz#1664242 -- Resolves: rhbz#1664243 - -* Thu Nov 29 2018 Ken Gaillot - 1.1.19-8.2 -- Don't schedule clone notifications behind stopped remote connection -- Resolves: rhbz#1654602 - -* Tue Nov 6 2018 Ken Gaillot - 1.1.19-8.1 -- Route remote clone notifications through correct cluster node -- Allow clean-up of managed guest nodes and bundle nodes -- Allow use of guest nodes and bundles in clusters with sbd fencing -- Resolves: rhbz#1646347 -- Resolves: rhbz#1646350 -- Resolves: rhbz#1646872 +* Fri May 24 2019 Ken Gaillot - 1.1.20-5 +- Correct memory issue in fence agent output fix +- Resolves: rhbz#1549366 + +* Fri Apr 19 2019 Ken Gaillot - 1.1.20-4 +- Update security patches +- Resolves: rhbz#1694556 +- Resolves: rhbz#1694559 +- Resolves: rhbz#1694907 + +* Thu Apr 4 2019 Ken Gaillot - 1.1.20-3 +- Support more than 64KB of fence agent output +- Avoid unnecessary recovery of group member +- Improve IPC clients' authentication of servers (CVE-2018-16877) +- Improve pacemakerd authentication of running subdaemons (CVE-2018-16878) +- Fix use-after-free with potential information disclosure (CVE-2019-3885) +- Resolves: rhbz#1549366 +- Resolves: rhbz#1609453 +- Resolves: rhbz#1694556 +- Resolves: rhbz#1694559 +- Resolves: rhbz#1694907 + +* Thu Mar 21 2019 Ken Gaillot - 1.1.20-2 +- Assume unprivileged ACL if unable to get user information from host +- Delay 2 seconds before re-attempting a failed node attribute write +- SNMP alert sample script now sends all OIDs with every alert +- Recover dependent resources correctly with asymmetric ordering +- Rebase on upstream 1.1.20 final version +- Resolves: rhbz#1596125 +- Resolves: rhbz#1597695 +- Resolves: rhbz#1608979 +- Resolves: rhbz#1628966 +- Resolves: rhbz#1644864 + +* Fri Feb 1 2019 Ken Gaillot - 1.1.20-1 +- pcs status now shows when a standby node still has active resources +- Allow clean-up of guest nodes and bundles without unmanaging first +- pcs status now shows pending and failed fence actions by default +- Improve pcs status display when disconnected from cluster +- Ensure node attributes are recorded if attrd writer is shutting down +- Synchronize fencing history across all nodes +- Add stonith_admin option to clear fencing history +- Don't schedule unneeded bundle actions when connection is on different node +- Allow use of sbd in clusters with guest nodes and bundles +- Schedule bundle clone notifications correctly when connection is moving +- Rebase on upstream 1.1.20-rc1 version +- Avoid unneeded resource restarts when remote connection fails to start +- Allow crm_resource --move to work when a previous move had a lifetime +- Wait for all replies when refreshing a resource +- Don't schedule clone notifications for a stopped bundle +- Allow option to crm_resource --clear to clear only expired constraints +- Fix result reporting when cleanup is done while an operation is in-flight +- Resolves: rhbz#1419548 +- Resolves: rhbz#1448467 +- Resolves: rhbz#1461964 +- Resolves: rhbz#1486869 +- Resolves: rhbz#1535221 +- Resolves: rhbz#1555938 +- Resolves: rhbz#1595422 +- Resolves: rhbz#1627948 +- Resolves: rhbz#1638593 +- Resolves: rhbz#1644076 +- Resolves: rhbz#1644864 +- Resolves: rhbz#1648507 +- Resolves: rhbz#1648620 +- Resolves: rhbz#1652053 +- Resolves: rhbz#1652752 +- Resolves: rhbz#1658650 +- Resolves: rhbz#1665343 * Mon Sep 24 2018 Ken Gaillot - 1.1.19-8 - Ensure crm_resource --force-* commands get stderr messages