From 141de63f1ddf48dc3c27f8397e0058e21bdecf46 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 26 Nov 2018 15:45:56 -0600 Subject: [PATCH 1/5] Fix: scheduler: don't send clone notifications to a stopped remote node Since b3f9a5bbb, we discard faked executor results when resource information is unavailable. This has exposed pre-existing issues where clone notifications were mistakenly scheduled for Pacemaker Remote nodes. Previously, the cluster node that had hosted the Pacemaker Remote connection would fake the result, and the transition would proceed. Now, if the cluster node doesn't happen to have the resource information cached, the result will not be sent, and thus the transition will get an action timeout. This permanently blocks later actions in the transition. This commit avoids such a situation where start and promote clone notifications were scheduled for a clone instance on a Pacemaker Remote node whose remote connection is stopping, and thus would be stopped by the time the notification would be needed. This is slightly modified from a patch provided by Andrew Beekhof . RHBZ#1652752 --- pengine/notif.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pengine/notif.c b/pengine/notif.c index 4913249..cdc382d 100644 --- a/pengine/notif.c +++ b/pengine/notif.c @@ -631,6 +631,28 @@ expand_notification_data(resource_t *rsc, notify_data_t * n_data, pe_working_set return required; } +/* + * \internal + * \brief Find any remote connection start relevant to an action + * + * \param[in] action Action to chek + * + * \return If action is behind a remote connection, connection's start + */ +static pe_action_t * +find_remote_start(pe_action_t *action) +{ + if (action && action->node) { + pe_resource_t *remote_rsc = action->node->details->remote_rsc; + + if (remote_rsc) { + return find_first_action(remote_rsc->actions, NULL, RSC_START, + NULL); + } + } + return NULL; +} + void create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t * data_set) { @@ -738,6 +760,20 @@ create_notifications(resource_t * rsc, notify_data_t * n_data, pe_working_set_t rsc->id); } else if (task == start_rsc || task == action_promote) { + + if (start) { + pe_action_t *remote_start = find_remote_start(start); + + if (remote_start + && is_not_set(remote_start->flags, pe_action_runnable)) { + /* Start and promote actions for a clone instance behind + * a Pacemaker Remote connection happen after the + * connection starts. If the connection start is blocked, do + * not schedule notifications for these actions. + */ + return; + } + } if (task != start_rsc || start == NULL || is_set(start->flags, pe_action_optional)) { pe_notify(rsc, rsc->allocated_to, n_data->pre, n_data->pre_done, n_data, data_set); } -- 1.8.3.1 From 2793dcd9cfd20989d64ce1c553d63c28d9c7cb59 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 28 Nov 2018 18:12:35 -0600 Subject: [PATCH 2/5] Test: scheduler: new test for clone notifications behind a stopping remote Don't send notifications to a remote node whose connection has been stopped. --- pengine/regression.sh | 1 + pengine/test10/notify-behind-stopping-remote.dot | 84 +++++ pengine/test10/notify-behind-stopping-remote.exp | 388 +++++++++++++++++++++ .../test10/notify-behind-stopping-remote.scores | 65 ++++ .../test10/notify-behind-stopping-remote.summary | 58 +++ pengine/test10/notify-behind-stopping-remote.xml | 187 ++++++++++ 6 files changed, 783 insertions(+) create mode 100644 pengine/test10/notify-behind-stopping-remote.dot create mode 100644 pengine/test10/notify-behind-stopping-remote.exp create mode 100644 pengine/test10/notify-behind-stopping-remote.scores create mode 100644 pengine/test10/notify-behind-stopping-remote.summary create mode 100644 pengine/test10/notify-behind-stopping-remote.xml diff --git a/pengine/regression.sh b/pengine/regression.sh index fedd1b7..f719df6 100755 --- a/pengine/regression.sh +++ b/pengine/regression.sh @@ -484,6 +484,7 @@ do_test notify-3 "Notify move, confirm" do_test novell-239079 "Notification priority" #do_test notify-2 "Notify - 764" do_test route-remote-notify "Route remote notify actions through correct cluster node" +do_test notify-behind-stopping-remote "Don't schedule notifications behind stopped remote" echo "" do_test 594 "OSDL #594 - Unrunnable actions scheduled in transition" diff --git a/pengine/test10/notify-behind-stopping-remote.dot b/pengine/test10/notify-behind-stopping-remote.dot new file mode 100644 index 0000000..cac4d5a --- /dev/null +++ b/pengine/test10/notify-behind-stopping-remote.dot @@ -0,0 +1,84 @@ +digraph "g" { +"Cancel redis_monitor_45000 redis-bundle-0" -> "redis_promote_0 redis-bundle-0" [ style = bold] +"Cancel redis_monitor_45000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"Cancel redis_monitor_60000 redis-bundle-0" -> "redis_promote_0 redis-bundle-0" [ style = bold] +"Cancel redis_monitor_60000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"all_stopped" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-1_monitor_60000 ra2" [ style=dashed color="red" fontcolor="black"] +"redis-bundle-1_start_0 ra2" -> "redis-bundle-1_monitor_60000 ra2" [ style = dashed] +"redis-bundle-1_start_0 ra2" -> "redis_monitor_45000 redis-bundle-1" [ style = dashed] +"redis-bundle-1_start_0 ra2" -> "redis_monitor_60000 redis-bundle-1" [ style = dashed] +"redis-bundle-1_start_0 ra2" -> "redis_start_0 redis-bundle-1" [ style = dashed] +"redis-bundle-1_start_0 ra2" [ style=dashed color="red" fontcolor="black"] +"redis-bundle-1_stop_0 ra2" -> "all_stopped" [ style = bold] +"redis-bundle-1_stop_0 ra2" -> "redis-bundle-1_start_0 ra2" [ style = dashed] +"redis-bundle-1_stop_0 ra2" -> "redis-bundle-docker-1_stop_0 ra2" [ style = bold] +"redis-bundle-1_stop_0 ra2" [ style=bold color="green" fontcolor="black"] +"redis-bundle-docker-1_stop_0 ra2" -> "all_stopped" [ style = bold] +"redis-bundle-docker-1_stop_0 ra2" -> "redis-bundle_stopped_0" [ style = bold] +"redis-bundle-docker-1_stop_0 ra2" [ style=bold color="green" fontcolor="black"] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis-bundle_promoted_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] +"redis-bundle-master_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_start_0" -> "redis-bundle-master_start_0" [ style = bold] +"redis-bundle-master_confirmed-pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_post_notify_promoted_0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_post_notify_running_0" -> "redis-bundle-master_confirmed-post_notify_running_0" [ style = bold] +"redis-bundle-master_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_pre_notify_promote_0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] +"redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_pre_notify_start_0" -> "redis-bundle-master_confirmed-pre_notify_start_0" [ style = bold] +"redis-bundle-master_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_promote_0" -> "redis_promote_0 redis-bundle-0" [ style = bold] +"redis-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_promoted_0" -> "redis-bundle-master_post_notify_promoted_0" [ style = bold] +"redis-bundle-master_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_running_0" -> "redis-bundle-master_post_notify_running_0" [ style = bold] +"redis-bundle-master_running_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle-master_start_0" -> "redis-bundle-master_running_0" [ style = bold] +"redis-bundle-master_start_0" -> "redis_start_0 redis-bundle-1" [ style = dashed] +"redis-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_promote_0" -> "redis-bundle-master_promote_0" [ style = bold] +"redis-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_running_0" -> "redis-bundle_promote_0" [ style = bold] +"redis-bundle_running_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_start_0" -> "redis-bundle-master_start_0" [ style = bold] +"redis-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_stop_0" -> "redis-bundle-docker-1_stop_0 ra2" [ style = bold] +"redis-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +"redis-bundle_stopped_0" -> "redis-bundle_promote_0" [ style = bold] +"redis-bundle_stopped_0" -> "redis-bundle_start_0" [ style = bold] +"redis-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] +"redis_monitor_20000 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_monitor_45000 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] +"redis_monitor_60000 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] +"redis_post_notify_promoted_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis_post_notify_promoted_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +"redis_post_notify_promoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_promote_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis_pre_notify_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] +"redis_pre_notify_promote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] +"redis_promote_0 redis-bundle-0" -> "redis-bundle-master_promoted_0" [ style = bold] +"redis_promote_0 redis-bundle-0" -> "redis_monitor_20000 redis-bundle-0" [ style = bold] +"redis_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] +"redis_start_0 redis-bundle-1" -> "redis-bundle-master_running_0" [ style = dashed] +"redis_start_0 redis-bundle-1" -> "redis_monitor_45000 redis-bundle-1" [ style = dashed] +"redis_start_0 redis-bundle-1" -> "redis_monitor_60000 redis-bundle-1" [ style = dashed] +"redis_start_0 redis-bundle-1" [ style=dashed color="red" fontcolor="black"] +} diff --git a/pengine/test10/notify-behind-stopping-remote.exp b/pengine/test10/notify-behind-stopping-remote.exp new file mode 100644 index 0000000..44e6356 --- /dev/null +++ b/pengine/test10/notify-behind-stopping-remote.exp @@ -0,0 +1,388 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pengine/test10/notify-behind-stopping-remote.scores b/pengine/test10/notify-behind-stopping-remote.scores new file mode 100644 index 0000000..e58b614 --- /dev/null +++ b/pengine/test10/notify-behind-stopping-remote.scores @@ -0,0 +1,65 @@ +Allocation scores: +Using the original execution date of: 2018-11-22 20:36:07Z +clone_color: redis-bundle-master allocation score on ra1: -INFINITY +clone_color: redis-bundle-master allocation score on ra2: -INFINITY +clone_color: redis-bundle-master allocation score on ra3: -INFINITY +clone_color: redis-bundle-master allocation score on redis-bundle-0: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-1: 0 +clone_color: redis-bundle-master allocation score on redis-bundle-2: 0 +clone_color: redis:0 allocation score on redis-bundle-0: INFINITY +clone_color: redis:1 allocation score on redis-bundle-1: INFINITY +clone_color: redis:2 allocation score on redis-bundle-2: INFINITY +container_color: redis-bundle allocation score on ra1: 0 +container_color: redis-bundle allocation score on ra2: -INFINITY +container_color: redis-bundle allocation score on ra3: 0 +container_color: redis-bundle-0 allocation score on ra1: 0 +container_color: redis-bundle-0 allocation score on ra2: 0 +container_color: redis-bundle-0 allocation score on ra3: 0 +container_color: redis-bundle-1 allocation score on ra1: 0 +container_color: redis-bundle-1 allocation score on ra2: 0 +container_color: redis-bundle-1 allocation score on ra3: 0 +container_color: redis-bundle-2 allocation score on ra1: 0 +container_color: redis-bundle-2 allocation score on ra2: 0 +container_color: redis-bundle-2 allocation score on ra3: 0 +container_color: redis-bundle-docker-0 allocation score on ra1: 0 +container_color: redis-bundle-docker-0 allocation score on ra2: -INFINITY +container_color: redis-bundle-docker-0 allocation score on ra3: 0 +container_color: redis-bundle-docker-1 allocation score on ra1: 0 +container_color: redis-bundle-docker-1 allocation score on ra2: -INFINITY +container_color: redis-bundle-docker-1 allocation score on ra3: 0 +container_color: redis-bundle-docker-2 allocation score on ra1: 0 +container_color: redis-bundle-docker-2 allocation score on ra2: -INFINITY +container_color: redis-bundle-docker-2 allocation score on ra3: 0 +container_color: redis-bundle-master allocation score on ra1: 0 +container_color: redis-bundle-master allocation score on ra2: 0 +container_color: redis-bundle-master allocation score on ra3: 0 +container_color: redis-bundle-master allocation score on redis-bundle-0: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-1: -INFINITY +container_color: redis-bundle-master allocation score on redis-bundle-2: -INFINITY +container_color: redis:0 allocation score on redis-bundle-0: 501 +container_color: redis:1 allocation score on redis-bundle-1: 500 +container_color: redis:2 allocation score on redis-bundle-2: 501 +native_color: redis-bundle-0 allocation score on ra1: 10000 +native_color: redis-bundle-0 allocation score on ra2: 0 +native_color: redis-bundle-0 allocation score on ra3: 0 +native_color: redis-bundle-1 allocation score on ra1: 0 +native_color: redis-bundle-1 allocation score on ra2: 0 +native_color: redis-bundle-1 allocation score on ra3: 0 +native_color: redis-bundle-2 allocation score on ra1: 0 +native_color: redis-bundle-2 allocation score on ra2: 0 +native_color: redis-bundle-2 allocation score on ra3: 10000 +native_color: redis-bundle-docker-0 allocation score on ra1: 0 +native_color: redis-bundle-docker-0 allocation score on ra2: -INFINITY +native_color: redis-bundle-docker-0 allocation score on ra3: 0 +native_color: redis-bundle-docker-1 allocation score on ra1: -INFINITY +native_color: redis-bundle-docker-1 allocation score on ra2: -INFINITY +native_color: redis-bundle-docker-1 allocation score on ra3: -INFINITY +native_color: redis-bundle-docker-2 allocation score on ra1: -INFINITY +native_color: redis-bundle-docker-2 allocation score on ra2: -INFINITY +native_color: redis-bundle-docker-2 allocation score on ra3: 0 +native_color: redis:0 allocation score on redis-bundle-0: INFINITY +native_color: redis:1 allocation score on redis-bundle-1: INFINITY +native_color: redis:2 allocation score on redis-bundle-2: INFINITY +redis:0 promotion score on redis-bundle-0: 1 +redis:1 promotion score on redis-bundle-1: -1 +redis:2 promotion score on redis-bundle-2: 1 diff --git a/pengine/test10/notify-behind-stopping-remote.summary b/pengine/test10/notify-behind-stopping-remote.summary new file mode 100644 index 0000000..b9342b9 --- /dev/null +++ b/pengine/test10/notify-behind-stopping-remote.summary @@ -0,0 +1,58 @@ +Using the original execution date of: 2018-11-22 20:36:07Z + +Current cluster status: +Online: [ ra1 ra2 ra3 ] +Containers: [ redis-bundle-0:redis-bundle-docker-0 redis-bundle-1:redis-bundle-docker-1 redis-bundle-2:redis-bundle-docker-2 ] + + Docker container set: redis-bundle [docker.io/tripleoqueens/centos-binary-redis:current-tripleo-rdo] + redis-bundle-0 (ocf::heartbeat:redis): Slave ra1 + redis-bundle-1 (ocf::heartbeat:redis): Stopped ra2 + redis-bundle-2 (ocf::heartbeat:redis): Slave ra3 + +Transition Summary: + * Promote redis:0 ( Slave -> Master redis-bundle-0 ) + * Stop redis-bundle-docker-1 ( ra2 ) due to node availability + * Stop redis-bundle-1 ( ra2 ) due to unrunnable redis-bundle-docker-1 start + * Start redis:1 ( redis-bundle-1 ) due to unrunnable redis-bundle-docker-1 start (blocked) + +Executing cluster transition: + * Resource action: redis cancel=45000 on redis-bundle-0 + * Resource action: redis cancel=60000 on redis-bundle-0 + * Pseudo action: redis-bundle-master_pre_notify_start_0 + * Resource action: redis-bundle-1 stop on ra2 + * Pseudo action: redis-bundle_stop_0 + * Pseudo action: redis-bundle-master_confirmed-pre_notify_start_0 + * Resource action: redis-bundle-docker-1 stop on ra2 + * Pseudo action: redis-bundle_stopped_0 + * Pseudo action: redis-bundle_start_0 + * Pseudo action: all_stopped + * Pseudo action: redis-bundle-master_start_0 + * Pseudo action: redis-bundle-master_running_0 + * Pseudo action: redis-bundle-master_post_notify_running_0 + * Pseudo action: redis-bundle-master_confirmed-post_notify_running_0 + * Pseudo action: redis-bundle_running_0 + * Pseudo action: redis-bundle-master_pre_notify_promote_0 + * Pseudo action: redis-bundle_promote_0 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 + * Pseudo action: redis-bundle-master_promote_0 + * Resource action: redis promote on redis-bundle-0 + * Pseudo action: redis-bundle-master_promoted_0 + * Pseudo action: redis-bundle-master_post_notify_promoted_0 + * Resource action: redis notify on redis-bundle-0 + * Resource action: redis notify on redis-bundle-2 + * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 + * Pseudo action: redis-bundle_promoted_0 + * Resource action: redis monitor=20000 on redis-bundle-0 +Using the original execution date of: 2018-11-22 20:36:07Z + +Revised cluster status: +Online: [ ra1 ra2 ra3 ] +Containers: [ redis-bundle-0:redis-bundle-docker-0 redis-bundle-2:redis-bundle-docker-2 ] + + Docker container set: redis-bundle [docker.io/tripleoqueens/centos-binary-redis:current-tripleo-rdo] + redis-bundle-0 (ocf::heartbeat:redis): Master ra1 + redis-bundle-1 (ocf::heartbeat:redis): Stopped + redis-bundle-2 (ocf::heartbeat:redis): Slave ra3 + diff --git a/pengine/test10/notify-behind-stopping-remote.xml b/pengine/test10/notify-behind-stopping-remote.xml new file mode 100644 index 0000000..66351b8 --- /dev/null +++ b/pengine/test10/notify-behind-stopping-remote.xml @@ -0,0 +1,187 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 1.8.3.1 From 90bed507285d23218617f0cd520d788ba246761e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 28 Nov 2018 18:30:50 -0600 Subject: [PATCH 3/5] Test: scheduler: update bundle-order-fencing test for notification change Don't schedule clone notifications on a Pacemaker Remote node that has just been fenced. --- pengine/test10/bundle-order-fencing.dot | 8 - pengine/test10/bundle-order-fencing.exp | 246 ++++++++++++---------------- pengine/test10/bundle-order-fencing.summary | 2 - 3 files changed, 107 insertions(+), 149 deletions(-) diff --git a/pengine/test10/bundle-order-fencing.dot b/pengine/test10/bundle-order-fencing.dot index e53a062..1e2721b 100644 --- a/pengine/test10/bundle-order-fencing.dot +++ b/pengine/test10/bundle-order-fencing.dot @@ -210,8 +210,6 @@ digraph "g" { "redis-bundle-master_confirmed-post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis-bundle_promoted_0" [ style = bold] "redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_20000 redis-bundle-1" [ style = bold] -"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_45000 redis-bundle-0" [ style = dashed] -"redis-bundle-master_confirmed-post_notify_promoted_0" -> "redis_monitor_60000 redis-bundle-0" [ style = dashed] "redis-bundle-master_confirmed-post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle-master_pre_notify_promote_0" [ style = bold] "redis-bundle-master_confirmed-post_notify_running_0" -> "redis-bundle_running_0" [ style = bold] @@ -247,7 +245,6 @@ digraph "g" { "redis-bundle-master_post_notify_demoted_0" -> "redis_post_notify_demoted_0 redis-bundle-2" [ style = bold] "redis-bundle-master_post_notify_demoted_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_post_notify_promoted_0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -"redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-0" [ style = bold] "redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-1" [ style = bold] "redis-bundle-master_post_notify_promoted_0" -> "redis_post_notify_promoted_0 redis-bundle-2" [ style = bold] "redis-bundle-master_post_notify_promoted_0" [ style=bold color="green" fontcolor="orange"] @@ -262,7 +259,6 @@ digraph "g" { "redis-bundle-master_pre_notify_demote_0" -> "redis_pre_notify_demote_0 redis-bundle-2" [ style = bold] "redis-bundle-master_pre_notify_demote_0" [ style=bold color="green" fontcolor="orange"] "redis-bundle-master_pre_notify_promote_0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -"redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-0" [ style = bold] "redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-1" [ style = bold] "redis-bundle-master_pre_notify_promote_0" -> "redis_pre_notify_promote_0 redis-bundle-2" [ style = bold] "redis-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] @@ -325,8 +321,6 @@ digraph "g" { "redis_post_notify_demoted_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_post_notify_demoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_demoted_0" [ style = bold] "redis_post_notify_demoted_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -"redis_post_notify_promoted_0 redis-bundle-0" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] -"redis_post_notify_promoted_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] "redis_post_notify_promoted_0 redis-bundle-1" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] "redis_post_notify_promoted_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_post_notify_promoted_0 redis-bundle-2" -> "redis-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] @@ -345,8 +339,6 @@ digraph "g" { "redis_pre_notify_demote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_demote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_demote_0" [ style = bold] "redis_pre_notify_demote_0 redis-bundle-2" [ style=bold color="green" fontcolor="black"] -"redis_pre_notify_promote_0 redis-bundle-0" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] -"redis_pre_notify_promote_0 redis-bundle-0" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_promote_0 redis-bundle-1" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] "redis_pre_notify_promote_0 redis-bundle-1" [ style=bold color="green" fontcolor="black"] "redis_pre_notify_promote_0 redis-bundle-2" -> "redis-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] diff --git a/pengine/test10/bundle-order-fencing.exp b/pengine/test10/bundle-order-fencing.exp index 2b8f5cf..84bffaa 100644 --- a/pengine/test10/bundle-order-fencing.exp +++ b/pengine/test10/bundle-order-fencing.exp @@ -521,32 +521,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -563,7 +537,7 @@ - + @@ -575,7 +549,7 @@ - + @@ -593,7 +567,7 @@ - + @@ -605,9 +579,9 @@ - + - + @@ -618,9 +592,9 @@ - + - + @@ -631,9 +605,9 @@ - + - + @@ -644,9 +618,9 @@ - + - + @@ -657,7 +631,7 @@ - + @@ -670,7 +644,7 @@ - + @@ -686,7 +660,7 @@ - + @@ -711,7 +685,7 @@ - + @@ -733,7 +707,7 @@ - + @@ -742,7 +716,7 @@ - + @@ -751,9 +725,9 @@ - + - + @@ -764,9 +738,9 @@ - + - + @@ -777,9 +751,9 @@ - + - + @@ -790,9 +764,9 @@ - + - + @@ -803,7 +777,7 @@ - + @@ -816,7 +790,7 @@ - + @@ -832,7 +806,7 @@ - + @@ -843,14 +817,14 @@ - + - + - + @@ -865,7 +839,7 @@ - + @@ -876,14 +850,14 @@ - + - + - + @@ -891,7 +865,7 @@ - + @@ -906,7 +880,7 @@ - + @@ -921,7 +895,7 @@ - + @@ -932,17 +906,14 @@ - + - - - - + - + @@ -957,7 +928,7 @@ - + @@ -968,17 +939,14 @@ - - - - + - + - + @@ -996,7 +964,7 @@ - + @@ -1008,7 +976,7 @@ - + @@ -1032,7 +1000,7 @@ - + @@ -1050,7 +1018,7 @@ - + @@ -1065,7 +1033,7 @@ - + @@ -1083,7 +1051,7 @@ - + @@ -1095,7 +1063,7 @@ - + @@ -1110,7 +1078,7 @@ - + @@ -1131,7 +1099,7 @@ - + @@ -1143,7 +1111,7 @@ - + @@ -1158,7 +1126,7 @@ - + @@ -1170,7 +1138,7 @@ - + @@ -1185,7 +1153,7 @@ - + @@ -1197,7 +1165,7 @@ - + @@ -1218,7 +1186,7 @@ - + @@ -1236,7 +1204,7 @@ - + @@ -1244,7 +1212,7 @@ - + @@ -1253,7 +1221,7 @@ - + @@ -1262,7 +1230,7 @@ - + @@ -1271,7 +1239,7 @@ - + @@ -1280,7 +1248,7 @@ - + @@ -1293,7 +1261,7 @@ - + @@ -1309,7 +1277,7 @@ - + @@ -1324,7 +1292,7 @@ - + @@ -1337,7 +1305,7 @@ - + @@ -1353,7 +1321,7 @@ - + @@ -1368,7 +1336,7 @@ - + @@ -1381,7 +1349,7 @@ - + @@ -1397,7 +1365,7 @@ - + @@ -1412,7 +1380,7 @@ - + @@ -1427,7 +1395,7 @@ - + @@ -1440,7 +1408,7 @@ - + @@ -1453,7 +1421,7 @@ - + @@ -1461,7 +1429,7 @@ - + @@ -1474,7 +1442,7 @@ - + @@ -1487,7 +1455,7 @@ - + @@ -1495,7 +1463,7 @@ - + @@ -1510,7 +1478,7 @@ - + @@ -1525,7 +1493,7 @@ - + @@ -1540,7 +1508,7 @@ - + @@ -1561,7 +1529,7 @@ - + @@ -1573,7 +1541,7 @@ - + @@ -1581,7 +1549,7 @@ - + @@ -1596,7 +1564,7 @@ - + @@ -1604,7 +1572,7 @@ - + @@ -1616,7 +1584,7 @@ - + @@ -1634,7 +1602,7 @@ - + @@ -1649,7 +1617,7 @@ - + @@ -1661,7 +1629,7 @@ - + @@ -1673,7 +1641,7 @@ - + @@ -1688,7 +1656,7 @@ - + @@ -1703,7 +1671,7 @@ - + @@ -1711,7 +1679,7 @@ - + @@ -1726,7 +1694,7 @@ - + @@ -1738,7 +1706,7 @@ - + @@ -1750,7 +1718,7 @@ - + @@ -1765,7 +1733,7 @@ - + @@ -1780,7 +1748,7 @@ - + @@ -1788,7 +1756,7 @@ - + @@ -1800,7 +1768,7 @@ - + @@ -1808,7 +1776,7 @@ - + @@ -1877,7 +1845,7 @@ - + diff --git a/pengine/test10/bundle-order-fencing.summary b/pengine/test10/bundle-order-fencing.summary index d398a12..a3dc3d4 100644 --- a/pengine/test10/bundle-order-fencing.summary +++ b/pengine/test10/bundle-order-fencing.summary @@ -174,7 +174,6 @@ Executing cluster transition: * Pseudo action: redis-bundle_running_0 * Pseudo action: redis-bundle-master_pre_notify_promote_0 * Pseudo action: redis-bundle_promote_0 - * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-pre_notify_promote_0 @@ -182,7 +181,6 @@ Executing cluster transition: * Resource action: redis promote on redis-bundle-1 * Pseudo action: redis-bundle-master_promoted_0 * Pseudo action: redis-bundle-master_post_notify_promoted_0 - * Resource action: redis notify on redis-bundle-0 * Resource action: redis notify on redis-bundle-1 * Resource action: redis notify on redis-bundle-2 * Pseudo action: redis-bundle-master_confirmed-post_notify_promoted_0 -- 1.8.3.1 From 48198ca839b62de1316d7ae6ab0994dedb37b523 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 27 Nov 2018 17:00:25 -0600 Subject: [PATCH 4/5] Refactor: controller: make process_lrm_event() void All callers ignored the return value --- crmd/crmd_lrm.h | 3 ++- crmd/lrm.c | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h index d115056..3e1596d 100644 --- a/crmd/crmd_lrm.h +++ b/crmd/crmd_lrm.h @@ -170,4 +170,5 @@ gboolean remote_ra_is_in_maintenance(lrm_state_t * lrm_state); void remote_ra_process_maintenance_nodes(xmlNode *xml); gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); -gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending); +void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + struct recurring_op_s *pending); diff --git a/crmd/lrm.c b/crmd/lrm.c index d18665c..5e5af9f 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -2515,7 +2515,7 @@ unescape_newlines(const char *string) return ret; } -gboolean +void process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; @@ -2526,8 +2526,8 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; - CRM_CHECK(op != NULL, return FALSE); - CRM_CHECK(op->rsc_id != NULL, return FALSE); + CRM_CHECK(op != NULL, return); + CRM_CHECK(op->rsc_id != NULL, return); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); @@ -2679,6 +2679,4 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr lrmd_free_rsc_info(rsc); free(op_key); free(op_id); - - return TRUE; } -- 1.8.3.1 From 77dd44e214401d4dd953a8bafa2469b36d70948e Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 27 Nov 2018 17:02:36 -0600 Subject: [PATCH 5/5] Low: controller: be more tolerant of malformed executor events b3f9a5bb was overzealous in discarding faked executor results without any resource information. Since that commit, synthesize_lrmd_failure() would check for resource information, and send a CIB update if the synthesized operation were recordable, but would otherwise (such as for notifications) discard the result. This means the fix was complete, because non-recordable actions for a resource behind a just-died remote connection would get lost. It also exposed two pre-existing bugs regarding notifications mis-scheduled on the wrong node. Any of these would block the transition from completing. Now, process_lrm_event() can handle missing lrm_state or resource information, so it can be called by synthesize_lrmd_failure() without any checking. This leads to all the normal handling for non-recordable operations, which doesn't require resource information. We log an assertion if the resource information is not found, so that we can still get some visibility into bugs. This won't be of use in the case of mis-scheduled notifications, but it could help in other situations. --- crmd/crmd_lrm.h | 2 +- crmd/lrm.c | 148 ++++++++++++++++++++++++++++++-------------------- crmd/lrm_state.c | 2 +- crmd/remote_lrmd_ra.c | 2 +- 4 files changed, 93 insertions(+), 61 deletions(-) diff --git a/crmd/crmd_lrm.h b/crmd/crmd_lrm.h index 3e1596d..0870817 100644 --- a/crmd/crmd_lrm.h +++ b/crmd/crmd_lrm.h @@ -171,4 +171,4 @@ void remote_ra_process_maintenance_nodes(xmlNode *xml); gboolean remote_ra_controlling_guest(lrm_state_t * lrm_state); void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - struct recurring_op_s *pending); + struct recurring_op_s *pending, xmlNode *action_xml); diff --git a/crmd/lrm.c b/crmd/lrm.c index 5e5af9f..0d64f59 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -314,7 +314,7 @@ lrm_op_callback(lrmd_event_data_t * op) lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); - process_lrm_event(lrm_state, op, NULL); + process_lrm_event(lrm_state, op, NULL, NULL); } /* A_LRM_CONNECT */ @@ -1434,7 +1434,6 @@ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; - lrmd_rsc_info_t *rsc_info = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); @@ -1464,35 +1463,8 @@ synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) crm_info("Faking %s_%s_%d result (%d) on %s", op->rsc_id, op->op_type, op->interval, op->rc, target_node); - /* Process the result as if it came from the LRM, if possible - * (i.e. resource info can be obtained from the lrm_state). - */ - if (lrm_state) { - rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); - } - if (rsc_info) { - lrmd_free_rsc_info(rsc_info); - process_lrm_event(lrm_state, op, NULL); - - } else if (controld_action_is_recordable(op->op_type)) { - /* If we can't process the result normally, at least write it to the CIB - * if possible, so the PE can act on it. - */ - const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); - const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); - const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE); - - if (standard && type) { - rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); - do_update_resource(target_node, rsc_info, op); - lrmd_free_rsc_info(rsc_info); - } else { - // @TODO Should we direct ack? - crm_info("Can't fake %s failure (%d) on %s without resource standard and type", - crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, - target_node); - } - } + // Process the result as if it came from the LRM + process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } @@ -1555,7 +1527,7 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); - process_lrm_event(lrm_state, op, NULL); + process_lrm_event(lrm_state, op, NULL, xml); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); @@ -2315,7 +2287,7 @@ do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operat crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); - process_lrm_event(lrm_state, op, NULL); + process_lrm_event(lrm_state, op, NULL, NULL); } else { /* record all operations so we can wait @@ -2516,7 +2488,8 @@ unescape_newlines(const char *string) } void -process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) +process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + struct recurring_op_s *pending, xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; @@ -2525,16 +2498,49 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; + const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); - rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); + + // Get resource info if available (from executor state or action XML) + if (lrm_state) { + rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); + } + if ((rsc == NULL) && action_xml) { + xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); + + const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); + const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); + const char *type = crm_element_value(xml, XML_ATTR_TYPE); + + if (standard && type) { + crm_info("%s agent information not cached, using %s%s%s:%s from action XML", + op->rsc_id, standard, + (provider? ":" : ""), (provider? provider : ""), type); + rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); + } else { + crm_err("Can't process %s result because %s agent information not cached or in XML", + op_key, op->rsc_id); + } + } + CRM_LOG_ASSERT(rsc != NULL); // If it's still NULL, there's a bug somewhere + + // Get node name if available (from executor state or action XML) + if (lrm_state) { + node_name = lrm_state->node_name; + } else if (action_xml) { + node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); + } + if(pending == NULL) { remove = TRUE; - pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); + if (lrm_state) { + pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); + } } if (op->op_status == PCMK_LRM_OP_ERROR) { @@ -2554,7 +2560,14 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (controld_action_is_recordable(op->op_type)) { - update_id = do_update_resource(lrm_state->node_name, rsc, op); + if (node_name && rsc) { + update_id = do_update_resource(node_name, rsc, op); + } else { + // @TODO Should we direct ack? + crm_err("Unable to record %s result in CIB: %s", + op_key, + (node_name? "No resource information" : "No node name")); + } } else { send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } @@ -2575,7 +2588,9 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ - erase_lrm_history_by_op(lrm_state, op); + if (lrm_state) { + erase_lrm_history_by_op(lrm_state, op); + } } else if (op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the @@ -2595,14 +2610,23 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; - } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { - removed = TRUE; - crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", - op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); + } else if (lrm_state && ((op->interval == 0) + || (op->op_status == PCMK_LRM_OP_CANCELLED))) { - } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { - removed = TRUE; - g_hash_table_remove(lrm_state->pending_ops, op_id); + gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id); + + if (op->interval != 0) { + removed = TRUE; + } else if (found) { + removed = TRUE; + crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", + op_key, op->call_id, op_id, + g_hash_table_size(lrm_state->pending_ops)); + } + } + + if (node_name == NULL) { + node_name = "unknown node"; // for logging } switch (op->op_status) { @@ -2610,7 +2634,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval), - op->rsc_id, lrm_state->node_name, + op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; @@ -2620,7 +2644,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval), - op->rsc_id, lrm_state->node_name, + op->rsc_id, node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); @@ -2630,7 +2654,7 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval), - op->rsc_id, lrm_state->node_name, + op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; @@ -2639,14 +2663,16 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval), - op->rsc_id, lrm_state->node_name, + op->rsc_id, node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = - crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); + crm_strdup_printf("%s-%s_%s_%d:%d", node_name, + op->rsc_id, op->op_type, op->interval, + op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); @@ -2656,25 +2682,31 @@ process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurr free(prefix); } - if (safe_str_neq(op->op_type, RSC_METADATA)) { - crmd_alert_resource_op(lrm_state->node_name, op); - } else if (op->rc == PCMK_OCF_OK) { - char *metadata = unescape_newlines(op->output); + if (lrm_state) { + if (safe_str_neq(op->op_type, RSC_METADATA)) { + crmd_alert_resource_op(lrm_state->node_name, op); + } else if (rsc && (op->rc == PCMK_OCF_OK)) { + char *metadata = unescape_newlines(op->output); - metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); - free(metadata); + metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); + free(metadata); + } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); - delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); + if (lrm_state) { + delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); + } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); - update_history_cache(lrm_state, rsc, op); + if (lrm_state && rsc) { + update_history_cache(lrm_state, rsc, op); + } lrmd_free_rsc_info(rsc); free(op_key); diff --git a/crmd/lrm_state.c b/crmd/lrm_state.c index 40da762..d8a0039 100644 --- a/crmd/lrm_state.c +++ b/crmd/lrm_state.c @@ -96,7 +96,7 @@ fail_pending_op(gpointer key, gpointer value, gpointer user_data) event.remote_nodename = lrm_state->node_name; event.params = op->params; - process_lrm_event(lrm_state, &event, op); + process_lrm_event(lrm_state, &event, op, NULL); return TRUE; } diff --git a/crmd/remote_lrmd_ra.c b/crmd/remote_lrmd_ra.c index 6fa05f6..2d04588 100644 --- a/crmd/remote_lrmd_ra.c +++ b/crmd/remote_lrmd_ra.c @@ -519,7 +519,7 @@ synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char * op.t_run = time(NULL); op.t_rcchange = op.t_run; op.call_id = generate_callid(); - process_lrm_event(lrm_state, &op, NULL); + process_lrm_event(lrm_state, &op, NULL, NULL); } void -- 1.8.3.1