Blob Blame History Raw
From 767b5552ab49850204692c2c990dfb41d37589f3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Mar 2022 18:11:52 -0500
Subject: [PATCH 1/9] Refactor: libpacemaker: drop unnecessary argument from
 "rsc-action" message

9875cab129 moved the setting of the "moving" variable from LogActions() to a
new "rsc-action" message, but continued to pass the variable unnecessarily

Also simplify how it's set
---
 lib/pacemaker/pcmk_output.c         | 10 ++++------
 lib/pacemaker/pcmk_sched_native.c   |  4 +---
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
index d864c8bd2..56963a93f 100644
--- a/lib/pacemaker/pcmk_output.c
+++ b/lib/pacemaker/pcmk_output.c
@@ -873,19 +873,18 @@ digests_xml(pcmk__output_t *out, va_list args)
         }                                                               \
     } while(0)
 
-PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *",
-                  "gboolean")
+PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *")
 static int
 rsc_action_default(pcmk__output_t *out, va_list args)
 {
     pe_resource_t *rsc = va_arg(args, pe_resource_t *);
     pe_node_t *current = va_arg(args, pe_node_t *);
     pe_node_t *next = va_arg(args, pe_node_t *);
-    gboolean moving = va_arg(args, gboolean);
 
     GList *possible_matches = NULL;
     char *key = NULL;
     int rc = pcmk_rc_no_output;
+    bool moving = false;
 
     pe_node_t *start_node = NULL;
     pe_action_t *start = NULL;
@@ -901,9 +900,8 @@ rsc_action_default(pcmk__output_t *out, va_list args)
         return rc;
     }
 
-    if (current != NULL && next != NULL && !pcmk__str_eq(current->details->id, next->details->id, pcmk__str_casei)) {
-        moving = TRUE;
-    }
+    moving = (current != NULL) && (next != NULL)
+             && (current->details != next->details);
 
     possible_matches = pe__resource_actions(rsc, next, RSC_START, FALSE);
     if (possible_matches) {
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index a3d646775..41631da3d 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -2037,8 +2037,6 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set)
     pe_node_t *next = NULL;
     pe_node_t *current = NULL;
 
-    gboolean moving = FALSE;
-
     if(rsc->variant == pe_container) {
         pcmk__bundle_log_actions(rsc, data_set);
         return;
@@ -2066,7 +2064,7 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set)
         return;
     }
 
-    out->message(out, "rsc-action", rsc, current, next, moving);
+    out->message(out, "rsc-action", rsc, current, next);
 }
 
 gboolean
-- 
2.27.0


From 870fb19715618c4ceab9ed4ae13a99658440b662 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 22 Mar 2022 15:22:23 -0500
Subject: [PATCH 2/9] Refactor: scheduler: functionize scheduling restart
 actions

native_create_actions() is already overlarge, and more needs to be added to it
---
 lib/pacemaker/pcmk_sched_native.c | 85 ++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 808e97540..b8a1c1e1a 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1185,6 +1185,58 @@ handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *cho
     }
 }
 
+/*!
+ * \internal
+ * \brief Schedule actions to bring resource down and back to current role
+ *
+ * \param[in] rsc           Resource to restart
+ * \param[in] current       Node that resource should be brought down on
+ * \param[in] chosen        Node that resource should be brought up on
+ * \param[in] need_stop     Whether the resource must be stopped
+ * \param[in] need_promote  Whether the resource must be promoted
+ *
+ * \return Role that resource would have after scheduled actions are taken
+ */
+static void
+schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
+                         pe_node_t *chosen, bool need_stop, bool need_promote)
+{
+    enum rsc_role_e role = rsc->role;
+    enum rsc_role_e next_role;
+
+    // Bring resource down to a stop on its current node
+    while (role != RSC_ROLE_STOPPED) {
+        next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
+        pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
+                     (need_stop? "required" : "optional"), rsc->id,
+                     role2text(role), role2text(next_role));
+        if (!rsc_action_matrix[role][next_role](rsc, current, !need_stop,
+                                                rsc->cluster)) {
+            break;
+        }
+        role = next_role;
+    }
+
+    // Bring resource up to its next role on its next node
+    while ((rsc->role <= rsc->next_role) && (role != rsc->role)
+           && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
+        bool required = need_stop;
+
+        next_role = rsc_state_matrix[role][rsc->role];
+        if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
+            required = true;
+        }
+        pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
+                     (required? "required" : "optional"), rsc->id,
+                     role2text(role), role2text(next_role));
+        if (!rsc_action_matrix[role][next_role](rsc, chosen, !required,
+                                                rsc->cluster)) {
+            break;
+        }
+        role = next_role;
+    }
+}
+
 void
 native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
 {
@@ -1332,39 +1384,10 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
     /* Create any additional actions required when bringing resource down and
      * back up to same level.
      */
-    role = rsc->role;
-    while (role != RSC_ROLE_STOPPED) {
-        next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
-        pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
-                     (need_stop? "required" : "optional"), rsc->id,
-                     role2text(role), role2text(next_role));
-        if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) {
-            break;
-        }
-        role = next_role;
-    }
-
-
-    while ((rsc->role <= rsc->next_role) && (role != rsc->role)
-           && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
-        bool required = need_stop;
-
-        next_role = rsc_state_matrix[role][rsc->role];
-        if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
-            required = true;
-        }
-        pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
-                     (required? "required" : "optional"), rsc->id,
-                     role2text(role), role2text(next_role));
-        if (rsc_action_matrix[role][next_role](rsc, chosen, !required,
-                                               data_set) == FALSE) {
-            break;
-        }
-        role = next_role;
-    }
-    role = rsc->role;
+    schedule_restart_actions(rsc, current, chosen, need_stop, need_promote);
 
     /* Required steps from this role to the next */
+    role = rsc->role;
     while (role != rsc->next_role) {
         next_role = rsc_state_matrix[role][rsc->next_role];
         pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)",
-- 
2.27.0


From 736d4d8f5e432acf12e577d137e9165904c71b3b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Mar 2022 17:42:26 -0500
Subject: [PATCH 3/9] Log: scheduler: improve trace messages when creating
 actions

---
 lib/pacemaker/pcmk_sched_native.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index b8a1c1e1a..8b651ebd2 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1997,7 +1997,6 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
     GList *gIter = NULL;
 
     CRM_ASSERT(rsc);
-    pe_rsc_trace(rsc, "%s", rsc->id);
 
     for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
         pe_node_t *current = (pe_node_t *) gIter->data;
@@ -2005,16 +2004,23 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
 
         if (rsc->partial_migration_target) {
             if (rsc->partial_migration_target->details == current->details) {
-                pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname,
-                             next->details->uname, rsc->id);
+                pe_rsc_trace(rsc,
+                             "Skipping stop of %s on %s "
+                             "because migration to %s in progress",
+                             rsc->id, current->details->uname,
+                             next->details->uname);
                 continue;
             } else {
-                pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id);
+                pe_rsc_trace(rsc,
+                             "Forcing stop of %s on %s "
+                             "because migration target changed",
+                             rsc->id, current->details->uname);
                 optional = FALSE;
             }
         }
 
-        pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname);
+        pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
+                     rsc->id, current->details->uname);
         stop = stop_action(rsc, current, optional);
 
         if(rsc->allocated_to == NULL) {
@@ -2048,7 +2054,11 @@ StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_se
     pe_action_t *start = NULL;
 
     CRM_ASSERT(rsc);
-    pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0);
+
+    pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (weight=%d)",
+                 (optional? "optional" : "required"), rsc->id,
+                 ((next == NULL)? "N/A" : next->details->uname),
+                 ((next == NULL)? 0 : next->weight));
     start = start_action(rsc, next, TRUE);
 
     pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then, data_set);
-- 
2.27.0


From 6f987234d5246ed50f4fe2db90e5edb6a23e877d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 16:42:06 -0600
Subject: [PATCH 4/9] Log: scheduler: log a warning if invalid value is given
 for multiple-active

---
 lib/pengine/complex.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index e82af2aae..f2caef831 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -694,7 +694,12 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc,
         (*rsc)->recovery_type = recovery_block;
         pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
 
-    } else {
+    } else { // "stop_start"
+        if (!pcmk__str_eq(value, "stop_start",
+                          pcmk__str_casei|pcmk__str_null_matches)) {
+            pe_warn("%s is not a valid value for " XML_RSC_ATTR_MULTIPLE
+                    ", using default of \"stop_start\"", value);
+        }
         (*rsc)->recovery_type = recovery_stop_start;
         pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop/start");
     }
-- 
2.27.0


From 50456c3e229a6021ca0ba7346af41cd234abcc16 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 16:49:31 -0600
Subject: [PATCH 5/9] API: libpe_status: add recovery_stop_unexpected to enum
 rsc_recovery_type

The behavior is not implemented as of this commit
---
 include/crm/pengine/common.h | 14 ++++++++++++--
 lib/pengine/complex.c        |  5 +++++
 lib/pengine/native.c         |  7 +++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h
index efe89a171..9b9f38f3b 100644
--- a/include/crm/pengine/common.h
+++ b/include/crm/pengine/common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -78,7 +78,8 @@ enum action_tasks {
 enum rsc_recovery_type {
     recovery_stop_start,
     recovery_stop_only,
-    recovery_block
+    recovery_block,
+    recovery_stop_unexpected,
 };
 
 enum rsc_start_requirement {
@@ -143,6 +144,13 @@ const char *fail2text(enum action_fail_response fail);
 const char *pe_pref(GHashTable * options, const char *name);
 void calculate_active_ops(GList * sorted_op_list, int *start_index, int *stop_index);
 
+/*!
+ * \brief Get readable description of a recovery type
+ *
+ * \param[in] type  Recovery type
+ *
+ * \return Static string describing \p type
+ */
 static inline const char *
 recovery2text(enum rsc_recovery_type type)
 {
@@ -153,6 +161,8 @@ recovery2text(enum rsc_recovery_type type)
             return "attempting recovery";
         case recovery_block:
             return "waiting for an administrator";
+        case recovery_stop_unexpected:
+            return "stopping unexpected instances";
     }
     return "Unknown";
 }
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index f2caef831..fc9028e81 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -694,6 +694,11 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc,
         (*rsc)->recovery_type = recovery_block;
         pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
 
+    } else if (pcmk__str_eq(value, "stop_unexpected", pcmk__str_casei)) {
+        (*rsc)->recovery_type = recovery_stop_unexpected;
+        pe_rsc_trace((*rsc), "\tMultiple running resource recovery: "
+                             "stop unexpected instances");
+
     } else { // "stop_start"
         if (!pcmk__str_eq(value, "stop_start",
                           pcmk__str_casei|pcmk__str_null_matches)) {
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index e16e54bae..fa7dc8960 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -149,8 +149,6 @@ native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * dat
                     }
                 }
                 break;
-            case recovery_stop_start:
-                break;
             case recovery_block:
                 pe__clear_resource_flags(rsc, pe_rsc_managed);
                 pe__set_resource_flags(rsc, pe_rsc_block);
@@ -171,6 +169,11 @@ native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * dat
                     }
                 }
                 break;
+            default: // recovery_stop_start, recovery_stop_unexpected
+                /* The scheduler will do the right thing because the relevant
+                 * variables and flags are set when unpacking the history.
+                 */
+                break;
         }
         crm_debug("%s is active on multiple nodes including %s: %s",
                   rsc->id, node->details->uname,
-- 
2.27.0


From 5e994f0633b27e7a53701e0954466739c8f1acf7 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 30 Mar 2022 16:26:19 -0500
Subject: [PATCH 6/9] API: libpe_status: add pe_rsc_stop_unexpected flag

---
 include/crm/pengine/pe_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index e3ecaa823..7d5394bff 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -277,6 +277,7 @@ struct pe_node_s {
 
 #  define pe_rsc_starting                   0x00100000ULL
 #  define pe_rsc_stopping                   0x00200000ULL
+#  define pe_rsc_stop_unexpected            0x00400000ULL
 #  define pe_rsc_allow_migrate              0x00800000ULL
 
 #  define pe_rsc_failure_ignored            0x01000000ULL
-- 
2.27.0


From c1acf05be853d99c17761759b8c961f2ec4a55c2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 31 Mar 2022 09:56:34 -0500
Subject: [PATCH 7/9] API: libpe_status: add pe_rsc_restarting flag

This is used to indicate that any actions currently being scheduled are part of
the resource's restart actions (i.e. we are in schedule_restart_actions()).
---
 include/crm/pengine/pe_types.h    | 1 +
 lib/pacemaker/pcmk_sched_native.c | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index 7d5394bff..77d28e900 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -265,6 +265,7 @@ struct pe_node_s {
 #  define pe_rsc_provisional                0x00000100ULL
 #  define pe_rsc_allocating                 0x00000200ULL
 #  define pe_rsc_merging                    0x00000400ULL
+#  define pe_rsc_restarting                 0x00000800ULL
 
 #  define pe_rsc_stop                       0x00001000ULL
 #  define pe_rsc_reload                     0x00002000ULL
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 8b651ebd2..8002938b5 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1204,6 +1204,8 @@ schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
     enum rsc_role_e role = rsc->role;
     enum rsc_role_e next_role;
 
+    pe__set_resource_flags(rsc, pe_rsc_restarting);
+
     // Bring resource down to a stop on its current node
     while (role != RSC_ROLE_STOPPED) {
         next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
@@ -1235,6 +1237,8 @@ schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
         }
         role = next_role;
     }
+
+    pe__clear_resource_flags(rsc, pe_rsc_restarting);
 }
 
 void
-- 
2.27.0


From 871e2201d92520039df45062afc9120fd1fb0f30 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 17:46:39 -0600
Subject: [PATCH 8/9] Refactor: scheduler: add expected node to primitive
 variant data

Nothing uses it yet
---
 include/crm/pengine/internal.h |  4 ++++
 lib/pengine/native.c           | 38 ++++++++++++++++++++++++++++++++++
 lib/pengine/variant.h          |  8 +++++--
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
index f949684b4..f69e6bcce 100644
--- a/include/crm/pengine/internal.h
+++ b/include/crm/pengine/internal.h
@@ -579,4 +579,8 @@ xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
 
 const char *pe__clone_child_id(pe_resource_t *rsc);
 
+void pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
+                              int execution_status, int exit_status,
+                              int expected_exit_status);
+
 #endif
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index fa7dc8960..591d1c6f5 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1376,3 +1376,41 @@ pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_paren
 
     return TRUE;
 }
+
+/*!
+ * \internal
+ * \brief Set a resource's expected node if appropriate for a history result
+ *
+ * \param[in] rsc               Resource to set expected node for
+ * \param[in] node              Node to set as expected node
+ * \param[in] execution_status  History entry's execution status
+ * \param[in] exit_status       History entry's actual exit status
+ * \param[in] expected_status   History entry's expected exit status
+ */
+void
+pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
+                         int execution_status, int exit_status,
+                         int expected_exit_status)
+{
+    native_variant_data_t *native_data = NULL;
+
+    get_native_variant_data(native_data, rsc);
+
+    if ((rsc->recovery_type == recovery_stop_unexpected)
+        && (rsc->role > RSC_ROLE_STOPPED)
+        && (execution_status == PCMK_EXEC_DONE)
+        && (exit_status == expected_exit_status)) {
+        // Resource is active and was expected on this node
+        pe_rsc_trace(rsc, "Found expected node %s for %s",
+                     node->details->uname, rsc->id);
+        native_data->expected_node = node;
+        pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
+
+    } else if ((native_data->expected_node != NULL)
+               && (native_data->expected_node->details == node->details)) {
+        // Resource is not cleanly active here
+        pe_rsc_trace(rsc, "Clearing expected node for %s", rsc->id);
+        native_data->expected_node = NULL;
+        pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
+    }
+}
diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h
index cabfbe81f..d8fefa9d6 100644
--- a/lib/pengine/variant.h
+++ b/lib/pengine/variant.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
  *
  * The version control history for this file may have further details.
  *
@@ -139,7 +139,11 @@ typedef struct group_variant_data_s {
 #  elif VARIANT_NATIVE
 
 typedef struct native_variant_data_s {
-    int dummy;
+    /* If the resource is multiply active, and has multiple-active set to
+     * stop_unexpected, this will be set to the node where the resource was
+     * found active by an operation with a expected result.
+     */
+    pe_node_t *expected_node;
 } native_variant_data_t;
 
 #    define get_native_variant_data(data, rsc)				\
-- 
2.27.0


From 0e4e17e972f1c3663389f18d8f8c527bd819b3c5 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 7 Apr 2022 10:20:00 -0500
Subject: [PATCH 9/9] Feature: scheduler: implement
 multiple-active=stop_unexpected

The default multiple-active policy of restarting the resource on all nodes
requires no special handling, because at least one of the locations will have
an unexpected rc, causing the resource to be marked as failed and restarted,
and StopRsc() creates stops on all nodes running the resource.

The new stop_unexpected behavior relies on most of the same handling, but
the action creation functions need to skip the node where the resource had the
expected result. For that, we set the new rsc->expected_node when unpacking a
successful result, to be checked by those functions.

Note that this still schedules a start for the resource, which is a pseudo-op
for the resource itself, but (properly) causes any dependent resources to be
restarted.

Fixes T23
---
 lib/pacemaker/pcmk_output.c       | 10 ++++
 lib/pacemaker/pcmk_sched_native.c | 94 ++++++++++++++++++++++++++++++-
 lib/pengine/unpack.c              |  1 +
 3 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
index 56963a93f..9a522a3e5 100644
--- a/lib/pacemaker/pcmk_output.c
+++ b/lib/pacemaker/pcmk_output.c
@@ -918,6 +918,16 @@ rsc_action_default(pcmk__output_t *out, va_list args)
     if (possible_matches) {
         stop = possible_matches->data;
         g_list_free(possible_matches);
+    } else if (pcmk_is_set(rsc->flags, pe_rsc_stop_unexpected)) {
+        /* The resource is multiply active with multiple-active set to
+         * stop_unexpected, and not stopping on its current node, but it should
+         * be stopping elsewhere.
+         */
+        possible_matches = pe__resource_actions(rsc, NULL, RSC_STOP, FALSE);
+        if (possible_matches != NULL) {
+            stop = possible_matches->data;
+            g_list_free(possible_matches);
+        }
     }
 
     possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, FALSE);
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 8002938b5..c0224849f 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1259,7 +1259,10 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
     enum rsc_role_e role = RSC_ROLE_UNKNOWN;
     enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
 
-    CRM_ASSERT(rsc);
+    native_variant_data_t *native_data = NULL;
+
+    get_native_variant_data(native_data, rsc);
+
     chosen = rsc->allocated_to;
     next_role = rsc->next_role;
     if (next_role == RSC_ROLE_UNKNOWN) {
@@ -1323,6 +1326,7 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
                        "(will stop on both nodes)",
                        rsc->id, rsc->partial_migration_source->details->uname,
                        rsc->partial_migration_target->details->uname);
+            multiply_active = false;
 
         } else {
             const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
@@ -1345,6 +1349,11 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
         allow_migrate = FALSE;
     }
 
+    if (!multiply_active) {
+        native_data->expected_node = NULL;
+        pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
+    }
+
     if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
         pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start",
                      rsc->id);
@@ -1995,6 +2004,32 @@ native_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
     out->message(out, "rsc-action", rsc, current, next);
 }
 
+/*!
+ * \internal
+ * \brief Check whether a node is a multiply active resource's expected node
+ *
+ * \param[in] rsc  Resource to check
+ * \param[in] node  Node to check
+ *
+ * \return true if \p rsc is multiply active with multiple-active set to
+ *         stop_unexpected, and \p node is the node where it will remain active
+ * \note This assumes that the resource's next role cannot be changed to stopped
+ *       after this is called, which should be reasonable if status has already
+ *       been unpacked and resources have been assigned to nodes.
+ */
+static bool
+is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
+{
+    native_variant_data_t *native_data = NULL;
+
+    get_native_variant_data(native_data, rsc);
+    return pcmk_all_flags_set(rsc->flags,
+                              pe_rsc_stop_unexpected|pe_rsc_restarting)
+           && (rsc->next_role > RSC_ROLE_STOPPED)
+           && (native_data->expected_node != NULL) && (node != NULL)
+           && (native_data->expected_node->details == node->details);
+}
+
 gboolean
 StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
 {
@@ -2006,6 +2041,18 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
         pe_node_t *current = (pe_node_t *) gIter->data;
         pe_action_t *stop;
 
+        if (is_expected_node(rsc, current)) {
+            /* We are scheduling restart actions for a multiply active resource
+             * with multiple-active=stop_unexpected, and this is where it should
+             * not be stopped.
+             */
+            pe_rsc_trace(rsc,
+                         "Skipping stop of multiply active resource %s "
+                         "on expected node %s",
+                         rsc->id, current->details->uname);
+            continue;
+        }
+
         if (rsc->partial_migration_target) {
             if (rsc->partial_migration_target->details == current->details) {
                 pe_rsc_trace(rsc,
@@ -2029,6 +2076,17 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
 
         if(rsc->allocated_to == NULL) {
             pe_action_set_reason(stop, "node availability", TRUE);
+        } else if (pcmk_is_set(rsc->flags, pe_rsc_restarting)) {
+            native_variant_data_t *native_data = NULL;
+
+            get_native_variant_data(native_data, rsc);
+            if (native_data->expected_node != NULL) {
+                /* We are stopping a multiply active resource on a node that is
+                 * not its expected node, and we are still scheduling restart
+                 * actions, so the stop is for being multiply active.
+                 */
+                pe_action_set_reason(stop, "being multiply active", TRUE);
+            }
         }
 
         if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
@@ -2071,6 +2129,16 @@ StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_se
         pe__clear_action_flags(start, pe_action_optional);
     }
 
+    if (is_expected_node(rsc, next)) {
+        /* This could be a problem if the start becomes necessary for other
+         * reasons later.
+         */
+        pe_rsc_trace(rsc,
+                     "Start of multiply active resouce %s "
+                     "on expected node %s will be a pseudo-action",
+                     rsc->id, next->details->uname);
+        pe__set_action_flags(start, pe_action_pseudo);
+    }
 
     return TRUE;
 }
@@ -2084,6 +2152,7 @@ PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_
 
     CRM_ASSERT(rsc);
     CRM_CHECK(next != NULL, return FALSE);
+
     pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname);
 
     action_list = pe__resource_actions(rsc, next, RSC_START, TRUE);
@@ -2098,7 +2167,19 @@ PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_
     g_list_free(action_list);
 
     if (runnable) {
-        promote_action(rsc, next, optional);
+        pe_action_t *promote = promote_action(rsc, next, optional);
+
+        if (is_expected_node(rsc, next)) {
+            /* This could be a problem if the promote becomes necessary for
+             * other reasons later.
+             */
+            pe_rsc_trace(rsc,
+                         "Promotion of multiply active resouce %s "
+                         "on expected node %s will be a pseudo-action",
+                         rsc->id, next->details->uname);
+            pe__set_action_flags(promote, pe_action_pseudo);
+        }
+
         return TRUE;
     }
 
@@ -2122,6 +2203,15 @@ DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_s
     GList *gIter = NULL;
 
     CRM_ASSERT(rsc);
+
+    if (is_expected_node(rsc, next)) {
+        pe_rsc_trace(rsc,
+                     "Skipping demote of multiply active resource %s "
+                     "on expected node %s",
+                     rsc->id, next->details->uname);
+        return TRUE;
+    }
+
     pe_rsc_trace(rsc, "%s", rsc->id);
 
     /* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index d218f523f..edaa9de48 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -3974,6 +3974,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
     }
 
 done:
+    pe__update_expected_node(rsc, node, status, rc, target_rc);
     pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
                  rsc->id, task, role2text(rsc->role),
                  role2text(rsc->next_role));
-- 
2.27.0