Blame SOURCES/0115-crm_resource-restart-fixes.patch

f3a210
From 020ff3810c12bbc6ef6ec212958871bb36b5859a Mon Sep 17 00:00:00 2001
f3a210
From: Ken Gaillot <kgaillot@redhat.com>
f3a210
Date: Sun, 12 Jun 2016 13:37:59 -0500
f3a210
Subject: [PATCH 1/5] Fix: tools: correctly count starting resources when doing
f3a210
 crm_resource --restart
f3a210
f3a210
---
f3a210
 tools/crm_resource_runtime.c | 5 +++--
f3a210
 1 file changed, 3 insertions(+), 2 deletions(-)
f3a210
f3a210
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
f3a210
index 82856ad..b246e34 100644
f3a210
--- a/tools/crm_resource_runtime.c
f3a210
+++ b/tools/crm_resource_runtime.c
f3a210
@@ -1178,7 +1178,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
             dump_list(list_delta, "Delta");
f3a210
         }
f3a210
 
f3a210
-        crm_trace("%d (was %d) resources remaining", before, g_list_length(list_delta));
f3a210
+        crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before);
f3a210
         if(before == g_list_length(list_delta)) {
f3a210
             /* aborted during stop phase, print the contents of list_delta */
f3a210
             fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta));
f3a210
@@ -1209,6 +1209,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
 
f3a210
     step_timeout_s = timeout / sleep_interval;
f3a210
     while(g_list_length(list_delta) > 0) {
f3a210
+        before = g_list_length(list_delta);
f3a210
         if(timeout_ms == 0) {
f3a210
             step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval;
f3a210
         }
f3a210
@@ -1241,7 +1242,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
             goto failure;
f3a210
         }
f3a210
 
f3a210
-    } while(g_list_length(list_delta) > 0);
f3a210
+    }
f3a210
 
f3a210
     free(rsc_id);
f3a210
     return pcmk_ok;
f3a210
-- 
f3a210
1.8.3.1
f3a210
f3a210
f3a210
From 06cc891dd16b1e1b8a004ed364a9f46c64127ffd Mon Sep 17 00:00:00 2001
f3a210
From: Ken Gaillot <kgaillot@redhat.com>
f3a210
Date: Sun, 12 Jun 2016 15:05:04 -0500
f3a210
Subject: [PATCH 2/5] Fix: tools: remember any existing target-role when doing
f3a210
 crm_resource --restart
f3a210
f3a210
---
f3a210
 tools/crm_resource_runtime.c | 26 +++++++++++++++++++++++++-
f3a210
 1 file changed, 25 insertions(+), 1 deletion(-)
f3a210
f3a210
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
f3a210
index b246e34..1567559 100644
f3a210
--- a/tools/crm_resource_runtime.c
f3a210
+++ b/tools/crm_resource_runtime.c
f3a210
@@ -1070,6 +1070,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
 
f3a210
     bool is_clone = FALSE;
f3a210
     char *rsc_id = NULL;
f3a210
+    char *orig_target_role = NULL;
f3a210
 
f3a210
     GList *list_delta = NULL;
f3a210
     GList *target_active = NULL;
f3a210
@@ -1088,7 +1089,9 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
         return -ENXIO;
f3a210
     }
f3a210
 
f3a210
+    /* We might set the target-role meta-attribute */
f3a210
     attr_set_type = XML_TAG_META_SETS;
f3a210
+
f3a210
     rsc_id = strdup(rsc->id);
f3a210
     if(rsc->variant > pe_group) {
f3a210
         is_clone = TRUE;
f3a210
@@ -1127,10 +1130,20 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
     dump_list(current_active, "Origin");
f3a210
 
f3a210
     if(is_clone && host) {
f3a210
+        /* Stop the clone instance by banning it from the host */
f3a210
         BE_QUIET = TRUE;
f3a210
         rc = cli_resource_ban(rsc_id, host, NULL, cib);
f3a210
 
f3a210
     } else {
f3a210
+        /* Stop the resource by setting target-role to Stopped.
f3a210
+         * Remember any existing target-role so we can restore it later
f3a210
+         * (though it only makes any difference if it's Slave).
f3a210
+         */
f3a210
+        char *lookup_id = clone_strip(rsc->id);
f3a210
+
f3a210
+        find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL,
f3a210
+                           NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role);
f3a210
+        free(lookup_id);
f3a210
         rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, &data_set);
f3a210
     }
f3a210
     if(rc != pcmk_ok) {
f3a210
@@ -1192,6 +1205,13 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
     if(is_clone && host) {
f3a210
         rc = cli_resource_clear(rsc_id, host, NULL, cib);
f3a210
 
f3a210
+    } else if (orig_target_role) {
f3a210
+        rc = cli_resource_update_attribute(rsc_id, NULL, NULL,
f3a210
+                                           XML_RSC_ATTR_TARGET_ROLE,
f3a210
+                                           orig_target_role, FALSE, cib,
f3a210
+                                           &data_set);
f3a210
+        free(orig_target_role);
f3a210
+        orig_target_role = NULL;
f3a210
     } else {
f3a210
         rc = cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set);
f3a210
     }
f3a210
@@ -1250,7 +1270,11 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
   failure:
f3a210
     if(is_clone && host) {
f3a210
         cli_resource_clear(rsc_id, host, NULL, cib);
f3a210
-
f3a210
+    } else if (orig_target_role) {
f3a210
+        cli_resource_update_attribute(rsc_id, NULL, NULL,
f3a210
+                                      XML_RSC_ATTR_TARGET_ROLE,
f3a210
+                                      orig_target_role, FALSE, cib, &data_set);
f3a210
+        free(orig_target_role);
f3a210
     } else {
f3a210
         cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set);
f3a210
     }
f3a210
-- 
f3a210
1.8.3.1
f3a210
f3a210
f3a210
From aaed9569272a5d4704aede32d9d1cf5d76085e6b Mon Sep 17 00:00:00 2001
f3a210
From: Ken Gaillot <kgaillot@redhat.com>
f3a210
Date: Sun, 12 Jun 2016 15:36:56 -0500
f3a210
Subject: [PATCH 3/5] Fix: tools: avoid memory leaks in crm_resource --restart
f3a210
f3a210
---
f3a210
 tools/crm_resource_runtime.c | 39 +++++++++++++++++++++++++++++++++++++--
f3a210
 1 file changed, 37 insertions(+), 2 deletions(-)
f3a210
f3a210
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
f3a210
index 1567559..6126e3c 100644
f3a210
--- a/tools/crm_resource_runtime.c
f3a210
+++ b/tools/crm_resource_runtime.c
f3a210
@@ -1148,6 +1148,12 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
     }
f3a210
     if(rc != pcmk_ok) {
f3a210
         fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc);
f3a210
+        if (current_active) {
f3a210
+            g_list_free_full(current_active, free);
f3a210
+        }
f3a210
+        if (restart_target_active) {
f3a210
+            g_list_free_full(restart_target_active, free);
f3a210
+        }
f3a210
         free(rsc_id);
f3a210
         return crm_exit(rc);
f3a210
     }
f3a210
@@ -1185,7 +1191,11 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
                 goto failure;
f3a210
             }
f3a210
 
f3a210
+            if (current_active) {
f3a210
+                g_list_free_full(current_active, free);
f3a210
+            }
f3a210
             current_active = get_active_resources(host, &data_set);
f3a210
+            g_list_free(list_delta);
f3a210
             list_delta = subtract_lists(current_active, target_active);
f3a210
             dump_list(current_active, "Current");
f3a210
             dump_list(list_delta, "Delta");
f3a210
@@ -1222,7 +1232,13 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
         return crm_exit(rc);
f3a210
     }
f3a210
 
f3a210
+    if (target_active) {
f3a210
+        g_list_free_full(target_active, free);
f3a210
+    }
f3a210
     target_active = restart_target_active;
f3a210
+    if (list_delta) {
f3a210
+        g_list_free(list_delta);
f3a210
+    }
f3a210
     list_delta = subtract_lists(target_active, current_active);
f3a210
     fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta));
f3a210
     display_list(list_delta, " * ");
f3a210
@@ -1248,7 +1264,11 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
                 goto failure;
f3a210
             }
f3a210
 
f3a210
+            if (current_active) {
f3a210
+                g_list_free_full(current_active, free);
f3a210
+            }
f3a210
             current_active = get_active_resources(host, &data_set);
f3a210
+            g_list_free(list_delta);
f3a210
             list_delta = subtract_lists(target_active, current_active);
f3a210
             dump_list(current_active, "Current");
f3a210
             dump_list(list_delta, "Delta");
f3a210
@@ -1264,8 +1284,8 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
 
f3a210
     }
f3a210
 
f3a210
-    free(rsc_id);
f3a210
-    return pcmk_ok;
f3a210
+    rc = pcmk_ok;
f3a210
+    goto done;
f3a210
 
f3a210
   failure:
f3a210
     if(is_clone && host) {
f3a210
@@ -1278,6 +1298,21 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
     } else {
f3a210
         cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set);
f3a210
     }
f3a210
+
f3a210
+done:
f3a210
+    if (list_delta) {
f3a210
+        g_list_free(list_delta);
f3a210
+    }
f3a210
+    if (current_active) {
f3a210
+        g_list_free_full(current_active, free);
f3a210
+    }
f3a210
+    if (target_active && (target_active != restart_target_active)) {
f3a210
+        g_list_free_full(target_active, free);
f3a210
+    }
f3a210
+    if (restart_target_active) {
f3a210
+        g_list_free_full(restart_target_active, free);
f3a210
+    }
f3a210
+    cleanup_alloc_calculations(&data_set);
f3a210
     free(rsc_id);
f3a210
     return rc;
f3a210
 }
f3a210
-- 
f3a210
1.8.3.1
f3a210
f3a210
f3a210
From 847723f7175a0f008eeebe2d3b333fea4570a228 Mon Sep 17 00:00:00 2001
f3a210
From: Ken Gaillot <kgaillot@redhat.com>
f3a210
Date: Sun, 12 Jun 2016 16:10:00 -0500
f3a210
Subject: [PATCH 4/5] Fix: tools: don't assume all resources restart on same
f3a210
 node with crm_resource --restart
f3a210
f3a210
---
f3a210
 tools/crm_resource_runtime.c | 14 +++++++++++---
f3a210
 1 file changed, 11 insertions(+), 3 deletions(-)
f3a210
f3a210
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
f3a210
index 6126e3c..753ba2d 100644
f3a210
--- a/tools/crm_resource_runtime.c
f3a210
+++ b/tools/crm_resource_runtime.c
f3a210
@@ -1044,6 +1044,9 @@ max_delay_in(pe_working_set_t * data_set, GList *resources)
f3a210
     return 5 + (max_delay / 1000);
f3a210
 }
f3a210
 
f3a210
+#define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) || \
f3a210
+                                    (resource_is_running_on((r), (h)) == FALSE))
f3a210
+
f3a210
 /*!
f3a210
  * \internal
f3a210
  * \brief Restart a resource (on a particular host if requested).
f3a210
@@ -1244,14 +1247,15 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
     display_list(list_delta, " * ");
f3a210
 
f3a210
     step_timeout_s = timeout / sleep_interval;
f3a210
-    while(g_list_length(list_delta) > 0) {
f3a210
+    while (waiting_for_starts(list_delta, rsc, host)) {
f3a210
         before = g_list_length(list_delta);
f3a210
         if(timeout_ms == 0) {
f3a210
             step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval;
f3a210
         }
f3a210
 
f3a210
         /* We probably don't need the entire step timeout */
f3a210
-        for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) {
f3a210
+        for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) {
f3a210
+
f3a210
             sleep(sleep_interval);
f3a210
             if(timeout) {
f3a210
                 timeout -= sleep_interval;
f3a210
@@ -1267,7 +1271,11 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
             if (current_active) {
f3a210
                 g_list_free_full(current_active, free);
f3a210
             }
f3a210
-            current_active = get_active_resources(host, &data_set);
f3a210
+
f3a210
+            /* It's OK if dependent resources moved to a different node,
f3a210
+             * so we check active resources on all nodes.
f3a210
+             */
f3a210
+            current_active = get_active_resources(NULL, &data_set);
f3a210
             g_list_free(list_delta);
f3a210
             list_delta = subtract_lists(target_active, current_active);
f3a210
             dump_list(current_active, "Current");
f3a210
-- 
f3a210
1.8.3.1
f3a210
f3a210
f3a210
From f5afdc1badbe38781d049c86e8a2f51b17636072 Mon Sep 17 00:00:00 2001
f3a210
From: Ken Gaillot <kgaillot@redhat.com>
f3a210
Date: Mon, 13 Jun 2016 16:12:28 -0500
f3a210
Subject: [PATCH 5/5] Fix: tools: properly handle crm_resource --restart with a
f3a210
 resource in a group
f3a210
f3a210
---
f3a210
 tools/crm_resource_runtime.c | 34 +++++++++++++++++++++++++---------
f3a210
 1 file changed, 25 insertions(+), 9 deletions(-)
f3a210
f3a210
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
f3a210
index 753ba2d..b714a96 100644
f3a210
--- a/tools/crm_resource_runtime.c
f3a210
+++ b/tools/crm_resource_runtime.c
f3a210
@@ -817,19 +817,35 @@ static bool resource_is_running_on(resource_t *rsc, const char *host)
f3a210
     return found;
f3a210
 }
f3a210
 
f3a210
-static GList *get_active_resources(const char *host, pe_working_set_t *data_set) 
f3a210
+/*!
f3a210
+ * \internal
f3a210
+ * \brief Create a list of all resources active on host from a given list
f3a210
+ *
f3a210
+ * \param[in] host      Name of host to check whether resources are active
f3a210
+ * \param[in] rsc_list  List of resources to check
f3a210
+ *
f3a210
+ * \return New list of resources from list that are active on host
f3a210
+ */
f3a210
+static GList *
f3a210
+get_active_resources(const char *host, GList *rsc_list)
f3a210
 {
f3a210
     GList *rIter = NULL;
f3a210
     GList *active = NULL;
f3a210
 
f3a210
-    for (rIter = data_set->resources; rIter != NULL; rIter = rIter->next) {
f3a210
+    for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) {
f3a210
         resource_t *rsc = (resource_t *) rIter->data;
f3a210
 
f3a210
-        if(resource_is_running_on(rsc, host)) {
f3a210
+        /* Expand groups to their members, because if we're restarting a member
f3a210
+         * other than the first, we can't otherwise tell which resources are
f3a210
+         * stopping and starting.
f3a210
+         */
f3a210
+        if (rsc->variant == pe_group) {
f3a210
+            active = g_list_concat(active,
f3a210
+                                   get_active_resources(host, rsc->children));
f3a210
+        } else if (resource_is_running_on(rsc, host)) {
f3a210
             active = g_list_append(active, strdup(rsc->id));
f3a210
         }
f3a210
     }
f3a210
-
f3a210
     return active;
f3a210
 }
f3a210
 
f3a210
@@ -1127,8 +1143,8 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
         return rc;
f3a210
     }
f3a210
 
f3a210
-    restart_target_active = get_active_resources(host, &data_set);
f3a210
-    current_active = get_active_resources(host, &data_set);
f3a210
+    restart_target_active = get_active_resources(host, data_set.resources);
f3a210
+    current_active = get_active_resources(host, data_set.resources);
f3a210
 
f3a210
     dump_list(current_active, "Origin");
f3a210
 
f3a210
@@ -1167,7 +1183,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
         goto failure;
f3a210
     }
f3a210
 
f3a210
-    target_active = get_active_resources(host, &data_set);
f3a210
+    target_active = get_active_resources(host, data_set.resources);
f3a210
     dump_list(target_active, "Target");
f3a210
 
f3a210
     list_delta = subtract_lists(current_active, target_active);
f3a210
@@ -1197,7 +1213,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
             if (current_active) {
f3a210
                 g_list_free_full(current_active, free);
f3a210
             }
f3a210
-            current_active = get_active_resources(host, &data_set);
f3a210
+            current_active = get_active_resources(host, data_set.resources);
f3a210
             g_list_free(list_delta);
f3a210
             list_delta = subtract_lists(current_active, target_active);
f3a210
             dump_list(current_active, "Current");
f3a210
@@ -1275,7 +1291,7 @@ cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t *
f3a210
             /* It's OK if dependent resources moved to a different node,
f3a210
              * so we check active resources on all nodes.
f3a210
              */
f3a210
-            current_active = get_active_resources(NULL, &data_set);
f3a210
+            current_active = get_active_resources(NULL, data_set.resources);
f3a210
             g_list_free(list_delta);
f3a210
             list_delta = subtract_lists(target_active, current_active);
f3a210
             dump_list(current_active, "Current");
f3a210
-- 
f3a210
1.8.3.1
f3a210