Blob Blame History Raw
From 039b778b07f256dd564171430c5427dfb9489a58 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Fri, 8 Dec 2017 14:47:40 +0100
Subject: [PATCH 1/8] Refactor: tools: crm_resource - Functionize cleaning up
 resource failures

---
 tools/crm_resource.c         | 26 ++------------------------
 tools/crm_resource.h         |  3 +++
 tools/crm_resource_runtime.c | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index 0557892..331adf6 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -1103,31 +1103,9 @@ main(int argc, char **argv)
 
     } else if (rsc_cmd == 'C' && just_errors) {
         crmd_replies_needed = 0;
-        for (xmlNode *xml_op = __xml_first_child(data_set.failed); xml_op != NULL;
-             xml_op = __xml_next(xml_op)) {
-
-            const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
-            const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
-            const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
-            const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
-
-            if(resource_name == NULL) {
-                continue;
-            } else if(host_uname && safe_str_neq(host_uname, node)) {
-                continue;
-            } else if(rsc_id && safe_str_neq(rsc_id, resource_name)) {
-                continue;
-            } else if(operation && safe_str_neq(operation, task)) {
-                continue;
-            } else if(interval && safe_str_neq(interval, task_interval)) {
-                continue;
-            }
 
-            crm_debug("Erasing %s failure for %s (%s detected) on %s",
-                      task, rsc->id, resource_name, node);
-            rc = cli_resource_delete(crmd_channel, node, rsc, task,
-                                     task_interval, &data_set);
-        }
+        rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation,
+                                          interval, &data_set);
 
         if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) {
             /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */
diff --git a/tools/crm_resource.h b/tools/crm_resource.h
index 0b8dd2a..e28c9ef 100644
--- a/tools/crm_resource.h
+++ b/tools/crm_resource.h
@@ -76,6 +76,9 @@ int cli_resource_search(resource_t *rsc, const char *requested_name,
 int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
                         resource_t *rsc, const char *operation,
                         const char *interval, pe_working_set_t *data_set);
+int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname,
+                                 resource_t *rsc, const char *operation,
+                                 const char *interval, pe_working_set_t *data_set);
 int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib);
 int cli_resource_move(resource_t *rsc, const char *rsc_id,
                       const char *host_name, cib_t *cib,
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index 5004935..9aa7b7e 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -681,6 +681,42 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
     return rc;
 }
 
+int
+cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname,
+                    resource_t *rsc, const char *operation,
+                    const char *interval, pe_working_set_t *data_set)
+{
+    int rc = pcmk_ok;
+
+    for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
+         xml_op = __xml_next(xml_op)) {
+
+        const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
+        const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+        const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
+        const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
+
+        if(resource_name == NULL) {
+            continue;
+        } else if(host_uname && safe_str_neq(host_uname, node)) {
+            continue;
+        } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) {
+            continue;
+        } else if(operation && safe_str_neq(operation, task)) {
+            continue;
+        } else if(interval && safe_str_neq(interval, task_interval)) {
+            continue;
+        }
+
+        crm_debug("Erasing %s failure for %s (%s detected) on %s",
+                  task, rsc->id, resource_name, node);
+        rc = cli_resource_delete(crmd_channel, node, rsc, task,
+                                 task_interval, data_set);
+    }
+
+    return rc;
+}
+
 void
 cli_resource_check(cib_t * cib_conn, resource_t *rsc)
 {
-- 
1.8.3.1


From 4ae40b495305b87f59e439de3298910c243c171d Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Fri, 8 Dec 2017 16:22:54 +0100
Subject: [PATCH 2/8] Fix: tools: crm_resource --cleanup for non-primitive
 resources

---
 tools/crm_resource_runtime.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index 9aa7b7e..98cd27f 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -688,6 +688,24 @@ cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname,
 {
     int rc = pcmk_ok;
 
+    if (rsc == NULL) {
+        return -ENXIO;
+
+    } else if (rsc->children) {
+        GListPtr lpc = NULL;
+
+        for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
+            resource_t *child = (resource_t *) lpc->data;
+
+            rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation,
+                                              interval, data_set);
+            if(rc != pcmk_ok) {
+                return rc;
+            }
+        }
+        return pcmk_ok;
+    }
+
     for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
          xml_op = __xml_next(xml_op)) {
 
-- 
1.8.3.1


From 6ce88cdbcbe15b7e81a4234eb92a93663243a7ff Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 11 Dec 2017 12:23:06 -0600
Subject: [PATCH 3/8] Fix: tools: crm_resource --cleanup

The new "failures only" mode of crm_resource --cleanup had multiple issues,
including not working without --resource specified, comparing a
user-provided interval string against a milliseconds interval, and
considering no interval specified as all intervals rather than 0
but only when clearing LRM history entries.
---
 tools/crm_resource.c         |  35 +++---
 tools/crm_resource.h         |   9 +-
 tools/crm_resource_runtime.c | 258 ++++++++++++++++++++++++++++++-------------
 3 files changed, 202 insertions(+), 100 deletions(-)

diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index 331adf6..e3f8f86 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -1101,14 +1101,20 @@ main(int argc, char **argv)
         rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id,
                                            prop_name, cib_conn, &data_set);
 
-    } else if (rsc_cmd == 'C' && just_errors) {
+    } else if ((rsc_cmd == 'C') && rsc) {
+        if (do_force == FALSE) {
+            rsc = uber_parent(rsc);
+        }
         crmd_replies_needed = 0;
 
-        rc = cli_resource_delete_failures(crmd_channel, host_uname, rsc, operation,
-                                          interval, &data_set);
+        crm_debug("%s of %s (%s requested) on %s",
+                  (just_errors? "Clearing failures" : "Re-checking the state"),
+                  rsc->id, rsc_id, (host_uname? host_uname : "all hosts"));
+        rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation,
+                                 interval, just_errors, &data_set);
 
-        if(rsc && (rc == pcmk_ok) && (BE_QUIET == FALSE)) {
-            /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */
+        if ((rc == pcmk_ok) && !BE_QUIET) {
+            // Show any reasons why resource might stay stopped
             cli_resource_check(cib_conn, rsc);
         }
 
@@ -1116,22 +1122,9 @@ main(int argc, char **argv)
             start_mainloop();
         }
 
-    } else if ((rsc_cmd == 'C') && rsc) {
-        if(do_force == FALSE) {
-            rsc = uber_parent(rsc);
-        }
-
-        crm_debug("Re-checking the state of %s (%s requested) on %s",
-                  rsc->id, rsc_id, host_uname);
-        crmd_replies_needed = 0;
-        rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation,
-                                 interval, &data_set);
-
-        if(rc == pcmk_ok && BE_QUIET == FALSE) {
-            /* Now check XML_RSC_ATTR_TARGET_ROLE and XML_RSC_ATTR_MANAGED */
-            cli_resource_check(cib_conn, rsc);
-        }
-
+    } else if (rsc_cmd == 'C' && just_errors) {
+        rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval,
+                             &data_set);
         if (rc == pcmk_ok) {
             start_mainloop();
         }
diff --git a/tools/crm_resource.h b/tools/crm_resource.h
index e28c9ef..0ac51f2 100644
--- a/tools/crm_resource.h
+++ b/tools/crm_resource.h
@@ -75,10 +75,11 @@ int cli_resource_search(resource_t *rsc, const char *requested_name,
                         pe_working_set_t *data_set);
 int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
                         resource_t *rsc, const char *operation,
-                        const char *interval, pe_working_set_t *data_set);
-int cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname,
-                                 resource_t *rsc, const char *operation,
-                                 const char *interval, pe_working_set_t *data_set);
+                        const char *interval, bool just_failures,
+                        pe_working_set_t *data_set);
+int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name,
+                    const char *operation, const char *interval,
+                    pe_working_set_t *data_set);
 int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib);
 int cli_resource_move(resource_t *rsc, const char *rsc_id,
                       const char *host_name, cib_t *cib,
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index 98cd27f..2cc2bec 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -558,15 +558,129 @@ rsc_fail_name(resource_t *rsc)
     return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name);
 }
 
+static int
+clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname,
+                  const char *rsc_id, pe_working_set_t *data_set)
+{
+    int rc = pcmk_ok;
+
+    /* Erase the resource's entire LRM history in the CIB, even if we're only
+     * clearing a single operation's fail count. If we erased only entries for a
+     * single operation, we might wind up with a wrong idea of the current
+     * resource state, and we might not re-probe the resource.
+     */
+    rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id,
+                         TRUE, data_set);
+    if (rc != pcmk_ok) {
+        return rc;
+    }
+    crmd_replies_needed++;
+
+    crm_trace("Processing %d mainloop inputs", crmd_replies_needed);
+    while (g_main_context_iteration(NULL, FALSE)) {
+        crm_trace("Processed mainloop input, %d still remaining",
+                  crmd_replies_needed);
+    }
+
+    if (crmd_replies_needed < 0) {
+        crmd_replies_needed = 0;
+    }
+    return rc;
+}
+
+static int
+clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
+                   const char *rsc_id, const char *operation,
+                   const char *interval, pe_working_set_t *data_set)
+{
+    int rc = pcmk_ok;
+    const char *failed_value = NULL;
+    const char *interval_ms_str = NULL;
+    GHashTable *rscs = NULL;
+    GHashTableIter iter;
+
+    /* Create a hash table to use as a set of resources to clean. This lets us
+     * clean each resource only once (per node) regardless of how many failed
+     * operations it has.
+     */
+    rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL);
+
+    // Normalize interval to milliseconds for comparison to history entry
+    if (operation) {
+        interval_ms_str = crm_strdup_printf("%llu", crm_get_interval(interval));
+    }
+
+    for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
+         xml_op = __xml_next(xml_op)) {
+
+        // No resource specified means all resources match
+        failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
+        if (rsc_id == NULL) {
+            rsc_id = failed_value;
+        } else if (safe_str_neq(rsc_id, failed_value)) {
+            continue;
+        }
+
+        // Host name should always have been provided by this point
+        failed_value = crm_element_value(xml_op, XML_ATTR_UNAME);
+        if (safe_str_neq(node_name, failed_value)) {
+            continue;
+        }
+
+        // No operation specified means all operations match
+        if (operation) {
+            failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
+            if (safe_str_neq(operation, failed_value)) {
+                continue;
+            }
+
+            // Interval (if operation was specified) defaults to 0 (not all)
+            failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
+            if (safe_str_neq(interval_ms_str, failed_value)) {
+                continue;
+            }
+        }
+
+        g_hash_table_add(rscs, (gpointer) rsc_id);
+    }
+
+    g_hash_table_iter_init(&iter, rscs);
+    while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) {
+        crm_debug("Erasing failures of %s on %s", rsc_id, node_name);
+        rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set);
+        if (rc != pcmk_ok) {
+            return rc;
+        }
+    }
+    g_hash_table_destroy(rscs);
+    return rc;
+}
+
+static int
+clear_rsc_fail_attrs(resource_t *rsc, const char *operation,
+                     const char *interval, node_t *node)
+{
+    int rc = pcmk_ok;
+    int attr_options = attrd_opt_none;
+    char *rsc_name = rsc_fail_name(rsc);
+
+    if (is_remote_node(node)) {
+        attr_options |= attrd_opt_remote;
+    }
+    rc = attrd_clear_delegate(NULL, node->details->uname, rsc_name, operation,
+                              interval, NULL, attr_options);
+    free(rsc_name);
+    return rc;
+}
+
 int
 cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
                     resource_t *rsc, const char *operation,
-                    const char *interval, pe_working_set_t *data_set)
+                    const char *interval, bool just_failures,
+                    pe_working_set_t *data_set)
 {
     int rc = pcmk_ok;
     node_t *node = NULL;
-    char *rsc_name = NULL;
-    int attr_options = attrd_opt_none;
 
     if (rsc == NULL) {
         return -ENXIO;
@@ -578,8 +692,8 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
             resource_t *child = (resource_t *) lpc->data;
 
             rc = cli_resource_delete(crmd_channel, host_uname, child, operation,
-                                     interval, data_set);
-            if(rc != pcmk_ok) {
+                                     interval, just_failures, data_set);
+            if (rc != pcmk_ok) {
                 return rc;
             }
         }
@@ -611,8 +725,13 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
             node = (node_t *) lpc->data;
 
             if (node->details->online) {
-                cli_resource_delete(crmd_channel, node->details->uname, rsc,
-                                    operation, interval, data_set);
+                rc = cli_resource_delete(crmd_channel, node->details->uname,
+                                         rsc, operation, interval,
+                                         just_failures, data_set);
+            }
+            if (rc != pcmk_ok) {
+                g_list_free(nodes);
+                return rc;
             }
         }
 
@@ -637,102 +756,91 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname,
     if (crmd_channel == NULL) {
         printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n",
                rsc->id, host_uname);
-        return rc;
-     }
+        return pcmk_ok;
+    }
 
-    /* Erase the resource's entire LRM history in the CIB, even if we're only
-     * clearing a single operation's fail count. If we erased only entries for a
-     * single operation, we might wind up with a wrong idea of the current
-     * resource state, and we might not re-probe the resource.
-     */
-    rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id,
-                         TRUE, data_set);
+    rc = clear_rsc_fail_attrs(rsc, operation, interval, node);
     if (rc != pcmk_ok) {
-        printf("Unable to clean up %s history on %s: %s\n",
-               rsc->id, host_uname, pcmk_strerror(rc));
+        printf("Unable to clean up %s failures on %s: %s\n",
+                rsc->id, host_uname, pcmk_strerror(rc));
         return rc;
     }
-    crmd_replies_needed++;
 
-    crm_trace("Processing %d mainloop inputs", crmd_replies_needed);
-    while(g_main_context_iteration(NULL, FALSE)) {
-        crm_trace("Processed mainloop input, %d still remaining",
-                  crmd_replies_needed);
-    }
-
-    if(crmd_replies_needed < 0) {
-        crmd_replies_needed = 0;
-    }
-
-    rsc_name = rsc_fail_name(rsc);
-    if (is_remote_node(node)) {
-        attr_options |= attrd_opt_remote;
+    if (just_failures) {
+        rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation,
+                                interval, data_set);
+    } else {
+        rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set);
     }
-    rc = attrd_clear_delegate(NULL, host_uname, rsc_name, operation, interval,
-                              NULL, attr_options);
     if (rc != pcmk_ok) {
-        printf("Cleaned %s history on %s, but unable to clear failures: %s\n",
+        printf("Cleaned %s failures on %s, but unable to clean history: %s\n",
                rsc->id, host_uname, pcmk_strerror(rc));
     } else {
         printf("Cleaned up %s on %s\n", rsc->id, host_uname);
     }
-    free(rsc_name);
-
     return rc;
 }
 
 int
-cli_resource_delete_failures(crm_ipc_t *crmd_channel, const char *host_uname,
-                    resource_t *rsc, const char *operation,
-                    const char *interval, pe_working_set_t *data_set)
+cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name,
+                const char *operation, const char *interval,
+                pe_working_set_t *data_set)
 {
+    int attr_options = attrd_opt_none;
     int rc = pcmk_ok;
+    const char *display_name = node_name? node_name : "all nodes";
 
-    if (rsc == NULL) {
-        return -ENXIO;
-
-    } else if (rsc->children) {
-        GListPtr lpc = NULL;
+    if (crmd_channel == NULL) {
+        printf("Dry run: skipping clean-up of %s due to CIB_file\n",
+               display_name);
+        return pcmk_ok;
+    }
+    crmd_replies_needed = 0;
 
-        for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) {
-            resource_t *child = (resource_t *) lpc->data;
+    if (node_name) {
+        node_t *node = pe_find_node(data_set->nodes, node_name);
 
-            rc = cli_resource_delete_failures(crmd_channel, host_uname, child, operation,
-                                              interval, data_set);
-            if(rc != pcmk_ok) {
-                return rc;
-            }
+        if (node == NULL) {
+            CMD_ERR("Unknown node: %s", node_name);
+            return -ENXIO;
+        }
+        if (is_remote_node(node)) {
+            attr_options |= attrd_opt_remote;
         }
-        return pcmk_ok;
     }
 
-    for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
-         xml_op = __xml_next(xml_op)) {
-
-        const char *node = crm_element_value(xml_op, XML_ATTR_UNAME);
-        const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
-        const char *task_interval = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL);
-        const char *resource_name = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
+    rc = attrd_clear_delegate(NULL, node_name, NULL, operation, interval,
+                              NULL, attr_options);
+    if (rc != pcmk_ok) {
+        printf("Unable to clean up all failures on %s: %s\n",
+                display_name, pcmk_strerror(rc));
+        return rc;
+    }
 
-        if(resource_name == NULL) {
-            continue;
-        } else if(host_uname && safe_str_neq(host_uname, node)) {
-            continue;
-        } else if(rsc->id && safe_str_neq(rsc->id, resource_name)) {
-            continue;
-        } else if(operation && safe_str_neq(operation, task)) {
-            continue;
-        } else if(interval && safe_str_neq(interval, task_interval)) {
-            continue;
+    if (node_name) {
+        rc = clear_rsc_failures(crmd_channel, node_name, NULL,
+                                operation, interval, data_set);
+        if (rc != pcmk_ok) {
+            printf("Cleaned all resource failures on %s, but unable to clean history: %s\n",
+                   node_name, pcmk_strerror(rc));
+            return rc;
         }
+    } else {
+        for (GList *iter = data_set->nodes; iter; iter = iter->next) {
+            pe_node_t *node = (pe_node_t *) iter->data;
 
-        crm_debug("Erasing %s failure for %s (%s detected) on %s",
-                  task, rsc->id, resource_name, node);
-        rc = cli_resource_delete(crmd_channel, node, rsc, task,
-                                 task_interval, data_set);
+            rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL,
+                                    operation, interval, data_set);
+            if (rc != pcmk_ok) {
+                printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n",
+                       node->details->uname, pcmk_strerror(rc));
+                return rc;
+            }
+        }
     }
 
-    return rc;
+    printf("Cleaned up all resources on %s\n", display_name);
+    return pcmk_ok;
 }
 
 void
-- 
1.8.3.1


From 0b6c3b3064401c8f0ebb48ccfd11f43dc2dc2b1b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 12 Dec 2017 10:02:22 -0600
Subject: [PATCH 4/8] Fix: tools: crm_resource --cleanup with no resource
 specified

7a813755 failed to completely fix --cleanup without --resource
---
 tools/crm_resource_runtime.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index 2cc2bec..ce86a49 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -595,6 +595,7 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
 {
     int rc = pcmk_ok;
     const char *failed_value = NULL;
+    const char *failed_id = NULL;
     const char *interval_ms_str = NULL;
     GHashTable *rscs = NULL;
     GHashTableIter iter;
@@ -613,11 +614,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
     for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL;
          xml_op = __xml_next(xml_op)) {
 
+        failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
+        if (failed_id == NULL) {
+            // Malformed history entry, should never happen
+            continue;
+        }
+
         // No resource specified means all resources match
-        failed_value = crm_element_value(xml_op, XML_LRM_ATTR_RSCID);
-        if (rsc_id == NULL) {
-            rsc_id = failed_value;
-        } else if (safe_str_neq(rsc_id, failed_value)) {
+        if (rsc_id && safe_str_neq(rsc_id, failed_id)) {
             continue;
         }
 
@@ -641,13 +645,13 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
             }
         }
 
-        g_hash_table_add(rscs, (gpointer) rsc_id);
+        g_hash_table_add(rscs, (gpointer) failed_id);
     }
 
     g_hash_table_iter_init(&iter, rscs);
-    while (g_hash_table_iter_next(&iter, (gpointer *) &rsc_id, NULL)) {
-        crm_debug("Erasing failures of %s on %s", rsc_id, node_name);
-        rc = clear_rsc_history(crmd_channel, node_name, rsc_id, data_set);
+    while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) {
+        crm_debug("Erasing failures of %s on %s", failed_id, node_name);
+        rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set);
         if (rc != pcmk_ok) {
             return rc;
         }
-- 
1.8.3.1


From 9d5a1dae23a44db190782560d8dbdf50343b3692 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 24 Jan 2018 10:51:34 -0600
Subject: [PATCH 5/8] Low: tools: crm_resource --refresh should ignore
 --operation and --interval

It already did when a resource was not specified.
Also update help text to clarify cleanup vs refresh.
---
 tools/crm_resource.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index e3f8f86..d00c8f2 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -214,15 +214,17 @@ static struct crm_option long_options[] = {
         "cleanup", no_argument, NULL, 'C',
 #if 0
         // new behavior disabled until 2.0.0
-        "\t\tDelete failed operations from a resource's history allowing its current state to be rechecked.\n"
+        "\t\tIf resource has any past failures, clear its history and fail count.\n"
         "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n"
+        "\t\t\t\t--operation and --interval apply to fail counts, but entire history is always cleared,\n"
+        "\t\t\t\tto allow current state to be rechecked.\n"
     },
     {
         "refresh", no_argument, NULL, 'R',
 #endif
         "\t\tDelete resource's history (including failures) so its current state is rechecked.\n"
-        "\t\t\t\tOptionally filtered by --resource, --node, --operation, and --interval (otherwise all).\n"
-        "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be cleaned"
+        "\t\t\t\tOptionally filtered by --resource and --node (otherwise all).\n"
+        "\t\t\t\tUnless --force is specified, resource's group or clone (if any) will also be refreshed."
     },
     {
         "set-parameter", required_argument, NULL, 'p',
@@ -442,7 +444,6 @@ main(int argc, char **argv)
     bool require_resource = TRUE; /* whether command requires that resource be specified */
     bool require_dataset = TRUE;  /* whether command requires populated dataset instance */
     bool require_crmd = FALSE;    /* whether command requires connection to CRMd */
-    bool just_errors = TRUE;      /* whether cleanup command deletes all history or just errors */
 
     int rc = pcmk_ok;
     int is_ocf_rc = 0;
@@ -634,8 +635,7 @@ main(int argc, char **argv)
                 if (cib_file == NULL) {
                     require_crmd = TRUE;
                 }
-                just_errors = FALSE;
-                rsc_cmd = 'C';
+                rsc_cmd = 'R';
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
@@ -645,7 +645,6 @@ main(int argc, char **argv)
                 if (cib_file == NULL) {
                     require_crmd = TRUE;
                 }
-                just_errors = FALSE; // disable until 2.0.0
                 rsc_cmd = 'C';
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
@@ -1101,7 +1100,7 @@ main(int argc, char **argv)
         rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id,
                                            prop_name, cib_conn, &data_set);
 
-    } else if ((rsc_cmd == 'C') && rsc) {
+    } else if ((rsc_cmd == 'R') && rsc) {
         if (do_force == FALSE) {
             rsc = uber_parent(rsc);
         }
@@ -1110,8 +1109,8 @@ main(int argc, char **argv)
         crm_debug("%s of %s (%s requested) on %s",
                   (just_errors? "Clearing failures" : "Re-checking the state"),
                   rsc->id, rsc_id, (host_uname? host_uname : "all hosts"));
-        rc = cli_resource_delete(crmd_channel, host_uname, rsc, operation,
-                                 interval, just_errors, &data_set);
+        rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0,
+                                 &data_set);
 
         if ((rc == pcmk_ok) && !BE_QUIET) {
             // Show any reasons why resource might stay stopped
@@ -1122,14 +1121,14 @@ main(int argc, char **argv)
             start_mainloop();
         }
 
-    } else if (rsc_cmd == 'C' && just_errors) {
+    } else if (rsc_cmd == 'C') {
         rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval,
                              &data_set);
         if (rc == pcmk_ok) {
             start_mainloop();
         }
 
-    } else if (rsc_cmd == 'C') {
+    } else if (rsc_cmd == 'R') {
 #if HAVE_ATOMIC_ATTRD
         const char *router_node = host_uname;
         xmlNode *msg_data = NULL;
-- 
1.8.3.1


From 035bebd78c1936b0749ae64fe949deb5d77effe9 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 13 Feb 2018 12:43:48 -0600
Subject: [PATCH 6/8] Fix: tools: auto-merge was insufficient

The master and 2.0 branches had taken different approaches to crm_resource
clean-up refactoring in response to different issues. It was necessary to
combine the code more carefully.
---
 tools/crm_resource.c         | 13 ++-----------
 tools/crm_resource_runtime.c | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index d00c8f2..fc46cc0 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -628,6 +628,7 @@ main(int argc, char **argv)
                 timeout_ms = crm_get_msec(optarg);
                 break;
 
+            case 'C':
             case 'R':
             case 'P':
                 crm_log_args(argc, argv);
@@ -635,17 +636,7 @@ main(int argc, char **argv)
                 if (cib_file == NULL) {
                     require_crmd = TRUE;
                 }
-                rsc_cmd = 'R';
-                find_flags = pe_find_renamed|pe_find_anon;
-                break;
-
-            case 'C':
-                crm_log_args(argc, argv);
-                require_resource = FALSE;
-                if (cib_file == NULL) {
-                    require_crmd = TRUE;
-                }
-                rsc_cmd = 'C';
+                rsc_cmd = 'R'; // disable new behavior until 2.0
                 find_flags = pe_find_renamed|pe_find_anon;
                 break;
 
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index ce86a49..e02cc44 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -621,8 +621,14 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
         }
 
         // No resource specified means all resources match
-        if (rsc_id && safe_str_neq(rsc_id, failed_id)) {
-            continue;
+        if (rsc_id) {
+            resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources,
+                                                               failed_id,
+                                                               pe_find_renamed|pe_find_anon);
+
+            if (!fail_rsc || safe_str_neq(rsc_id, fail_rsc->id)) {
+                continue;
+            }
         }
 
         // Host name should always have been provided by this point
@@ -790,8 +796,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name,
                 const char *operation, const char *interval,
                 pe_working_set_t *data_set)
 {
-    int attr_options = attrd_opt_none;
     int rc = pcmk_ok;
+    int attr_options = attrd_opt_none;
     const char *display_name = node_name? node_name : "all nodes";
 
     if (crmd_channel == NULL) {
@@ -836,8 +842,8 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name,
             rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL,
                                     operation, interval, data_set);
             if (rc != pcmk_ok) {
-                printf("Cleaned all resource failures on all nodes, but unable to clean history on %s: %s\n",
-                       node->details->uname, pcmk_strerror(rc));
+                printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n",
+                       pcmk_strerror(rc));
                 return rc;
             }
         }
-- 
1.8.3.1


From 5fa351ec714de6b67c456fb1a85a8ebdb658f604 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 7 Aug 2018 10:42:59 -0500
Subject: [PATCH 7/8] Low: tools: update crm_resource for 1.1 vs 2.0
 differences

---
 tools/crm_resource.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index fc46cc0..128d075 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -352,11 +352,13 @@ static struct crm_option long_options[] = {
     },
     {
         "operation", required_argument, NULL, 'n',
-        "\tOperation to clear instead of all (with -C -r)"
+        "\tOperation to clear instead of all (with -C -r)",
+        pcmk_option_hidden // only used with 2.0 -C behavior
     },
     {
         "interval", required_argument, NULL, 'I',
-        "\tInterval of operation to clear (default 0) (with -C -r -n)"
+        "\tInterval of operation to clear (default 0) (with -C -r -n)",
+        pcmk_option_hidden // only used with 2.0 -C behavior
     },
     {
         "set-name", required_argument, NULL, 's',
@@ -1091,17 +1093,16 @@ main(int argc, char **argv)
         rc = cli_resource_delete_attribute(rsc, rsc_id, prop_set, prop_id,
                                            prop_name, cib_conn, &data_set);
 
-    } else if ((rsc_cmd == 'R') && rsc) {
+    } else if ((rsc_cmd == 'C') && rsc) {
         if (do_force == FALSE) {
             rsc = uber_parent(rsc);
         }
         crmd_replies_needed = 0;
 
-        crm_debug("%s of %s (%s requested) on %s",
-                  (just_errors? "Clearing failures" : "Re-checking the state"),
-                  rsc->id, rsc_id, (host_uname? host_uname : "all hosts"));
-        rc = cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0,
-                                 &data_set);
+        crm_debug("Erasing failures of %s (%s requested) on %s",
+                  rsc->id, rsc_id, (host_uname? host_uname: "all nodes"));
+        rc = cli_resource_delete(crmd_channel, host_uname, rsc,
+                                 operation, interval, TRUE, &data_set);
 
         if ((rc == pcmk_ok) && !BE_QUIET) {
             // Show any reasons why resource might stay stopped
@@ -1119,6 +1120,22 @@ main(int argc, char **argv)
             start_mainloop();
         }
 
+     } else if ((rsc_cmd == 'R') && rsc) {
+         if (do_force == FALSE) {
+             rsc = uber_parent(rsc);
+         }
+         crmd_replies_needed = 0;
+
+         crm_debug("Re-checking the state of %s (%s requested) on %s",
+                   rsc->id, rsc_id, (host_uname? host_uname: "all nodes"));
+         rc = cli_resource_delete(crmd_channel, host_uname, rsc,
+                                  NULL, 0, FALSE, &data_set);
+
+         if ((rc == pcmk_ok) && !BE_QUIET) {
+             // Show any reasons why resource might stay stopped
+             cli_resource_check(cib_conn, rsc);
+         }
+
     } else if (rsc_cmd == 'R') {
 #if HAVE_ATOMIC_ATTRD
         const char *router_node = host_uname;
@@ -1174,8 +1191,8 @@ main(int argc, char **argv)
         crmd_replies_needed = 0;
         for (rIter = data_set.resources; rIter; rIter = rIter->next) {
             rsc = rIter->data;
-            cli_resource_delete(crmd_channel, host_uname, rsc, NULL, NULL,
-                                &data_set);
+            cli_resource_delete(crmd_channel, host_uname, rsc, NULL, 0,
+                                FALSE, &data_set);
         }
 
         start_mainloop();
-- 
1.8.3.1


From 555bdce4ceaf9a406059150c9dee047151fb3d94 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 7 Aug 2018 14:11:50 -0500
Subject: [PATCH 8/8] Low: tools: avoid function not available until glib
 2.32.0

---
 tools/crm_resource_runtime.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index e02cc44..41cc742 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -651,7 +651,10 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name,
             }
         }
 
+        /* not available until glib 2.32
         g_hash_table_add(rscs, (gpointer) failed_id);
+        */
+        g_hash_table_insert(rscs, (gpointer) failed_id, (gpointer) failed_id);
     }
 
     g_hash_table_iter_init(&iter, rscs);
-- 
1.8.3.1