Blame SOURCES/bz1156311-Fix-waiting-for-resource-operations.patch

337c54
From 9ed24231c194985f16ba14633d4f215f48608ee2 Mon Sep 17 00:00:00 2001
337c54
From: Tomas Jelinek <tojeline@redhat.com>
337c54
Date: Thu, 4 Dec 2014 17:10:11 +0100
337c54
Subject: [PATCH] Fix waiting for resource operations
337c54
337c54
* fixed waiting for globally-unique clone resources
337c54
* added --wait support to 'pcs resource update' command
337c54
* do not exit with an error when a resource is not running, print a warning
337c54
  instead
337c54
---
337c54
 pcs/pcs.8       |   4 +-
337c54
 pcs/resource.py | 175 +++++++++++++++++++++++++++++++++++++++-----------------
337c54
 pcs/usage.py    |   8 ++-
337c54
 pcs/utils.py    |  46 ++++++++++++++-
337c54
 4 files changed, 174 insertions(+), 59 deletions(-)
337c54
337c54
diff --git a/pcs/pcs.8 b/pcs/pcs.8
337c54
index 2020f99..67f85d5 100644
337c54
--- a/pcs/pcs.8
337c54
+++ b/pcs/pcs.8
337c54
@@ -92,8 +92,8 @@ List available OCF resource agent providers
337c54
 agents [standard[:provider]]
337c54
 List available agents optionally filtered by standard and provider
337c54
 .TP
337c54
-update <resource id> [resource options] [op [<operation action> <operation options>]...] [meta <meta operations>...]
337c54
-Add/Change options to specified resource, clone or multi\-state resource.  If an operation (op) is specified it will update the first found operation with the same action on the specified resource, if no operation with that action exists then a new operation will be created (WARNING: all current options on the update op will be reset if not specified). If you want to create multiple monitor operations you should use the add_operation & remove_operation commands.
337c54
+update <resource id> [resource options] [op [<operation action> <operation options>]...] [meta <meta operations>...] [\fB\-\-wait\fR[=n]]
337c54
+Add/Change options to specified resource, clone or multi\-state resource.  If an operation (op) is specified it will update the first found operation with the same action on the specified resource, if no operation with that action exists then a new operation will be created (WARNING: all current options on the update op will be reset if not specified). If you want to create multiple monitor operations you should use the add_operation & remove_operation commands.  If \fB\-\-wait\fR is specified, pcs will wait up to 'n' seconds for the changes to take effect and then return 0 if the changes have been processed or 1 otherwise.  If 'n' is not specified, default resource timeout will be used.
337c54
 .TP
337c54
 op add <resource id> <operation action> [operation properties]
337c54
 Add operation for specified resource
337c54
diff --git a/pcs/resource.py b/pcs/resource.py
337c54
index 48d894d..75cd8bb 100644
337c54
--- a/pcs/resource.py
337c54
+++ b/pcs/resource.py
337c54
@@ -575,8 +575,10 @@ def resource_move(argv,clear=False,ban=False):
337c54
         if utils.usefile:
337c54
             utils.err("Cannot use '-f' together with '--wait'")
337c54
         if not utils.is_resource_started(resource_id, 0)[0]:
337c54
-            utils.err("Cannot use '--wait' on non-running resources")
337c54
-        wait = True
337c54
+            print "Warning: Cannot use '--wait' on non-running resources"
337c54
+        else:
337c54
+            wait = True
337c54
+    if wait:
337c54
         timeout = utils.pcs_options["--wait"]
337c54
         if timeout is None:
337c54
             timeout = (
337c54
@@ -737,6 +739,12 @@ def resource_update(res_id,args):
337c54
             else:
337c54
                 ra_values.append(arg)
337c54
 
337c54
+    wait = False
337c54
+    if "--wait" in utils.pcs_options:
337c54
+        if utils.usefile:
337c54
+            utils.err("Cannot use '-f' together with '--wait'")
337c54
+        wait = True
337c54
+
337c54
     resource = None
337c54
     for r in dom.getElementsByTagName("primitive"):
337c54
         if r.getAttribute("id") == res_id:
337c54
@@ -753,10 +761,8 @@ def resource_update(res_id,args):
337c54
         if clone:
337c54
             for a in c.childNodes:
337c54
                 if a.localName == "primitive" or a.localName == "group":
337c54
-                    return utils.replace_cib_configuration(
337c54
-                        resource_clone_create(
337c54
-                            dom, [a.getAttribute("id")] + args, True
337c54
-                          )
337c54
+                    return resource_update_clone_master(
337c54
+                        dom, clone, "clone", a.getAttribute("id"), args, wait
337c54
                     )
337c54
 
337c54
         master = None
337c54
@@ -766,12 +772,18 @@ def resource_update(res_id,args):
337c54
                 break
337c54
 
337c54
         if master:
337c54
-            return utils.replace_cib_configuration(
337c54
-                resource_master_create(dom, [res_id] + args, True)
337c54
+            return resource_update_clone_master(
337c54
+                dom, master, "master", res_id, args, wait
337c54
             )
337c54
 
337c54
         utils.err ("Unable to find resource: %s" % res_id)
337c54
 
337c54
+    if wait:
337c54
+        node_count = len(utils.getNodesFromPacemaker())
337c54
+        status_old = utils.get_resource_status_for_wait(
337c54
+            dom, resource, node_count
337c54
+        )
337c54
+
337c54
     instance_attributes = resource.getElementsByTagName("instance_attributes")
337c54
     if len(instance_attributes) == 0:
337c54
         instance_attributes = dom.createElement("instance_attributes")
337c54
@@ -919,8 +931,85 @@ def resource_update(res_id,args):
337c54
     if len(instance_attributes.getElementsByTagName("nvpair")) == 0:
337c54
         instance_attributes.parentNode.removeChild(instance_attributes)
337c54
 
337c54
+    if wait:
337c54
+        status_new = utils.get_resource_status_for_wait(
337c54
+            dom, resource, node_count
337c54
+        )
337c54
+        wait_for_start, wait_for_stop = utils.get_resource_wait_decision(
337c54
+            status_old, status_new
337c54
+        )
337c54
+        if wait_for_start or wait_for_stop:
337c54
+            timeout = utils.pcs_options["--wait"]
337c54
+            if timeout is None:
337c54
+                timeout = utils.get_resource_op_timeout(
337c54
+                    dom, res_id, "start" if wait_for_start else "stop"
337c54
+                )
337c54
+            elif not timeout.isdigit():
337c54
+                utils.err("You must specify the number of seconds to wait")
337c54
+        else:
337c54
+            timeout = 0
337c54
+
337c54
     utils.replace_cib_configuration(dom)
337c54
 
337c54
+    if wait:
337c54
+        if wait_for_start or wait_for_stop:
337c54
+            success, message = utils.is_resource_started(
337c54
+                res_id, int(timeout), wait_for_stop,
337c54
+                count=status_new["instances"]
337c54
+            )
337c54
+            if success:
337c54
+                print message
337c54
+            else:
337c54
+                utils.err("Unable to start '%s'\n%s" % (res_id, message))
337c54
+        else:
337c54
+            print utils.resource_running_on(res_id)["message"]
337c54
+
337c54
+def resource_update_clone_master(dom, clone, clone_type, res_id, args, wait):
337c54
+    if wait:
337c54
+        node_count = len(utils.getNodesFromPacemaker())
337c54
+        status_old = utils.get_resource_status_for_wait(dom, clone, node_count)
337c54
+
337c54
+    if clone_type == "clone":
337c54
+        dom = resource_clone_create(dom, [res_id] + args, True)
337c54
+    elif clone_type == "master":
337c54
+        dom = resource_master_create(dom, [res_id] + args, True)
337c54
+
337c54
+    if wait:
337c54
+        status_new = utils.get_resource_status_for_wait(dom, clone, node_count)
337c54
+        wait_for_start, wait_for_stop = utils.get_resource_wait_decision(
337c54
+            status_old, status_new
337c54
+        )
337c54
+        if wait_for_start or wait_for_stop:
337c54
+            timeout = utils.pcs_options["--wait"]
337c54
+            if timeout is None:
337c54
+                timeout = utils.get_resource_op_timeout(
337c54
+                    dom, res_id, "start" if wait_for_start else "stop"
337c54
+                )
337c54
+            elif not timeout.isdigit():
337c54
+                utils.err("You must specify the number of seconds to wait")
337c54
+        else:
337c54
+            timeout = 0
337c54
+
337c54
+    dom = utils.replace_cib_configuration(dom)
337c54
+
337c54
+    if wait:
337c54
+        if wait_for_start or wait_for_stop:
337c54
+            success, message = utils.is_resource_started(
337c54
+                clone.getAttribute("id"), int(timeout), wait_for_stop,
337c54
+                count=status_new["instances"]
337c54
+            )
337c54
+            if success:
337c54
+                print message
337c54
+            else:
337c54
+                utils.err(
337c54
+                    "Unable to start '%s'\n%s"
337c54
+                    % (clone.getAttribute("id"), message)
337c54
+                )
337c54
+        else:
337c54
+            print utils.resource_running_on(clone.getAttribute("id"))["message"]
337c54
+
337c54
+    return dom
337c54
+
337c54
 # Removes all OCF_CHECK_LEVEL nvpairs
337c54
 def remove_ocf_check_levels(dom):
337c54
     for np in dom.getElementsByTagName("nvpair")[:]:
337c54
@@ -1092,15 +1181,7 @@ def resource_meta(res_id, argv):
337c54
             utils.err("Cannot use '-f' together with '--wait'")
337c54
         wait = True
337c54
         node_count = len(utils.getNodesFromPacemaker())
337c54
-        clone_ms_parent = utils.dom_get_resource_clone_ms_parent(dom, res_id)
337c54
-        old_status_running = utils.is_resource_started(res_id, 0)[0]
337c54
-        old_role = utils.dom_get_meta_attr_value(
337c54
-            meta_attributes.parentNode, "target-role"
337c54
-        )
337c54
-        old_status_enabled = not old_role or old_role.lower() != "stopped"
337c54
-        old_status_instances = utils.count_expected_resource_instances(
337c54
-            clone_ms_parent if clone_ms_parent else elem, node_count
337c54
-        )
337c54
+        status_old = utils.get_resource_status_for_wait(dom, elem, node_count)
337c54
 
337c54
     update_meta_attributes(
337c54
         meta_attributes,
337c54
@@ -1109,29 +1190,10 @@ def resource_meta(res_id, argv):
337c54
     )
337c54
 
337c54
     if wait:
337c54
-        new_role = utils.dom_get_meta_attr_value(
337c54
-            meta_attributes.parentNode, "target-role"
337c54
+        status_new = utils.get_resource_status_for_wait(dom, elem, node_count)
337c54
+        wait_for_start, wait_for_stop = utils.get_resource_wait_decision(
337c54
+            status_old, status_new
337c54
         )
337c54
-        new_status_enabled = not new_role or new_role.lower() != "stopped"
337c54
-        new_status_instances = utils.count_expected_resource_instances(
337c54
-            clone_ms_parent if clone_ms_parent else elem, node_count
337c54
-        )
337c54
-        wait_for_start = False
337c54
-        wait_for_stop = False
337c54
-        if old_status_running and not new_status_enabled:
337c54
-            wait_for_stop = True
337c54
-        elif (
337c54
-            not old_status_running
337c54
-            and
337c54
-            (not old_status_enabled and new_status_enabled)
337c54
-        ):
337c54
-            wait_for_start = True
337c54
-        elif (
337c54
-            old_status_running
337c54
-            and
337c54
-            old_status_instances != new_status_instances
337c54
-        ):
337c54
-            wait_for_start = True
337c54
         if wait_for_start or wait_for_stop:
337c54
             timeout = utils.pcs_options["--wait"]
337c54
             if timeout is None:
337c54
@@ -1145,14 +1207,17 @@ def resource_meta(res_id, argv):
337c54
 
337c54
     utils.replace_cib_configuration(dom)
337c54
 
337c54
-    if wait and (wait_for_start or wait_for_stop):
337c54
-        success, message = utils.is_resource_started(
337c54
-            res_id, int(timeout), wait_for_stop, count=new_status_instances
337c54
-        )
337c54
-        if success:
337c54
-            print message
337c54
+    if wait:
337c54
+        if wait_for_start or wait_for_stop:
337c54
+            success, message = utils.is_resource_started(
337c54
+                res_id, int(timeout), wait_for_stop, count=status_new["instances"]
337c54
+            )
337c54
+            if success:
337c54
+                print message
337c54
+            else:
337c54
+                utils.err("Unable to start '%s'\n%s" % (res_id, message))
337c54
         else:
337c54
-            utils.err("Unable to start '%s'\n%s" % (res_id, message))
337c54
+            print utils.resource_running_on(res_id)["message"]
337c54
 
337c54
 def update_meta_attributes(meta_attributes, meta_attrs, id_prefix):
337c54
     dom = meta_attributes.ownerDocument
337c54
@@ -1377,8 +1442,10 @@ def resource_clone(argv):
337c54
         if utils.usefile:
337c54
             utils.err("Cannot use '-f' together with '--wait'")
337c54
         if not utils.is_resource_started(res, 0)[0]:
337c54
-            utils.err("Cannot use '--wait' on non-running resources")
337c54
-        wait = True
337c54
+            print "Warning: Cannot use '--wait' on non-running resources"
337c54
+        else:
337c54
+            wait = True
337c54
+    if wait:
337c54
         wait_op = "start"
337c54
         for arg in argv:
337c54
             if arg.lower() == "target-role=stopped":
337c54
@@ -1486,8 +1553,10 @@ def resource_clone_master_remove(argv):
337c54
         if utils.usefile:
337c54
             utils.err("Cannot use '-f' together with '--wait'")
337c54
         if not utils.is_resource_started(resource_id, 0)[0]:
337c54
-            utils.err("Cannot use '--wait' on non-running resources")
337c54
-        wait = True
337c54
+            print "Warning: Cannot use '--wait' on non-running resources"
337c54
+        else:
337c54
+            wait = True
337c54
+    if wait:
337c54
         timeout = utils.pcs_options["--wait"]
337c54
         if timeout is None:
337c54
             timeout = utils.get_resource_op_timeout(dom, resource_id, "stop")
337c54
@@ -1534,8 +1603,10 @@ def resource_master(argv):
337c54
         if utils.usefile:
337c54
             utils.err("Cannot use '-f' together with '--wait'")
337c54
         if not utils.is_resource_started(res_id, 0)[0]:
337c54
-            utils.err("Cannot use '--wait' on non-running resources")
337c54
-        wait = True
337c54
+            print "Warning: Cannot use '--wait' on non-running resources"
337c54
+        else:
337c54
+            wait = True
337c54
+    if wait:
337c54
         wait_op = "promote"
337c54
         for arg in argv:
337c54
             if arg.lower() == "target-role=stopped":
337c54
diff --git a/pcs/usage.py b/pcs/usage.py
337c54
index ed99148..a66b90e 100644
337c54
--- a/pcs/usage.py
337c54
+++ b/pcs/usage.py
337c54
@@ -333,14 +333,18 @@ Commands:
337c54
         List available agents optionally filtered by standard and provider
337c54
 
337c54
     update <resource id> [resource options] [op [<operation action>
337c54
-           <operation options>]...] [meta <meta operations>...]
337c54
+           <operation options>]...] [meta <meta operations>...] [--wait[=n]]
337c54
         Add/Change options to specified resource, clone or multi-state
337c54
         resource.  If an operation (op) is specified it will update the first
337c54
         found operation with the same action on the specified resource, if no
337c54
         operation with that action exists then a new operation will be created.
337c54
         (WARNING: all current options on the update op will be reset if not
337c54
         specified) If you want to create multiple monitor operations you should
337c54
-        use the add_operation & remove_operation commands.
337c54
+        use the add_operation & remove_operation commands.  If --wait is
337c54
+        specified, pcs will wait up to 'n' seconds for the changes to take
337c54
+        effect and then return 0 if the changes have been processed or 1
337c54
+        otherwise.  If 'n' is not specified, default resource timeout will
337c54
+        be used.
337c54
 
337c54
     op add <resource id> <operation action> [operation properties]
337c54
         Add operation for specified resource
337c54
diff --git a/pcs/utils.py b/pcs/utils.py
337c54
index 0e6c70c..76fe57f 100644
337c54
--- a/pcs/utils.py
337c54
+++ b/pcs/utils.py
337c54
@@ -1038,11 +1038,12 @@ def is_resource_started(
337c54
         for res in resources:
337c54
             # If resource is a clone it can have an id of '<resource name>:N'
337c54
             if res.getAttribute("id") == resource or res.getAttribute("id").startswith(resource+":"):
337c54
-                set_running_on = set(
337c54
+                list_running_on = (
337c54
                     running_on["nodes_started"] + running_on["nodes_master"]
337c54
                 )
337c54
                 if slave_as_started:
337c54
-                    set_running_on.update(running_on["nodes_slave"])
337c54
+                    list_running_on.extend(running_on["nodes_slave"])
337c54
+                set_running_on = set(list_running_on)
337c54
                 if stopped:
337c54
                     if (
337c54
                         res.getAttribute("role") != "Stopped"
337c54
@@ -1071,7 +1072,7 @@ def is_resource_started(
337c54
                         and
337c54
                         res.getAttribute("failed") != "true"
337c54
                         and
337c54
-                        (count is None or len(set_running_on) == count)
337c54
+                        (count is None or len(list_running_on) == count)
337c54
                         and
337c54
                         (
337c54
                             not banned_nodes
337c54
@@ -1180,6 +1181,45 @@ def wait_for_primitive_ops_to_process(op_list, timeout=None):
337c54
                 % (op[1], op[0], op[2], message)
337c54
             )
337c54
 
337c54
+def get_resource_status_for_wait(dom, resource_el, node_count):
337c54
+    res_id = resource_el.getAttribute("id")
337c54
+    clone_ms_parent = dom_get_resource_clone_ms_parent(dom, res_id)
337c54
+    meta_resource_el = clone_ms_parent if clone_ms_parent else resource_el
337c54
+    status_running = is_resource_started(res_id, 0)[0]
337c54
+    status_enabled = True
337c54
+    for meta in meta_resource_el.getElementsByTagName("meta_attributes"):
337c54
+        for nvpair in meta.getElementsByTagName("nvpair"):
337c54
+            if nvpair.getAttribute("name") == "target-role":
337c54
+                if nvpair.getAttribute("value").lower() == "stopped":
337c54
+                    status_enabled = False
337c54
+    status_instances = count_expected_resource_instances(
337c54
+        meta_resource_el, node_count
337c54
+    )
337c54
+    return {
337c54
+        "running": status_running,
337c54
+        "enabled": status_enabled,
337c54
+        "instances": status_instances,
337c54
+    }
337c54
+
337c54
+def get_resource_wait_decision(old_status, new_status):
337c54
+    wait_for_start = False
337c54
+    wait_for_stop = False
337c54
+    if old_status["running"] and not new_status["enabled"]:
337c54
+        wait_for_stop = True
337c54
+    elif (
337c54
+        not old_status["running"]
337c54
+        and
337c54
+        (not old_status["enabled"] and new_status["enabled"])
337c54
+    ):
337c54
+        wait_for_start = True
337c54
+    elif (
337c54
+        old_status["running"]
337c54
+        and
337c54
+        old_status["instances"] != new_status["instances"]
337c54
+    ):
337c54
+        wait_for_start = True
337c54
+    return wait_for_start, wait_for_stop
337c54
+
337c54
 def get_lrm_rsc_op(cib, resource, op_list=None, last_call_id=None):
337c54
     lrm_rsc_op_list = []
337c54
     for lrm_resource in cib.getElementsByTagName("lrm_resource"):
337c54
-- 
337c54
1.9.1
337c54