Blame SOURCES/bz1420565-pgsql-dont-use-crm_failcount.patch

15862b
diff -uNr a/heartbeat/pgsql b/heartbeat/pgsql
15862b
--- a/heartbeat/pgsql	2017-03-09 11:50:06.365145803 +0100
15862b
+++ b/heartbeat/pgsql	2017-03-09 12:19:41.566177608 +0100
15862b
@@ -966,8 +966,13 @@
15862b
             cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
15862b
                           sort | head -1`
15862b
             if [ "$cmp_location" != "$my_master_baseline" ]; then
15862b
+                # We used to set the failcount to INF for the resource here in
15862b
+                # order to move the master to the other node. However, setting
15862b
+                # the failcount should be done only by the CRM and so this use
15862b
+                # got deprecated in pacemaker version 1.1.17. Now we do the
15862b
+                # "ban resource from the node".
15862b
                 ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
15862b
-                $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
15862b
+                exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q
15862b
                 return $OCF_ERR_GENERIC
15862b
             fi
15862b
         fi
15862b
@@ -1526,6 +1531,36 @@
15862b
     wait $func_pid
15862b
 }
15862b
 
15862b
+# retry command when command doesn't return 0
15862b
+# arg1       : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
15862b
+# arg2..argN : command and args
15862b
+exec_with_retry() {
15862b
+    local count="86400"
15862b
+    local output
15862b
+    local rc
15862b
+
15862b
+    if [ "$1" -ne 0 ]; then
15862b
+        count=$1
15862b
+    fi
15862b
+    shift
15862b
+
15862b
+    while [ $count -gt 0 ]; do
15862b
+        output=`$*`
15862b
+        rc=$?
15862b
+        if [ $rc -ne 0 ]; then
15862b
+            ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
15862b
+            count=`expr $count - 1`
15862b
+            sleep 1
15862b
+        else
15862b
+            printf "${output}"
15862b
+            return 0
15862b
+        fi
15862b
+    done
15862b
+
15862b
+    ocf_exit_reason "giving up executing \"$*\""
15862b
+    return $rc
15862b
+}
15862b
+
15862b
 is_node_online() {
15862b
     crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
15862b
 }
15862b
@@ -1734,7 +1769,7 @@
15862b
     CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
15862b
     CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
15862b
     CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
15862b
-    CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
15862b
+    CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource"
15862b
 
15862b
     CAN_NOT_PROMOTE="-INFINITY"
15862b
     CAN_PROMOTE="100"