Blob Blame History Raw
diff -uNr a/heartbeat/pgsql b/heartbeat/pgsql
--- a/heartbeat/pgsql	2017-03-09 11:50:06.365145803 +0100
+++ b/heartbeat/pgsql	2017-03-09 12:19:41.566177608 +0100
@@ -966,8 +966,13 @@
             cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
                           sort | head -1`
             if [ "$cmp_location" != "$my_master_baseline" ]; then
+                # We used to set the failcount to INF for the resource here in
+                # order to move the master to the other node. However, setting
+                # the failcount should be done only by the CRM and so this use
+                # got deprecated in pacemaker version 1.1.17. Now we do the
+                # "ban resource from the node".
                 ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
-                $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
+                exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q
                 return $OCF_ERR_GENERIC
             fi
         fi
@@ -1526,6 +1531,36 @@
     wait $func_pid
 }
 
+# retry command when command doesn't return 0
+# arg1       : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
+# arg2..argN : command and args
+exec_with_retry() {
+    local count="86400"
+    local output
+    local rc
+
+    if [ "$1" -ne 0 ]; then
+        count=$1
+    fi
+    shift
+
+    while [ $count -gt 0 ]; do
+        output=`$*`
+        rc=$?
+        if [ $rc -ne 0 ]; then
+            ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
+            count=`expr $count - 1`
+            sleep 1
+        else
+            printf "${output}"
+            return 0
+        fi
+    done
+
+    ocf_exit_reason "giving up executing \"$*\""
+    return $rc
+}
+
 is_node_online() {
     crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
 }
@@ -1734,7 +1769,7 @@
     CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
     CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
     CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
-    CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
+    CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource"
 
     CAN_NOT_PROMOTE="-INFINITY"
     CAN_PROMOTE="100"