|
|
15862b |
diff -uNr a/heartbeat/pgsql b/heartbeat/pgsql
|
|
|
15862b |
--- a/heartbeat/pgsql 2017-03-09 11:50:06.365145803 +0100
|
|
|
15862b |
+++ b/heartbeat/pgsql 2017-03-09 12:19:41.566177608 +0100
|
|
|
15862b |
@@ -966,8 +966,13 @@
|
|
|
15862b |
cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\
|
|
|
15862b |
sort | head -1`
|
|
|
15862b |
if [ "$cmp_location" != "$my_master_baseline" ]; then
|
|
|
15862b |
+ # We used to set the failcount to INF for the resource here in
|
|
|
15862b |
+ # order to move the master to the other node. However, setting
|
|
|
15862b |
+ # the failcount should be done only by the CRM and so this use
|
|
|
15862b |
+ # got deprecated in pacemaker version 1.1.17. Now we do the
|
|
|
15862b |
+ # "ban resource from the node".
|
|
|
15862b |
ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline"
|
|
|
15862b |
- $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY
|
|
|
15862b |
+ exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q
|
|
|
15862b |
return $OCF_ERR_GENERIC
|
|
|
15862b |
fi
|
|
|
15862b |
fi
|
|
|
15862b |
@@ -1526,6 +1531,36 @@
|
|
|
15862b |
wait $func_pid
|
|
|
15862b |
}
|
|
|
15862b |
|
|
|
15862b |
+# retry command when command doesn't return 0
|
|
|
15862b |
+# arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day))
|
|
|
15862b |
+# arg2..argN : command and args
|
|
|
15862b |
+exec_with_retry() {
|
|
|
15862b |
+ local count="86400"
|
|
|
15862b |
+ local output
|
|
|
15862b |
+ local rc
|
|
|
15862b |
+
|
|
|
15862b |
+ if [ "$1" -ne 0 ]; then
|
|
|
15862b |
+ count=$1
|
|
|
15862b |
+ fi
|
|
|
15862b |
+ shift
|
|
|
15862b |
+
|
|
|
15862b |
+ while [ $count -gt 0 ]; do
|
|
|
15862b |
+ output=`$*`
|
|
|
15862b |
+ rc=$?
|
|
|
15862b |
+ if [ $rc -ne 0 ]; then
|
|
|
15862b |
+ ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"."
|
|
|
15862b |
+ count=`expr $count - 1`
|
|
|
15862b |
+ sleep 1
|
|
|
15862b |
+ else
|
|
|
15862b |
+ printf "${output}"
|
|
|
15862b |
+ return 0
|
|
|
15862b |
+ fi
|
|
|
15862b |
+ done
|
|
|
15862b |
+
|
|
|
15862b |
+ ocf_exit_reason "giving up executing \"$*\""
|
|
|
15862b |
+ return $rc
|
|
|
15862b |
+}
|
|
|
15862b |
+
|
|
|
15862b |
is_node_online() {
|
|
|
15862b |
crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline"
|
|
|
15862b |
}
|
|
|
15862b |
@@ -1734,7 +1769,7 @@
|
|
|
15862b |
CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
|
|
|
15862b |
CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot"
|
|
|
15862b |
CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever"
|
|
|
15862b |
- CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount"
|
|
|
15862b |
+ CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource"
|
|
|
15862b |
|
|
|
15862b |
CAN_NOT_PROMOTE="-INFINITY"
|
|
|
15862b |
CAN_PROMOTE="100"
|