Blob Blame History Raw
From ec9fd4e526e572fe9bc0070186fa584b032eac22 Mon Sep 17 00:00:00 2001
From: AngelaBriel <abriel@suse.com>
Date: Fri, 5 Mar 2021 19:18:02 +0100
Subject: [PATCH] during the function 'check_for_primary' sometimes the command
 'hdbnsutil' does not work, but timed out. As a fallback we use
 'getParameter.py' to get some parameter values from the global.ini file. In
 the past the use of the variable 'mode' was sufficient, but now we more often
 see the problem, that this variable does not contain the current mode of the
 node. So we will switch to the variable 'actual_mode', which will be more
 reliable updated by the SAP software and will (hopefully) provide us with the
 current mode of the node in times, where 'hdbnsutil' refuse to answer. This
 change will help to avoid the irritating and confusing message 'secondary has
 unexpected sync status PRIM ==> RESCORE' on a primary node as seen in
 bsc#1181765

---
 heartbeat/SAPHana | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/heartbeat/SAPHana b/heartbeat/SAPHana
index 64e61e8..cd91ddf 100755
--- a/heartbeat/SAPHana
+++ b/heartbeat/SAPHana
@@ -1054,21 +1054,29 @@ function check_for_primary() {
     super_ocf_log info "FLOW $FUNCNAME ($*)"
     local rc=$HANA_STATE_DEFECT
     # TODO: PRIO 3: Check beginning from which SPS does SAP support HDBSettings.sh?
-    # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback
-    # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files?
-    #              This might also solve some problems when we could not figure-out the ilocal or remote site name
+    # DONE: Limit the runtime of hdbnsutil and use getParameter.py as fallback
+    # DONE: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files?
+    # TODO:        This might also solve some problems when we could not figure-out the local or remote site name (site_name,site_id from global.ini)
     local chkMethod=""
+    local ini_mode=""
     for chkMethod in  hU hU hU gP; do
        case "$chkMethod" in
            gP )
+                # fallback for 'hdbnsutil' failing 3 times.
                 local gpKeys=""
-                gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id})
+                gpKeys=$(echo --key=global.ini/system_replication/{actual_mode,mode})
                 node_full_status=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: <begin>/ { out=1 } /^SAPCONTROL-OK: <end>/ { out=0 } /=/ {if (out==1) {print $3} }')
-                node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}')
+                # first try to get the value of 'actual_mode' from the global.ini
+                ini_mode=$(echo "$node_full_status" | awk -F= '$1=="actual_mode" {print $2}')
+                # if 'actual_mode' is not available, fallback to 'mode'
+                if [ -z "$ini_mode" ]; then
+                    ini_mode=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}')
+                fi
+                node_status="$ini_mode"
                 super_ocf_log info "ACT: Using getParameter.py as fallback - node_status=$node_status"
                 ;;
            hU | * )
-                # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported
+                # DONE: PRIO1: Beginning from SAP HANA rev 112.03 -sr_state is not longer supported
                 node_full_status=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "$hdbState" 2>/dev/null )
                 node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
                 super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"