#!/bin/bash
#
# SAPHanaTopology
#
# Description:	Clone resource to analyze SAPHana-Topology
#
###################################################################################################################
#
# Thanks to Alexander Krauth for providing SAPInstance and SAPDatabase
#
# SAPHanaTopology: (short sht)
# Author:       Fabian Herschel, February 2014
# Support:      linux@sap.com
# License:      GNU General Public License (GPL)
# Copyright:    (c) 2014 SUSE Linux Products GmbH
#               (c) 2015-2016 SUSE Linux GmbH
#               (c) 2017-2019 SUSE LLC
#
# An example usage:
#      See usage() function below for more details...
#
# OCF instance parameters:
#   OCF_RESKEY_SID            (LNX, NDB, SLE)
#   OCF_RESKEY_InstanceNumber (00..99)
#	OCF_RESKEY_DIR_EXECUTABLE   (optional, well known directories will be searched by default)
#
#######################################################################
# DONE PRIO 1: AFTER(!) SAP HANA SPS12 is available we could use hdbnsutil --sr_stateConfiguration
SAPHanaTopologyVersion="0.180.0.0628.1824"
#
# Initialization:
timeB=$(date '+%s')

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

#######################################################################
log_attributes=false
if ocf_is_true "$log_attributes"; then
    log_attr_file="/var/log/fhATTRIBUTES"
else
    log_attr_file="/dev/null"
fi

HANA_STATE_PRIMARY=0
HANA_STATE_SECONDARY=1
HANA_STATE_STANDALONE=2
HANA_STATE_DEFECT=3
HANA_CALL_TIMEOUT=120

debug_attributes=0
SH=/bin/sh

#
# function: super_ocf_log - wrapper function for ocf log in order catch usual logging into super log
# params:   LOG_MESSAGE
# globals:  SUPER_LOG_PATH, SAPHanaFilter
function super_ocf_log() {
    local level="$1"
    local message="$2"
    local skip=1
    local mtype=""
    local search=0
    local shf="${SAPHanaFilter:-all}"
    #ocf_log "info" "super_ocf_log: f:$shf l:$level m:$message"
    # message levels: (dbg)|info|warn|err|error
    #
    # message types:  (ACT|RA|FLOW|DBG|LPA|DEC
    case "$level" in
        dbg | debug | warn | err | error ) skip=0
        ;;
        info )
        case "$shf" in
            all) skip=0
            ;;
            none )
                skip=1
                ;;
            * ) mtype=${message%% *}
                mtype=${mtype%:}
                mtype=${mtype#fh}
                echo "$shf"|  grep -iq ${mtype}; search=$?
                if [ $search -eq 0 ]; then
                     skip=0
                else
                    skip=1
                fi
            ;;
        esac
        ;;
    esac
    if [ $skip -eq 0 ]; then
        ocf_log "$level" "$message"
    fi
}

#
# function: sht_usage - short usage info
# params:   -
# globals:  $0(r)
#
function sht_usage() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    methods=$(sht_methods)
    methods=$(echo $methods | tr ' ' '|')
  cat <<-!
	usage: $0 ($methods)

    $0 manages a SAP HANA Instance as an HA resource.

    The 'start'        operation starts the HANA instance or bring the "instance" to a WAITING (for primary) status
    The 'stop'         operation stops the HANA instance
    The 'status'       operation reports whether the HANA instance is running
    The 'monitor'      operation reports whether the HANA instance seems to be working in multi-state configuration it also needs to check the system replication status
    The 'notify'       operation always returns SUCCESS
    The 'validate-all' operation reports whether the parameters are valid
    The 'methods'      operation reports on the methods $0 supports

	!
	return $rc
}

#
# function: sht_meta_data - print resource agent meta-data for cluster
# params:   -
# globals:  -
#
function sht_meta_data() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHanaTopology">
    <version>$SAPHanaTopologyVersion</version>
    <shortdesc lang="en">Analyzes SAP HANA System Replication Topology.</shortdesc>
    <longdesc lang="en">This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to
        all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases.
        In addition it starts and monitors the local saphostagent.

1. Interface to monitor a HANA system: landscapeHostConfiguration.py
landscapeHostConfiguration.py has some detailed output about HANA system status
and node roles. For our monitor the overall status is relevant. This overall
status is reported by the return code of the script:
0: Internal Fatal
1: ERROR
2: WARNING
3: INFO (maybe a switch of the resource running)
4: OK
The SAPHanaTopology resource agent will interpret return codes 1 as NOT-RUNNING (or 1 failure) and return codes 2+3+4 as RUNNING.
SAPHanaTopology scans the output table of landscapeHostConfiguration.py to identify the roles of the cluster node. Roles means configured and current role of the nameserver as well as the indexserver.

2. Interface is hdbnsutil
   The interface hdbnsutil is used to check the "topology" of the system replication as well as the current configuration
   (primary/secondary) of a SAP HANA database instance. A second task of the interface is the possibility to run a
   system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register).

3. saphostctrl
   The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the
   SAP HANA instance. This is the hostname used during the HANA installation.
    </longdesc>
<parameters>
    <parameter name="SID" unique="0" required="1">
        <longdesc lang="en">The SAP System Identifier (SID)</longdesc>
        <shortdesc lang="en">The SAP System Identifier (SID)</shortdesc>
        <content type="string" default="" />
    </parameter>
    <parameter name="InstanceNumber" unique="0" required="1">
        <longdesc lang="en">The SAP Instance Number</longdesc>
        <shortdesc lang="en">The SAP Instance Number</shortdesc>
        <content type="string" default="" />
    </parameter>
    <parameter name="HANA_CALL_TIMEOUT" unique="0" required="0">
        <shortdesc lang="en">Define timeout how long a call to HANA to receive information can take.</shortdesc>
        <longdesc lang="en">Define timeout how long a call to HANA to receive information can take. This could be eg landscapeHostConfiguration.py.
          There are some specific calls to HANA which have their own timeout values. For example the takeover command does not timeout (inf).
          If the timeout is reached, the return code will be 124. If you increase the timeouts for HANA calls you should also adjust the operation timeouts
          of your cluster resources.
        </longdesc>
        <content type="string" default="120" />
    </parameter>
    <parameter name="DIR_EXECUTABLE" unique="0" required="0">
        <longdesc lang="en">Path to the SAP Hana Instance executable directory. If not set the RA tries /usr/sap/\$SID/\$InstanceName/exe.
        While InstanceName is the string of "HDB" and \$InstanceNumber for SAP Hana databases.
        </longdesc>
        <shortdesc lang="en">Path to the SAP Hana Instance executable directory.</shortdesc>
        <content type="string" default="" />
    </parameter>
</parameters>
<actions>
    <action name="start" timeout="600" />
    <action name="stop" timeout="300" />
    <action name="status" timeout="60" />
    <action name="monitor" depth="0" timeout="600" interval="10" />
    <action name="validate-all" timeout="5" />
    <action name="meta-data" timeout="5" />
    <action name="methods" timeout="5" />
</actions>
</resource-agent>
END
return $rc
}

#
# function: get_hana_attribute
# params:   NODE ATTR [STORE]
# globals:  -
#
function get_hana_attribute()
{
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    local attr_node=$1
    local attr_name=$2
    local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
    local attr_default=${4:-}
    local dstr
    dstr=$(date)
    case "$attr_store" in
        reboot | forever )
            echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file
            crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$?
            ;;
        props )
            echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file
            crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$?
            ;;
    esac
    super_ocf_log info "FLOW $FUNCNAME rc=$rc"
    return $rc
}

#
# function: set_hana_attribute - set the multi-state status of a node
# params:   NODE VALUE ATTR [STORE]
# globals:  -
#
function set_hana_attribute()
{
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local attr_node=$1
    local attr_value=$2
    local attr_name=$3
    local attr_store=${4:-reboot} # DONE: PRIO5 get this (optional) from parameter
    local attr_default=${5:-}
    local rc=1
    local attr_old=""
    local dstr
    dstr=$(date)
    attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$?
    if [ "$attr_old" != "$attr_value" ]; then
        super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
        case "$attr_store" in
            reboot | forever )
                echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> $log_attr_file
                crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>$log_attr_file; rc=$?
                ;;
            props )
                echo "$dstr: SAPHanaTopology: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> $log_attr_file
                crm_attribute -v $attr_value -n "$attr_name" -t crm_config  -s SAPHanaSR 2>>$log_attr_file; rc=$?
                ;;
        esac
    else
        super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}"
        rc=0
    fi
    super_ocf_log info "FLOW $FUNCNAME rc=$rc"
    return $rc
}

#
# function: sht_methods - report supported cluster methods
# params:   -
# globals:  -
# methods: What methods/operations do we support?
#
function sht_methods() {
  super_ocf_log info "FLOW $FUNCNAME ($*)"
  local rc=0
  cat <<-!
    start
    stop
    status
    monitor
    notify
    validate-all
    methods
    meta-data
    usage
    admin-setup
	!
	return $rc
}

#
# function: is_clone - report, if resource is configured as a clone (also multi-state)
# params:   -
# globals:  OCF_*(r)
# descript: is_clone : find out if we are configured to run in a multi-state configuration
#   rc: 0: it is a clone
#       1: it is not a clone
#   Special EXIT of RA, if clone is misconfigured
#
function is_clone() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    #
    # is a clone config?
    #
    if [ -n "$OCF_RESKEY_CRM_meta_clone_max" ] \
       && [ "$OCF_RESKEY_CRM_meta_clone_max" -gt 0 ]; then
       #
       # yes it is a clone config - check, if its configured well
       #
        if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then
                super_ocf_log err "ACT: Clone options misconfigured. (expect: clone_node_max=1)"
                exit $OCF_NOT_RUNNING
        fi
        rc=0;
    else
        rc=1;
    fi
    super_ocf_log info "FLOW $FUNCNAME rc=$rc"
    return $rc
}

#
# function: HANA_CALL
# params:   timeout-in-seconds cmd-line
# globals:  sid(r), SID(r), InstanceName(r)
#
function HANA_CALL()
{
    #
    # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with
    #       root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp)
    #       would hdbnsutil -sr_stateConfiguration also create such a shared memory file?
    # TODO: PRIO 5: Maybe make "su" optional by a parameter
    local timeOut=0
    local onTimeOut=""
    local rc=0
    local use_su=1 # Default to be changed later (see TODO above)
    local pre_cmd=""
    local cmd=""
    local pre_script=""
    local output=""
    while [ $# -gt 0 ]; do
        case "$1" in
            --timeout ) timeOut=$2; shift;;
            --use-su  ) use_su=1;;
            --on-timeout ) onTimeOut="$2"; shift;;
            --cmd ) shift; cmd="$*"; break;;
        esac
        shift
    done

    if [ $use_su -eq 1 ]; then
        pre_cmd="su - ${sid}adm -c"
        pre_script="true"
    else
        # as root user we need the library path to the SAP kernel to be able to call sapcontrol
        # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH
        if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ]
        then
            MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH
        fi
        pre_cmd="bash -c"
        pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH"
    fi
    case $timeOut in
        0 | inf )
                  output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$?
                  ;;
        *       )
                  output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$?
                  #
                  # on timeout ...
                  #
                  if [ $rc -eq 124 ]; then
                      super_ocf_log warn "RA: HANA_CALL TIMEOUT after $timeOut seconds ($cmd)"
                      if [ -n "$onTimeOut" ]; then
                          local second_output=""
                          second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut");
                      fi
                  fi
                 ;;
    esac
    echo "$output"
    return $rc;
}


#
# function: sht_init - initialize variables for the resource agent
# params:   -
# globals:  OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w),
# globals:  meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w)
# globals:  ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w)
# globals:  DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w)
# sht_init : Define global variables with default values, if optional parameters are not set
#
#

function sht_init() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local myInstanceName=""
    local rc=$OCF_SUCCESS
    local hdbANSWER=""
    local siteID
    local siteNAME
    HOSTEXECNAME=saphostexec
    USRSAP=/usr/sap
    SAPSERVICE_PATH=${USRSAP}/sapservices
    SAPHOSTCTRL_PATH=${USRSAP}/hostctrl/exe
    HOSTEXEC_PATH=${SAPHOSTCTRL_PATH}/${HOSTEXECNAME}
    HOSTEXEC_PROFILE_PATH=${SAPHOSTCTRL_PATH}/host_profile
    SID=$OCF_RESKEY_SID
    InstanceNr=$OCF_RESKEY_InstanceNumber
    myInstanceName="${SID}_HDB${InstanceNr}"
    InstanceName="HDB${InstanceNr}"
    HANA_CALL_TIMEOUT="${OCF_RESKEY_HANA_CALL_TIMEOUT:-120}"
    export SAPSYSTEMNAME=$SID
    super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)"
    sid=$(echo "$SID" | tr [:upper:] [:lower:])
    sidadm="${sid}adm"
    ocf_env=$(env | grep 'OCF_RESKEY_CRM')
    super_ocf_log debug "DBG: OCF: $ocf_env"
    ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot")  # SOK, SFAIL, UNKNOWN?
    ATTR_NAME_HANA_PRIMARY_AT=("hana_${sid}_primary_at" "reboot")   # Not really used
    ATTR_NAME_HANA_CLONE_STATE=("hana_${sid}_clone_state" "reboot") # UNKNOWN?, DEMOTED, PROMOTED
    #ATTR_NAME_HANA_REMOTEHOST=("hana_${sid}_remoteHost" "forever")
    ATTR_NAME_HANA_SITE=("hana_${sid}_site" "forever")
    ATTR_NAME_HANA_ROLES=("hana_${sid}_roles" "reboot")
    ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever")
    ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot")
    #
    # new "central" attributes
    #
    ATTR_NAME_HANA_PRIM=("hana_${sid}_glob_prim" "props")                        # SITE
    ATTR_NAME_HANA_SEC=("hana_${sid}_glob_sec" "props")                          # SITE
    ATTR_NAME_HANA_SEC_SYNC_STATUS=("hana_${sid}_glob_sec_sync_state" "props")   # SOK, SFAIL
    ATTR_NAME_HANA_SEC_SYNC_SRHOOK=("hana_${sid}_glob_sec_sync_state" "props")   # SOK, SFAIL
    ATTR_NAME_HANA_PRIMARY_AT=("hana_${sid}_glob_primary_at"   "props")          # Not used so far
    ATTR_NAME_HANA_SRMODE2=("hana_${sid}_glob_srmode" "props" "sync")            # sync syncmem async
    ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa")
    #
    # SITE based attributes
    #
    LPA_ATTR=("hana_${sid}_site_lpt" "props")
    ATTR_NAME_HANA_LSS=("hana_${sid}_site_lss" "props")
    ATTR_NAME_HANA_SRR=("hana_${sid}_site_srr" "props")
    # optional OCF parameters, we try to guess which directories are correct

    SAPHanaFilter=$(get_hana_attribute "X" "${ATTR_NAME_HANA_FILTER[@]}")

    if  [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ]
    then
        DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe"
    else
        DIR_EXECUTABLE="$OCF_RESKEY_DIR_EXECUTABLE"
    fi

    if [ -z "$DIR_EXECUTABLE" ]; then
        super_ocf_log err "DEC: Can not determine DIR_EXECUTABLE. Please set this parameter. -> OCF_NOT_RUNNING"
        rc=$OCF_NOT_RUNNING
    fi

    if [ -z "$OCF_RESKEY_DIR_PROFILE" ]
    then
        DIR_PROFILE="/usr/sap/$SID/SYS/profile"
    else
        DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE"
    fi

    #PATH=${PATH}:${DIR_EXECUTABLE}
    #
    # figure-out all needed values from system replication status with ONE call
    # we need: mode=primary|sync|syncmem|...; site name=<site>; mapping/<me>=<site>/<node> (multiple lines)
    case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in
       *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');;
       *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');;
       *cman*    ) nodelist=$(crm_node -l);;
    esac
    #### SAP-CALL
    # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode
    for i in  1 2 3; do
        hdbANSWER=$(HANA_CALL --timeout $HANA_CALL_TIMEOUT --cmd "hdbnsutil -sr_stateConfiguration --sapcontrol=1" 2>/dev/null); RC_hdbnsutil=$?
        super_ocf_log debug "DBG: hdbANSWER=$hdbANSWER"
        srmode=$(echo "$hdbANSWER" | awk -F= '$1 == "mode" {print $2}')
        case "$srmode" in
            primary | syncmem | sync | async | none )
              # we can leave the loop as we already got a result
              break
              ;;
            * )
              # lets pause a bit to give hdbnsutil a chance to answer next time
              sleep 20
              ;;
        esac
    done
    # TODO PRIO3: Implement a file lookup, if we did not get a result
    siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}')
    siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}')
    site=$siteNAME
    srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}')
    MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $2 == myhost && $3 != site { print $4 }' site=$site myhost=$HOSTNAME)
    super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING"
    #
    # filter all non-cluster mappings
    #
    # TODO PRIO2: as we might not need remoteHost (per node) any longer, could we completely drop the scanning of the output?
    #             which parts could be omitted?
    #hanaRemoteHost=$MAPPING
    super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING"
    #super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
    #super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
    #super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING"
    super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS"
    return $OCF_SUCCESS
}

#
# function: check_for_primary - check if local SAP HANA is configured as primary
# params:   -
# globals:  srmode(r), HANA_STATE_PRIMARY(r), HANA_STATE_SECONDARY(r), HANA_STATE_DEFECT(r), HANA_STATE_STANDALONE(r)
#
function check_for_primary() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    node_status=$srmode
    super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"
    super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"
    for i in  1 2 3; do
        case "$node_status" in
           primary )
                  super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY"
                  return $HANA_STATE_PRIMARY;;
           syncmem | sync | async )
                  super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY"
                  return $HANA_STATE_SECONDARY;;
           none ) # have seen that mode on second side BEFEORE we registered it as replica
                  # TODO: PRIO1: Should we set SFAIL?
                  super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE"
                  return $HANA_STATE_STANDALONE;;
           * )
              # TODO: PRIO3: Should we set SFAIL?
              # TODO: PRIO4: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
              dump=$( echo $node_status | hexdump -C );
              super_ocf_log err "ACT: check_for_primary:  we didn't expect node_status to be: DUMP: <$dump>"
              #### SAP-CALL
              #node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
              node_full_status=$(HANA_CALL --timeout $HANA_CALL_TIMEOUT --cmd "hdbnsutil -sr_stateConfiguration --sapcontrol=1" 2>/dev/null )
              node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}')
              super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status"
              # TODO: PRIO5: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
       esac;
       sleep 20
   done
   super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT"
   return $HANA_STATE_DEFECT
}


#
# function: start_saphostagent
# params:   -
# globals:  HOSTEXEC_PATH(r), HOSTEXEC_PROFILE_PATH(r)
#
function start_saphostagent()
{
    ### SAP-CALL
    if [ -x "${HOSTEXEC_PATH}" ]; then
        ${HOSTEXEC_PATH} pf=${HOSTEXEC_PROFILE_PATH}
    fi
    return 0
}

#
# function: stop_saphostagent
# params:   -
# globals: HOSTEXEC_PATH(r)
#
function stop_saphostagent()
{
    ### SAP-CALL
    if [ -x "${HOSTEXEC_PATH}" ]; then
        ${HOSTEXEC_PATH} -stop
    fi
}

#
# function: check_saphostagent
# params:   -
# globals:
#
function check_saphostagent()
{
    local rc=1
    # TODO: PRIO3: should the path been removed like "saphostexec" instead of "/usr/sap/hostctrl/exe/saphostexec"
    #       or should we use ${HOSTEXEC_PATH} instead?
    pgrep -f /usr/sap/hostctrl/exe/saphostexec; rc=$?
    return $rc
}

#
#############################################################################
#
# function: sht_start - start a hana instance
# params:   -
# globals:  OCF_*
# sht_start : Start the SAP HANA instance
#
function sht_start() {
  super_ocf_log info "FLOW $FUNCNAME ($*)"

  local rc=$OCF_NOT_RUNNING
  local output=""
  local loopcount=0

  # TODO: PRIO3: move the string "$HA_RSCTMP/SAPHana/SAPTopologyON" to a variable
  # TODO: PRIO3: move the file to the clusters tmp directory?
  mkdir -p $HA_RSCTMP/SAPHana
  touch $HA_RSCTMP/SAPHana/SAPTopologyON
  if ! check_saphostagent; then
     start_saphostagent
  fi

  case "$RC_hdbnsutil" in
      0 | 1 ) rc=$OCF_SUCCESS;;     # TODO: Prio 3: Need to check rc==1 - is that success or not_running?
      2 )     rc=$OCF_NOT_RUNNING;; # rc==2 have been seen for bad persistency layer
      * )     rc=$OCF_SUCCESS;;     # TODO: Prio 3: Need to check "other" return codes
  esac


  super_ocf_log info "FLOW $FUNCNAME rc=$rc"
  return $rc
}

#
# function: sht_stop - stop a hana instance
# params:   -
# globals:  OCF_*(r), SAPCONTROL(r), SID(r), InstanceName(r)
# sht_stop: Stop the SAP HANA Topology Resource
#
function sht_stop() {
  super_ocf_log info "FLOW $FUNCNAME ($*)"
  local output=""
  local rc=0

  rm $HA_RSCTMP/SAPHana/SAPTopologyON
  rc=$OCF_SUCCESS

  super_ocf_log info "FLOW $FUNCNAME rc=$rc"
  return $rc
}

# function: master_walk - find master node(s)
# params:   -
# globals:  srmode(r), the_master(w)
function master_walk() {
   local active_master="" master1="" master2="" master3=""
   local nSite="" nRole=""
   local best_cold_master="" rest=""
   #
   # 2:S:master1:slave:worker:standby  MAINZ
   # 4:P:master1:master:worker:master  KOELN
   # 4:P:master2:slave:worker:slave    KOELN
   # 4:P:slave:slave:worker:slave      KOELN
   super_ocf_log debug "DBG: master walk"
   case "$srmode" in
      primary | sync | syncmem | async )
          # walk over all known cluster nodes
          super_ocf_log debug "DBG: master walk for $srmode"
          for node in $(crm_node  -l | awk '{print $2}'); do
              nSite=$(get_hana_attribute ${node} "${ATTR_NAME_HANA_SITE[@]}")
              if [ "$site" = "$nSite" ]; then
                 # node of same site found
                 nRole=$(get_hana_attribute ${node} "${ATTR_NAME_HANA_ROLES[@]}" | tr ':' ' ')
                 read nNsConf nNsCurr nIsConf nIsCurr <<< $nRole
                 super_ocf_log debug "DBG: site $site $nNsConf:$nNsCurr"
                 case "$nNsConf:$nNsCurr" in
                    master1:master  ) master1=$node; active_master=$node
                 super_ocf_log debug "DBG: site $site match master1:master"
                                    ;;
                    master2:master  ) master2=$node; active_master=$node
                 super_ocf_log debug "DBG: site $site match master2:master"
                                    ;;
                    master3:master  ) master3=$node; active_master=$node
                 super_ocf_log debug "DBG: site $site match master3:master"
                                    ;;
                    master1:*       ) master1=$node;
                 super_ocf_log debug "DBG: site $site match master1:STAR"
                                    ;;
                    master2:*       ) master2=$node;
                 super_ocf_log debug "DBG: site $site match master2:STAR"
                                    ;;
                    master3:*       ) master3=$node;
                 super_ocf_log debug "DBG: site $site match master3:STAR"
                                    ;;
                    * )
                 super_ocf_log debug "DBG: site $site NO match"
                                    ;;

                 esac
                 super_ocf_log debug "DBG: $site: $nRole"
              fi
          done
          if [ -z "$active_master" ]; then
             declare -a masters
             masters=( $master1 $master2 $master3 )
             best_cold_master=${masters[0]}
             #read best_cold_master rest <<<$master1 $master2 $master3
          fi
          super_ocf_log info "ACT ===> priorities for site $site master1=$master1 master2=$master2 master3=$master3 ==> active_master=$active_master best_cold_master=$best_cold_master"
          ;;
      * )
          return 1
          ;;
    esac
    declare -a masters
    masters=( $active_master $best_cold_master )
    the_master=${masters[0]}
#   read the_master rest <<<$active_master $best_cold_master
   super_ocf_log info "ACT ===> master_walk: the_master=$the_master; priorities for site $site master1=$master1 master2=$master2 master3=$master3 ==> active_master=$active_master best_cold_master=$best_cold_master"
}

#
# function: sht_monitor - monitor a hana topology instance
# params:   --
# globals:  OCF_*(r), SAPCONTROL(r), InstanveNr(r)
# sht_monitor: Can the given SAP instance do anything useful?
#
function sht_monitor() {
  super_ocf_log info "FLOW $FUNCNAME ($*)"
  local rc=0

  master_walk
  if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON ]; then
     rc=$OCF_SUCCESS
  else
     rc=$OCF_NOT_RUNNING
  fi

  super_ocf_log info "FLOW $FUNCNAME rc=$rc"
  return $rc
}


#
# function: sht_status - get status of a hana instance (os tools only)
# params:   -
# globals:  SID(r), InstanceName(r), OCF_*(r), sidarm(r)
# sht_status: Lightweight check of SAP instance only with OS tools
#
function sht_status() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0

    sht_monitor; rc=$?
    return $rc
}


#
# function: sht_validate - validation of (some) variables/parameters
# params:   -
# globals:  OCF_*(r), SID(r), InstanceName(r), InstanceNr(r),
# sht_validate: Check the semantic of the input parameters
#
function sht_validate() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=$OCF_SUCCESS
    if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ]
    then
        super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!"
        rc=$OCF_ERR_ARGS
    fi

    if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ]
    then
        super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!"
        rc=$OCF_ERR_ARGS
    fi

    super_ocf_log info "FLOW $FUNCNAME rc=$rc"
    return $rc
}

#
# function: sht_start_clone - start a hana clone instance
# params:   -
# globals:  OCF_*(r),
# sht_start_clone
#
function sht_start_clone() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=$OCF_NOT_RUNNING
    sht_start; rc=$?
    return $rc
}

#
# function: sht_stop_clone - stop a hana clone instance
# params:   -
# globals:  NODENAME(r), HANA_STATE_*, ATTR_NAME_*
# sht_stop_clone
#
function sht_stop_clone() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    local nRole nLsc nSrmode nNsConf nNsCurr nIsConf nIsCurr
	check_for_primary; primary_status=$?
    if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then
        hanaPrim="P"
    elif [ $primary_status -eq $HANA_STATE_SECONDARY ]; then
        hanaPrim="S"
    elif [ $primary_status -eq $HANA_STATE_STANDALONE ]; then
        hanaPrim="N"
    else
        hanaPrim="-"
    fi
    nRole=$(get_hana_attribute ${node} "${ATTR_NAME_HANA_ROLES[@]}" | tr ':' ' ')
    read nNsConf nNsCurr nIsConf nIsCurr <<< $nRole
    set_hana_attribute "${NODENAME}" "$nNsConf:shtdown:shtdown:shtdown" "${ATTR_NAME_HANA_ROLES[@]}"
    sht_stop; rc=$?
    return $rc
}

#
# function: sht_monitor_clone - monitor a hana clone instance
# params:   -
# globals:  OCF_*, SID, InstanceNr, InstanceName, MAPPING(r)
# sht_monitor_clone
#
function sht_monitor_clone() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    #
	local rc=$OCF_ERR_GENERIC
	local promoted=0
    local init_attribute=0


	if ocf_is_probe; then
		super_ocf_log debug "DBG: PROBE ONLY"
        sht_monitor; rc=$?
	else
		super_ocf_log debug "DBG: REGULAR MONITOR"
        if ! check_saphostagent; then
             start_saphostagent
        fi
	#
	# First check, if we are PRIMARY or SECONDARY
	#
    super_ocf_log debug "DBG: HANA SID $SID"
    super_ocf_log debug "DBG: HANA InstanceName $InstanceName"
    super_ocf_log debug "DBG: HANA InstanceNr $InstanceNr"
	check_for_primary; primary_status=$?
    if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then
        hanaPrim="P"
        super_ocf_log debug "DBG: HANA IS PRIMARY"
        sht_monitor; rc=$?
    else
        if [ $primary_status -eq $HANA_STATE_SECONDARY  ]; then
            hanaPrim="S"
            super_ocf_log debug "DBG: HANA IS SECONDARY"
            sht_monitor; rc=$?
        elif [ $primary_status -eq $HANA_STATE_STANDALONE  ]; then
            hanaPrim="N"
            super_ocf_log debug "DBG: HANA IS STANDALONE"
            sht_monitor; rc=$?
        else
            hanaPrim="-"
            super_ocf_log warn "ACT: sht_monitor_clone: HANA_STATE_DEFECT"
            rc=$OCF_NOT_RUNNING
        fi
    fi
    # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API?
    # try to catch:  Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691
    # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8
    #### SAP-CALL
    if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then
        vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \
            | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null )
        super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)"
    else
        super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)"
    fi
    if [ -n "$vName" ]; then
       set_hana_attribute ${NODENAME} "$vName" "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}"
    else
       vName=$(get_hana_attribute ${NODENAME} "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}")
    fi
    # last fallback, if neither the HANA call NOR the Attribute "knows" the vName - try the local hostname
    if [ -z "$vName" ]; then
       vName=${NODENAME}
    fi
    hanaANSWER=$(HANA_CALL --timeout $HANA_CALL_TIMEOUT --cmd "landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); hanalrc="$?"
    if [ "$hanalrc" -eq 124 ]; then
        # landscape timeout
        super_ocf_log warn "RA: landscapeHostConfiguration.py TIMEOUT after $HANA_CALL_TIMEOUT seconds"
    fi
    #
    # scan with and without host prefix to allow also single instances (scaleup) for smaller test environments
    #
    hanarole=$(echo "$hanaANSWER" | tr -d ' ' | \
            awk -F= '
            $1 == "host/"vName"/nameServerConfigRole"  {nsCR=$2}
            $1 == "host/"vName"/nameServerActualRole"  {nsAR=$2}
            $1 == "host/"vName"/indexServerConfigRole" {isCR=$2}
            $1 == "host/"vName"/indexServerActualRole" {isAR=$2}
            $1 == "nameServerConfigRole"  {nsCR=$2}
            $1 == "nameServerActualRole"  {nsAR=$2}
            $1 == "indexServerConfigRole" {isCR=$2}
            $1 == "indexServerActualRole" {isAR=$2}
            END { printf "%s:%s:%s:%s\n", nsCR, nsAR, isCR, isAR;  } ' vName=$vName )
    set_hana_attribute ${NODENAME} "$hanarole" "${ATTR_NAME_HANA_ROLES[@]}"

    # TODO PRIO2: COULD/SHOULD WE LIMIT THE SET OF THE LSS/SRR ATTRIBUTE TO ONLY THE_MASTER nodes?
    # ignore timeout (124) and "ignore" (5) as return code from the landscapeHostConfiguration call
    super_ocf_log info "DEC: site=$site; hanalrc=$hanalrc"
    if [ -n "$site" -a "$hanalrc" != "124" ]; then
        ATTR=(${ATTR_NAME_HANA_LSS[@]}); ATTR[0]="${ATTR[0]}_$site"
        set_hana_attribute "X" "$hanalrc" "${ATTR[@]}"
        super_ocf_log info "DEC: set_hana_attribute X $hanalrc ${ATTR[0]}"
        ATTR=(${ATTR_NAME_HANA_SRR[@]}); ATTR[0]="${ATTR[0]}_$site"
        set_hana_attribute "X" "$hanaPrim" "${ATTR[@]}"
        set_hana_attribute ${NODENAME} "$site" "${ATTR_NAME_HANA_SITE[@]}"
        super_ocf_log info "DEC: set_hana_attribute ${NODENAME} $site ${ATTR_NAME_HANA_SITE[0]}"
    fi
    case "$hanaPrim" in
        P ) ;;
        S ) # only secondary may propagate its sync status
        set_hana_attribute "X" "$srmode" "${ATTR_NAME_HANA_SRMODE2[@]}"
          ;;
    esac
    #ATTR_NAME_HANA_STATUS  # TODO: PRIO5: For SCALE-OUT: Fill that attribute later
    fi # end ocf_is_NOT_probe
    super_ocf_log info "FLOW $FUNCNAME rc=$rc"
    return $rc
}

#
# function: sht_notify - notify action
# params:   -
# globals:  OCF_*(r), ACTION(r), CLACT(r), NODENAME(r)
# sht_notify: Handle master scoring - to make sure a slave gets the next master
#
function sht_notify() {
    super_ocf_log info "FLOW $FUNCNAME ($*)"
    local rc=0
    super_ocf_log info "RA ==== end action $ACTION$CLACT (${n_type}/${n_op})===="
    return $rc
}

#
# function: main - main function to operate
# params:   ACTION
# globals:  OCF_*(r), SID(w), sidadm(w), InstanceName(w), DIR_EXECUTABLE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r)
#

## GLOBALS
SID=""
sidadm=""
InstanceName=""
InstanceNr=""
DIR_EXECUTABLE=""
SAPHanaFilter="ra-act-dec-lpa"
NODENAME=$(crm_node -n)

if [ $# -ne 1 ]
then
  sht_usage
  exit $OCF_ERR_ARGS
fi

ACTION=$1
if [ "$ACTION" = "status" ]; then
    ACTION=monitor
fi

# These operations don't require OCF parameters to be set
case "$ACTION" in
    usage|methods)  sht_$ACTION
                    exit $OCF_SUCCESS;;
    meta-data)      sht_meta_data
                    exit $OCF_SUCCESS;;
    notify)         sht_notify
                    exit $OCF_SUCCESS;;
    admin-setup)    admin-setup
                    exit $OCF_SUCCESS;;
    *);;
esac
sht_init

if ! ocf_is_root
then
    super_ocf_log err "ACT: $0 must be run as root"
    exit $OCF_ERR_PERM
fi

# parameter check
if  [ -z "$OCF_RESKEY_SID" ]
then
    super_ocf_log err "ACT: Please set parameter SID!"
    exit $OCF_ERR_ARGS
fi

if  [ -z "$OCF_RESKEY_InstanceNumber" ]
then
    super_ocf_log err "ACT: Please set parameter InstanceNumber!"
    exit $OCF_ERR_ARGS
fi

if is_clone
then
    CLACT=_clone
else
    if [ "$ACTION" = "promote" -o "$ACTION" = "demote" ]
    then
        super_ocf_log err "ACT: $ACTION called in a non clone environment"
        exit $OCF_ERR_ARGS
    fi
fi

super_ocf_log info "RA ==== begin action $ACTION$CLACT ($SAPHanaTopologyVersion) ===="
ra_rc=$OCF_ERR_UNIMPLEMENTED
case "$ACTION" in
    start|stop|monitor) # Standard controlling actions
        sht_$ACTION$CLACT
        ra_rc=$?
        ;;
    validate-all)
        sht_validate
        ra_rc=$?
        ;;
    *)  # seems to be an unknown request
        sht_methods
        ra_rc=$OCF_ERR_UNIMPLEMENTED
        ;;
esac
timeE=$(date '+%s')
(( timeR = timeE - timeB ))
super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaTopologyVersion) (${timeR}s)===="
exit ${ra_rc}
