diff --git a/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch b/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch new file mode 100644 index 0000000..5cd8ffa --- /dev/null +++ b/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch @@ -0,0 +1,1990 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2016-10-14 10:09:56.479051279 +0200 ++++ b/heartbeat/SAPHana 2016-10-14 10:29:23.990066292 +0200 +@@ -2,8 +2,8 @@ + # + # SAPHana + # +-# Description: Manages two single SAP HANA Instance in System Replication +-# Planned: do also manage scale-up scenarios ++# Description: Manages two SAP HANA Databases in System Replication ++# Planned: do also manage scale-out scenarios + # currently the SAPHana is dependent of the analysis of + # SAPHanaTopology + # For supported scenarios please read the README file provided +@@ -16,7 +16,7 @@ + # Support: linux@sap.com + # License: GNU General Public License (GPL) + # Copyright: (c) 2013,2014 SUSE Linux Products GmbH +-# Copyright: (c) 2015 SUSE Linux GmbH ++# (c) 2015-2016 SUSE Linux GmbH + # + # An example usage: + # See usage() function below for more details... +@@ -29,12 +29,13 @@ + # OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) + # OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) + # OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) +-# OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) ++# OCF_RESKEY_SAPHanaFilter (outdated, replaced by cluster property hana_${sid}_glob_filter) + # + # + ####################################################################### + # + # Initialization: ++SAPHanaVersion="0.152.17" + timeB=$(date '+%s') + + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +@@ -43,6 +44,12 @@ + # + ####################################################################### + # ++log_attributes=false ++if ocf_is_true "$log_attributes"; then ++ log_attr_file="/var/log/fhATTRIBUTES" ++else ++ log_attr_file="/dev/null" ++fi + + HANA_STATE_PRIMARY=0 + HANA_STATE_SECONDARY=1 +@@ -107,7 +114,7 @@ + cat <<-EOF + usage: $0 ($methods) + +- $0 manages a SAP HANA Instance as an HA resource. ++ $0 manages two SAP HANA databases (scale-up) in system replication. + + The 'start' operation starts the HANA instance or bring the "clone instance" to a WAITING status + The 'stop' operation stops the HANA instance +@@ -145,15 +152,14 @@ + + + +-0.151.1 ++$SAPHanaVersion + +-Manages two SAP HANA instances in system replication (SR). ++Manages two SAP HANA database systems in system replication (SR). + +-The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured +-in system replication. This first version is limited to the scale-up scenario. Scale-Out is +-not supported in this version. ++The SAPHanaSR resource agent manages two SAP HANA database systems which are configured ++in system replication. SAPHana supports Scale-Up scenarios. + +-Managing the two SAP HANA instances means that the resource agent controls the start/stop of the ++Managing the two SAP HANA database systems means that the resource agent controls the start/stop of the + instances. In addition the resource agent is able to monitor the SAP HANA databases to check their + availability on landscape host configuration level. For this monitoring the resource agent relies on interfaces + provided by SAP. A third task of the resource agent is to also check the synchronisation status +@@ -205,9 +211,10 @@ + Should cluster/RA prefer to switchover to slave instance instead of restarting master locally? Default="yes" + no: Do prefer restart locally + yes: Do prefer takever to remote site ++ never: Do never run a sr_takeover (promote) at the secondary side. THIS VALUE IS CURRENTLY NOT SUPPORTED. + + Local or site recover preferred? +- ++ + + + Define, if a former primary should automatically be registered. +@@ -220,7 +227,7 @@ + Time difference needed between to primary time stamps, if a dual-primary situation occurs + Time difference needed between to primary time stamps, + if a dual-primary situation occurs. If the time difference is +- less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin ++ less than the time gap, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin + a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After + this registration to the new primary all data will be overwritten by the system replication. + +@@ -290,6 +297,45 @@ + local rc=0; tr -d '"'; return $rc + } + ++# function: version: cpmpare two HANA version strings ++function ver_lt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 && return 0 || return 1 ++} ++ ++function ver_le() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 ++} ++ ++function ver_gt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 && return 0 || return 1 ++} ++ ++function ver_ge() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 ++} ++# ++# function: version: cpmpare two HANA version strings ++# ++function version() { ++ if [ $# -eq 3 ]; then ++ case "$2" in ++ LE | le | "<=" ) ver_le $1 $3;; ++ LT | lt | "<" ) ver_lt $1 $3;; ++ GE | ge | ">=" ) ver_ge $1 $3;; ++ GT | gt | ">" ) ver_gt $1 $3;; ++ * ) return 1; ++ esac ++ elif [ $# -ge 5 ]; then ++ version $1 $2 $3 && shift 2 && version $* ++ else ++ return 1; ++ fi ++} ++ + # + # function: remoteHost2remoteNode - convert a SAP remoteHost to the cluster node name + # params: remoteHost +@@ -372,12 +418,16 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE +- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file ++ fi ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE +- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file ++ fi ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -405,12 +455,16 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE +- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> $log_attr_file ++ fi ++ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE +- crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> $log_attr_file ++ fi ++ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>$log_attr_file; rc=$? + ;; + esac + else +@@ -460,6 +514,10 @@ + # DONE: PRIO2: Only adjust master if value is really different (try to check that) + oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) + if [ "$oldscore" != "$score" ]; then ++ dstr=$(date) ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_master -v $score -l reboot " >> $log_attr_file ++ fi + super_ocf_log debug "DBG: SET crm master: $score (old: $oldscore)" + ${HA_SBIN_DIR}/crm_master -v $score -l reboot; rc=$? + else +@@ -471,9 +529,9 @@ + } + + # +-# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) ++# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE) + # params: NODE_ROLES NODE_SYNC_STATUS +-# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], ++# globals: SCORING_TABLE[@], + # + scoring_crm_master() + { +@@ -482,7 +540,7 @@ + local sync="$2" + local skip=0 + local myScore="" +- for scan in "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}"; do ++ for scan in "${SCORING_TABLE[@]}"; do + if [ $skip -eq 0 ]; then + read rolePatt syncPatt score <<< $scan + if grep "$rolePatt" <<< "$roles"; then +@@ -494,7 +552,7 @@ + fi + done + super_ocf_log debug "DBG: scoring_crm_master adjust score $myScore" +- # TODO: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 ++ # DONE: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 + if [ -n "$myScore" ]; then + set_crm_master $myScore + fi +@@ -514,28 +572,91 @@ + } + + # ++# function: HANA_CALL ++# params: timeout-in-seconds cmd-line ++# globals: sid(r), SID(r), InstanceName(r) ++# ++function HANA_CALL() ++{ ++ # ++ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with ++ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) ++ # TODO: PRIO 5: Maybe make "su" optional by a parameter ++ local timeOut=0 ++ local onTimeOut="" ++ local rc=0 ++ local use_su=1 # Default to be changed later (see TODO above) ++ local pre_cmd="" ++ local cmd="" ++ local pre_script="" ++ local output="" ++ while [ $# -gt 0 ]; do ++ case "$1" in ++ --timeout ) timeOut=$2; shift;; ++ --use-su ) use_su=1;; ++ --on-timeout ) onTimeOut="$2"; shift;; ++ --cmd ) shift; cmd="$*"; break;; ++ esac ++ shift ++ done ++ ++ if [ $use_su -eq 1 ]; then ++ pre_cmd="su - ${sid}adm -c" ++ pre_script="true" ++ else ++ # as root user we need the library path to the SAP kernel to be able to call sapcontrol ++ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH ++ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] ++ then ++ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ++ fi ++ pre_cmd="bash -c" ++ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" ++ fi ++ case $timeOut in ++ 0 | inf ) ++ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ ;; ++ * ) ++ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ # ++ # on timeout ... ++ # ++ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then ++ local second_output="" ++ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); ++ fi ++ ;; ++ esac ++ echo "$output" ++ return $rc; ++} ++ ++# + # function: saphana_init - initialize variables for the resource agent + # params: InstanceName +-# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), +-# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) ++# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), ++# globals: sr_name(w), remoteHost(w), otherNodes(w), remSR_name(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) + # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) ++# globals: NODENAME(w), vNAME(w), hdbver(w), + # saphana_init : Define global variables with default values, if optional parameters are not set + # + function saphana_init() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$OCF_SUCCESS +- local vName + local clN + # local site + # two parameter models (for transition only) + # OLD: InstanceName + # NEW: SID InstanceNumber ++ NODENAME=$(crm_node -n) + SID=$OCF_RESKEY_SID + InstanceNr=$OCF_RESKEY_InstanceNumber + SIDInstanceName="${SID}_HDB${InstanceNr}" + InstanceName="HDB${InstanceNr}" ++ export SAPSYSTEMNAME=$SID + super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" + sid=$(echo "$SID" | tr [:upper:] [:lower:]) + sidadm="${sid}adm" +@@ -544,15 +665,23 @@ + # DONE: PRIO4: SAPVIRHOST might be different to NODENAME + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 +- vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ +- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr) ++ # We rely on the following format: SID is word#4, SYSNR is work#6, vHost is word#8 ++ if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then ++ vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ ++ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) ++ super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" ++ else ++ super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" ++ fi + if [ -z "$vName" ]; then + # + # if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology + # + vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); + fi ++ if [ -z "$vName" ]; then # last fallback if we are not able to figure out the virtual host name ++ vName="$NODENAME" ++ fi + SAPVIRHOST=${vName} + PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" + AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" +@@ -571,6 +700,12 @@ + ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") + ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") + ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") ++ ATTR_NAME_HANA_OPERATION_MODE=("hana_${sid}_op_mode" "forever") ++ # ++ # new "central" attributes ++ # ++ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") ++ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) + # + # TODO: PRIO4: Table for non-preferred-site-takeover + # +@@ -591,9 +726,7 @@ + ) + SCORING_TABLE_PREFERRED_LOCAL_RESTART=( + "[0-9]*:P:[^:]*:master .* 150" +- "[0-9]*:P:[^:]*:slave .* 140" +- "[0-9]*:P:[^:]*:\? .* 0" +- "[0-9]*:P:[^:]*:- .* 0" ++ "[0-9]*:P:[^:]*:.* .* 140" + "[0-9]*:S:[^:]*:master SOK 100" + "[0-9]*:S:[^:]*:master SFAIL -INFINITY" + "[0-9]*:S:[^:]*:slave SOK 10" +@@ -602,6 +735,25 @@ + "[0-9]*:S:[^:]*:- .* 0" + ".* .* -1" + ) ++ SCORING_TABLE_PREFERRED_NEVER=( ++ "[234]*:P:[^:]*:master .* 150" ++ "[015-9]*:P:[^:]*:master .* 90" ++ "[0-9]*:P:[^:]*:.* .* -INFINITY" ++ "[0-9]*:S:[^:]*:.* .* -INFINITY" ++ ".* .* -INFINITY" ++ ) ++ if ocf_is_true $PreferSiteTakeover; then ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}") ++ else ++ case "$PreferSiteTakeover" in ++ never|NEVER|Never ) ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_NEVER[@]}") ++ ;; ++ * ) ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_LOCAL_RESTART[@]}") ++ ;; ++ esac ++ fi + # + DUPLICATE_PRIMARY_TIMEOUT="${OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT:-7200}" + super_ocf_log debug "DBG: DUPLICATE_PRIMARY_TIMEOUT=$DUPLICATE_PRIMARY_TIMEOUT" +@@ -615,7 +767,7 @@ + esac + # + # +- ++ # + remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); + if [ -z "$remoteHost" ]; then + if [ ${#otherNodes[@]} -eq 1 ]; then # we are a 2 node cluster, lets assume the other is the remote-host +@@ -640,7 +792,7 @@ + sr_mode="sync" + fi + if [ -n "$remoteNode" ]; then +- rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); ++ remSR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); + fi + super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" + # optional OCF parameters, we try to guess which directories are correct +@@ -671,26 +823,21 @@ + # + SAPSTARTPROFILE="$(ls -1 $DIR_PROFILE/${OCF_RESKEY_INSTANCE_PROFILE:-${SID}_${InstanceName}_*})" + fi +- # as root user we need the library path to the SAP kernel to be able to call sapcontrol +- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH +- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] +- then +- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH +- export LD_LIBRARY_PATH +- fi + PATH=${PATH}:${DIR_EXECUTABLE}; export PATH ++ local ges_ver ++ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') ++ hdbver=${ges_ver%.*.*} ++ # ++ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state ++ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible ++ # ++ hdbState="hdbnsutil -sr_state" ++ hdbMap="hdbnsutil -sr_state" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbState="hdbnsutil -sr_stateConfiguration" ++ hdbMap="hdbnsutil -sr_stateHostMapping" ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" +- ############################# +- # TODO: PRIO9: To be able to call landscapeHostConfig.py without su (so as root) +- # TODO: PRIO9: Research for environment script .htacces or something like that +- #export SAPSYSTEMNAME=ZLF +- #export DIR_INSTANCE=/usr/sap/ZLF/HDB02 +- #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIR_INSTANCE/exe:$DIR_INSTANCE/exe/Python/lib +- #export PYTHONPATH=$DIR_INSTANCE/$HOST:$DIR_INSTANCE/exe/python_support:$DIR_INSTANCE/exe +- #export PYTHONHOME=$DIR_INSTANCE/exe/Python +- #export SAP_RETRIEVAL_PATH=$DIR_INSTANCE/$HOST +- #export DIR_EXECUTABLE=$DIR_INSTANCE/exe +- ############################# + return $OCF_SUCCESS + } + +@@ -765,7 +912,11 @@ + # or ownership - they will be recreated by sapstartsrv during next start + rm -f /tmp/.sapstream5${InstanceNr}13 + rm -f /tmp/.sapstream5${InstanceNr}14 +- $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm ++ ( ++ export PATH="$DIR_EXECUTABLE${PATH:+:}$PATH" ++ export LD_LIBRARY_PATH="$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH" ++ $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm ++ ) + # now make sure the daemon has been started and is able to respond + local srvrc=1 + while [ $srvrc -eq 1 -a $(pgrep -f "sapstartsrv.*$runninginst" | wc -l) -gt 0 ] +@@ -809,31 +960,47 @@ + function check_for_primary() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$HANA_STATE_DEFECT +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" +- # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? ++ # TODO: PRIO 3: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback ++ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files? + # This might also solve some problems when we could not figure-out the ilocal or remote site name +- for i in 1 2 3 4 5 6 7 8 9; do ++ local chkMethod="" ++ for chkMethod in hU hU hU gP; do ++ case "$chkMethod" in ++ gP ) ++ local gpKeys="" ++ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) ++ node_full_status=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}') ++ super_ocf_log info "ACT: Using getParameter.py as fallback - node_status=$node_status" ++ ;; ++ hU | * ) ++ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported ++ node_full_status=$(HANA_CALL --timeout 60 --cmd "$hdbState" 2>/dev/null ) ++ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" ++ ;; ++ esac + case "$node_status" in + primary ) +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" +- return $HANA_STATE_PRIMARY;; ++ rc=$HANA_STATE_PRIMARY ++ break;; + syncmem | sync | async ) +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" +- return $HANA_STATE_SECONDARY;; ++ rc=$HANA_STATE_SECONDARY ++ break;; + none ) # have seen that mode on second side BEFEORE we registered it as replica +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" +- return $HANA_STATE_STANDALONE;; ++ rc=$HANA_STATE_STANDALONE ++ break;; + * ) + super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" + dump=$( echo $node_status | hexdump -C ); + super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback ++ # SAP_CALL + super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" + # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes + esac; ++ sleep 2 + done + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -854,12 +1021,18 @@ + { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" +- if [ -n "$rem_SR_name" ]; then +- siteParam="--site=$rem_SR_name" ++ if [ -n "$remSR_name" ]; then ++ siteParam="--site=$remSR_name" + fi +- FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? +- super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" +- super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh systemReplicationStatus.py $siteParam ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of systemReplicationStatus.py ++ # SAP_CALL ++ # FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? ++ FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py" 2>/dev/null); srRc=$? ++ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" ++ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" + # + # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) + # And is the check for return code capable for chains? +@@ -890,7 +1063,7 @@ + # ok we should be careful and set secondary to SFAIL + super_ocf_log info "FLOW $FUNCNAME SFAIL" + set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" ++ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (5) - srRc=$srRc lss=$lss" + # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary + lpa_set_lpt 10 "$remoteNode" + rc=1 +@@ -898,7 +1071,7 @@ + else + super_ocf_log info "FLOW $FUNCNAME SFAIL" + set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" ++ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (2) - srRc=$srRc" + # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary + lpa_set_lpt 10 "$remoteNode" + rc=1; +@@ -992,14 +1165,28 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 + # +- su - $sidadm -c "python $DIR_EXECUTABLE/python_support/landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh landscapeHostConfiguration.py ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # DONE: Limit the runtime of landscapeHostConfiguration.py ++ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ if [ $rc -eq 124 ]; then ++ # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? ++ # landscape timeout ++ sleep 20 ++ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ if [ $rc -eq 124 ]; then ++ # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" ++ rc=0 ++ fi ++ fi + return $rc; + } + + # + # function: register_hana_secondary - register local hana as secondary to the other site + # params: - +-# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r) ++# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r), hdbver(r) + # register_hana_secondary + # + function register_hana_secondary() +@@ -1007,17 +1194,31 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=2; + local remoteInstance=""; ++ local newParameter=0 + remoteInstance=$InstanceNr ++ ++ ++ if version "$hdbver" ">=" "1.00.110"; then ++ newParameter=1 ++ fi ++ + if ocf_is_true ${AUTOMATED_REGISTER}; then +- # +- # +- # +- # +- # +- super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" +- # +- # +- su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_register ... ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil -sr_register ???? ++ if [ $newParameter -eq 1 ]; then ++ local hanaOM="" ++ hanaOM=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_OPERATION_MODE[@]}) ++ if [ -n "$hanaOM" ]; then ++ hanaOM="--operationMode=$hanaOM" ++ fi ++ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name" ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name"; rc=$? ++ else ++ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? ++ fi + # backup_global_and_nameserver + else + super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" +@@ -1051,7 +1252,7 @@ + check_sapstartsrv + rc=$? + # +- # TODO: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? ++ # DONE: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? + # + if [ $rc -eq $OCF_SUCCESS ]; then + output=$($SAPCONTROL -nr $InstanceNr -function Start) +@@ -1169,7 +1370,7 @@ + 0 ) # LPA says start-up + lpa_advice="start" + # TODO: PRIO1: We need to do a special handling for remote being a 234-Secondary in SR Status SOK +- # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) ++ # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) + # then lpa_advice="wait" + remoteRole=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_ROLES[@]}) + remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) +@@ -1193,17 +1394,20 @@ + 1) # LPA says register! + lpa_advice="register" + ;; +- 2) # LPA says wait for second LPT ++ 2) # LPA says wait for older LPA to expire ++ lpa_advice="wait" ++ ;; ++ 3) # LPA says to wait for remote LPA to be reported/announced + lpa_advice="wait" + ;; +- 3 | 4 ) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss ++ 4) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss + lpa_advice="fail" + ;; +- * ) # LPA failed with an unkonown status - FAIL resource ++ *) # LPA failed with an unkonown status - FAIL resource + lpa_advice="fail" + ;; + esac +- ++ + # DONE: PRIO2: Do we need to differ 0 and 1 here? While 0 is a fatal SAP error, 1 for down/error + if [ $lss -eq 0 ]; then + super_ocf_log err "ACT: get_hana_landscape_status reports FATAL" +@@ -1218,7 +1422,7 @@ + 2 | 3 | 4 ) # as landcape says we are up - just set the scores and return code + super_ocf_log info "LPA: landcape: UP, LPA: start ==> keep running" + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + rc=$OCF_SUCCESS + ;; + 1 ) # landcape says we are down, lets start and adjust scores and return code +@@ -1226,7 +1430,7 @@ + saphana_start + rc=$? + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + ;; + esac + scoring_crm_master "$my_role" "$my_sync" +@@ -1250,11 +1454,11 @@ + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + super_ocf_log info "ACT: Register successful" + lpa_push_lpt 10 +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + set_crm_master 0 + saphana_start_secondary + rc=$? +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + else + super_ocf_log err "ACT: Register failed" + rc=$OCF_NOT_RUNNING +@@ -1279,11 +1483,19 @@ + rc=$OCF_ERR_GENERIC + ;; + 1 ) # we are down, so we should wait --> followup in next monitor +- super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" +- # TODO: PRIO3: Check, if WAITING is correct here +- set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} +- set_crm_master -9000 +- rc=$OCF_SUCCESS ++ # DONE: PRIO3: Check, if WAITING is correct here ++ if ocf_is_true "$AUTOMATED_REGISTER" ; then ++ super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" ++ super_ocf_log info "RA: landcape: DOWN, LPA: wait ==> keep waiting" ++ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_crm_master -9000 ++ rc=$OCF_SUCCESS ++ else ++ super_ocf_log warning "LPA: OLD primary needs manual registration (AUTOMATED_REGISTER='false')" ++ set_hana_attribute ${NODENAME} "WAITING4REG" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_crm_master -9000 ++ rc=$OCF_NOT_RUNNING ++ fi + ;; + esac + ;; +@@ -1309,22 +1521,24 @@ + local ch ch_role + # + # get actual list of cluster members +- # ++ # + if [ -n "$otherNodes" ]; then + for ch in ${otherNodes[@]}; do + if [ $rc -eq 1 ]; then + ch_role=$(get_hana_attribute ${ch} ${ATTR_NAME_HANA_ROLES[@]}) +-# TODO: PRIO3: check if [0-9], [234] or [34] is correct +-# TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? +-# grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 +-# grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 +-# Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master +- awk -F: 'BEGIN { rc=1 } +- $1 ~ "[34]" && $2 ="P" && $4="master" { rc=0 } +- END { exit rc }' <<< $ch_role ; rc=$? ++ # TODO: PRIO3: check if [0-9], [234] or [34] is correct ++ # TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? ++ # grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 ++ # grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 ++ # Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master ++ super_ocf_log debug "DBG: check_for_primary_master (3) ch_role=$ch_role" ++ awk -F: 'BEGIN { rc=1 } ++ $1 ~ "[34]" && $2 == "P" && $4 == "master" { rc=0 } ++ END { exit rc }' <<< $ch_role ; rc=$? ++ super_ocf_log debug "DBG: check_for_primary_master (4) rc=$rc" + fi + done +- fi ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1378,7 +1592,7 @@ + ####### LPA - begin + # + lpa_push_lpt 10 +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + # + ####### LPA - end + # +@@ -1404,7 +1618,7 @@ + rc=$OCF_SUCCESS + fi + else +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + fi + else + super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" +@@ -1454,7 +1668,7 @@ + then + if [ $STATE -eq $OCF_NOT_RUNNING ] + then +- [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" ++ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE status color is $COLOR !" + rc=$STATE + fi + count=1 +@@ -1511,13 +1725,17 @@ + local crm_rc=1 + local lpt=$1 + local clpt=-1 +- local node=${2:-${NODENAME}} ++ local node=$2 + set_hana_attribute ${node} "$lpt" ${LPA_ATTR[@]}; crm_rc=$? +- clpt=$(lpa_get_lpt $NODENAME) +- if [ "$lpt" != "$clpt" ]; then +- rc=2 ++ if [ -n "$node" ]; then ++ clpt=$(lpa_get_lpt $NODENAME) ++ if [ "$lpt" != "$clpt" ]; then ++ rc=2 ++ else ++ rc=0 ++ fi + else +- rc=0 ++ super_ocf_log info "DEC: lpa_set_lpt ignore to change value for empty node name" + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -1608,7 +1826,7 @@ + else + rc=2 + fi +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1621,9 +1839,10 @@ + # + # Returncodes: + # 0: start +-# 1: register than start +-# 2: wait4gab +-# 3: wait4other ++# 1: register (then start) ++# 2: wait4gab (WAIT4LPA - Older LPA needs to expire) ++# 3: wait4other (WAIT4LPA - Remote LPA needs to be announced) ++# 4: lpa internal error + # + # Initializing (if NO local LPT-file): + # SECONDARY sets to 10 +@@ -1648,7 +1867,7 @@ + # + function lpa_check_lpt_status() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local rc=0 ++ local rc=4 + local LPTloc=-1 + local LPTrem=-1 + local LPTMark=1000 +@@ -1666,16 +1885,16 @@ + if [ -z "$LPTloc" -o "$LPTloc" -eq -1 -o "$lparc" -ne 0 ]; then + # last option - try to initialize as PRIMARY + lpa_push_lpt 20 +- lpa_set_lpt 20 ++ lpa_set_lpt 20 $NODENAME + LPTloc=20 # DEFAULT + fi + fi +- # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt ++ # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt + LPTrem=$(lpa_get_lpt $remoteNode); lparc=$? + if [ $lparc -ne 0 ]; then + # LPT of the other node could not be evaluated - LPA says WAIT + super_ocf_log debug "DBG: LPA: LPTloc=$LPTloc, LPTrem undefined ==> WAIT" +- rc=2 ++ rc=3 + else + super_ocf_log debug "DBG: LPA: LPTloc ($LPTloc) LPTrem ($LPTrem) delta ($delta)" + if [ $LPTloc -lt $LPTMark -a $LPTrem -lt $LPTMark ]; then +@@ -1683,11 +1902,11 @@ + else + delta=$DUPLICATE_PRIMARY_TIMEOUT # at least one of the lpts is a real timestamp so include delta-gap + fi +- if (( delta < LPTloc - LPTrem )); then ++ if (( delta < LPTloc - LPTrem )); then + # We are the winner - LPA says STARTUP + super_ocf_log debug "DBG: LPA: LPTloc wins $LPTloc > $LPTrem + $delta ==> START" + rc=0 +- elif (( delta < LPTrem - LPTloc )); then ++ elif (( delta < LPTrem - LPTloc )); then + if ocf_is_true "$AUTOMATED_REGISTER" ; then + # The other one has won - LPA says REGISTER + super_ocf_log debug "DBG: LPA: LPTrem wins $LPTrem > $LPTloc + $delta ==> REGISTER" +@@ -1697,12 +1916,12 @@ + rc=2 + fi + +- else ++ else + super_ocf_log debug "DBG: LPA: Difference between LPTloc and LPTrem is less than delta ($delta) ==> WAIT" + # TODO: PRIO3: ADD STALEMATE-HANDLING HERE; currently admin should set one of the lpa to 20 + rc=2 +- fi +- fi ++ fi ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1716,6 +1935,7 @@ + { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 ++ # always true for scale-up + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1728,23 +1948,15 @@ + # + function saphana_start_clone() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING ++ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING + local sqlrc; +- local chkusr; +- # TODO: PRIO4: remove check_secstore_users later +- secUser=$(check_secstore_users SAPHANA${SID}SR SLEHALOC RHELHALOC) ; chkusr=$? +- if [ $chkusr -ne 0 ]; then +- super_ocf_log err "ACT: Secure store users are missing (see best practice manual how to setup the users)" +- rc=$OCF_ERR_CONFIGURED ++ set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ check_for_primary; primary_status=$? ++ if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then ++ saphana_start_primary; rc=$? + else +- set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} +- check_for_primary; primary_status=$? +- if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then +- saphana_start_primary; rc=$? +- else +- lpa_set_lpt 10 +- saphana_start_secondary; rc=$? +- fi ++ lpa_set_lpt 10 $NODENAME ++ saphana_start_secondary; rc=$? + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -1761,9 +1973,10 @@ + local rc=0 + local primary_status="x" + set_hana_attribute ${NODENAME} "UNDEFINED" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ super_ocf_log debug "DBG: SET UNDEFINED" + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + fi + saphana_stop; rc=$? + return $rc +@@ -1813,26 +2026,42 @@ + # seems admin already decided that for us? -> we are running - set DEMOTED + promoted=0; + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + fi + lpa_check_lpt_status; lparc=$? +- # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes +- if [ $lparc -lt 2 ]; then +- # lpa - no need to wait any longer - lets try a new start +- saphana_start_clone +- rc=$? +- super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc +- else +- lpa_init_lpt $HANA_STATE_PRIMARY +- # still waiting for second site to report lpa-lpt +- if ocf_is_true "$AUTOMATED_REGISTER" ; then +- super_ocf_log info "LPA: Still waiting for remote site to report LPA status" +- else +- super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" +- fi +- return $OCF_SUCCESS +- fi ++ # DONE: PRIO1: Need to differ lpa_check_lpt_status return codes ++ case "$lparc" in ++ 0 | 1 ) ++ # lpa - no need to wait any longer - lets try a new start ++ saphana_start_clone ++ rc=$? ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc ++ ;; ++ 2 ) ++ lpa_init_lpt $HANA_STATE_PRIMARY ++ # still waiting for second site to expire ++ if ocf_is_true "$AUTOMATED_REGISTER" ; then ++ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" ++ else ++ super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" ++ super_ocf_log info "LPA: You need to manually sr_register the older primary" ++ fi ++ return $OCF_SUCCESS ++ ;; ++ 3 ) ++ lpa_init_lpt $HANA_STATE_PRIMARY ++ # still waiting for second site to report lpa-lpt ++ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" ++ return $OCF_SUCCESS ++ ;; ++ 4 ) ++ # lpa internal error ++ # TODO PRIO3: Impplement special handling for this issue - should we fail the ressource? ++ super_ocf_log info "LPA: LPA reports an internal error" ++ return $OCF_SUCCESS ++ ;; ++ esac + promoted=0; + ;; + UNDEFINED ) +@@ -1848,7 +2077,7 @@ + ;; + esac + fi +- get_hana_landscape_status; lss=$? ++ get_hana_landscape_status; lss=$? + super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" + case "$lss" in + 0 ) # FATAL or ERROR +@@ -1876,19 +2105,20 @@ + # + # TODO PRIO1: REMOVE remoteNode dependency - get_sync_status + remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ # TODO HANDLING OF "NEVER" + case "$remoteSync" in + SOK | PRIM ) + super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" + set_crm_master 5 + if check_for_primary_master; then +- lpa_set_lpt 20 ++ lpa_set_lpt 20 $NODENAME + fi + ;; + SFAIL ) +- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" ++ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" + ;; + * ) +- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" ++ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" + ;; + esac + else +@@ -1916,7 +2146,7 @@ + rc=$OCF_SUCCESS + else + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + lpa_push_lpt $LPTloc + if [ "$promoted" -eq 1 ]; then + set_hana_attribute "$NODENAME" "PRIM" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +@@ -1931,12 +2161,14 @@ + fi + my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) + my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) +- case "$my_role" in ++ case "$my_role" in + [12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status + ;; + [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query + if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then +- analyze_hana_sync_statusSRS ++ if [ "$promote_attr" = "PROMOTED" ]; then ++ analyze_hana_sync_statusSRS ++ fi + else + analyze_hana_sync_statusSQL + fi +@@ -1949,8 +2181,8 @@ + [234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster + lpa_check_lpt_status; again_lpa_rc=$? + if [ $again_lpa_rc -eq 2 ]; then +- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" +- lpa_set_lpt 10 ++ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" ++ lpa_set_lpt 10 $NODENAME + lpa_push_lpt 10 + rc=$OCF_NOT_RUNNING + fi +@@ -1993,7 +2225,7 @@ + # OK, we are running as HANA SECONDARY + # + if ! lpa_get_lpt ${NODENAME}; then +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + lpa_push_lpt 10 + fi + promote_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_CLONE_STATE[@]}) +@@ -2042,17 +2274,25 @@ + 0 ) # FATAL + # DONE: PRIO1: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error + # TODO: PRIO3: is OCF_ERR_GENERIC best option? +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + rc=$OCF_ERR_GENERIC + ;; + 1 ) # ERROR +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + rc=$OCF_NOT_RUNNING + ;; + 2 | 3 | 4 ) # WARN INFO OK + rc=$OCF_SUCCESS +- lpa_set_lpt 30 ++ lpa_set_lpt 30 $NODENAME + sync_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ local hanaOM="" ++ local hanaOut1="" ++ # TODO: PRIO 3: check, if using getParameter.py is the best option to analyze the set operationMode ++ # DONE: PRIO 3: Should we default to logreplay for SAP HANA >= SPS11 ? ++ hanaOut1=$(HANA_CALL --timeout 10 --use-su --cmd "getParameter.py --key=global.ini/system_replication/operation_mode --sapcontrol=1") ++ hanaFilter1=$(echo "$hanaOut1" | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ hanaOM=$(echo "$hanaFilter1" | awk -F= '$1=="operation_mode" {print $2}') ++ set_hana_attribute ${NODENAME} "$hanaOM" ${ATTR_NAME_HANA_OPERATION_MODE[@]} + super_ocf_log debug "DBG: sync_attr=$sync_attr" + case "$sync_attr" in + "SOK" ) # This is a possible node to promote, when primary is missing +@@ -2112,7 +2352,7 @@ + fi + # + # First check, if we are PRIMARY or SECONDARY +- # ++ # + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then + # FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent +@@ -2145,7 +2385,7 @@ + # + # function: saphana_promote_clone - promote a hana clone + # params: - +-# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), ++# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), + # saphana_promote_clone: + # In a Master/Slave configuration get Master being the primary OR by running hana takeover + # +@@ -2169,7 +2409,7 @@ + else + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + # +- # we are SECONDARY/SLAVE and need to takepover ... ++ # we are SECONDARY/SLAVE and need to takeover ... promote on the replica (secondary) side... + # promote on the replica side... + # + hana_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) +@@ -2178,9 +2418,14 @@ + super_ocf_log info "ACT: !!!!!!! Promote REPLICA $SID-$InstanceName to be primary. !!!!!!" + LPTloc=$(date '+%s') + # lpa_set_lpt 20 $remoteNode +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + lpa_push_lpt $LPTloc +- su - $sidadm -c "hdbnsutil -sr_takeover" ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_takeover ... ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil -sr_takeover ???? ++ # SAP_CALL ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_takeover" + # + # now gain check, if we are primary NOW + # +@@ -2248,7 +2493,6 @@ + SAPSTARTPROFILE="" + SAPHanaFilter="ra-act-dec-lpa" + +-NODENAME=$(crm_node -n) + + + if [ $# -ne 1 ] +@@ -2306,8 +2550,7 @@ + fi + + # What kind of method was invoked? +-THE_VERSION=$(saphana_meta_data | grep ' + + +- 0.151.1 ++ $SAPHanaVersion + Analyzes SAP HANA System Replication Topology. + This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to + all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. +@@ -207,12 +215,12 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE +- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE +- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -282,6 +290,53 @@ + } + + # ++# function: dequote - filter: remove quotes (") from stdin ++# params: - ++# globals: - ++function dequote() ++{ ++ local rc=0; tr -d '"'; return $rc ++} ++ ++# function: version: cpmpare two HANA version strings ++function ver_lt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 && return 0 || return 1 ++} ++ ++function ver_le() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 ++} ++ ++function ver_gt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 && return 0 || return 1 ++} ++ ++function ver_ge() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 ++} ++# ++# function: version: cpmpare two HANA version strings ++# ++function version() { ++ if [ $# -eq 3 ]; then ++ case "$2" in ++ LE | le | "<=" ) ver_le $1 $3;; ++ LT | lt | "<" ) ver_lt $1 $3;; ++ GE | ge | ">=" ) ver_ge $1 $3;; ++ GT | gt | ">" ) ver_gt $1 $3;; ++ * ) return 1; ++ esac ++ elif [ $# -ge 5 ]; then ++ version $1 $2 $3 && shift 2 && version $* ++ else ++ return 1; ++ fi ++} ++# + # function: is_clone - report, if resource is configured as a clone (also master/slave) + # params: - + # globals: OCF_*(r) +@@ -314,12 +369,74 @@ + } + + # ++# function: HANA_CALL ++# params: timeout-in-seconds cmd-line ++# globals: sid(r), SID(r), InstanceName(r) ++# ++function HANA_CALL() ++{ ++ # ++ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with ++ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) ++ # TODO: PRIO 5: Maybe make "su" optional by a parameter ++ local timeOut=0 ++ local onTimeOut="" ++ local rc=0 ++ local use_su=1 # Default to be changed later (see TODO above) ++ local pre_cmd="" ++ local cmd="" ++ local pre_script="" ++ local output="" ++ while [ $# -gt 0 ]; do ++ case "$1" in ++ --timeout ) timeOut=$2; shift;; ++ --use-su ) use_su=1;; ++ --on-timeout ) onTimeOut="$2"; shift;; ++ --cmd ) shift; cmd="$*"; break;; ++ esac ++ shift ++ done ++ ++ if [ $use_su -eq 1 ]; then ++ pre_cmd="su - ${sid}adm -c" ++ pre_script="true" ++ else ++ # as root user we need the library path to the SAP kernel to be able to call sapcontrol ++ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH ++ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] ++ then ++ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ++ fi ++ pre_cmd="bash -c" ++ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" ++ fi ++ case $timeout in ++ 0 | inf ) ++ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ ;; ++ * ) ++ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ # ++ # on timeout ... ++ # ++ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then ++ local second_output="" ++ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); ++ fi ++ ;; ++ esac ++ echo "$output" ++ return $rc; ++} ++ ++# + # function: sht_init - initialize variables for the resource agent + # params: - + # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), +-# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) ++# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) ++# globals: NODENAME(w), hdbver(w) + # sht_init : Define global variables with default values, if optional parameters are not set + # + # +@@ -331,12 +448,14 @@ + local hdbANSWER="" + local siteID + local siteNAME ++ local chkMethod="" + HOSTEXECNAME=saphostexec + USRSAP=/usr/sap + SAPSERVICE_PATH=${USRSAP}/sapservices + SAPHOSTCTRL_PATH=${USRSAP}/hostctrl/exe + HOSTEXEC_PATH=${SAPHOSTCTRL_PATH}/${HOSTEXECNAME} + HOSTEXEC_PROFILE_PATH=${SAPHOSTCTRL_PATH}/host_profile ++ NODENAME=$(crm_node -n) + SID=$OCF_RESKEY_SID + InstanceNr=$OCF_RESKEY_InstanceNumber + myInstanceName="${SID}_HDB${InstanceNr}" +@@ -382,13 +501,6 @@ + DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" + fi + +- # as root user we need the library path to the SAP kernel to be able to call sapcontrol +- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH +- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] +- then +- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH +- export LD_LIBRARY_PATH +- fi + + PATH=${PATH}:${DIR_EXECUTABLE} + # +@@ -399,12 +511,45 @@ + *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; + *cman* ) nodelist=$(crm_node -l);; + esac ++ # ++ # get HANA version ++ # ++ local ges_ver ++ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') ++ hdbver=${ges_ver%.*.*} ++ # ++ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state ++ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible ++ # ++ hdbState="hdbnsutil -sr_state" ++ hdbMap="hdbnsutil -sr_state" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbState="hdbnsutil -sr_stateConfiguration" ++ hdbMap="hdbnsutil -sr_stateHostMapping" ++ fi + #### SAP-CALL + # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode +- for i in 1 2 3 4 5 6 7 8 9; do +- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) +- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" +- srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') ++ for chkMethod in hU hU hU gP ; do ++ # DONE: Limit the runtime of hdbnsutil. ++ # TODO: Use getParameter.py if we get no answer ++ # SAP_CALL ++ #super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" ++ #srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') ++ case "$chkMethod" in ++ gP ) # call getParameter (gP) ++ local gpKeys="" ++ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') ++ super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$srmode" ++ ;; ++ hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) ++ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbState --sapcontrol=1" 2>/dev/null) ++ super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" ++ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') ++ ;; ++ esac + case "$srmode" in + primary | syncmem | sync | async | none ) + # we can leave the loop as we already got a result +@@ -417,27 +562,51 @@ + esac + done + # TODO PRIO3: Implement a file lookup, if we did not get a result +- siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') +- siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') ++ siteID=$(echo "$hdbANSWER" | awk -F= '/site.id/ {print $2}') # allow 'site_id' AND 'site id' ++ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site.name/ {print $2}') + site=$siteNAME + srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') +- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) +- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" + # +- # filter all non-cluster mappings ++ # for rev >= 111 we use the new mapping query + # +- # DONE: PRIO2: Need mapping between HANA HOSTS not cluster NODES +- local hanaVHost +- hanaRemoteHost=$(for n1 in $nodelist; do +- hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) +- for n2 in $MAPPING; do +- if [ "$hanaVHost" == "$n2" ]; then +- echo $hanaVHost; +- fi; +- done; +- done ) +- super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" +- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbMap --sapcontrol=1" 2>/dev/null) ++ fi ++ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) ++ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" ++ if [ -n "$MAPPING" ]; then ++ # we have a mapping from HANA, lets use it ++ # ++ # filter all non-cluster mappings ++ # ++ local hanaVHost="" ++ local n1="" ++ hanaRemoteHost="" ++ for n1 in $nodelist; do ++ hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) ++ for n2 in $MAPPING; do ++ if [ "$hanaVHost" == "$n2" ]; then ++ hanaRemoteHost="$hanaVHost" ++ fi; ++ done; ++ done ++ super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ else ++ # HANA DID NOT TOLD THE MAPPING, LETS TRY TO USE THE SITE ATTRIBUTES ++ local n1="" ++ local hanaSite="" ++ for n1 in $nodelist; do ++ # TODO: PRIO9 - For multi tier with more than 2 chain/star members IN the cluster we might need to be ++ # able to catch more than one remoteHost ++ # currently having more than 2 HANA in a chain/star members IN the cluster is not allowed, the third must be external ++ if [ "$NODENAME" != "$n1" ]; then ++ hanaSite=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_SITE[@]}) ++ hanaRemoteHost="$n1" ++ fi ++ done ++ super_ocf_log info "DEC: site=$site, mode=$srmode, hanaRemoteHost=$hanaRemoteHost - found by remote site ($hanaSite)" ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" + return $OCF_SUCCESS + } +@@ -446,38 +615,29 @@ + # function: check_for_primary - check if local SAP HANA is configured as primary + # params: - + # globals: HANA_STATE_PRIMARY(r), HANA_STATE_SECONDARY(r), HANA_STATE_DEFECT(r), HANA_STATE_STANDALONE(r) ++# srmode(r) + # + function check_for_primary() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- node_status=$srmode +- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" +- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" +- for i in 1 2 3 4 5 6 7 8 9; do +- case "$node_status" in +- primary ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" +- return $HANA_STATE_PRIMARY;; +- syncmem | sync | async ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" +- return $HANA_STATE_SECONDARY;; +- none ) # have seen that mode on second side BEFEORE we registered it as replica +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" +- return $HANA_STATE_STANDALONE;; +- * ) +- # TODO: PRIO1: Should we set SFAIL? +- # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes +- dump=$( echo $node_status | hexdump -C ); +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" +- #### SAP-CALL +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" +- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes +- esac; +- done +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" +- return $HANA_STATE_DEFECT ++ super_ocf_log debug "DBG: check_for_primary: srmode=$srmode" ++ case "$srmode" in ++ primary ) ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" ++ rc=$HANA_STATE_PRIMARY;; ++ syncmem | sync | async ) ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" ++ rc=$HANA_STATE_SECONDARY;; ++ none ) # have seen that mode on second side BEFEORE we registered it as replica ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" ++ rc=$HANA_STATE_STANDALONE;; ++ * ) ++ dump=$( echo $srmode | hexdump -C ); ++ super_ocf_log err "ACT: check_for_primary: we didn't expect srmode to be: DUMP: <$dump>" ++ rc=$HANA_STATE_DEFECT ++ esac; ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc + } + + +@@ -653,7 +813,7 @@ + function sht_stop_clone() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- check_for_primary; primary_status=$? ++ check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then + hanaPrim="P" + elif [ $primary_status -eq $HANA_STATE_SECONDARY ]; then +@@ -663,7 +823,7 @@ + else + hanaPrim="-" + fi +- set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} ++ set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} + sht_stop; rc=$? + return $rc + } +@@ -718,28 +878,49 @@ + fi + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 ++ # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 + #### SAP-CALL + vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ +- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) ++ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) + # super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" + if [ -n "$vName" ]; then +- set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} ++ set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} + else + vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}) + fi + #site=$(get_site_name) + #### SAP-CALL +- hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" +- hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) ++ # SAP_CALL ++ #hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" ++ # ++ # since rev 09x SAP has added the --sapcontrol option for the landscapeHostConfiguration interface ++ # we begin to use --sapcontrol with rev 100 ++ # since rev 120 we need to use the --sapcontrol, because SAP changed the tool output ++ # ++ if version "$hdbver" ">=" "1.00.100"; then ++ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); hanalrc="$?" ++ # TODO: PRIO9: Do we need to check the lines: 'SAPCONTROL-OK: ' and 'SAPCONTROL-OK: '? ++ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | \ ++ awk -F= '$1 == "nameServerConfigRole" {f1=$2} ++ $1 == "nameServerActualRole" {f2=$2} ++ $1 == "indexServerConfigRole" {f3=$2} ++ $1 == "indexServerActualRole" {f4=$2} ++ END { printf "%s:%s:%s:%s\n", f1, f2, f3,f4 }') ++ else ++ # ++ # old code for backward compatability ++ # ++ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" ++ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) ++ fi + #if [ -z "$MAPPING" ]; then + # super_ocf_log info "ACT: Did not find remote Host at this moment" + #fi + # FH TODO PRIO3: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST" + if [ -n "$hanaRemoteHost" ]; then +- set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} ++ set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} + fi +- set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} ++ set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} + if [ -n "$site" ]; then + set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} + fi +@@ -748,8 +929,8 @@ + S ) # only secondary may propargate its sync status + case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in + *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; +- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; +- *cman* ) nodelist=$(crm_node -l);; ++ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; ++ *cman* ) nodelist=$(crm_node -l);; + esac + + for n in ${nodelist}; do +@@ -789,7 +970,6 @@ + InstanceNr="" + DIR_EXECUTABLE="" + SAPHanaFilter="ra-act-dec-lpa" +-NODENAME=$(crm_node -n) + + if [ $# -ne 1 ] + then +@@ -846,8 +1026,7 @@ + fi + fi + +-THE_VERSION=$(sht_meta_data | grep ' ++# License: GPL v2+ ++my $Version="0.18.2016.02.16.1"; + # ++################################################################## + use POSIX; + use strict; ++use Sys::Syslog; ++use Sys::Hostname; ++use File::Path; ++use Getopt::Long; ++use lib '/usr/share/SAPHanaSR/tests'; ++use SAPHanaSRTools; ++ ++################################### ++## this part is not for scale out and currently NOT zero-config ++ ++my $ClusterNodes=2; ++my $ClusterPrimaries=1; ++my $ClusterSecondaries=1; ++my %Name; ++my %Host; ++my $host = hostname(); + ++my $varlib='/var/lib/SAPHanaTD'; ++my $testfile='SAPHanaTD.status'; ++my $testcount=0; ++my $first_test=1; + my $sid=""; +-my $table_title = "Host \\ Attr"; +-my %Name; ++my @sids; ++my $ino=""; ++my $sortBy=""; ++my $table_titleH = "Host"; ++#my %Name; + my %Host; ++my %Site; ++my %Global; ++my %HName; ++my %SName; ++my %GName; ++my $help; ++my $version; ++my $cibFile=""; ++ ++sub init() ++{ ++ my $result = GetOptions ("sid=s" => \@sids, ++ "sort=s" => \$sortBy, ++ "cib=s" => \$cibFile, ++ "version" => \$version, ++ "help" => \$help, ++ ); ++ return 0; ++} ++ ++init(); ++ ++if ( $help ) { ++ printf "SAPHanaSR-showAttr {[--sid=]} [--sort=] [--cib=]\n"; ++ printf ""; ++ exit 0; ++} ++if ( $version ) { ++ printf "%s\n", $Version; ++ exit 0; ++} ++ ++if ( $cibFile ne "" ) { ++ printf "Using cib file %s\n", $cibFile; ++} + + sub max { # thanks to http://www.perlunity.de/perl/forum/thread_018329.shtml + my $a = shift; +@@ -21,113 +80,75 @@ + return $a > $b ? $a : $b; + } + +-sub print_attr_host() +-{ +- my ($HKey, $AKey); +- printf "%-22s", "Attribute \\ Host"; +- foreach $HKey (sort keys %Host) { +- printf "%-16s ", $HKey; +- } +- printf "\n"; +- +- printf "%s\n", "-" x 120 ; +- +- foreach $AKey (sort keys %Name) { +- printf "%-22s", $AKey; +- foreach $HKey (sort keys %Host) { +- printf "%-16.16s ", $Host{$HKey} -> {$AKey}; +- } +- +- printf "\n"; +- } +- return 0; +-} +- +-sub print_host_attr() +-{ +- my ($AKey, $HKey, $len, $line_len, $hclen); +- $hclen=$Name{_hosts}->{_length}; +- $line_len=$hclen+1; +- printf "%-$hclen.${hclen}s ", "$table_title"; +- foreach $AKey (sort keys %Name) { +- if ($AKey ne "_hosts") { +- $len = $Name{$AKey}->{_length}; +- $line_len=$line_len+$len+1; +- printf "%-$len.${len}s ", $Name{$AKey}->{_title}; ++sub read_cib($) { ++ my $sid = shift(); ++ if ( $cibFile eq "" ) { ++ printf "Open live cib\n"; ++ open CIB, "cibadmin -Ql |" or die "CIB could not be read from cluster"; ++ } else { ++ open CIB, "<$cibFile" or die "CIB file $cibFile not found or not able to read it"; ++ } ++ while () { ++ chomp; ++ my ($host, $name, $site, $value); ++ if ( $_ =~ /cib-last-written="([^"]*)"/ ) { ++ printf "CIB-time: %s\n", $1; + } +- } +- printf "\n"; +- printf "%s\n", "-" x $line_len ; +- foreach $HKey (sort keys %Host) { +- printf "%-$hclen.${hclen}s ", $HKey; +- foreach $AKey (sort keys %Name) { +- if ($AKey ne "_hosts") { +- $len = $Name{$AKey}->{_length}; +- printf "%-$len.${len}s ", $Host{$HKey} -> {$AKey}; +- } +- } +- printf "\n"; +- } +- return 0; +-} +- +-open ListInstances, "/usr/sap/hostctrl/exe/saphostctrl -function ListInstances|"; +-while () { +- # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- chomp; +- if ( $_ =~ /:\s+([A-Z][A-Z0-9][A-Z0-9])\s+-/ ) { +- $sid=tolower("$1"); +- } +-} +-close ListInstances; +- +- +-open CIB, "cibadmin -Ql |"; +-while () { +- chomp; +- my ($host, $name, $value); +- my $found=0; +- if ( $_ =~ /nvpair.*name="(\w+_${sid}_\w+)"/ ) { +- $name=$1; +- # find attribute in forever and reboot store :) +- if ( $_ =~ /id="(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { +- $host=$2; +- } +- if ( $_ =~ /value="([^"]+)"/ ) { +- $value=$1; +- $found=1; +- } +- } +- if ( $found == 1 ) { +- # +- # handle the hosts name and table-title +- # +- $Host{$host}->{$name}=${value}; +- if ( defined ($Name{_hosts}->{_length})) { +- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length($host )); +- } else { +- $Name{_hosts}->{_length} = length($host ); ++ if ( $_ =~ /node_state id=".+" uname="([a-zA-Z0-9\-\_]+)" .*crmd="([a-zA-Z0-9\-\_]+)"/ ) { ++ insertAttribute($sid, \%Host, \%HName, $1, "node_status", $2); + } +- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length( $table_title)); +- # +- # now handle the attributes name and value +- # +- $Name{$name}->{$host}=${value}; +- if ( defined ($Name{$name}->{_length})) { +- $Name{$name}->{_length} = max($Name{$name}->{_length}, length($value )); +- } else { +- $Name{$name}->{_length} = length($value ); ++ if ( $_ =~ /nvpair.*name="([a-zA-Z0-9\_\-]+_${sid}_([a-zA-Z0-9\-\_]+))"/ ) { ++ $name=$1; ++ if ( $_ =~ /id=.(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { ++ # found attribute in nodes forever and reboot store ++ $host=$2; ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Host, \%HName, $host, $name, $value); ++ } ++ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_site_[a-zA-Z0-9\-]+_([a-zA-Z0-9\_\-]+)/) { ++ # found a site attribute ++ $site=$1; ++ if ( $name =~ /[a-zA-Z0-9\_\-]+_site_([a-zA-Z0-9\-]+)/ ) { ++ $name = $1; ++ } ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Site, \%SName, $site, $name, $value); ++ } ++ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_glob_[a-zA-Z0-9\_\-]+/) { ++ # found a global attribute ++ $host="GLOBAL"; ++ if ( $name =~ /([a-zA-Z0-9\_\-]+)_glob_([a-zA-Z0-9\_\-]+)/ ) { ++ $name = $2; ++ } ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Global, \%GName, "global", $name, $value); ++ } ++ } + } +- if ( $name =~ /hana_${sid}_(.*)/ ) { +- $Name{$name}->{_title} = $1; +- } else { +- $Name{$name}->{_title} = $name; +- } +- $Name{$name}->{_length} = max($Name{$name}->{_length}, length( $Name{$name}->{_title})); +- # printf "%-8s %-20s %-30s\n", $1, $2, $3; +- } ++ } ++ close CIB; + } +-close CIB; + +-#print_attr_host; +-print_host_attr; ++if ( 0 == @sids ) { ++ my $sid_ino_list; ++ ( $sid_ino_list ) = get_sid_and_InstNr(); ++ @sids = split(",", $sid_ino_list); ++ ++} ++ ++foreach $sid (@sids) { ++ ( $sid, $ino ) = split(":", $sid); ++ $sid=tolower("$sid"); ++ %Host=(); ++ %HName=(); ++ read_cib($sid); ++ get_hana_attributes($sid); ++ if ( keys(%Host) == 0 ) { ++ printf "No attributes found for SID=%s\n", $sid; ++ } else { ++ print_host_attr(\%Host, \%HName, "Hosts", $sortBy); ++ } ++} diff --git a/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch b/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch new file mode 100644 index 0000000..2b9637b --- /dev/null +++ b/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch @@ -0,0 +1,14 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2016-11-17 09:35:47.460984046 +0100 ++++ b/heartbeat/SAPHana 2016-11-17 09:36:20.536591188 +0100 +@@ -133,8 +133,8 @@ + function backup_global_and_nameserver() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s") +- cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s") ++ cp /hana/shared/$SID/global/hdb/custom/config/global.ini /hana/shared/$SID/global/hdb/custom/config/global.ini.$(date +"%s") ++ cp /hana/shared/$SID/global/hdb/custom/config/nameserver.ini /hana/shared/$SID/global/hdb/custom/config/nameserver.ini.$(date +"%s") + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } diff --git a/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch b/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch new file mode 100644 index 0000000..ad5f57b --- /dev/null +++ b/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch @@ -0,0 +1,92 @@ +From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001 +From: Michele Baldessari +Date: Tue, 28 Mar 2017 16:21:52 +0200 +Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes + +This was first observed via +https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way +the resource agent looks for attrd entries, it will filter out any +node which does not have the @crmd=online attribute. This is the +case for pacemaker-remote nodes. To fix this we chose the more +conservative approach and only do an additional query when the first +one returned no entries. Note that this issue exhibits itself +when 'pcs status' reports rabbitmq started on a bunch of nodes: +Clone Set: rabbitmq-clone [rabbitmq] + Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 + +But the cluster_status command returns a single node: +[root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status +Cluster status of node 'rabbit@overcloud-rabbit-1' ... +[{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]}, + {running_nodes,['rabbit@overcloud-rabbit-1']}, + {cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>}, + {partitions,[]}, + {alarms,[{'rabbit@overcloud-rabbit-1',[]}]}] + +Also add some text in the help explaining that currently a mixture of +pacemaker-remote and pacemaker nodes is not supported. + +We tested this change on a pacemaker-remote only setup successfully: +Clone Set: rabbitmq-clone [rabbitmq] + Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 + +[root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status +Cluster status of node 'rabbit@overcloud-rabbit-0' ... +[{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1', + 'rabbit@overcloud-rabbit-2']}]}, + {running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1', + 'rabbit@overcloud-rabbit-0']}, + {cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>}, + {partitions,[]}, + {alarms,[{'rabbit@overcloud-rabbit-2',[]}, + {'rabbit@overcloud-rabbit-1',[]}, + {'rabbit@overcloud-rabbit-0',[]}]}] + +Signed-Off-By: Michele Baldessari +Signed-Off-By: Damien Ciabrini +--- + heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 74378be..6a17590 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -56,7 +56,9 @@ meta_data() { + 1.0 + + +-Starts cloned rabbitmq cluster instance ++Starts cloned rabbitmq cluster instance. NB: note that this RA ++cannot be spawned across a mix of pacemaker and pacemaker-remote nodes. ++Only on pacemaker *or* pacemaker-remote nodes exclusively. + + rabbitmq clustered + +@@ -111,7 +113,25 @@ rmq_local_node() + + rmq_join_list() + { +- cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" ++ local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") ++ # If join_list is empty we want to check if there are any remote nodes ++ # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector) ++ if [ -z "$join_list" ]; then ++ # Get all the nodes written in the ATTR_COOKIE no matter if ++ # they are online or not. This will be one line per node like ++ # rabbit@overcloud-rabbit-0 ++ # rabbit@overcloud-rabbit-1 ++ # ... ++ local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") ++ # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...' ++ local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}') ++ # export the intersection which gives us only the nodes that ++ # a) wrote their namein the cib attrd ++ # b) run on nodes where pacemaker_remote is enabled ++ join_list="$(echo $remote_join_list | grep $filter)" ++ fi ++ ++ echo $join_list + } + + rmq_write_nodename() diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 289a30c..9ffbeae 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -32,7 +32,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 3.9.5 -Release: 82%{?dist}.6 +Release: 82%{?dist}.9 License: GPLv2+, LGPLv2+ and ASL 2.0 URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -174,6 +174,9 @@ Patch129: bz1400103-redis-notify-clients-of-master-being-demoted.patch Patch130: bz1400103-nova-compute-wait-nova-compute-unfence.patch Patch131: bz1409513-portblock-wait.patch Patch132: bz1402511-rabbitmq-cluster-reset-mnesia-before-join.patch +Patch133: bz1423424-1-update-saphana-saphanatopology.patch +Patch134: bz1423424-2-update-saphana-saphanatopology.patch +Patch135: bz1437122-rabbitmq-cluster-pacemaker-remote.patch Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} @@ -417,6 +420,9 @@ exit 1 %patch130 -p1 %patch131 -p1 %patch132 -p1 +%patch133 -p1 +%patch134 -p1 +%patch135 -p1 %build if [ ! -f configure ]; then @@ -671,6 +677,16 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog +* Tue Apr 4 2017 Oyvind Albrigtsen - 3.9.5-82.9 +- rabbitmq-cluster: allow to run on Pacemaker remote nodes + + Resolves: rhbz#1437122 + +* Fri Feb 17 2017 Oyvind Albrigtsen - 3.9.5-82.7 +- SAPHana/SAPHanaTopology: update to version 0.152.17 + + Resolves: rhbz#1423424 + * Thu Feb 2 2017 Oyvind Albrigtsen - 3.9.5-82.6 - redis: notify clients of master being demoted