diff --git a/SOURCES/bz1077888-CTDB-fix-logging.patch b/SOURCES/bz1077888-CTDB-fix-logging.patch new file mode 100644 index 0000000..fbd1069 --- /dev/null +++ b/SOURCES/bz1077888-CTDB-fix-logging.patch @@ -0,0 +1,30 @@ +diff --git a/heartbeat/CTDB b/heartbeat/CTDB +index b23ffae..3e36dd0 100755 +--- a/heartbeat/CTDB ++++ b/heartbeat/CTDB +@@ -572,10 +572,22 @@ ctdb_start() { + + # Use logfile by default, or syslog if asked for +- local log_option="--logfile=$OCF_RESKEY_ctdb_logfile" +- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then +- log_option="--syslog" +- elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then ++ # --logging supported from v4.3.0 and --logfile / --syslog support ++ # has been removed from newer versions ++ version=$(ctdb version | awk '{print $NF}') ++ ocf_version_cmp "$version" "4.2.14" ++ if [ "$?" -eq "2" ]; then ++ log_option="--logging=file:$OCF_RESKEY_ctdb_logfile" ++ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then ++ log_option="--logging=syslog" ++ fi ++ else ++ log_option="--logfile=$OCF_RESKEY_ctdb_logfile" ++ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then ++ log_option="--syslog" ++ fi ++ fi ++ if [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then + # ensure the logfile's directory exists, otherwise ctdb will fail to start + mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile) + fi diff --git a/SOURCES/bz1159328-LVM-check_writethrough.patch b/SOURCES/bz1159328-LVM-check_writethrough.patch new file mode 100644 index 0000000..d059b83 --- /dev/null +++ b/SOURCES/bz1159328-LVM-check_writethrough.patch @@ -0,0 +1,60 @@ +From 8d25da64ab9dee8545a0c52f7db08213a03ea106 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 28 Feb 2017 15:46:40 +0100 +Subject: [PATCH] LVM: add check_writethrough parameter + +--- + heartbeat/LVM | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/heartbeat/LVM b/heartbeat/LVM +index 90a900b..5b265f5 100755 +--- a/heartbeat/LVM ++++ b/heartbeat/LVM +@@ -29,6 +29,8 @@ + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + ++OCF_RESKEY_check_writethrough_default="false" ++ + ####################################################################### + + +@@ -106,6 +108,14 @@ logical volumes. + + + ++ ++ ++If set to true, check if cache_mode is set to writethrough. ++ ++Check if cache_mode is set to writethrough ++ ++ ++ + + + +@@ -583,6 +593,13 @@ LVM_validate_all() { + exit $OCF_ERR_GENERIC + fi + ++ if ocf_is_true "$OCF_RESKEY_check_writethrough"; then ++ if ! lvs --noheadings -o cache_mode "$OCF_RESKEY_volgrpname" | grep -q "writethrough"; then ++ ocf_exit_reason "LVM cache is not in writethrough mode." ++ exit $OCF_ERR_CONFIGURED ++ fi ++ fi ++ + ## + # If exclusive activation is not enabled, then + # further checking of proper setup is not necessary +@@ -690,6 +707,8 @@ if [ -n "$OCF_RESKEY_tag" ]; then + OUR_TAG=$OCF_RESKEY_tag + fi + ++: ${OCF_RESKEY_check_writethrough=${OCF_RESKEY_check_writethrough_default}} ++ + # What kind of method was invoked? + case "$1" in + diff --git a/SOURCES/bz1260713-1-sapdatabase-process-count-suser.patch b/SOURCES/bz1260713-1-sapdatabase-process-count-suser.patch new file mode 100644 index 0000000..3a24e9d --- /dev/null +++ b/SOURCES/bz1260713-1-sapdatabase-process-count-suser.patch @@ -0,0 +1,135 @@ +From fe55f9b909d81a0093dbfb1f00083706cf5d2cf1 Mon Sep 17 00:00:00 2001 +From: Alexander Krauth +Date: Fri, 19 Feb 2016 18:00:58 +0100 +Subject: [PATCH] High: SAPDatabase: Add support for Oracle 12c + +To work with Oracle 12c the agent needs an option +to pass the new Database Username to the resource. + +Example configuration: + +primitive oracle-database SAPDatabase \ + params \ + SID=HAO \ + DBTYPE=ORA \ + DBOUSER=oracle \ + STRICT_MONITORING=1 \ + op monitor interval=120 timeout=60 +--- + heartbeat/SAPDatabase | 12 +++++++++++- + heartbeat/sapdb.sh | 35 ++++++++++++++++++++++++++--------- + 2 files changed, 37 insertions(+), 10 deletions(-) + +diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase +index de7959f..641bd40 100755 +--- a/heartbeat/SAPDatabase ++++ b/heartbeat/SAPDatabase +@@ -18,6 +18,7 @@ + # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) + # OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB) + # OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID) ++# OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level) + # OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck) + # OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false) + # OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services) +@@ -69,7 +70,7 @@ meta_data() { + + + +-2.06 ++2.14 + + Manages a SAP database instance as an HA resource. + +@@ -115,6 +116,11 @@ Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is use + Database instance name, if not equal to SID + + ++ ++ The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm ++ the Linux user that owns the database processes on operating system level ++ ++ + + Deprecated - do not use anymore. This parameter will be deleted in one of the next releases. + deprecated - do not use anymore +@@ -305,6 +311,10 @@ DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'` + if saphostctrl_installed; then + . ${OCF_FUNCTIONS_DIR}/sapdb.sh + else ++ if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then ++ ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed" ++ exit $OCF_ERR_ARGS ++ fi + . ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh + fi + sapdatabase_init +diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh +index 7edb4b8..33d2033 100755 +--- a/heartbeat/sapdb.sh ++++ b/heartbeat/sapdb.sh +@@ -210,7 +210,11 @@ sapdatabase_monitor() { + then + DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE " + fi +- output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST` ++ if [ -n "$OCF_RESKEY_DBOSUSER" ] ++ then ++ DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER " ++ fi ++ output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER` + + # we have to parse the output, because the returncode doesn't tell anything about the instance status + for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'` +@@ -255,30 +259,43 @@ sapdatabase_monitor() { + # sapdatabase_status: Are there any database processes on this host ? + # + sapdatabase_status() { ++ sid=`echo $SID | tr '[:upper:]' '[:lower:]'` ++ ++ SUSER=${OCF_RESKEY_DBOSUSER:-""} ++ + case $DBTYPE in + ADA) SEARCH="$SID/db/pgm/kernel" +- SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` ++ [ -z "$SUSER" ] && SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'` + SNUM=2 + ;; +- ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_" +- SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`" +- SNUM=4 ++ ORA) DBINST=${OCF_RESKEY_DBINSTANCE} ++ DBINST=${OCF_RESKEY_DBINSTANCE:-${SID}} ++ SEARCH="ora_[a-z][a-z][a-z][a-z]_$DBINST" ++ ++ if [ -z "$SUSER" ]; then ++ id "oracle" > /dev/null 2> /dev/null && SUSER="oracle" ++ id "ora${sid}" > /dev/null 2> /dev/null && SUSER="${SUSER:+${SUSER},}ora${sid}" ++ fi ++ ++ SNUM=4 + ;; + DB6) SEARCH="db2[a-z][a-z][a-z]" +- SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`" ++ [ -z "$SUSER" ] && SUSER="db2${sid}" + SNUM=2 + ;; + SYB) SEARCH="dataserver" +- SUSER="syb`echo $SID | tr '[:upper:]' '[:lower:]'`" ++ [ -z "$SUSER" ] && SUSER="syb${sid}" + SNUM=1 + ;; + HDB) SEARCH="hdb[a-z]*server" +- SUSER="`echo $SID | tr '[:upper:]' '[:lower:]'`adm" ++ [ -z "$SUSER" ] && SUSER="${sid}adm" + SNUM=1 + ;; + esac + +- cnt=`ps -u $SUSER -o args 2> /dev/null | grep -c $SEARCH` ++ [ -z "$SUSER" ] && return $OCF_ERR_INSTALLED ++ ++ cnt=`ps -u $SUSER -o args 2> /dev/null | grep -v grep | grep -c $SEARCH` + [ $cnt -ge $SNUM ] && return $OCF_SUCCESS + return $OCF_NOT_RUNNING + } diff --git a/SOURCES/bz1260713-2-sapdatabase-process-count-suser.patch b/SOURCES/bz1260713-2-sapdatabase-process-count-suser.patch new file mode 100644 index 0000000..c349f0d --- /dev/null +++ b/SOURCES/bz1260713-2-sapdatabase-process-count-suser.patch @@ -0,0 +1,24 @@ +From af5863ecd255d2d514113d39bbf03ab95b5ccca2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Kristoffer=20Gr=C3=B6nlund?= +Date: Mon, 16 Nov 2015 17:14:43 +0100 +Subject: [PATCH] SAPDatabase: Add Oracle 12 to list of supported databases + (bsc#953991) + +This agent has been tested to work with Oracle database version 12. +--- + heartbeat/SAPDatabase | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase +index 3b77206..de7959f 100755 +--- a/heartbeat/SAPDatabase ++++ b/heartbeat/SAPDatabase +@@ -78,7 +78,7 @@ Resource script for SAP databases. It manages a SAP database of any type as an H + The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB. + The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure. + The resource agent supports the following databases: +-- Oracle 10.2 and 11.2 ++- Oracle 10.2, 11.2 and 12 + - DB/2 UDB for Windows and Unix 9.x + - SAP-DB / MaxDB 7.x + - Sybase ASE 15.7 diff --git a/SOURCES/bz1305549-nova-compute-wait-nova-compute-unfence.patch b/SOURCES/bz1305549-nova-compute-wait-nova-compute-unfence.patch new file mode 100644 index 0000000..0901754 --- /dev/null +++ b/SOURCES/bz1305549-nova-compute-wait-nova-compute-unfence.patch @@ -0,0 +1,259 @@ +diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait +--- a/heartbeat/nova-compute-wait 2017-02-02 11:23:38.263510362 +0100 ++++ b/heartbeat/nova-compute-wait 2017-02-02 11:28:27.181650906 +0100 +@@ -1,30 +1,15 @@ + #!/bin/sh ++# Copyright 2015 Red Hat, Inc. + # ++# Description: Manages compute daemons + # +-# nova-compute-wait agent manages compute daemons. ++# Authors: Andrew Beekhof + # +-# Copyright (c) 2015 +-# +-# This program is free software; you can redistribute it and/or modify +-# it under the terms of version 2 of the GNU General Public License as +-# published by the Free Software Foundation. +-# +-# This program is distributed in the hope that it would be useful, but +-# WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +-# +-# Further, this software is distributed without any warranty that it is +-# free of the rightful claim of any third person regarding infringement +-# or the like. Any license provided herein, whether implied or +-# otherwise, applies only to this software file. Patent licenses, if +-# any, provided herein do not apply to combinations of this program with +-# other software, or any other product whatsoever. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write the Free Software Foundation, +-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# Support: openstack@lists.openstack.org ++# License: Apache Software License (ASL) 2.0 + # + ++ + ####################################################################### + # Initialization: + +@@ -137,6 +122,8 @@ + } + + nova_start() { ++ build_unfence_overlay ++ + state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) + if [ "x$state" = x ]; then + : never been fenced +@@ -147,8 +134,8 @@ + sleep ${OCF_RESKEY_evacuation_delay} + + else +- ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" + while [ "x$state" != "xno" ]; do ++ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" + state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) + sleep 5 + done +@@ -156,14 +143,22 @@ + ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete" + sleep ${OCF_RESKEY_evacuation_delay} + fi ++ ++ touch "$statefile" ++ + return $OCF_SUCCESS + } + + nova_stop() { ++ rm -f "$statefile" + return $OCF_SUCCESS + } + + nova_monitor() { ++ if [ ! -f "$statefile" ]; then ++ return $OCF_NOT_RUNNING ++ fi ++ + return $OCF_SUCCESS + } + +@@ -171,17 +166,113 @@ + return $OCF_SUCCESS + } + ++build_unfence_overlay() { ++ fence_options="" ++ ++ if [ -z "${OCF_RESKEY_auth_url}" ]; then ++ candidates=$(/usr/sbin/stonith_admin -l ${NOVA_HOST}) ++ for candidate in ${candidates}; do ++ pcs stonith show $d | grep -q fence_compute ++ if [ $? = 0 ]; then ++ ocf_log info "Unfencing nova based on: $candidate" ++ fence_auth=$(pcs stonith show $candidate | grep Attributes: | sed -e s/Attributes:// -e s/-/_/g -e 's/[^ ]\+=/OCF_RESKEY_\0/g' -e s/passwd/password/g) ++ eval "export $fence_auth" ++ break ++ fi ++ done ++ fi ++ ++ # Copied from NovaEvacuate ++ if [ -z "${OCF_RESKEY_auth_url}" ]; then ++ ocf_exit_reason "auth_url not configured" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ ++ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}" ++ ++ if [ -z "${OCF_RESKEY_username}" ]; then ++ ocf_exit_reason "username not configured" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ ++ fence_options="${fence_options} -l ${OCF_RESKEY_username}" ++ ++ if [ -z "${OCF_RESKEY_password}" ]; then ++ ocf_exit_reason "password not configured" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ ++ fence_options="${fence_options} -p ${OCF_RESKEY_password}" ++ ++ if [ -z "${OCF_RESKEY_tenant_name}" ]; then ++ ocf_exit_reason "tenant_name not configured" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ ++ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}" ++ ++ if [ -n "${OCF_RESKEY_domain}" ]; then ++ fence_options="${fence_options} -d ${OCF_RESKEY_domain}" ++ fi ++ ++ if [ -n "${OCF_RESKEY_region_name}" ]; then ++ fence_options="${fence_options} \ ++ --region-name ${OCF_RESKEY_region_name}" ++ fi ++ ++ if [ -n "${OCF_RESKEY_insecure}" ]; then ++ if ocf_is_true "${OCF_RESKEY_insecure}"; then ++ fence_options="${fence_options} --insecure" ++ fi ++ fi ++ ++ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then ++ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then ++ fence_options="${fence_options} --no-shared-storage" ++ fi ++ fi ++ ++ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then ++ case ${OCF_RESKEY_endpoint_type} in ++ adminURL|publicURL|internalURL) ++ ;; ++ *) ++ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \ ++ "not valid. Use adminURL or publicURL or internalURL" ++ exit $OCF_ERR_CONFIGURED ++ ;; ++ esac ++ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}" ++ fi ++ ++ mkdir -p /run/systemd/system/openstack-nova-compute.service.d ++ cat</run/systemd/system/openstack-nova-compute.service.d/unfence-20.conf ++[Service] ++ExecStartPost=/sbin/fence_compute ${fence_options} -o on -n ${NOVA_HOST} ++EOF ++} ++ + nova_validate() { + rc=$OCF_SUCCESS + + check_binary crudini + check_binary nova-compute ++ check_binary fence_compute + + if [ ! -f /etc/nova/nova.conf ]; then + ocf_exit_reason "/etc/nova/nova.conf not found" + exit $OCF_ERR_CONFIGURED + fi + ++ # Is the state directory writable? ++ state_dir=$(dirname $statefile) ++ touch "$state_dir/$$" ++ if [ $? != 0 ]; then ++ ocf_exit_reason "Invalid state directory: $state_dir" ++ return $OCF_ERR_ARGS ++ fi ++ rm -f "$state_dir/$$" ++ + NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null) + if [ $? = 1 ]; then + short_host=$(uname -n | awk -F. '{print $1}') +@@ -198,6 +289,8 @@ + return $rc + } + ++statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active" ++ + : ${OCF_RESKEY_evacuation_delay=120} + case $__OCF_ACTION in + meta-data) meta_data +@@ -221,3 +314,4 @@ + rc=$? + ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" + exit $rc ++ +diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +--- a/heartbeat/NovaEvacuate 2017-02-02 11:23:38.253510461 +0100 ++++ b/heartbeat/NovaEvacuate 2017-02-02 11:28:49.262432371 +0100 +@@ -1,30 +1,16 @@ + #!/bin/sh + # ++# Copyright 2015 Red Hat, Inc. + # +-# NovaCompute agent manages compute daemons. ++# Description: Manages evacuation of nodes running nova-compute + # +-# Copyright (c) 2015 ++# Authors: Andrew Beekhof + # +-# This program is free software; you can redistribute it and/or modify +-# it under the terms of version 2 of the GNU General Public License as +-# published by the Free Software Foundation. +-# +-# This program is distributed in the hope that it would be useful, but +-# WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +-# +-# Further, this software is distributed without any warranty that it is +-# free of the rightful claim of any third person regarding infringement +-# or the like. Any license provided herein, whether implied or +-# otherwise, applies only to this software file. Patent licenses, if +-# any, provided herein do not apply to combinations of this program with +-# other software, or any other product whatsoever. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write the Free Software Foundation, +-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# Support: openstack@lists.openstack.org ++# License: Apache Software License (ASL) 2.0 + # + ++ + ####################################################################### + # Initialization: + +@@ -180,7 +166,7 @@ + ocf_log notice "Initiating evacuation of $node" + + fence_compute ${fence_options} -o status -n ${node} +- if [ $? != 0 ]; then ++ if [ $? = 1 ]; then + ocf_log info "Nova does not know about ${node}" + # Dont mark as no because perhaps nova is unavailable right now + continue diff --git a/SOURCES/bz1305549-redis-notify-clients-of-master-being-demoted.patch b/SOURCES/bz1305549-redis-notify-clients-of-master-being-demoted.patch new file mode 100644 index 0000000..f7ba67a --- /dev/null +++ b/SOURCES/bz1305549-redis-notify-clients-of-master-being-demoted.patch @@ -0,0 +1,42 @@ +From f1c2249ef5e8524ddb986f0df879d5f18e935da3 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 20 Jan 2017 09:17:15 +0100 +Subject: [PATCH] redis: use "CLIENT KILL type normal" to notify clients of + master being demoted + +--- + heartbeat/redis | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/heartbeat/redis b/heartbeat/redis +index 1ea0025..d08e57a 100755 +--- a/heartbeat/redis ++++ b/heartbeat/redis +@@ -436,6 +436,11 @@ function demote() { + local master_host + local master_port + ++ # client kill is only supported in Redis 2.8.12 or greater ++ version=$(redis_client -v | awk '{print $NF}') ++ ocf_version_cmp "$version" "2.8.11" ++ client_kill=$? ++ + CHECK_SLAVE_STATE=1 + monitor + status=$? +@@ -478,9 +483,15 @@ function demote() { + while true; do + # Wait infinite if replication is syncing + # Then start/demote operation timeout determines timeout ++ if [ "$client_kill" -eq 2 ]; then ++ redis_client CLIENT PAUSE 2000 ++ fi + monitor + status=$? + if (( status == OCF_SUCCESS )); then ++ if [ "$client_kill" -eq 2 ]; then ++ redis_client CLIENT KILL type normal ++ fi + return $OCF_SUCCESS + fi + diff --git a/SOURCES/bz1316130-systemd-drop-in-clvmd-LVM.patch b/SOURCES/bz1316130-systemd-drop-in-clvmd-LVM.patch new file mode 100644 index 0000000..153de9c --- /dev/null +++ b/SOURCES/bz1316130-systemd-drop-in-clvmd-LVM.patch @@ -0,0 +1,136 @@ +diff -uNr a/configure.ac b/configure.ac +--- a/configure.ac 2017-05-03 10:00:54.396040173 +0200 ++++ b/configure.ac 2017-05-03 10:07:28.969236697 +0200 +@@ -65,6 +65,21 @@ + AM_CONDITIONAL(OCFT_FEDORA_CASES, test "x$OCFT_TEST_CASES" = "xfedora" ) + AM_CONDITIONAL(OCFT_DEFAULT_CASES, test "x$OCFT_TEST_CASES" = "xdefault" ) + ++AC_ARG_WITH([systemdsystemunitdir], ++ [AS_HELP_STRING([--with-systemdsystemunitdir=DIR], [Directory for systemd service files])],, ++ [with_systemdsystemunitdir=auto]) ++AS_IF([test "x$with_systemdsystemunitdir" = "xyes" -o "x$with_systemdsystemunitdir" = "xauto"], [ ++ def_systemdsystemunitdir=$($PKGCONFIG --variable=systemdsystemunitdir systemd) ++ ++ AS_IF([test "x$def_systemdsystemunitdir" = "x"], ++ [AS_IF([test "x$with_systemdsystemunitdir" = "xyes"], ++ [AC_MSG_ERROR([systemd support requested but pkg-config unable to query systemd package])]) ++ with_systemdsystemunitdir=no], ++ [with_systemdsystemunitdir="$def_systemdsystemunitdir"])]) ++AS_IF([test "x$with_systemdsystemunitdir" != "xno"], ++ [AC_SUBST([systemdsystemunitdir], [$with_systemdsystemunitdir])]) ++AM_CONDITIONAL([HAVE_SYSTEMD], [test "x$with_systemdsystemunitdir" != "xno"]) ++ + dnl + dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz]) + dnl +@@ -857,6 +872,7 @@ + heartbeat/ocf-directories \ + heartbeat/ocf-shellfuncs \ + heartbeat/shellfuncs \ ++systemd/Makefile \ + tools/Makefile \ + tools/ocf-tester \ + tools/ocft/Makefile \ +diff -uNr a/heartbeat/clvm b/heartbeat/clvm +--- a/heartbeat/clvm 2017-05-03 10:00:54.560038569 +0200 ++++ b/heartbeat/clvm 2017-05-03 10:01:13.309855171 +0200 +@@ -353,6 +353,18 @@ + return $? + fi + ++ # systemd drop-in to stop process before storage services during ++ # shutdown/reboot ++ if ps -p 1 | grep -q systemd ; then ++ systemdrundir="/run/systemd/system/resource-agents-deps.target.d" ++ mkdir "$systemdrundir" ++ cat > "$systemdrundir/99-clvmd.conf" < "$systemdrundir/99-LVM.conf" < +Date: Tue, 30 May 2017 13:43:19 +0200 +Subject: [PATCH] [rabbitmq] Typo fix + +Unfortunately we introduced a regression with commit +1f57e26816d8148e0c77ff7573457b8d2599bf8b. This patch addresses it and +fixes #982. + +Thanks @seabres for the heads up. + +Signed-off-by: Peter Lemenkov +--- + heartbeat/rabbitmq-cluster | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index b5060b155..30f032066 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -455,7 +455,7 @@ rmq_stop() { + {'EXIT', _} -> []; + Any -> Any + end, +- Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [RuntimeParams])) ++ Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [Result])) + end, + + %% Backup users diff --git a/SOURCES/bz1342376-3-rabbitmq-cluster-backup-and-restore-users-policies.patch b/SOURCES/bz1342376-3-rabbitmq-cluster-backup-and-restore-users-policies.patch new file mode 100644 index 0000000..13b1128 --- /dev/null +++ b/SOURCES/bz1342376-3-rabbitmq-cluster-backup-and-restore-users-policies.patch @@ -0,0 +1,24 @@ +From a045342ebe8523d1408afb87b062bb7d71927c46 Mon Sep 17 00:00:00 2001 +From: Dave Holland +Date: Thu, 8 Jun 2017 14:38:15 +0100 +Subject: [PATCH] rabbitmq-cluster: typo fix + +fix a small typo which causes errors in corosync.log e.g. +Jun 08 09:00:14 [6504] overcloud-controller-1.localdomain lrmd: notice: operation_finished: rabbitmq_start_0:7504:stderr [ Error: syntax error before: ')' ] +--- + heartbeat/rabbitmq-cluster | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 30f032066..1e78d9eca 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -398,7 +398,7 @@ rmq_start() { + case file:consult(Filename) of + {error, _} -> + ok; +- {ok, [Result]) -> ++ {ok, [Result]} -> + lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result), + file:delete(Filename) + end diff --git a/SOURCES/bz1342376-rabbitmq-cluster-backup-and-restore-users-policies.patch b/SOURCES/bz1342376-rabbitmq-cluster-backup-and-restore-users-policies.patch new file mode 100644 index 0000000..011e2e8 --- /dev/null +++ b/SOURCES/bz1342376-rabbitmq-cluster-backup-and-restore-users-policies.patch @@ -0,0 +1,167 @@ +From 1f57e26816d8148e0c77ff7573457b8d2599bf8b Mon Sep 17 00:00:00 2001 +From: Peter Lemenkov +Date: Wed, 5 Apr 2017 19:12:26 +0200 +Subject: [PATCH] Backup and restore policies + +- Backup and restore policies. +- Simplify code + +Signed-off-by: Peter Lemenkov +--- + heartbeat/rabbitmq-cluster | 123 ++++++++++++++++++++++++--------------------- + 1 file changed, 66 insertions(+), 57 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 6a17590..b5060b1 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -388,83 +388,92 @@ rmq_start() { + return $OCF_ERR_GENERIC + fi + +- # Restore users and users' permissions (if any) ++ # Restore users, user permissions, and policies (if any) + BaseDataDir=`dirname $RMQ_DATA_DIR` +- if [ -f $BaseDataDir/users.erl ] ; then +- rabbitmqctl eval " +- %% Run only if Mnesia is ready. +- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso +- begin +- [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), +- +- %% Read users first +- {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), +- +- Upgrade = fun +- ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; +- ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} +- end, ++ rabbitmqctl eval " ++ %% Run only if Mnesia is ready. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso ++ begin ++ Restore = fun(Table, PostprocessFun, Filename) -> ++ case file:consult(Filename) of ++ {error, _} -> ++ ok; ++ {ok, [Result]) -> ++ lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result), ++ file:delete(Filename) ++ end ++ end, + +- Downgrade = fun +- ({internal_user, A, B, C}) -> {internal_user, A, B, C}; +- ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; +- %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. +- %% Unfortunately, this case will require manual intervention - user have to run: +- %% rabbitmqctl change_password +- ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} +- end, ++ %% Restore users + +- case WildPattern of +- %% Version < 3.6.0 +- {internal_user,'_','_','_'} -> +- lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Downgrade(X)) end, Users); +- %% Version >= 3.6.0 +- {internal_user,'_','_','_','_'} -> +- lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) +- end, ++ Upgrade = fun ++ ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; ++ ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} ++ end, + +- ok = file:delete(\"$BaseDataDir/users.erl\") +- end. +- " +- fi +- if [ -f $BaseDataDir/users_perms.erl ] ; then +- rabbitmqctl eval " +- %% Run only if Mnesia is ready. +- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso +- begin +- {ok, [UsersPerms]} = file:consult(\"$BaseDataDir/users_perms.erl\"), +- lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user_permission, X) end, UsersPerms), +- +- ok = file:delete(\"$BaseDataDir/users_perms.erl\") +- end. +- " +- fi ++ Downgrade = fun ++ ({internal_user, A, B, C}) -> {internal_user, A, B, C}; ++ ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; ++ %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. ++ %% Unfortunately, this case will require manual intervention - user have to run: ++ %% rabbitmqctl change_password ++ ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} ++ end, ++ ++ %% Check db scheme first ++ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ case WildPattern of ++ %% Version < 3.6.0 ++ {internal_user,'_','_','_'} -> ++ Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\"); ++ %% Version >= 3.6.0 ++ {internal_user,'_','_','_','_'} -> ++ Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\") ++ end, + ++ NoOp = fun(X) -> X end, ++ ++ %% Restore user permissions ++ Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"), ++ ++ %% Restore policies ++ Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\") ++ end. ++ " + return $OCF_SUCCESS + } + + rmq_stop() { +- # Backup users and users' permissions ++ # Backup users, user permissions, and policies + BaseDataDir=`dirname $RMQ_DATA_DIR` + rabbitmqctl eval " + %% Run only if Mnesia is still available. + lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso + begin +- [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ Backup = fun(Table, SelectPattern, Filter, Filename) -> ++ Result = case catch mnesia:dirty_select(Table, [{SelectPattern, [Filter], ['\\\$_']}]) of ++ {'EXIT', _} -> []; ++ Any -> Any ++ end, ++ Result /= [] andalso file:write_file(Filename, io_lib:fwrite(\"~p.~n\", [RuntimeParams])) ++ end, + +- Users = case WildPattern of ++ %% Backup users ++ %% Check db scheme first ++ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ UsersSelectPattern = case WildPattern of + %% Version < 3.6.0 +- {internal_user,'_','_','_'} -> +- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); ++ {internal_user,'_','_','_'} -> {internal_user, '\\\$1', '_', '_'}; + %% Version >= 3.6.0 +- {internal_user,'_','_','_','_'} -> +- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) ++ {internal_user,'_','_','_','_'} -> {internal_user, '\\\$1', '_', '_', '_'} + end, ++ Backup(rabbit_user, UsersSelectPattern, {'/=', '\\\$1', <<\"guest\">>}, \"$BaseDataDir/users.erl\"), + +- Users /= [] andalso file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])), ++ %% Backup user permissions ++ Backup(rabbit_user_permission, {'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, {'/=', '\\\$3', <<\"guest\">>}, \"$BaseDataDir/users_perms.erl\"), + +- UsersPerms = mnesia:dirty_select(rabbit_user_permission, [{{'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, [{'/=', '\\\$3', <<\"guest\">>}], ['\\\$_']}]), +- UsersPerms /= [] andalso file:write_file(\"$BaseDataDir/users_perms.erl\", io_lib:fwrite(\"~p.~n\", [UsersPerms])) ++ %% Backup policies ++ Backup(rabbit_runtime_parameters, {runtime_parameters, {'_', '\\\$1', '_'}, '_'}, {'==', '\\\$1', <<\"policy\">>}, \"$BaseDataDir/policies.erl\") + end. + " + diff --git a/SOURCES/bz1359252-clvm-remove-reload-action.patch b/SOURCES/bz1359252-clvm-remove-reload-action.patch new file mode 100644 index 0000000..dc33e8d --- /dev/null +++ b/SOURCES/bz1359252-clvm-remove-reload-action.patch @@ -0,0 +1,21 @@ +From 0f7685998bb233716ef39d127de6a686e7fb003b Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 6 Mar 2017 16:42:10 +0100 +Subject: [PATCH] clvm: remove reload action from metadata + +--- + heartbeat/clvm | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/heartbeat/clvm b/heartbeat/clvm +index a778333..8eae6c3 100755 +--- a/heartbeat/clvm ++++ b/heartbeat/clvm +@@ -78,7 +78,6 @@ is set to. + + + +- + + + diff --git a/SOURCES/bz1360768-galera-prevent-promote-after-demote.patch b/SOURCES/bz1360768-galera-prevent-promote-after-demote.patch new file mode 100644 index 0000000..8c2dc27 --- /dev/null +++ b/SOURCES/bz1360768-galera-prevent-promote-after-demote.patch @@ -0,0 +1,27 @@ +From d16f9dad57d9dae587b6a267fbd250c1ff3a2a20 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Fri, 18 Nov 2016 09:43:29 +0100 +Subject: [PATCH] galera: prevent promote right after demote + +--- + heartbeat/galera | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 543200d..66d5ad8 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -764,6 +764,13 @@ galera_demote() + clear_sync_needed + clear_no_grastate + ++ # Clear master score here rather than letting pacemaker do so once ++ # demote finishes. This way a promote cannot take place right ++ # after this demote even if pacemaker is requested to do so. It ++ # will first have to run a start/monitor op, to reprobe the state ++ # of the other galera nodes and act accordingly. ++ clear_master_score ++ + # record last commit for next promotion + detect_last_commit + rc=$? diff --git a/SOURCES/bz1376588-iSCSITarget-properly-create-portals-for-lio-t.patch b/SOURCES/bz1376588-iSCSITarget-properly-create-portals-for-lio-t.patch new file mode 100644 index 0000000..c63115b --- /dev/null +++ b/SOURCES/bz1376588-iSCSITarget-properly-create-portals-for-lio-t.patch @@ -0,0 +1,38 @@ +From b10e2de76ccb143fdfd69988c8105ad4336d57d2 Mon Sep 17 00:00:00 2001 +From: Patrick Emer +Date: Sat, 27 Jun 2015 15:29:12 +0200 +Subject: [PATCH] iSCSITarget: properly create portals for lio-t implementation + +Even when defining the "portals" parameter, targetcli creates a +default portal at 0.0.0.0:3260 if the auto_add_default_portal option +is set to true (default), causing the resource to fail with +OCF_ERR_GENERIC. + +Suppress creation of the default portal by setting +auto_add_default_portal=false if the "portals" parameter is set, +and retain the original behavior if it is not. + +Fixes ClusterLabs/resource-agents#630. +--- + heartbeat/iSCSITarget | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/heartbeat/iSCSITarget b/heartbeat/iSCSITarget +index 72ec64a..401e98c 100755 +--- a/heartbeat/iSCSITarget ++++ b/heartbeat/iSCSITarget +@@ -326,10 +326,13 @@ iSCSITarget_start() { + # automatically creates the corresponding target if it + # doesn't already exist. + for portal in ${OCF_RESKEY_portals}; do +- ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC + if [ $portal != ${OCF_RESKEY_portals_default} ] ; then ++ ocf_run targetcli /iscsi set global auto_add_default_portal=false || exit $OCF_ERR_GENERIC ++ ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC + IFS=':' read -a sep_portal <<< "$portal" + ocf_run targetcli /iscsi/${OCF_RESKEY_iqn}/tpg1/portals create "${sep_portal[0]}" "${sep_portal[1]}" || exit $OCF_ERR_GENERIC ++ else ++ ocf_run targetcli /iscsi create ${OCF_RESKEY_iqn} || exit $OCF_ERR_GENERIC + fi + done + # in lio, we can set target parameters by manipulating diff --git a/SOURCES/bz1380405-send_arp-usage.patch b/SOURCES/bz1380405-send_arp-usage.patch new file mode 100644 index 0000000..b1acda3 --- /dev/null +++ b/SOURCES/bz1380405-send_arp-usage.patch @@ -0,0 +1,252 @@ +From 922ef94eefd55ca25df7ce7c98ac7c87134aa982 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 4 Apr 2017 12:12:35 +0200 +Subject: [PATCH] send_arp: update usage info + +--- + heartbeat/IPaddr2 | 2 +- + heartbeat/SendArp | 2 +- + tools/send_arp.libnet.c | 70 ++++++++++++++++--------------------------------- + tools/send_arp.linux.c | 60 ++++++++++++++++-------------------------- + 4 files changed, 48 insertions(+), 86 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index 27b7208..d07c622 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -708,7 +708,7 @@ run_send_arp() { + LOGLEVEL=info + fi + if [ $ARP_COUNT -ne 0 ] ; then +- ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" ++ ARGS="-i $OCF_RESKEY_arp_interval -c $ARP_COUNT -p $SENDARPPIDFILE -I $NIC -m $MY_MAC $OCF_RESKEY_ip" + ocf_log $LOGLEVEL "$SENDARP $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 +diff --git a/heartbeat/SendArp b/heartbeat/SendArp +index dbcc7dc..033865f 100755 +--- a/heartbeat/SendArp ++++ b/heartbeat/SendArp +@@ -156,7 +156,7 @@ sendarp_start() { + fi + + +- ARGS="-i $ARP_INTERVAL_MS -r $ARP_REPEAT -p $SENDARPPIDFILE $INTERFACE $BASEIP auto $BASEIP $ARP_NETMASK" ++ ARGS="-i $ARP_INTERVAL_MS -c $ARP_REPEAT -p $SENDARPPIDFILE -I $INTERFACE -m auto $BASEIP" + ocf_log debug "$SENDARP $ARGS" + + rc=$OCF_SUCCESS +diff --git a/tools/send_arp.libnet.c b/tools/send_arp.libnet.c +index 12fe7f1..bd96a49 100644 +--- a/tools/send_arp.libnet.c ++++ b/tools/send_arp.libnet.c +@@ -62,35 +62,14 @@ + #define PIDFILE_BASE PIDDIR "/send_arp-" + + static char print_usage[]={ +-"send_arp: sends out custom ARP packet.\n" +-" usage: send_arp [-i repeatinterval-ms] [-r repeatcount] [-p pidfile] \\\n" +-" device src_ip_addr src_hw_addr broadcast_ip_addr netmask\n" +-"\n" +-" where:\n" +-" repeatinterval-ms: timing, in milliseconds of sending arp packets\n" +-" For each ARP announcement requested, a pair of ARP packets is sent,\n" +-" an ARP request, and an ARP reply. This is becuse some systems\n" +-" ignore one or the other, and this combination gives the greatest\n" +-" chance of success.\n" +-"\n" +-" Each time an ARP is sent, if another ARP will be sent then\n" +-" the code sleeps for half of repeatinterval-ms.\n" +-"\n" +-" repeatcount: how many pairs of ARP packets to send.\n" +-" See above for why pairs are sent\n" +-"\n" +-" pidfile: pid file to use\n" +-"\n" +-" device: netowrk interace to use\n" +-"\n" +-" src_ip_addr: source ip address\n" +-"\n" +-" src_hw_addr: source hardware address.\n" +-" If \"auto\" then the address of device\n" +-"\n" +-" broadcast_ip_addr: ignored\n" +-"\n" +-" netmask: ignored\n" ++"Usage: send_arp [-i repeatinterval-ms] [-c count] [-p pidfile] [-I device] [-m mac] destination\n" ++" -i repeatinterval-ms : repeat interval in milliseconds (ignored in Linux version)\n" ++" -c count : how many packets to send\n" ++" -p pidfile : pid file (ignored in Linux version)\n" ++" -I device : which ethernet device to use\n" ++" -m mac : source MAC address (ignored in Linux version).\n" ++" If \"auto\" device address is used\n" ++" destination : ask for what ip address\n" + }; + + static const char * SENDARPNAME = "send_arp"; +@@ -158,41 +137,38 @@ main(int argc, char *argv[]) + cl_log_set_facility(LOG_USER); + cl_inherit_logging_environment(0); + +- while ((flag = getopt(argc, argv, "i:r:p:")) != EOF) { ++ while ((flag = getopt(argc, argv, "h?c:I:i:p:m:")) != EOF) { + switch(flag) { + + case 'i': msinterval= atol(optarg); + break; + +- case 'r': repeatcount= atoi(optarg); ++ case 'c': repeatcount= atoi(optarg); + break; + + case 'p': pidfilename= optarg; + break; + ++ case 'I': device= optarg; ++ break; ++ ++ case 'm': macaddr= optarg; ++ break; ++ ++ case 'h': ++ case '?': + default: fprintf(stderr, "%s\n\n", print_usage); + return 1; + break; + } + } +- if (argc-optind != 5) { +- fprintf(stderr, "%s\n\n", print_usage); +- return 1; +- } + +- /* +- * argv[optind+1] DEVICE dc0,eth0:0,hme0:0, +- * argv[optind+2] IP 192.168.195.186 +- * argv[optind+3] MAC ADDR 00a0cc34a878 +- * argv[optind+4] BROADCAST 192.168.195.186 +- * argv[optind+5] NETMASK ffffffffffff +- */ ++ argc -= optind; ++ argv += optind; ++ if (argc != 1) ++ usage(); + +- device = argv[optind]; +- ipaddr = argv[optind+1]; +- macaddr = argv[optind+2]; +- broadcast = argv[optind+3]; +- netmask = argv[optind+4]; ++ ipaddr = *argv; + + if (!pidfilename) { + if (snprintf(pidfilenamebuf, sizeof(pidfilenamebuf), "%s%s", +diff --git a/tools/send_arp.linux.c b/tools/send_arp.linux.c +index 477100a..348794c 100644 +--- a/tools/send_arp.linux.c ++++ b/tools/send_arp.linux.c +@@ -137,22 +137,25 @@ static socklen_t sll_len(size_t halen) + void usage(void) + { + fprintf(stderr, +- "Usage: arping [-fqbDUAV] [-c count] [-w timeout] [-I device] [-s source] destination\n" +- " -f : quit on first reply\n" +- " -q : be quiet\n" +- " -b : keep broadcasting, don't go unicast\n" +- " -D : duplicate address detection mode\n" +- " -U : Unsolicited ARP mode, update your neighbours\n" +- " -A : ARP answer mode, update your neighbours\n" +- " -V : print version and exit\n" ++ "Usage: send_arp [-fqbDUAV] [-c count] [-w timeout] [-I device] [-s source] destination\n" ++ " -f : quit on first reply (not available in libnet version)\n" ++ " -q : be quiet (not available in libnet version)\n" ++ " -b : keep broadcasting, don't go unicast (not available in libnet version)\n" ++ " -i : repeat interval in milliseconds (ignored)\n" ++ " -p : pid file (ignored)\n" ++ " -D : duplicate address detection mode (not available in libnet version)\n" ++ " -U : Unsolicited ARP mode, update your neighbours (not available in libnet version)\n" ++ " -A : ARP answer mode, update your neighbours (not available in libnet version)\n" ++ " -V : print version and exit (not available in libnet version)\n" + " -c count : how many packets to send\n" +- " -w timeout : how long to wait for a reply\n" ++ " -w timeout : how long to wait for a reply (not available in libnet version)\n" + " -I device : which ethernet device to use" + #ifdef DEFAULT_DEVICE_STR + " (" DEFAULT_DEVICE_STR ")" + #endif + "\n" +- " -s source : source ip address\n" ++ " -s source : source ip address (not available in libnet version)\n" ++ " -m mac : source MAC address (ignored).\n" + " destination : ask for what ip address\n" + ); + exit(2); +@@ -1044,7 +1047,7 @@ main(int argc, char **argv) + + disable_capability_raw(); + +- while ((ch = getopt(argc, argv, "h?bfDUAqc:w:s:I:Vr:i:p:")) != EOF) { ++ while ((ch = getopt(argc, argv, "h?bfDUAqc:w:s:I:Vi:m:p:")) != EOF) { + switch(ch) { + case 'b': + broadcast_only=1; +@@ -1063,9 +1066,6 @@ main(int argc, char **argv) + case 'q': + quiet++; + break; +- case 'r': /* send_arp.libnet compatibility option */ +- hb_mode = 1; +- /* fall-through */ + case 'c': + count = atoi(optarg); + break; +@@ -1086,9 +1086,10 @@ main(int argc, char **argv) + exit(0); + case 'p': + case 'i': +- hb_mode = 1; +- /* send_arp.libnet compatibility options, ignore */ +- break; ++ case 'm': ++ hb_mode = 1; ++ /* send_arp.libnet compatibility options, ignore */ ++ break; + case 'h': + case '?': + default: +@@ -1098,30 +1099,15 @@ main(int argc, char **argv) + + if(hb_mode) { + /* send_arp.libnet compatibility mode */ +- if (argc - optind != 5) { +- usage(); +- return 1; +- } +- /* +- * argv[optind+1] DEVICE dc0,eth0:0,hme0:0, +- * argv[optind+2] IP 192.168.195.186 +- * argv[optind+3] MAC ADDR 00a0cc34a878 +- * argv[optind+4] BROADCAST 192.168.195.186 +- * argv[optind+5] NETMASK ffffffffffff +- */ +- + unsolicited = 1; +- device.name = argv[optind]; +- target = argv[optind+1]; ++ } + +- } else { +- argc -= optind; +- argv += optind; +- if (argc != 1) ++ argc -= optind; ++ argv += optind; ++ if (argc != 1) + usage(); + +- target = *argv; +- } ++ target = *argv; + + if (device.name && !*device.name) + device.name = NULL; diff --git a/SOURCES/bz1384955-nfsserver-dont-stop-rpcbind.patch b/SOURCES/bz1384955-nfsserver-dont-stop-rpcbind.patch new file mode 100644 index 0000000..7d64cfb --- /dev/null +++ b/SOURCES/bz1384955-nfsserver-dont-stop-rpcbind.patch @@ -0,0 +1,13 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-11-01 14:49:44.005585567 +0100 ++++ b/heartbeat/nfsserver 2016-11-01 14:50:17.280266118 +0100 +@@ -918,9 +918,6 @@ + + # systemd + if [ "$EXEC_MODE" -eq "2" ]; then +- nfs_exec stop rpcbind > /dev/null 2>&1 +- ocf_log info "Stop: rpcbind" +- + nfs_exec stop rpc-gssd > /dev/null 2>&1 + ocf_log info "Stop: rpc-gssd" + fi diff --git a/SOURCES/bz1387363-Filesystem-submount-check.patch b/SOURCES/bz1387363-Filesystem-submount-check.patch new file mode 100644 index 0000000..b7aef4b --- /dev/null +++ b/SOURCES/bz1387363-Filesystem-submount-check.patch @@ -0,0 +1,16 @@ +diff -uNr a/heartbeat/Filesystem b/heartbeat/Filesystem +--- a/heartbeat/Filesystem 2016-12-20 14:16:29.439700386 +0100 ++++ b/heartbeat/Filesystem 2016-12-20 14:18:04.954623779 +0100 +@@ -538,6 +538,12 @@ + Filesystem_notify + fi + ++ # Check if there are any mounts mounted under the mountpoint ++ if list_mounts | grep -q -E " $MOUNTPOINT/\w+" >/dev/null 2>&1; then ++ ocf_log err "There is one or more mounts mounted under $MOUNTPOINT." ++ return $OCF_ERR_CONFIGURED ++ fi ++ + # See if the device is already mounted. + if Filesystem_status >/dev/null 2>&1 ; then + ocf_log info "Filesystem $MOUNTPOINT is already mounted." diff --git a/SOURCES/bz1387491-nfsserver-keep-options.patch b/SOURCES/bz1387491-nfsserver-keep-options.patch new file mode 100644 index 0000000..2889084 --- /dev/null +++ b/SOURCES/bz1387491-nfsserver-keep-options.patch @@ -0,0 +1,54 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-10-28 10:51:35.357385160 +0200 ++++ b/heartbeat/nfsserver 2016-10-28 13:22:16.844380108 +0200 +@@ -434,7 +434,12 @@ + # only write to the tmp /etc/sysconfig/nfs if sysconfig exists. + # otherwise this distro does not support setting these options. + if [ -d "/etc/sysconfig" ]; then +- echo "${key}=\"${value}\"" >> $file ++ # replace if the value exists, append otherwise ++ if grep "^\s*${key}=" $file ; then ++ sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file ++ else ++ echo "${key}=\"${value}\"" >> $file ++ fi + elif [ "$requires_sysconfig" = "true" ]; then + ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args." + fi +@@ -447,6 +452,11 @@ + local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX) + local statd_args + ++ if [ -f "$NFS_SYSCONFIG" ]; then ++ ## Take the $NFS_SYSCONFIG file as our skeleton ++ cp $NFS_SYSCONFIG $tmpconfig ++ fi ++ + # nfsd args + set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true" + +@@ -477,14 +487,20 @@ + + # override local nfs config. preserve previous local config though. + if [ -s $tmpconfig ]; then +- cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG" ++ cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 + if [ $? -ne 0 ]; then + # backup local nfs config if it doesn't have our HA autogen tag in it. + mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP + fi +- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG +- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG +- cat $tmpconfig >> $NFS_SYSCONFIG ++ ++ cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 ++ if [ $? -ne 0 ]; then ++ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG ++ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG ++ cat $tmpconfig >> $NFS_SYSCONFIG ++ else ++ cat $tmpconfig > $NFS_SYSCONFIG ++ fi + fi + rm -f $tmpconfig + } diff --git a/SOURCES/bz1388854-delay-change-startdelay.patch b/SOURCES/bz1388854-delay-change-startdelay.patch new file mode 100644 index 0000000..d9582a9 --- /dev/null +++ b/SOURCES/bz1388854-delay-change-startdelay.patch @@ -0,0 +1,41 @@ +From 9134a62cec26106a0540309f60db9a420e7488b6 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 19 Oct 2016 12:02:06 +0200 +Subject: [PATCH] Delay: set default startdelay lower than start timeout to + avoid it timing out before starting with default values + +--- + heartbeat/Delay | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/Delay b/heartbeat/Delay +index 9cfa939..f9d303b 100755 +--- a/heartbeat/Delay ++++ b/heartbeat/Delay +@@ -14,7 +14,7 @@ + # OCF_RESKEY_mondelay + # + # +-# OCF_RESKEY_startdelay defaults to 30 (seconds) ++# OCF_RESKEY_startdelay defaults to 20 (seconds) + # OCF_RESKEY_stopdelay defaults to $OCF_RESKEY_startdelay + # OCF_RESKEY_mondelay defaults to $OCF_RESKEY_startdelay + # +@@ -55,7 +55,7 @@ This script is a test resource for introducing delay. + How long in seconds to delay on start operation. + + Start delay +- ++ + + + +@@ -195,7 +195,7 @@ if [ $# -ne 1 ]; then + exit $OCF_ERR_ARGS + fi + +-: ${OCF_RESKEY_startdelay=30} ++: ${OCF_RESKEY_startdelay=20} + : ${OCF_RESKEY_stopdelay=$OCF_RESKEY_startdelay} + : ${OCF_RESKEY_mondelay=$OCF_RESKEY_startdelay} + diff --git a/SOURCES/bz1389300-iSCSILogicalUnit-IPv6-support.patch b/SOURCES/bz1389300-iSCSILogicalUnit-IPv6-support.patch new file mode 100644 index 0000000..0f37142 --- /dev/null +++ b/SOURCES/bz1389300-iSCSILogicalUnit-IPv6-support.patch @@ -0,0 +1,25 @@ +From b22e59bc72c93df40846b0a528f2839466e185de Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 23 Feb 2017 16:20:29 +0100 +Subject: [PATCH] iSCSILogicalUnit: lio-t IPv6-support + +--- + heartbeat/iSCSILogicalUnit | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit +index 0a07c5f..99e7686 100755 +--- a/heartbeat/iSCSILogicalUnit ++++ b/heartbeat/iSCSILogicalUnit +@@ -379,6 +379,11 @@ iSCSILogicalUnit_start() { + fi + ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/luns create /backstores/block/${OCF_RESOURCE_INSTANCE} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC + ++ if $(ip a | grep -q inet6); then ++ ocf_run -q targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/portals delete 0.0.0.0 3260 ++ ocf_run -q targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/portals create ::0 ++ fi ++ + if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then + for initiator in ${OCF_RESKEY_allowed_initiators}; do + ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC diff --git a/SOURCES/bz1390974-redis-fix-selinux-permissions.patch b/SOURCES/bz1390974-redis-fix-selinux-permissions.patch new file mode 100644 index 0000000..b6c7a98 --- /dev/null +++ b/SOURCES/bz1390974-redis-fix-selinux-permissions.patch @@ -0,0 +1,29 @@ +From 70b13e3c27944292cfe658284878de5cb3a4918c Mon Sep 17 00:00:00 2001 +From: Gabriele Cerami +Date: Wed, 2 Nov 2016 00:44:37 +0100 +Subject: [PATCH] Redis: restore rundir security context + +When selinux rules packages are installed, rundir does not yet exist, +and security context for it cannot be applied. Calling restorecon after +dir creation ensures that the proper context is applied to the rundir. +If the context is not applied, selinux denies write permission, the unix +socket cannot be created, and redis does not start +--- + heartbeat/redis | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/heartbeat/redis b/heartbeat/redis +index f85b2f7..1fe8906 100755 +--- a/heartbeat/redis ++++ b/heartbeat/redis +@@ -324,6 +324,10 @@ function start() { + + [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" + chown -R "$REDIS_USER" "$REDIS_RUNDIR" ++ if have_binary "restorecon"; then ++ restorecon -Rv "$REDIS_RUNDIR" ++ fi ++ + + # check for 0 byte database dump file. This is an unrecoverable start + # condition that we can avoid by deleting the 0 byte database file. diff --git a/SOURCES/bz1391470-galera-last-commit-fix-for-mariadb-10.1.18.patch b/SOURCES/bz1391470-galera-last-commit-fix-for-mariadb-10.1.18.patch new file mode 100644 index 0000000..1afe40d --- /dev/null +++ b/SOURCES/bz1391470-galera-last-commit-fix-for-mariadb-10.1.18.patch @@ -0,0 +1,83 @@ +From 4e62e214f560c68f037d7a4730076ddc6ee72301 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Thu, 17 Nov 2016 22:19:56 +0100 +Subject: [PATCH] galera: make last commit parsing compatible with mariadb + 10.1.18+ + +--- + heartbeat/galera | 39 +++++++++++++++++++++------------------ + 1 file changed, 21 insertions(+), 18 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 543200d..44c52a5 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -662,13 +662,13 @@ detect_last_commit() + --socket=$OCF_RESKEY_socket \ + --datadir=$OCF_RESKEY_datadir \ + --user=$OCF_RESKEY_user" ++ local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p' + local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' + + ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" + last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" + if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then + local tmp=$(mktemp) +- local tmperr=$(mktemp) + + # if we pass here because grastate.dat doesn't exist, + # try not to bootstrap from this node if possible +@@ -678,33 +678,36 @@ detect_last_commit() + + ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" + +- ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr ++ ${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null + +- last_commit="$(cat $tmp | sed -n $recovered_position_regex)" ++ last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" + if [ -z "$last_commit" ]; then + # Galera uses InnoDB's 2pc transactions internally. If + # server was stopped in the middle of a replication, the + # recovery may find a "prepared" XA transaction in the + # redo log, and mysql won't recover automatically + +- cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null +- if [ $? -eq 0 ]; then +- # we can only rollback the transaction, but that's OK +- # since the DB will get resynchronized anyway +- ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" +- ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ +- --tc-heuristic-recover=rollback > $tmp 2>/dev/null +- +- last_commit="$(cat $tmp | sed -n $recovered_position_regex)" +- if [ ! -z "$last_commit" ]; then +- ocf_log warn "State recovered. force SST at next restart for full resynchronization" +- rm -f ${OCF_RESKEY_datadir}/grastate.dat +- # try not to bootstrap from this node if possible +- set_no_grastate ++ local recovery_file="$(cat $tmp | sed -n $recovery_file_regex)" ++ if [ -e $recovery_file ]; then ++ cat $recovery_file | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null ++ if [ $? -eq 0 ]; then ++ # we can only rollback the transaction, but that's OK ++ # since the DB will get resynchronized anyway ++ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" ++ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ ++ --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null ++ ++ last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)" ++ if [ ! -z "$last_commit" ]; then ++ ocf_log warn "State recovered. force SST at next restart for full resynchronization" ++ rm -f ${OCF_RESKEY_datadir}/grastate.dat ++ # try not to bootstrap from this node if possible ++ set_no_grastate ++ fi + fi + fi + fi +- rm -f $tmp $tmperr ++ rm -f $tmp + fi + + if [ ! -z "$last_commit" ]; then diff --git a/SOURCES/bz1391495-nfsserver-keep-options.patch b/SOURCES/bz1391495-nfsserver-keep-options.patch deleted file mode 100644 index 2889084..0000000 --- a/SOURCES/bz1391495-nfsserver-keep-options.patch +++ /dev/null @@ -1,54 +0,0 @@ -diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver ---- a/heartbeat/nfsserver 2016-10-28 10:51:35.357385160 +0200 -+++ b/heartbeat/nfsserver 2016-10-28 13:22:16.844380108 +0200 -@@ -434,7 +434,12 @@ - # only write to the tmp /etc/sysconfig/nfs if sysconfig exists. - # otherwise this distro does not support setting these options. - if [ -d "/etc/sysconfig" ]; then -- echo "${key}=\"${value}\"" >> $file -+ # replace if the value exists, append otherwise -+ if grep "^\s*${key}=" $file ; then -+ sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file -+ else -+ echo "${key}=\"${value}\"" >> $file -+ fi - elif [ "$requires_sysconfig" = "true" ]; then - ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args." - fi -@@ -447,6 +452,11 @@ - local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX) - local statd_args - -+ if [ -f "$NFS_SYSCONFIG" ]; then -+ ## Take the $NFS_SYSCONFIG file as our skeleton -+ cp $NFS_SYSCONFIG $tmpconfig -+ fi -+ - # nfsd args - set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true" - -@@ -477,14 +487,20 @@ - - # override local nfs config. preserve previous local config though. - if [ -s $tmpconfig ]; then -- cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG" -+ cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 - if [ $? -ne 0 ]; then - # backup local nfs config if it doesn't have our HA autogen tag in it. - mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP - fi -- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG -- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG -- cat $tmpconfig >> $NFS_SYSCONFIG -+ -+ cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 -+ if [ $? -ne 0 ]; then -+ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG -+ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG -+ cat $tmpconfig >> $NFS_SYSCONFIG -+ else -+ cat $tmpconfig > $NFS_SYSCONFIG -+ fi - fi - rm -f $tmpconfig - } diff --git a/SOURCES/bz1391580-portblock-return-success-on-stop-with-invalid-ip.patch b/SOURCES/bz1391580-portblock-return-success-on-stop-with-invalid-ip.patch new file mode 100644 index 0000000..c103537 --- /dev/null +++ b/SOURCES/bz1391580-portblock-return-success-on-stop-with-invalid-ip.patch @@ -0,0 +1,22 @@ +From 501316b1c6c072c8750c8bd951594a4e1ef408f4 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 25 Nov 2016 12:13:17 +0100 +Subject: [PATCH] IPaddr2: return success on stop with invalid IP + +--- + heartbeat/IPaddr2 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index b224ca5..aea7a19 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -351,7 +351,7 @@ ip_init() { + exit $OCF_ERR_INSTALLED + fi + +- if [ X"$OCF_RESKEY_ip" = "X" ]; then ++ if [ X"$OCF_RESKEY_ip" = "X" ] && [ "$__OCF_ACTION" != "stop" ]; then + ocf_exit_reason "IP address (the ip parameter) is mandatory" + exit $OCF_ERR_CONFIGURED + fi diff --git a/SOURCES/bz1392432-LVM-partial_activation-fix.patch b/SOURCES/bz1392432-LVM-partial_activation-fix.patch new file mode 100644 index 0000000..147f185 --- /dev/null +++ b/SOURCES/bz1392432-LVM-partial_activation-fix.patch @@ -0,0 +1,33 @@ +From 42d298191993fd8d851a881a067e09aca2d4a079 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 27 Jan 2017 10:25:18 +0100 +Subject: [PATCH] LVM: fix partial activation + +--- + heartbeat/LVM | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/LVM b/heartbeat/LVM +index 79c2791..90a900b 100755 +--- a/heartbeat/LVM ++++ b/heartbeat/LVM +@@ -548,7 +548,8 @@ LVM_validate_all() { + # "unknown device" then another node may have marked a device missing + # where we have access to all of them and can start without issue. + if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then +- if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'unknown device' > /dev/null 2>&1; then ++ case $(vgs -o attr --noheadings $OCF_RESKEY_volgrpname | tr -d ' ') in ++ ???p??*) + if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then + # We are missing devices and cannot activate partially + ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially" +@@ -558,7 +559,8 @@ LVM_validate_all() { + # Assume that caused the vgck failure and carry on + ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action." + fi +- fi ++ ;; ++ esac + # else the vg is partial but all devices are accounted for, so another + # node must have marked the device missing. Proceed. + else diff --git a/SOURCES/bz1393189-1-IPaddr2-detect-duplicate-IP.patch b/SOURCES/bz1393189-1-IPaddr2-detect-duplicate-IP.patch new file mode 100644 index 0000000..30b717d --- /dev/null +++ b/SOURCES/bz1393189-1-IPaddr2-detect-duplicate-IP.patch @@ -0,0 +1,76 @@ +From 24edeaaacf0648b3df0032ef5d1deb3a9e11b388 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 11 Jan 2017 14:11:21 +0100 +Subject: [PATCH] IPaddr2 detect duplicate IP + +--- + heartbeat/IPaddr2 | 28 ++++++++++++++++++++++++---- + 1 file changed, 24 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index b416e31..2d5e1ea 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -79,6 +79,7 @@ OCF_RESKEY_arp_count_default=5 + OCF_RESKEY_arp_count_refresh_default=0 + OCF_RESKEY_arp_bg_default=true + OCF_RESKEY_arp_mac_default="ffffffffffff" ++OCF_RESKEY_run_arping_default=false + + : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} + : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} +@@ -90,6 +91,7 @@ OCF_RESKEY_arp_mac_default="ffffffffffff" + : ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} + : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} + : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} ++: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}} + ####################################################################### + + SENDARP=$HA_BIN/send_arp +@@ -340,6 +342,14 @@ IP address goes away. + + + ++ ++ ++Whether or not to run arping for IPv4 collision detection check. ++ ++Run arping for IPv4 collision detection check ++ ++ ++ + + + +@@ -556,6 +566,15 @@ add_interface () { + iface="$4" + label="$5" + ++ if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping && ++ check_binary arping; then ++ arping -q -c 2 -w 3 -D -I $iface $ipaddr ++ if [ $? = 1 ]; then ++ ocf_log err "IPv4 address collision $ipaddr [DAD]" ++ return $OCF_ERR_CONFIGURED ++ fi ++ fi ++ + if [ "$FAMILY" = "inet6" ] && ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then + add_ipv6_addrlabel $ipaddr + fi +@@ -877,10 +896,11 @@ ip_start() { + fi + + add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL +- +- if [ $? -ne 0 ]; then +- ocf_exit_reason "$CMD failed." +- exit $OCF_ERR_GENERIC ++ rc=$? ++ ++ if [ $rc -ne $OCF_SUCCESS ]; then ++ ocf_exit_reason "Failed to add $OCF_RESKEY_ip" ++ exit $rc + fi + fi + diff --git a/SOURCES/bz1393189-2-IPaddr2-detect-duplicate-IP.patch b/SOURCES/bz1393189-2-IPaddr2-detect-duplicate-IP.patch new file mode 100644 index 0000000..f3acf29 --- /dev/null +++ b/SOURCES/bz1393189-2-IPaddr2-detect-duplicate-IP.patch @@ -0,0 +1,54 @@ +From 303a738d177a60d001209c6d334ef44f05e1b93b Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 28 Mar 2017 14:15:49 +0200 +Subject: [PATCH] IPaddr2: fix gratuitious ARP checks + +--- + heartbeat/IPaddr2 | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index 27b7208..aea2aa2 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -710,10 +710,14 @@ run_send_arp() { + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" + ocf_log $LOGLEVEL "$SENDARP $ARGS" +- if ocf_is_true $OCF_RESKEY_arp_bg; then +- ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 +- else +- $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" ++ output=$($SENDARP $ARGS 2>&1) ++ rc=$? ++ if [ $rc -ne $OCF_SUCCESS ]; then ++ if ! ocf_is_true $OCF_RESKEY_arp_bg; then ++ ocf_log err "send_arp output: $output" ++ fi ++ ocf_exit_reason "Could not send gratuitous arps" ++ exit $OCF_ERR_GENERIC + fi + fi + } +@@ -771,10 +775,14 @@ run_send_ib_arp() { + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" + ocf_log $LOGLEVEL "ipoibarping $ARGS" +- if ocf_is_true $OCF_RESKEY_arp_bg; then +- (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 +- else +- ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" ++ output=$(ipoibarping $ARGS 2>&1) ++ rc=$? ++ if [ $rc -ne $OCF_SUCCESS ]; then ++ if ! ocf_is_true $OCF_RESKEY_arp_bg; then ++ ocf_log err "ipoibarping output: $output" ++ fi ++ ocf_exit_reason "Could not send gratuitous arps" ++ exit $OCF_ERR_GENERIC + fi + fi + } +-- +2.9.3 + diff --git a/SOURCES/bz1394296-redis-fix-selinux-permissions.patch b/SOURCES/bz1394296-redis-fix-selinux-permissions.patch deleted file mode 100644 index b6c7a98..0000000 --- a/SOURCES/bz1394296-redis-fix-selinux-permissions.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 70b13e3c27944292cfe658284878de5cb3a4918c Mon Sep 17 00:00:00 2001 -From: Gabriele Cerami -Date: Wed, 2 Nov 2016 00:44:37 +0100 -Subject: [PATCH] Redis: restore rundir security context - -When selinux rules packages are installed, rundir does not yet exist, -and security context for it cannot be applied. Calling restorecon after -dir creation ensures that the proper context is applied to the rundir. -If the context is not applied, selinux denies write permission, the unix -socket cannot be created, and redis does not start ---- - heartbeat/redis | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/heartbeat/redis b/heartbeat/redis -index f85b2f7..1fe8906 100755 ---- a/heartbeat/redis -+++ b/heartbeat/redis -@@ -324,6 +324,10 @@ function start() { - - [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" - chown -R "$REDIS_USER" "$REDIS_RUNDIR" -+ if have_binary "restorecon"; then -+ restorecon -Rv "$REDIS_RUNDIR" -+ fi -+ - - # check for 0 byte database dump file. This is an unrecoverable start - # condition that we can avoid by deleting the 0 byte database file. diff --git a/SOURCES/bz1395142-1-update-saphana-saphanatopology.patch b/SOURCES/bz1395142-1-update-saphana-saphanatopology.patch new file mode 100644 index 0000000..5cd8ffa --- /dev/null +++ b/SOURCES/bz1395142-1-update-saphana-saphanatopology.patch @@ -0,0 +1,1990 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2016-10-14 10:09:56.479051279 +0200 ++++ b/heartbeat/SAPHana 2016-10-14 10:29:23.990066292 +0200 +@@ -2,8 +2,8 @@ + # + # SAPHana + # +-# Description: Manages two single SAP HANA Instance in System Replication +-# Planned: do also manage scale-up scenarios ++# Description: Manages two SAP HANA Databases in System Replication ++# Planned: do also manage scale-out scenarios + # currently the SAPHana is dependent of the analysis of + # SAPHanaTopology + # For supported scenarios please read the README file provided +@@ -16,7 +16,7 @@ + # Support: linux@sap.com + # License: GNU General Public License (GPL) + # Copyright: (c) 2013,2014 SUSE Linux Products GmbH +-# Copyright: (c) 2015 SUSE Linux GmbH ++# (c) 2015-2016 SUSE Linux GmbH + # + # An example usage: + # See usage() function below for more details... +@@ -29,12 +29,13 @@ + # OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) + # OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) + # OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) +-# OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) ++# OCF_RESKEY_SAPHanaFilter (outdated, replaced by cluster property hana_${sid}_glob_filter) + # + # + ####################################################################### + # + # Initialization: ++SAPHanaVersion="0.152.17" + timeB=$(date '+%s') + + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +@@ -43,6 +44,12 @@ + # + ####################################################################### + # ++log_attributes=false ++if ocf_is_true "$log_attributes"; then ++ log_attr_file="/var/log/fhATTRIBUTES" ++else ++ log_attr_file="/dev/null" ++fi + + HANA_STATE_PRIMARY=0 + HANA_STATE_SECONDARY=1 +@@ -107,7 +114,7 @@ + cat <<-EOF + usage: $0 ($methods) + +- $0 manages a SAP HANA Instance as an HA resource. ++ $0 manages two SAP HANA databases (scale-up) in system replication. + + The 'start' operation starts the HANA instance or bring the "clone instance" to a WAITING status + The 'stop' operation stops the HANA instance +@@ -145,15 +152,14 @@ + + + +-0.151.1 ++$SAPHanaVersion + +-Manages two SAP HANA instances in system replication (SR). ++Manages two SAP HANA database systems in system replication (SR). + +-The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured +-in system replication. This first version is limited to the scale-up scenario. Scale-Out is +-not supported in this version. ++The SAPHanaSR resource agent manages two SAP HANA database systems which are configured ++in system replication. SAPHana supports Scale-Up scenarios. + +-Managing the two SAP HANA instances means that the resource agent controls the start/stop of the ++Managing the two SAP HANA database systems means that the resource agent controls the start/stop of the + instances. In addition the resource agent is able to monitor the SAP HANA databases to check their + availability on landscape host configuration level. For this monitoring the resource agent relies on interfaces + provided by SAP. A third task of the resource agent is to also check the synchronisation status +@@ -205,9 +211,10 @@ + Should cluster/RA prefer to switchover to slave instance instead of restarting master locally? Default="yes" + no: Do prefer restart locally + yes: Do prefer takever to remote site ++ never: Do never run a sr_takeover (promote) at the secondary side. THIS VALUE IS CURRENTLY NOT SUPPORTED. + + Local or site recover preferred? +- ++ + + + Define, if a former primary should automatically be registered. +@@ -220,7 +227,7 @@ + Time difference needed between to primary time stamps, if a dual-primary situation occurs + Time difference needed between to primary time stamps, + if a dual-primary situation occurs. If the time difference is +- less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin ++ less than the time gap, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin + a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After + this registration to the new primary all data will be overwritten by the system replication. + +@@ -290,6 +297,45 @@ + local rc=0; tr -d '"'; return $rc + } + ++# function: version: cpmpare two HANA version strings ++function ver_lt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 && return 0 || return 1 ++} ++ ++function ver_le() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 ++} ++ ++function ver_gt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 && return 0 || return 1 ++} ++ ++function ver_ge() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 ++} ++# ++# function: version: cpmpare two HANA version strings ++# ++function version() { ++ if [ $# -eq 3 ]; then ++ case "$2" in ++ LE | le | "<=" ) ver_le $1 $3;; ++ LT | lt | "<" ) ver_lt $1 $3;; ++ GE | ge | ">=" ) ver_ge $1 $3;; ++ GT | gt | ">" ) ver_gt $1 $3;; ++ * ) return 1; ++ esac ++ elif [ $# -ge 5 ]; then ++ version $1 $2 $3 && shift 2 && version $* ++ else ++ return 1; ++ fi ++} ++ + # + # function: remoteHost2remoteNode - convert a SAP remoteHost to the cluster node name + # params: remoteHost +@@ -372,12 +418,16 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE +- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file ++ fi ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE +- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file ++ fi ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -405,12 +455,16 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE +- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> $log_attr_file ++ fi ++ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE +- crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> $log_attr_file ++ fi ++ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>$log_attr_file; rc=$? + ;; + esac + else +@@ -460,6 +514,10 @@ + # DONE: PRIO2: Only adjust master if value is really different (try to check that) + oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) + if [ "$oldscore" != "$score" ]; then ++ dstr=$(date) ++ if ocf_is_true "$log_attributes"; then ++ echo "$dstr: SAPHana: crm_master -v $score -l reboot " >> $log_attr_file ++ fi + super_ocf_log debug "DBG: SET crm master: $score (old: $oldscore)" + ${HA_SBIN_DIR}/crm_master -v $score -l reboot; rc=$? + else +@@ -471,9 +529,9 @@ + } + + # +-# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) ++# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE) + # params: NODE_ROLES NODE_SYNC_STATUS +-# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], ++# globals: SCORING_TABLE[@], + # + scoring_crm_master() + { +@@ -482,7 +540,7 @@ + local sync="$2" + local skip=0 + local myScore="" +- for scan in "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}"; do ++ for scan in "${SCORING_TABLE[@]}"; do + if [ $skip -eq 0 ]; then + read rolePatt syncPatt score <<< $scan + if grep "$rolePatt" <<< "$roles"; then +@@ -494,7 +552,7 @@ + fi + done + super_ocf_log debug "DBG: scoring_crm_master adjust score $myScore" +- # TODO: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 ++ # DONE: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 + if [ -n "$myScore" ]; then + set_crm_master $myScore + fi +@@ -514,28 +572,91 @@ + } + + # ++# function: HANA_CALL ++# params: timeout-in-seconds cmd-line ++# globals: sid(r), SID(r), InstanceName(r) ++# ++function HANA_CALL() ++{ ++ # ++ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with ++ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) ++ # TODO: PRIO 5: Maybe make "su" optional by a parameter ++ local timeOut=0 ++ local onTimeOut="" ++ local rc=0 ++ local use_su=1 # Default to be changed later (see TODO above) ++ local pre_cmd="" ++ local cmd="" ++ local pre_script="" ++ local output="" ++ while [ $# -gt 0 ]; do ++ case "$1" in ++ --timeout ) timeOut=$2; shift;; ++ --use-su ) use_su=1;; ++ --on-timeout ) onTimeOut="$2"; shift;; ++ --cmd ) shift; cmd="$*"; break;; ++ esac ++ shift ++ done ++ ++ if [ $use_su -eq 1 ]; then ++ pre_cmd="su - ${sid}adm -c" ++ pre_script="true" ++ else ++ # as root user we need the library path to the SAP kernel to be able to call sapcontrol ++ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH ++ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] ++ then ++ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ++ fi ++ pre_cmd="bash -c" ++ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" ++ fi ++ case $timeOut in ++ 0 | inf ) ++ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ ;; ++ * ) ++ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ # ++ # on timeout ... ++ # ++ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then ++ local second_output="" ++ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); ++ fi ++ ;; ++ esac ++ echo "$output" ++ return $rc; ++} ++ ++# + # function: saphana_init - initialize variables for the resource agent + # params: InstanceName +-# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), +-# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) ++# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), ++# globals: sr_name(w), remoteHost(w), otherNodes(w), remSR_name(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) + # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) ++# globals: NODENAME(w), vNAME(w), hdbver(w), + # saphana_init : Define global variables with default values, if optional parameters are not set + # + function saphana_init() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$OCF_SUCCESS +- local vName + local clN + # local site + # two parameter models (for transition only) + # OLD: InstanceName + # NEW: SID InstanceNumber ++ NODENAME=$(crm_node -n) + SID=$OCF_RESKEY_SID + InstanceNr=$OCF_RESKEY_InstanceNumber + SIDInstanceName="${SID}_HDB${InstanceNr}" + InstanceName="HDB${InstanceNr}" ++ export SAPSYSTEMNAME=$SID + super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" + sid=$(echo "$SID" | tr [:upper:] [:lower:]) + sidadm="${sid}adm" +@@ -544,15 +665,23 @@ + # DONE: PRIO4: SAPVIRHOST might be different to NODENAME + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 +- vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ +- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr) ++ # We rely on the following format: SID is word#4, SYSNR is work#6, vHost is word#8 ++ if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then ++ vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ ++ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) ++ super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" ++ else ++ super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" ++ fi + if [ -z "$vName" ]; then + # + # if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology + # + vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); + fi ++ if [ -z "$vName" ]; then # last fallback if we are not able to figure out the virtual host name ++ vName="$NODENAME" ++ fi + SAPVIRHOST=${vName} + PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" + AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" +@@ -571,6 +700,12 @@ + ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") + ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") + ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") ++ ATTR_NAME_HANA_OPERATION_MODE=("hana_${sid}_op_mode" "forever") ++ # ++ # new "central" attributes ++ # ++ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") ++ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) + # + # TODO: PRIO4: Table for non-preferred-site-takeover + # +@@ -591,9 +726,7 @@ + ) + SCORING_TABLE_PREFERRED_LOCAL_RESTART=( + "[0-9]*:P:[^:]*:master .* 150" +- "[0-9]*:P:[^:]*:slave .* 140" +- "[0-9]*:P:[^:]*:\? .* 0" +- "[0-9]*:P:[^:]*:- .* 0" ++ "[0-9]*:P:[^:]*:.* .* 140" + "[0-9]*:S:[^:]*:master SOK 100" + "[0-9]*:S:[^:]*:master SFAIL -INFINITY" + "[0-9]*:S:[^:]*:slave SOK 10" +@@ -602,6 +735,25 @@ + "[0-9]*:S:[^:]*:- .* 0" + ".* .* -1" + ) ++ SCORING_TABLE_PREFERRED_NEVER=( ++ "[234]*:P:[^:]*:master .* 150" ++ "[015-9]*:P:[^:]*:master .* 90" ++ "[0-9]*:P:[^:]*:.* .* -INFINITY" ++ "[0-9]*:S:[^:]*:.* .* -INFINITY" ++ ".* .* -INFINITY" ++ ) ++ if ocf_is_true $PreferSiteTakeover; then ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}") ++ else ++ case "$PreferSiteTakeover" in ++ never|NEVER|Never ) ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_NEVER[@]}") ++ ;; ++ * ) ++ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_LOCAL_RESTART[@]}") ++ ;; ++ esac ++ fi + # + DUPLICATE_PRIMARY_TIMEOUT="${OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT:-7200}" + super_ocf_log debug "DBG: DUPLICATE_PRIMARY_TIMEOUT=$DUPLICATE_PRIMARY_TIMEOUT" +@@ -615,7 +767,7 @@ + esac + # + # +- ++ # + remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); + if [ -z "$remoteHost" ]; then + if [ ${#otherNodes[@]} -eq 1 ]; then # we are a 2 node cluster, lets assume the other is the remote-host +@@ -640,7 +792,7 @@ + sr_mode="sync" + fi + if [ -n "$remoteNode" ]; then +- rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); ++ remSR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); + fi + super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" + # optional OCF parameters, we try to guess which directories are correct +@@ -671,26 +823,21 @@ + # + SAPSTARTPROFILE="$(ls -1 $DIR_PROFILE/${OCF_RESKEY_INSTANCE_PROFILE:-${SID}_${InstanceName}_*})" + fi +- # as root user we need the library path to the SAP kernel to be able to call sapcontrol +- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH +- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] +- then +- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH +- export LD_LIBRARY_PATH +- fi + PATH=${PATH}:${DIR_EXECUTABLE}; export PATH ++ local ges_ver ++ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') ++ hdbver=${ges_ver%.*.*} ++ # ++ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state ++ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible ++ # ++ hdbState="hdbnsutil -sr_state" ++ hdbMap="hdbnsutil -sr_state" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbState="hdbnsutil -sr_stateConfiguration" ++ hdbMap="hdbnsutil -sr_stateHostMapping" ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" +- ############################# +- # TODO: PRIO9: To be able to call landscapeHostConfig.py without su (so as root) +- # TODO: PRIO9: Research for environment script .htacces or something like that +- #export SAPSYSTEMNAME=ZLF +- #export DIR_INSTANCE=/usr/sap/ZLF/HDB02 +- #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIR_INSTANCE/exe:$DIR_INSTANCE/exe/Python/lib +- #export PYTHONPATH=$DIR_INSTANCE/$HOST:$DIR_INSTANCE/exe/python_support:$DIR_INSTANCE/exe +- #export PYTHONHOME=$DIR_INSTANCE/exe/Python +- #export SAP_RETRIEVAL_PATH=$DIR_INSTANCE/$HOST +- #export DIR_EXECUTABLE=$DIR_INSTANCE/exe +- ############################# + return $OCF_SUCCESS + } + +@@ -765,7 +912,11 @@ + # or ownership - they will be recreated by sapstartsrv during next start + rm -f /tmp/.sapstream5${InstanceNr}13 + rm -f /tmp/.sapstream5${InstanceNr}14 +- $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm ++ ( ++ export PATH="$DIR_EXECUTABLE${PATH:+:}$PATH" ++ export LD_LIBRARY_PATH="$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH" ++ $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm ++ ) + # now make sure the daemon has been started and is able to respond + local srvrc=1 + while [ $srvrc -eq 1 -a $(pgrep -f "sapstartsrv.*$runninginst" | wc -l) -gt 0 ] +@@ -809,31 +960,47 @@ + function check_for_primary() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$HANA_STATE_DEFECT +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" +- # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? ++ # TODO: PRIO 3: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback ++ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files? + # This might also solve some problems when we could not figure-out the ilocal or remote site name +- for i in 1 2 3 4 5 6 7 8 9; do ++ local chkMethod="" ++ for chkMethod in hU hU hU gP; do ++ case "$chkMethod" in ++ gP ) ++ local gpKeys="" ++ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) ++ node_full_status=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}') ++ super_ocf_log info "ACT: Using getParameter.py as fallback - node_status=$node_status" ++ ;; ++ hU | * ) ++ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported ++ node_full_status=$(HANA_CALL --timeout 60 --cmd "$hdbState" 2>/dev/null ) ++ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" ++ ;; ++ esac + case "$node_status" in + primary ) +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" +- return $HANA_STATE_PRIMARY;; ++ rc=$HANA_STATE_PRIMARY ++ break;; + syncmem | sync | async ) +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" +- return $HANA_STATE_SECONDARY;; ++ rc=$HANA_STATE_SECONDARY ++ break;; + none ) # have seen that mode on second side BEFEORE we registered it as replica +- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" +- return $HANA_STATE_STANDALONE;; ++ rc=$HANA_STATE_STANDALONE ++ break;; + * ) + super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" + dump=$( echo $node_status | hexdump -C ); + super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback ++ # SAP_CALL + super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" + # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes + esac; ++ sleep 2 + done + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -854,12 +1021,18 @@ + { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" +- if [ -n "$rem_SR_name" ]; then +- siteParam="--site=$rem_SR_name" ++ if [ -n "$remSR_name" ]; then ++ siteParam="--site=$remSR_name" + fi +- FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? +- super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" +- super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh systemReplicationStatus.py $siteParam ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of systemReplicationStatus.py ++ # SAP_CALL ++ # FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? ++ FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py" 2>/dev/null); srRc=$? ++ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" ++ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" + # + # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) + # And is the check for return code capable for chains? +@@ -890,7 +1063,7 @@ + # ok we should be careful and set secondary to SFAIL + super_ocf_log info "FLOW $FUNCNAME SFAIL" + set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" ++ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (5) - srRc=$srRc lss=$lss" + # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary + lpa_set_lpt 10 "$remoteNode" + rc=1 +@@ -898,7 +1071,7 @@ + else + super_ocf_log info "FLOW $FUNCNAME SFAIL" + set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" ++ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (2) - srRc=$srRc" + # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary + lpa_set_lpt 10 "$remoteNode" + rc=1; +@@ -992,14 +1165,28 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 + # +- su - $sidadm -c "python $DIR_EXECUTABLE/python_support/landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh landscapeHostConfiguration.py ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # DONE: Limit the runtime of landscapeHostConfiguration.py ++ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ if [ $rc -eq 124 ]; then ++ # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? ++ # landscape timeout ++ sleep 20 ++ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? ++ if [ $rc -eq 124 ]; then ++ # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" ++ rc=0 ++ fi ++ fi + return $rc; + } + + # + # function: register_hana_secondary - register local hana as secondary to the other site + # params: - +-# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r) ++# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r), hdbver(r) + # register_hana_secondary + # + function register_hana_secondary() +@@ -1007,17 +1194,31 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=2; + local remoteInstance=""; ++ local newParameter=0 + remoteInstance=$InstanceNr ++ ++ ++ if version "$hdbver" ">=" "1.00.110"; then ++ newParameter=1 ++ fi ++ + if ocf_is_true ${AUTOMATED_REGISTER}; then +- # +- # +- # +- # +- # +- super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" +- # +- # +- su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_register ... ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil -sr_register ???? ++ if [ $newParameter -eq 1 ]; then ++ local hanaOM="" ++ hanaOM=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_OPERATION_MODE[@]}) ++ if [ -n "$hanaOM" ]; then ++ hanaOM="--operationMode=$hanaOM" ++ fi ++ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name" ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name"; rc=$? ++ else ++ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? ++ fi + # backup_global_and_nameserver + else + super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" +@@ -1051,7 +1252,7 @@ + check_sapstartsrv + rc=$? + # +- # TODO: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? ++ # DONE: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? + # + if [ $rc -eq $OCF_SUCCESS ]; then + output=$($SAPCONTROL -nr $InstanceNr -function Start) +@@ -1169,7 +1370,7 @@ + 0 ) # LPA says start-up + lpa_advice="start" + # TODO: PRIO1: We need to do a special handling for remote being a 234-Secondary in SR Status SOK +- # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) ++ # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) + # then lpa_advice="wait" + remoteRole=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_ROLES[@]}) + remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) +@@ -1193,17 +1394,20 @@ + 1) # LPA says register! + lpa_advice="register" + ;; +- 2) # LPA says wait for second LPT ++ 2) # LPA says wait for older LPA to expire ++ lpa_advice="wait" ++ ;; ++ 3) # LPA says to wait for remote LPA to be reported/announced + lpa_advice="wait" + ;; +- 3 | 4 ) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss ++ 4) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss + lpa_advice="fail" + ;; +- * ) # LPA failed with an unkonown status - FAIL resource ++ *) # LPA failed with an unkonown status - FAIL resource + lpa_advice="fail" + ;; + esac +- ++ + # DONE: PRIO2: Do we need to differ 0 and 1 here? While 0 is a fatal SAP error, 1 for down/error + if [ $lss -eq 0 ]; then + super_ocf_log err "ACT: get_hana_landscape_status reports FATAL" +@@ -1218,7 +1422,7 @@ + 2 | 3 | 4 ) # as landcape says we are up - just set the scores and return code + super_ocf_log info "LPA: landcape: UP, LPA: start ==> keep running" + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + rc=$OCF_SUCCESS + ;; + 1 ) # landcape says we are down, lets start and adjust scores and return code +@@ -1226,7 +1430,7 @@ + saphana_start + rc=$? + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + ;; + esac + scoring_crm_master "$my_role" "$my_sync" +@@ -1250,11 +1454,11 @@ + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + super_ocf_log info "ACT: Register successful" + lpa_push_lpt 10 +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + set_crm_master 0 + saphana_start_secondary + rc=$? +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + else + super_ocf_log err "ACT: Register failed" + rc=$OCF_NOT_RUNNING +@@ -1279,11 +1483,19 @@ + rc=$OCF_ERR_GENERIC + ;; + 1 ) # we are down, so we should wait --> followup in next monitor +- super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" +- # TODO: PRIO3: Check, if WAITING is correct here +- set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} +- set_crm_master -9000 +- rc=$OCF_SUCCESS ++ # DONE: PRIO3: Check, if WAITING is correct here ++ if ocf_is_true "$AUTOMATED_REGISTER" ; then ++ super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" ++ super_ocf_log info "RA: landcape: DOWN, LPA: wait ==> keep waiting" ++ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_crm_master -9000 ++ rc=$OCF_SUCCESS ++ else ++ super_ocf_log warning "LPA: OLD primary needs manual registration (AUTOMATED_REGISTER='false')" ++ set_hana_attribute ${NODENAME} "WAITING4REG" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_crm_master -9000 ++ rc=$OCF_NOT_RUNNING ++ fi + ;; + esac + ;; +@@ -1309,22 +1521,24 @@ + local ch ch_role + # + # get actual list of cluster members +- # ++ # + if [ -n "$otherNodes" ]; then + for ch in ${otherNodes[@]}; do + if [ $rc -eq 1 ]; then + ch_role=$(get_hana_attribute ${ch} ${ATTR_NAME_HANA_ROLES[@]}) +-# TODO: PRIO3: check if [0-9], [234] or [34] is correct +-# TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? +-# grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 +-# grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 +-# Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master +- awk -F: 'BEGIN { rc=1 } +- $1 ~ "[34]" && $2 ="P" && $4="master" { rc=0 } +- END { exit rc }' <<< $ch_role ; rc=$? ++ # TODO: PRIO3: check if [0-9], [234] or [34] is correct ++ # TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? ++ # grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 ++ # grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 ++ # Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master ++ super_ocf_log debug "DBG: check_for_primary_master (3) ch_role=$ch_role" ++ awk -F: 'BEGIN { rc=1 } ++ $1 ~ "[34]" && $2 == "P" && $4 == "master" { rc=0 } ++ END { exit rc }' <<< $ch_role ; rc=$? ++ super_ocf_log debug "DBG: check_for_primary_master (4) rc=$rc" + fi + done +- fi ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1378,7 +1592,7 @@ + ####### LPA - begin + # + lpa_push_lpt 10 +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + # + ####### LPA - end + # +@@ -1404,7 +1618,7 @@ + rc=$OCF_SUCCESS + fi + else +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + fi + else + super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" +@@ -1454,7 +1668,7 @@ + then + if [ $STATE -eq $OCF_NOT_RUNNING ] + then +- [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" ++ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE status color is $COLOR !" + rc=$STATE + fi + count=1 +@@ -1511,13 +1725,17 @@ + local crm_rc=1 + local lpt=$1 + local clpt=-1 +- local node=${2:-${NODENAME}} ++ local node=$2 + set_hana_attribute ${node} "$lpt" ${LPA_ATTR[@]}; crm_rc=$? +- clpt=$(lpa_get_lpt $NODENAME) +- if [ "$lpt" != "$clpt" ]; then +- rc=2 ++ if [ -n "$node" ]; then ++ clpt=$(lpa_get_lpt $NODENAME) ++ if [ "$lpt" != "$clpt" ]; then ++ rc=2 ++ else ++ rc=0 ++ fi + else +- rc=0 ++ super_ocf_log info "DEC: lpa_set_lpt ignore to change value for empty node name" + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -1608,7 +1826,7 @@ + else + rc=2 + fi +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1621,9 +1839,10 @@ + # + # Returncodes: + # 0: start +-# 1: register than start +-# 2: wait4gab +-# 3: wait4other ++# 1: register (then start) ++# 2: wait4gab (WAIT4LPA - Older LPA needs to expire) ++# 3: wait4other (WAIT4LPA - Remote LPA needs to be announced) ++# 4: lpa internal error + # + # Initializing (if NO local LPT-file): + # SECONDARY sets to 10 +@@ -1648,7 +1867,7 @@ + # + function lpa_check_lpt_status() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local rc=0 ++ local rc=4 + local LPTloc=-1 + local LPTrem=-1 + local LPTMark=1000 +@@ -1666,16 +1885,16 @@ + if [ -z "$LPTloc" -o "$LPTloc" -eq -1 -o "$lparc" -ne 0 ]; then + # last option - try to initialize as PRIMARY + lpa_push_lpt 20 +- lpa_set_lpt 20 ++ lpa_set_lpt 20 $NODENAME + LPTloc=20 # DEFAULT + fi + fi +- # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt ++ # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt + LPTrem=$(lpa_get_lpt $remoteNode); lparc=$? + if [ $lparc -ne 0 ]; then + # LPT of the other node could not be evaluated - LPA says WAIT + super_ocf_log debug "DBG: LPA: LPTloc=$LPTloc, LPTrem undefined ==> WAIT" +- rc=2 ++ rc=3 + else + super_ocf_log debug "DBG: LPA: LPTloc ($LPTloc) LPTrem ($LPTrem) delta ($delta)" + if [ $LPTloc -lt $LPTMark -a $LPTrem -lt $LPTMark ]; then +@@ -1683,11 +1902,11 @@ + else + delta=$DUPLICATE_PRIMARY_TIMEOUT # at least one of the lpts is a real timestamp so include delta-gap + fi +- if (( delta < LPTloc - LPTrem )); then ++ if (( delta < LPTloc - LPTrem )); then + # We are the winner - LPA says STARTUP + super_ocf_log debug "DBG: LPA: LPTloc wins $LPTloc > $LPTrem + $delta ==> START" + rc=0 +- elif (( delta < LPTrem - LPTloc )); then ++ elif (( delta < LPTrem - LPTloc )); then + if ocf_is_true "$AUTOMATED_REGISTER" ; then + # The other one has won - LPA says REGISTER + super_ocf_log debug "DBG: LPA: LPTrem wins $LPTrem > $LPTloc + $delta ==> REGISTER" +@@ -1697,12 +1916,12 @@ + rc=2 + fi + +- else ++ else + super_ocf_log debug "DBG: LPA: Difference between LPTloc and LPTrem is less than delta ($delta) ==> WAIT" + # TODO: PRIO3: ADD STALEMATE-HANDLING HERE; currently admin should set one of the lpa to 20 + rc=2 +- fi +- fi ++ fi ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1716,6 +1935,7 @@ + { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 ++ # always true for scale-up + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -1728,23 +1948,15 @@ + # + function saphana_start_clone() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING ++ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING + local sqlrc; +- local chkusr; +- # TODO: PRIO4: remove check_secstore_users later +- secUser=$(check_secstore_users SAPHANA${SID}SR SLEHALOC RHELHALOC) ; chkusr=$? +- if [ $chkusr -ne 0 ]; then +- super_ocf_log err "ACT: Secure store users are missing (see best practice manual how to setup the users)" +- rc=$OCF_ERR_CONFIGURED ++ set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ check_for_primary; primary_status=$? ++ if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then ++ saphana_start_primary; rc=$? + else +- set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} +- check_for_primary; primary_status=$? +- if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then +- saphana_start_primary; rc=$? +- else +- lpa_set_lpt 10 +- saphana_start_secondary; rc=$? +- fi ++ lpa_set_lpt 10 $NODENAME ++ saphana_start_secondary; rc=$? + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc +@@ -1761,9 +1973,10 @@ + local rc=0 + local primary_status="x" + set_hana_attribute ${NODENAME} "UNDEFINED" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ super_ocf_log debug "DBG: SET UNDEFINED" + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + fi + saphana_stop; rc=$? + return $rc +@@ -1813,26 +2026,42 @@ + # seems admin already decided that for us? -> we are running - set DEMOTED + promoted=0; + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + fi + lpa_check_lpt_status; lparc=$? +- # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes +- if [ $lparc -lt 2 ]; then +- # lpa - no need to wait any longer - lets try a new start +- saphana_start_clone +- rc=$? +- super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc +- else +- lpa_init_lpt $HANA_STATE_PRIMARY +- # still waiting for second site to report lpa-lpt +- if ocf_is_true "$AUTOMATED_REGISTER" ; then +- super_ocf_log info "LPA: Still waiting for remote site to report LPA status" +- else +- super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" +- fi +- return $OCF_SUCCESS +- fi ++ # DONE: PRIO1: Need to differ lpa_check_lpt_status return codes ++ case "$lparc" in ++ 0 | 1 ) ++ # lpa - no need to wait any longer - lets try a new start ++ saphana_start_clone ++ rc=$? ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc ++ ;; ++ 2 ) ++ lpa_init_lpt $HANA_STATE_PRIMARY ++ # still waiting for second site to expire ++ if ocf_is_true "$AUTOMATED_REGISTER" ; then ++ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" ++ else ++ super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" ++ super_ocf_log info "LPA: You need to manually sr_register the older primary" ++ fi ++ return $OCF_SUCCESS ++ ;; ++ 3 ) ++ lpa_init_lpt $HANA_STATE_PRIMARY ++ # still waiting for second site to report lpa-lpt ++ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" ++ return $OCF_SUCCESS ++ ;; ++ 4 ) ++ # lpa internal error ++ # TODO PRIO3: Impplement special handling for this issue - should we fail the ressource? ++ super_ocf_log info "LPA: LPA reports an internal error" ++ return $OCF_SUCCESS ++ ;; ++ esac + promoted=0; + ;; + UNDEFINED ) +@@ -1848,7 +2077,7 @@ + ;; + esac + fi +- get_hana_landscape_status; lss=$? ++ get_hana_landscape_status; lss=$? + super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" + case "$lss" in + 0 ) # FATAL or ERROR +@@ -1876,19 +2105,20 @@ + # + # TODO PRIO1: REMOVE remoteNode dependency - get_sync_status + remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ # TODO HANDLING OF "NEVER" + case "$remoteSync" in + SOK | PRIM ) + super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" + set_crm_master 5 + if check_for_primary_master; then +- lpa_set_lpt 20 ++ lpa_set_lpt 20 $NODENAME + fi + ;; + SFAIL ) +- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" ++ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" + ;; + * ) +- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" ++ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" + ;; + esac + else +@@ -1916,7 +2146,7 @@ + rc=$OCF_SUCCESS + else + LPTloc=$(date '+%s') +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + lpa_push_lpt $LPTloc + if [ "$promoted" -eq 1 ]; then + set_hana_attribute "$NODENAME" "PRIM" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +@@ -1931,12 +2161,14 @@ + fi + my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) + my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) +- case "$my_role" in ++ case "$my_role" in + [12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status + ;; + [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query + if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then +- analyze_hana_sync_statusSRS ++ if [ "$promote_attr" = "PROMOTED" ]; then ++ analyze_hana_sync_statusSRS ++ fi + else + analyze_hana_sync_statusSQL + fi +@@ -1949,8 +2181,8 @@ + [234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster + lpa_check_lpt_status; again_lpa_rc=$? + if [ $again_lpa_rc -eq 2 ]; then +- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" +- lpa_set_lpt 10 ++ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" ++ lpa_set_lpt 10 $NODENAME + lpa_push_lpt 10 + rc=$OCF_NOT_RUNNING + fi +@@ -1993,7 +2225,7 @@ + # OK, we are running as HANA SECONDARY + # + if ! lpa_get_lpt ${NODENAME}; then +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + lpa_push_lpt 10 + fi + promote_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_CLONE_STATE[@]}) +@@ -2042,17 +2274,25 @@ + 0 ) # FATAL + # DONE: PRIO1: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error + # TODO: PRIO3: is OCF_ERR_GENERIC best option? +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + rc=$OCF_ERR_GENERIC + ;; + 1 ) # ERROR +- lpa_set_lpt 10 ++ lpa_set_lpt 10 $NODENAME + rc=$OCF_NOT_RUNNING + ;; + 2 | 3 | 4 ) # WARN INFO OK + rc=$OCF_SUCCESS +- lpa_set_lpt 30 ++ lpa_set_lpt 30 $NODENAME + sync_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ local hanaOM="" ++ local hanaOut1="" ++ # TODO: PRIO 3: check, if using getParameter.py is the best option to analyze the set operationMode ++ # DONE: PRIO 3: Should we default to logreplay for SAP HANA >= SPS11 ? ++ hanaOut1=$(HANA_CALL --timeout 10 --use-su --cmd "getParameter.py --key=global.ini/system_replication/operation_mode --sapcontrol=1") ++ hanaFilter1=$(echo "$hanaOut1" | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ hanaOM=$(echo "$hanaFilter1" | awk -F= '$1=="operation_mode" {print $2}') ++ set_hana_attribute ${NODENAME} "$hanaOM" ${ATTR_NAME_HANA_OPERATION_MODE[@]} + super_ocf_log debug "DBG: sync_attr=$sync_attr" + case "$sync_attr" in + "SOK" ) # This is a possible node to promote, when primary is missing +@@ -2112,7 +2352,7 @@ + fi + # + # First check, if we are PRIMARY or SECONDARY +- # ++ # + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then + # FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent +@@ -2145,7 +2385,7 @@ + # + # function: saphana_promote_clone - promote a hana clone + # params: - +-# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), ++# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), + # saphana_promote_clone: + # In a Master/Slave configuration get Master being the primary OR by running hana takeover + # +@@ -2169,7 +2409,7 @@ + else + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + # +- # we are SECONDARY/SLAVE and need to takepover ... ++ # we are SECONDARY/SLAVE and need to takeover ... promote on the replica (secondary) side... + # promote on the replica side... + # + hana_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) +@@ -2178,9 +2418,14 @@ + super_ocf_log info "ACT: !!!!!!! Promote REPLICA $SID-$InstanceName to be primary. !!!!!!" + LPTloc=$(date '+%s') + # lpa_set_lpt 20 $remoteNode +- lpa_set_lpt $LPTloc ++ lpa_set_lpt $LPTloc $NODENAME + lpa_push_lpt $LPTloc +- su - $sidadm -c "hdbnsutil -sr_takeover" ++ # TODO: Get rid of the su by using a new interface: ++ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_takeover ... ++ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? ++ # TODO: Limit the runtime of hdbnsutil -sr_takeover ???? ++ # SAP_CALL ++ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_takeover" + # + # now gain check, if we are primary NOW + # +@@ -2248,7 +2493,6 @@ + SAPSTARTPROFILE="" + SAPHanaFilter="ra-act-dec-lpa" + +-NODENAME=$(crm_node -n) + + + if [ $# -ne 1 ] +@@ -2306,8 +2550,7 @@ + fi + + # What kind of method was invoked? +-THE_VERSION=$(saphana_meta_data | grep ' + + +- 0.151.1 ++ $SAPHanaVersion + Analyzes SAP HANA System Replication Topology. + This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to + all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. +@@ -207,12 +215,12 @@ + dstr=$(date) + case "$attr_store" in + reboot | forever ) +- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE +- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + props ) +- echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE +- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? + ;; + esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -282,6 +290,53 @@ + } + + # ++# function: dequote - filter: remove quotes (") from stdin ++# params: - ++# globals: - ++function dequote() ++{ ++ local rc=0; tr -d '"'; return $rc ++} ++ ++# function: version: cpmpare two HANA version strings ++function ver_lt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 && return 0 || return 1 ++} ++ ++function ver_le() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 ++} ++ ++function ver_gt() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 && return 0 || return 1 ++} ++ ++function ver_ge() { ++ ocf_version_cmp $1 $2 ++ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 ++} ++# ++# function: version: cpmpare two HANA version strings ++# ++function version() { ++ if [ $# -eq 3 ]; then ++ case "$2" in ++ LE | le | "<=" ) ver_le $1 $3;; ++ LT | lt | "<" ) ver_lt $1 $3;; ++ GE | ge | ">=" ) ver_ge $1 $3;; ++ GT | gt | ">" ) ver_gt $1 $3;; ++ * ) return 1; ++ esac ++ elif [ $# -ge 5 ]; then ++ version $1 $2 $3 && shift 2 && version $* ++ else ++ return 1; ++ fi ++} ++# + # function: is_clone - report, if resource is configured as a clone (also master/slave) + # params: - + # globals: OCF_*(r) +@@ -314,12 +369,74 @@ + } + + # ++# function: HANA_CALL ++# params: timeout-in-seconds cmd-line ++# globals: sid(r), SID(r), InstanceName(r) ++# ++function HANA_CALL() ++{ ++ # ++ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with ++ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) ++ # TODO: PRIO 5: Maybe make "su" optional by a parameter ++ local timeOut=0 ++ local onTimeOut="" ++ local rc=0 ++ local use_su=1 # Default to be changed later (see TODO above) ++ local pre_cmd="" ++ local cmd="" ++ local pre_script="" ++ local output="" ++ while [ $# -gt 0 ]; do ++ case "$1" in ++ --timeout ) timeOut=$2; shift;; ++ --use-su ) use_su=1;; ++ --on-timeout ) onTimeOut="$2"; shift;; ++ --cmd ) shift; cmd="$*"; break;; ++ esac ++ shift ++ done ++ ++ if [ $use_su -eq 1 ]; then ++ pre_cmd="su - ${sid}adm -c" ++ pre_script="true" ++ else ++ # as root user we need the library path to the SAP kernel to be able to call sapcontrol ++ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH ++ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] ++ then ++ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH ++ fi ++ pre_cmd="bash -c" ++ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" ++ fi ++ case $timeout in ++ 0 | inf ) ++ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ ;; ++ * ) ++ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? ++ # ++ # on timeout ... ++ # ++ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then ++ local second_output="" ++ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); ++ fi ++ ;; ++ esac ++ echo "$output" ++ return $rc; ++} ++ ++# + # function: sht_init - initialize variables for the resource agent + # params: - + # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), +-# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) ++# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) ++# globals: NODENAME(w), hdbver(w) + # sht_init : Define global variables with default values, if optional parameters are not set + # + # +@@ -331,12 +448,14 @@ + local hdbANSWER="" + local siteID + local siteNAME ++ local chkMethod="" + HOSTEXECNAME=saphostexec + USRSAP=/usr/sap + SAPSERVICE_PATH=${USRSAP}/sapservices + SAPHOSTCTRL_PATH=${USRSAP}/hostctrl/exe + HOSTEXEC_PATH=${SAPHOSTCTRL_PATH}/${HOSTEXECNAME} + HOSTEXEC_PROFILE_PATH=${SAPHOSTCTRL_PATH}/host_profile ++ NODENAME=$(crm_node -n) + SID=$OCF_RESKEY_SID + InstanceNr=$OCF_RESKEY_InstanceNumber + myInstanceName="${SID}_HDB${InstanceNr}" +@@ -382,13 +501,6 @@ + DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" + fi + +- # as root user we need the library path to the SAP kernel to be able to call sapcontrol +- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH +- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] +- then +- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH +- export LD_LIBRARY_PATH +- fi + + PATH=${PATH}:${DIR_EXECUTABLE} + # +@@ -399,12 +511,45 @@ + *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; + *cman* ) nodelist=$(crm_node -l);; + esac ++ # ++ # get HANA version ++ # ++ local ges_ver ++ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') ++ hdbver=${ges_ver%.*.*} ++ # ++ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state ++ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible ++ # ++ hdbState="hdbnsutil -sr_state" ++ hdbMap="hdbnsutil -sr_state" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbState="hdbnsutil -sr_stateConfiguration" ++ hdbMap="hdbnsutil -sr_stateHostMapping" ++ fi + #### SAP-CALL + # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode +- for i in 1 2 3 4 5 6 7 8 9; do +- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) +- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" +- srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') ++ for chkMethod in hU hU hU gP ; do ++ # DONE: Limit the runtime of hdbnsutil. ++ # TODO: Use getParameter.py if we get no answer ++ # SAP_CALL ++ #super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" ++ #srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') ++ case "$chkMethod" in ++ gP ) # call getParameter (gP) ++ local gpKeys="" ++ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') ++ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') ++ super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$srmode" ++ ;; ++ hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) ++ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbState --sapcontrol=1" 2>/dev/null) ++ super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" ++ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') ++ ;; ++ esac + case "$srmode" in + primary | syncmem | sync | async | none ) + # we can leave the loop as we already got a result +@@ -417,27 +562,51 @@ + esac + done + # TODO PRIO3: Implement a file lookup, if we did not get a result +- siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') +- siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') ++ siteID=$(echo "$hdbANSWER" | awk -F= '/site.id/ {print $2}') # allow 'site_id' AND 'site id' ++ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site.name/ {print $2}') + site=$siteNAME + srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') +- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) +- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" + # +- # filter all non-cluster mappings ++ # for rev >= 111 we use the new mapping query + # +- # DONE: PRIO2: Need mapping between HANA HOSTS not cluster NODES +- local hanaVHost +- hanaRemoteHost=$(for n1 in $nodelist; do +- hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) +- for n2 in $MAPPING; do +- if [ "$hanaVHost" == "$n2" ]; then +- echo $hanaVHost; +- fi; +- done; +- done ) +- super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" +- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ if version "$hdbver" ">=" "1.00.111"; then ++ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbMap --sapcontrol=1" 2>/dev/null) ++ fi ++ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) ++ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" ++ if [ -n "$MAPPING" ]; then ++ # we have a mapping from HANA, lets use it ++ # ++ # filter all non-cluster mappings ++ # ++ local hanaVHost="" ++ local n1="" ++ hanaRemoteHost="" ++ for n1 in $nodelist; do ++ hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) ++ for n2 in $MAPPING; do ++ if [ "$hanaVHost" == "$n2" ]; then ++ hanaRemoteHost="$hanaVHost" ++ fi; ++ done; ++ done ++ super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" ++ else ++ # HANA DID NOT TOLD THE MAPPING, LETS TRY TO USE THE SITE ATTRIBUTES ++ local n1="" ++ local hanaSite="" ++ for n1 in $nodelist; do ++ # TODO: PRIO9 - For multi tier with more than 2 chain/star members IN the cluster we might need to be ++ # able to catch more than one remoteHost ++ # currently having more than 2 HANA in a chain/star members IN the cluster is not allowed, the third must be external ++ if [ "$NODENAME" != "$n1" ]; then ++ hanaSite=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_SITE[@]}) ++ hanaRemoteHost="$n1" ++ fi ++ done ++ super_ocf_log info "DEC: site=$site, mode=$srmode, hanaRemoteHost=$hanaRemoteHost - found by remote site ($hanaSite)" ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" + return $OCF_SUCCESS + } +@@ -446,38 +615,29 @@ + # function: check_for_primary - check if local SAP HANA is configured as primary + # params: - + # globals: HANA_STATE_PRIMARY(r), HANA_STATE_SECONDARY(r), HANA_STATE_DEFECT(r), HANA_STATE_STANDALONE(r) ++# srmode(r) + # + function check_for_primary() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- node_status=$srmode +- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" +- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" +- for i in 1 2 3 4 5 6 7 8 9; do +- case "$node_status" in +- primary ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" +- return $HANA_STATE_PRIMARY;; +- syncmem | sync | async ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" +- return $HANA_STATE_SECONDARY;; +- none ) # have seen that mode on second side BEFEORE we registered it as replica +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" +- return $HANA_STATE_STANDALONE;; +- * ) +- # TODO: PRIO1: Should we set SFAIL? +- # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes +- dump=$( echo $node_status | hexdump -C ); +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" +- #### SAP-CALL +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" +- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes +- esac; +- done +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" +- return $HANA_STATE_DEFECT ++ super_ocf_log debug "DBG: check_for_primary: srmode=$srmode" ++ case "$srmode" in ++ primary ) ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" ++ rc=$HANA_STATE_PRIMARY;; ++ syncmem | sync | async ) ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" ++ rc=$HANA_STATE_SECONDARY;; ++ none ) # have seen that mode on second side BEFEORE we registered it as replica ++ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" ++ rc=$HANA_STATE_STANDALONE;; ++ * ) ++ dump=$( echo $srmode | hexdump -C ); ++ super_ocf_log err "ACT: check_for_primary: we didn't expect srmode to be: DUMP: <$dump>" ++ rc=$HANA_STATE_DEFECT ++ esac; ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc + } + + +@@ -653,7 +813,7 @@ + function sht_stop_clone() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- check_for_primary; primary_status=$? ++ check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then + hanaPrim="P" + elif [ $primary_status -eq $HANA_STATE_SECONDARY ]; then +@@ -663,7 +823,7 @@ + else + hanaPrim="-" + fi +- set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} ++ set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} + sht_stop; rc=$? + return $rc + } +@@ -718,28 +878,49 @@ + fi + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 ++ # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 + #### SAP-CALL + vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ +- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) ++ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) + # super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" + if [ -n "$vName" ]; then +- set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} ++ set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} + else + vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}) + fi + #site=$(get_site_name) + #### SAP-CALL +- hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" +- hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) ++ # SAP_CALL ++ #hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" ++ # ++ # since rev 09x SAP has added the --sapcontrol option for the landscapeHostConfiguration interface ++ # we begin to use --sapcontrol with rev 100 ++ # since rev 120 we need to use the --sapcontrol, because SAP changed the tool output ++ # ++ if version "$hdbver" ">=" "1.00.100"; then ++ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); hanalrc="$?" ++ # TODO: PRIO9: Do we need to check the lines: 'SAPCONTROL-OK: ' and 'SAPCONTROL-OK: '? ++ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | \ ++ awk -F= '$1 == "nameServerConfigRole" {f1=$2} ++ $1 == "nameServerActualRole" {f2=$2} ++ $1 == "indexServerConfigRole" {f3=$2} ++ $1 == "indexServerActualRole" {f4=$2} ++ END { printf "%s:%s:%s:%s\n", f1, f2, f3,f4 }') ++ else ++ # ++ # old code for backward compatability ++ # ++ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" ++ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) ++ fi + #if [ -z "$MAPPING" ]; then + # super_ocf_log info "ACT: Did not find remote Host at this moment" + #fi + # FH TODO PRIO3: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST" + if [ -n "$hanaRemoteHost" ]; then +- set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} ++ set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} + fi +- set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} ++ set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} + if [ -n "$site" ]; then + set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} + fi +@@ -748,8 +929,8 @@ + S ) # only secondary may propargate its sync status + case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in + *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; +- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; +- *cman* ) nodelist=$(crm_node -l);; ++ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; ++ *cman* ) nodelist=$(crm_node -l);; + esac + + for n in ${nodelist}; do +@@ -789,7 +970,6 @@ + InstanceNr="" + DIR_EXECUTABLE="" + SAPHanaFilter="ra-act-dec-lpa" +-NODENAME=$(crm_node -n) + + if [ $# -ne 1 ] + then +@@ -846,8 +1026,7 @@ + fi + fi + +-THE_VERSION=$(sht_meta_data | grep ' ++# License: GPL v2+ ++my $Version="0.18.2016.02.16.1"; + # ++################################################################## + use POSIX; + use strict; ++use Sys::Syslog; ++use Sys::Hostname; ++use File::Path; ++use Getopt::Long; ++use lib '/usr/share/SAPHanaSR/tests'; ++use SAPHanaSRTools; ++ ++################################### ++## this part is not for scale out and currently NOT zero-config ++ ++my $ClusterNodes=2; ++my $ClusterPrimaries=1; ++my $ClusterSecondaries=1; ++my %Name; ++my %Host; ++my $host = hostname(); + ++my $varlib='/var/lib/SAPHanaTD'; ++my $testfile='SAPHanaTD.status'; ++my $testcount=0; ++my $first_test=1; + my $sid=""; +-my $table_title = "Host \\ Attr"; +-my %Name; ++my @sids; ++my $ino=""; ++my $sortBy=""; ++my $table_titleH = "Host"; ++#my %Name; + my %Host; ++my %Site; ++my %Global; ++my %HName; ++my %SName; ++my %GName; ++my $help; ++my $version; ++my $cibFile=""; ++ ++sub init() ++{ ++ my $result = GetOptions ("sid=s" => \@sids, ++ "sort=s" => \$sortBy, ++ "cib=s" => \$cibFile, ++ "version" => \$version, ++ "help" => \$help, ++ ); ++ return 0; ++} ++ ++init(); ++ ++if ( $help ) { ++ printf "SAPHanaSR-showAttr {[--sid=]} [--sort=] [--cib=]\n"; ++ printf ""; ++ exit 0; ++} ++if ( $version ) { ++ printf "%s\n", $Version; ++ exit 0; ++} ++ ++if ( $cibFile ne "" ) { ++ printf "Using cib file %s\n", $cibFile; ++} + + sub max { # thanks to http://www.perlunity.de/perl/forum/thread_018329.shtml + my $a = shift; +@@ -21,113 +80,75 @@ + return $a > $b ? $a : $b; + } + +-sub print_attr_host() +-{ +- my ($HKey, $AKey); +- printf "%-22s", "Attribute \\ Host"; +- foreach $HKey (sort keys %Host) { +- printf "%-16s ", $HKey; +- } +- printf "\n"; +- +- printf "%s\n", "-" x 120 ; +- +- foreach $AKey (sort keys %Name) { +- printf "%-22s", $AKey; +- foreach $HKey (sort keys %Host) { +- printf "%-16.16s ", $Host{$HKey} -> {$AKey}; +- } +- +- printf "\n"; +- } +- return 0; +-} +- +-sub print_host_attr() +-{ +- my ($AKey, $HKey, $len, $line_len, $hclen); +- $hclen=$Name{_hosts}->{_length}; +- $line_len=$hclen+1; +- printf "%-$hclen.${hclen}s ", "$table_title"; +- foreach $AKey (sort keys %Name) { +- if ($AKey ne "_hosts") { +- $len = $Name{$AKey}->{_length}; +- $line_len=$line_len+$len+1; +- printf "%-$len.${len}s ", $Name{$AKey}->{_title}; ++sub read_cib($) { ++ my $sid = shift(); ++ if ( $cibFile eq "" ) { ++ printf "Open live cib\n"; ++ open CIB, "cibadmin -Ql |" or die "CIB could not be read from cluster"; ++ } else { ++ open CIB, "<$cibFile" or die "CIB file $cibFile not found or not able to read it"; ++ } ++ while () { ++ chomp; ++ my ($host, $name, $site, $value); ++ if ( $_ =~ /cib-last-written="([^"]*)"/ ) { ++ printf "CIB-time: %s\n", $1; + } +- } +- printf "\n"; +- printf "%s\n", "-" x $line_len ; +- foreach $HKey (sort keys %Host) { +- printf "%-$hclen.${hclen}s ", $HKey; +- foreach $AKey (sort keys %Name) { +- if ($AKey ne "_hosts") { +- $len = $Name{$AKey}->{_length}; +- printf "%-$len.${len}s ", $Host{$HKey} -> {$AKey}; +- } +- } +- printf "\n"; +- } +- return 0; +-} +- +-open ListInstances, "/usr/sap/hostctrl/exe/saphostctrl -function ListInstances|"; +-while () { +- # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- chomp; +- if ( $_ =~ /:\s+([A-Z][A-Z0-9][A-Z0-9])\s+-/ ) { +- $sid=tolower("$1"); +- } +-} +-close ListInstances; +- +- +-open CIB, "cibadmin -Ql |"; +-while () { +- chomp; +- my ($host, $name, $value); +- my $found=0; +- if ( $_ =~ /nvpair.*name="(\w+_${sid}_\w+)"/ ) { +- $name=$1; +- # find attribute in forever and reboot store :) +- if ( $_ =~ /id="(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { +- $host=$2; +- } +- if ( $_ =~ /value="([^"]+)"/ ) { +- $value=$1; +- $found=1; +- } +- } +- if ( $found == 1 ) { +- # +- # handle the hosts name and table-title +- # +- $Host{$host}->{$name}=${value}; +- if ( defined ($Name{_hosts}->{_length})) { +- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length($host )); +- } else { +- $Name{_hosts}->{_length} = length($host ); ++ if ( $_ =~ /node_state id=".+" uname="([a-zA-Z0-9\-\_]+)" .*crmd="([a-zA-Z0-9\-\_]+)"/ ) { ++ insertAttribute($sid, \%Host, \%HName, $1, "node_status", $2); + } +- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length( $table_title)); +- # +- # now handle the attributes name and value +- # +- $Name{$name}->{$host}=${value}; +- if ( defined ($Name{$name}->{_length})) { +- $Name{$name}->{_length} = max($Name{$name}->{_length}, length($value )); +- } else { +- $Name{$name}->{_length} = length($value ); ++ if ( $_ =~ /nvpair.*name="([a-zA-Z0-9\_\-]+_${sid}_([a-zA-Z0-9\-\_]+))"/ ) { ++ $name=$1; ++ if ( $_ =~ /id=.(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { ++ # found attribute in nodes forever and reboot store ++ $host=$2; ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Host, \%HName, $host, $name, $value); ++ } ++ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_site_[a-zA-Z0-9\-]+_([a-zA-Z0-9\_\-]+)/) { ++ # found a site attribute ++ $site=$1; ++ if ( $name =~ /[a-zA-Z0-9\_\-]+_site_([a-zA-Z0-9\-]+)/ ) { ++ $name = $1; ++ } ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Site, \%SName, $site, $name, $value); ++ } ++ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_glob_[a-zA-Z0-9\_\-]+/) { ++ # found a global attribute ++ $host="GLOBAL"; ++ if ( $name =~ /([a-zA-Z0-9\_\-]+)_glob_([a-zA-Z0-9\_\-]+)/ ) { ++ $name = $2; ++ } ++ if ( $_ =~ /value="([^"]+)"/ ) { ++ $value=$1; ++ insertAttribute($sid, \%Global, \%GName, "global", $name, $value); ++ } ++ } + } +- if ( $name =~ /hana_${sid}_(.*)/ ) { +- $Name{$name}->{_title} = $1; +- } else { +- $Name{$name}->{_title} = $name; +- } +- $Name{$name}->{_length} = max($Name{$name}->{_length}, length( $Name{$name}->{_title})); +- # printf "%-8s %-20s %-30s\n", $1, $2, $3; +- } ++ } ++ close CIB; + } +-close CIB; + +-#print_attr_host; +-print_host_attr; ++if ( 0 == @sids ) { ++ my $sid_ino_list; ++ ( $sid_ino_list ) = get_sid_and_InstNr(); ++ @sids = split(",", $sid_ino_list); ++ ++} ++ ++foreach $sid (@sids) { ++ ( $sid, $ino ) = split(":", $sid); ++ $sid=tolower("$sid"); ++ %Host=(); ++ %HName=(); ++ read_cib($sid); ++ get_hana_attributes($sid); ++ if ( keys(%Host) == 0 ) { ++ printf "No attributes found for SID=%s\n", $sid; ++ } else { ++ print_host_attr(\%Host, \%HName, "Hosts", $sortBy); ++ } ++} diff --git a/SOURCES/bz1395142-2-update-saphana-saphanatopology.patch b/SOURCES/bz1395142-2-update-saphana-saphanatopology.patch new file mode 100644 index 0000000..2b9637b --- /dev/null +++ b/SOURCES/bz1395142-2-update-saphana-saphanatopology.patch @@ -0,0 +1,14 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2016-11-17 09:35:47.460984046 +0100 ++++ b/heartbeat/SAPHana 2016-11-17 09:36:20.536591188 +0100 +@@ -133,8 +133,8 @@ + function backup_global_and_nameserver() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s") +- cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s") ++ cp /hana/shared/$SID/global/hdb/custom/config/global.ini /hana/shared/$SID/global/hdb/custom/config/global.ini.$(date +"%s") ++ cp /hana/shared/$SID/global/hdb/custom/config/nameserver.ini /hana/shared/$SID/global/hdb/custom/config/nameserver.ini.$(date +"%s") + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } diff --git a/SOURCES/bz1397393-rabbitmq-cluster-reset-mnesia-before-join.patch b/SOURCES/bz1397393-rabbitmq-cluster-reset-mnesia-before-join.patch new file mode 100644 index 0000000..3772674 --- /dev/null +++ b/SOURCES/bz1397393-rabbitmq-cluster-reset-mnesia-before-join.patch @@ -0,0 +1,169 @@ +diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +--- a/heartbeat/rabbitmq-cluster 2017-01-20 15:37:43.698833068 +0100 ++++ b/heartbeat/rabbitmq-cluster 2017-01-20 16:28:56.170739557 +0100 +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2014 David Vossel ++# Copyright (c) 2014 David Vossel + # All Rights Reserved. + # + # This program is free software; you can redistribute it and/or modify +@@ -52,7 +52,7 @@ + cat < + +- ++ + 1.0 + + +@@ -111,7 +111,7 @@ + + rmq_join_list() + { +- cibadmin -Q 2>/dev/null | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" ++ cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" + } + + rmq_write_nodename() +@@ -203,7 +203,7 @@ + + rmq_set_policy() + { +- $RMQ_CTL set_policy $@ > /dev/null 2>&1 ++ $RMQ_CTL set_policy "$@" > /dev/null 2>&1 + } + + rmq_start_first() +@@ -284,7 +284,6 @@ + return $OCF_SUCCESS + } + +- + rmq_forget_cluster_node_remotely() { + local running_cluster_nodes="$1" + local node_to_forget="$2" +@@ -354,26 +353,28 @@ + return $rc + fi + +- # first try to join without wiping mnesia data +- rmq_join_existing "$join_list" +- if [ $? -ne 0 ]; then +- ocf_log info "node failed to join, wiping data directory and trying again" +- local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" ++ # Try to join existing cluster ++ ocf_log info "wiping data directory before joining" ++ local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" + +- # if the graceful join fails, use the hammer and reset all the data. +- rmq_stop +- rmq_wipe_data +- rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" +- rmq_join_existing "$join_list" +- rc=$? ++ rmq_stop ++ rmq_wipe_data ++ rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" ++ rmq_join_existing "$join_list" ++ rc=$? + +- # Restore users (if any) +- BaseDataDir=`dirname $RMQ_DATA_DIR` +- if [ -f $BaseDataDir/users.erl ] ; then +- rabbitmqctl eval " +- %% Run only if Mnesia is ready, otherwise exit. +- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), ++ if [ $rc -ne 0 ]; then ++ ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" ++ return $OCF_ERR_GENERIC ++ fi + ++ # Restore users and users' permissions (if any) ++ BaseDataDir=`dirname $RMQ_DATA_DIR` ++ if [ -f $BaseDataDir/users.erl ] ; then ++ rabbitmqctl eval " ++ %% Run only if Mnesia is ready. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso ++ begin + [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), + + %% Read users first +@@ -400,39 +401,51 @@ + %% Version >= 3.6.0 + {internal_user,'_','_','_','_'} -> + lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) +- end. +- " +- rm -f $BaseDataDir/users.erl +- fi +- +- if [ $rc -ne 0 ]; then +- ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" +- return $OCF_ERR_GENERIC +- fi ++ end, ++ ++ ok = file:delete(\"$BaseDataDir/users.erl\") ++ end. ++ " ++ fi ++ if [ -f $BaseDataDir/users_perms.erl ] ; then ++ rabbitmqctl eval " ++ %% Run only if Mnesia is ready. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso ++ begin ++ {ok, [UsersPerms]} = file:consult(\"$BaseDataDir/users_perms.erl\"), ++ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user_permission, X) end, UsersPerms), ++ ++ ok = file:delete(\"$BaseDataDir/users_perms.erl\") ++ end. ++ " + fi + + return $OCF_SUCCESS + } + + rmq_stop() { +- # Backup users ++ # Backup users and users' permissions + BaseDataDir=`dirname $RMQ_DATA_DIR` + rabbitmqctl eval " +- %% Run only if Mnesia is still available, otherwise exit. +- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), +- +- [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), +- +- Users = case WildPattern of +- %% Version < 3.6.0 +- {internal_user,'_','_','_'} -> +- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); +- %% Version >= 3.6.0 +- {internal_user,'_','_','_','_'} -> +- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) +- end, +- +- file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). ++ %% Run only if Mnesia is still available. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso ++ begin ++ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ ++ Users = case WildPattern of ++ %% Version < 3.6.0 ++ {internal_user,'_','_','_'} -> ++ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); ++ %% Version >= 3.6.0 ++ {internal_user,'_','_','_','_'} -> ++ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) ++ end, ++ ++ Users /= [] andalso file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])), ++ ++ UsersPerms = mnesia:dirty_select(rabbit_user_permission, [{{'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, [{'/=', '\\\$3', <<\"guest\">>}], ['\\\$_']}]), ++ UsersPerms /= [] andalso file:write_file(\"$BaseDataDir/users_perms.erl\", io_lib:fwrite(\"~p.~n\", [UsersPerms])) ++ end. + " + + rmq_monitor diff --git a/SOURCES/bz1400103-nova-compute-wait-nova-compute-unfence.patch b/SOURCES/bz1400103-nova-compute-wait-nova-compute-unfence.patch deleted file mode 100644 index 0901754..0000000 --- a/SOURCES/bz1400103-nova-compute-wait-nova-compute-unfence.patch +++ /dev/null @@ -1,259 +0,0 @@ -diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait ---- a/heartbeat/nova-compute-wait 2017-02-02 11:23:38.263510362 +0100 -+++ b/heartbeat/nova-compute-wait 2017-02-02 11:28:27.181650906 +0100 -@@ -1,30 +1,15 @@ - #!/bin/sh -+# Copyright 2015 Red Hat, Inc. - # -+# Description: Manages compute daemons - # --# nova-compute-wait agent manages compute daemons. -+# Authors: Andrew Beekhof - # --# Copyright (c) 2015 --# --# This program is free software; you can redistribute it and/or modify --# it under the terms of version 2 of the GNU General Public License as --# published by the Free Software Foundation. --# --# This program is distributed in the hope that it would be useful, but --# WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --# --# Further, this software is distributed without any warranty that it is --# free of the rightful claim of any third person regarding infringement --# or the like. Any license provided herein, whether implied or --# otherwise, applies only to this software file. Patent licenses, if --# any, provided herein do not apply to combinations of this program with --# other software, or any other product whatsoever. --# --# You should have received a copy of the GNU General Public License --# along with this program; if not, write the Free Software Foundation, --# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -+# Support: openstack@lists.openstack.org -+# License: Apache Software License (ASL) 2.0 - # - -+ - ####################################################################### - # Initialization: - -@@ -137,6 +122,8 @@ - } - - nova_start() { -+ build_unfence_overlay -+ - state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) - if [ "x$state" = x ]; then - : never been fenced -@@ -147,8 +134,8 @@ - sleep ${OCF_RESKEY_evacuation_delay} - - else -- ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" - while [ "x$state" != "xno" ]; do -+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" - state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) - sleep 5 - done -@@ -156,14 +143,22 @@ - ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete" - sleep ${OCF_RESKEY_evacuation_delay} - fi -+ -+ touch "$statefile" -+ - return $OCF_SUCCESS - } - - nova_stop() { -+ rm -f "$statefile" - return $OCF_SUCCESS - } - - nova_monitor() { -+ if [ ! -f "$statefile" ]; then -+ return $OCF_NOT_RUNNING -+ fi -+ - return $OCF_SUCCESS - } - -@@ -171,17 +166,113 @@ - return $OCF_SUCCESS - } - -+build_unfence_overlay() { -+ fence_options="" -+ -+ if [ -z "${OCF_RESKEY_auth_url}" ]; then -+ candidates=$(/usr/sbin/stonith_admin -l ${NOVA_HOST}) -+ for candidate in ${candidates}; do -+ pcs stonith show $d | grep -q fence_compute -+ if [ $? = 0 ]; then -+ ocf_log info "Unfencing nova based on: $candidate" -+ fence_auth=$(pcs stonith show $candidate | grep Attributes: | sed -e s/Attributes:// -e s/-/_/g -e 's/[^ ]\+=/OCF_RESKEY_\0/g' -e s/passwd/password/g) -+ eval "export $fence_auth" -+ break -+ fi -+ done -+ fi -+ -+ # Copied from NovaEvacuate -+ if [ -z "${OCF_RESKEY_auth_url}" ]; then -+ ocf_exit_reason "auth_url not configured" -+ exit $OCF_ERR_CONFIGURED -+ fi -+ -+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}" -+ -+ if [ -z "${OCF_RESKEY_username}" ]; then -+ ocf_exit_reason "username not configured" -+ exit $OCF_ERR_CONFIGURED -+ fi -+ -+ fence_options="${fence_options} -l ${OCF_RESKEY_username}" -+ -+ if [ -z "${OCF_RESKEY_password}" ]; then -+ ocf_exit_reason "password not configured" -+ exit $OCF_ERR_CONFIGURED -+ fi -+ -+ fence_options="${fence_options} -p ${OCF_RESKEY_password}" -+ -+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then -+ ocf_exit_reason "tenant_name not configured" -+ exit $OCF_ERR_CONFIGURED -+ fi -+ -+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}" -+ -+ if [ -n "${OCF_RESKEY_domain}" ]; then -+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}" -+ fi -+ -+ if [ -n "${OCF_RESKEY_region_name}" ]; then -+ fence_options="${fence_options} \ -+ --region-name ${OCF_RESKEY_region_name}" -+ fi -+ -+ if [ -n "${OCF_RESKEY_insecure}" ]; then -+ if ocf_is_true "${OCF_RESKEY_insecure}"; then -+ fence_options="${fence_options} --insecure" -+ fi -+ fi -+ -+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then -+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then -+ fence_options="${fence_options} --no-shared-storage" -+ fi -+ fi -+ -+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then -+ case ${OCF_RESKEY_endpoint_type} in -+ adminURL|publicURL|internalURL) -+ ;; -+ *) -+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \ -+ "not valid. Use adminURL or publicURL or internalURL" -+ exit $OCF_ERR_CONFIGURED -+ ;; -+ esac -+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}" -+ fi -+ -+ mkdir -p /run/systemd/system/openstack-nova-compute.service.d -+ cat</run/systemd/system/openstack-nova-compute.service.d/unfence-20.conf -+[Service] -+ExecStartPost=/sbin/fence_compute ${fence_options} -o on -n ${NOVA_HOST} -+EOF -+} -+ - nova_validate() { - rc=$OCF_SUCCESS - - check_binary crudini - check_binary nova-compute -+ check_binary fence_compute - - if [ ! -f /etc/nova/nova.conf ]; then - ocf_exit_reason "/etc/nova/nova.conf not found" - exit $OCF_ERR_CONFIGURED - fi - -+ # Is the state directory writable? -+ state_dir=$(dirname $statefile) -+ touch "$state_dir/$$" -+ if [ $? != 0 ]; then -+ ocf_exit_reason "Invalid state directory: $state_dir" -+ return $OCF_ERR_ARGS -+ fi -+ rm -f "$state_dir/$$" -+ - NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null) - if [ $? = 1 ]; then - short_host=$(uname -n | awk -F. '{print $1}') -@@ -198,6 +289,8 @@ - return $rc - } - -+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active" -+ - : ${OCF_RESKEY_evacuation_delay=120} - case $__OCF_ACTION in - meta-data) meta_data -@@ -221,3 +314,4 @@ - rc=$? - ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" - exit $rc -+ -diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate ---- a/heartbeat/NovaEvacuate 2017-02-02 11:23:38.253510461 +0100 -+++ b/heartbeat/NovaEvacuate 2017-02-02 11:28:49.262432371 +0100 -@@ -1,30 +1,16 @@ - #!/bin/sh - # -+# Copyright 2015 Red Hat, Inc. - # --# NovaCompute agent manages compute daemons. -+# Description: Manages evacuation of nodes running nova-compute - # --# Copyright (c) 2015 -+# Authors: Andrew Beekhof - # --# This program is free software; you can redistribute it and/or modify --# it under the terms of version 2 of the GNU General Public License as --# published by the Free Software Foundation. --# --# This program is distributed in the hope that it would be useful, but --# WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --# --# Further, this software is distributed without any warranty that it is --# free of the rightful claim of any third person regarding infringement --# or the like. Any license provided herein, whether implied or --# otherwise, applies only to this software file. Patent licenses, if --# any, provided herein do not apply to combinations of this program with --# other software, or any other product whatsoever. --# --# You should have received a copy of the GNU General Public License --# along with this program; if not, write the Free Software Foundation, --# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -+# Support: openstack@lists.openstack.org -+# License: Apache Software License (ASL) 2.0 - # - -+ - ####################################################################### - # Initialization: - -@@ -180,7 +166,7 @@ - ocf_log notice "Initiating evacuation of $node" - - fence_compute ${fence_options} -o status -n ${node} -- if [ $? != 0 ]; then -+ if [ $? = 1 ]; then - ocf_log info "Nova does not know about ${node}" - # Dont mark as no because perhaps nova is unavailable right now - continue diff --git a/SOURCES/bz1400103-redis-notify-clients-of-master-being-demoted.patch b/SOURCES/bz1400103-redis-notify-clients-of-master-being-demoted.patch deleted file mode 100644 index f7ba67a..0000000 --- a/SOURCES/bz1400103-redis-notify-clients-of-master-being-demoted.patch +++ /dev/null @@ -1,42 +0,0 @@ -From f1c2249ef5e8524ddb986f0df879d5f18e935da3 Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Fri, 20 Jan 2017 09:17:15 +0100 -Subject: [PATCH] redis: use "CLIENT KILL type normal" to notify clients of - master being demoted - ---- - heartbeat/redis | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/heartbeat/redis b/heartbeat/redis -index 1ea0025..d08e57a 100755 ---- a/heartbeat/redis -+++ b/heartbeat/redis -@@ -436,6 +436,11 @@ function demote() { - local master_host - local master_port - -+ # client kill is only supported in Redis 2.8.12 or greater -+ version=$(redis_client -v | awk '{print $NF}') -+ ocf_version_cmp "$version" "2.8.11" -+ client_kill=$? -+ - CHECK_SLAVE_STATE=1 - monitor - status=$? -@@ -478,9 +483,15 @@ function demote() { - while true; do - # Wait infinite if replication is syncing - # Then start/demote operation timeout determines timeout -+ if [ "$client_kill" -eq 2 ]; then -+ redis_client CLIENT PAUSE 2000 -+ fi - monitor - status=$? - if (( status == OCF_SUCCESS )); then -+ if [ "$client_kill" -eq 2 ]; then -+ redis_client CLIENT KILL type normal -+ fi - return $OCF_SUCCESS - fi - diff --git a/SOURCES/bz1400172-IPsrcaddr-fix-duplicate-routes.patch b/SOURCES/bz1400172-IPsrcaddr-fix-duplicate-routes.patch new file mode 100644 index 0000000..02d7446 --- /dev/null +++ b/SOURCES/bz1400172-IPsrcaddr-fix-duplicate-routes.patch @@ -0,0 +1,22 @@ +From f35491fd18693d2816ad6f83c32e133b26193aa2 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 21 Dec 2016 15:34:50 +0100 +Subject: [PATCH] IPsrcaddr: match exact route to avoid failing + +--- + heartbeat/IPsrcaddr | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr +index 08fd8a6..0efe154 100755 +--- a/heartbeat/IPsrcaddr ++++ b/heartbeat/IPsrcaddr +@@ -469,7 +469,7 @@ rc=$? + } + + INTERFACE=`echo $findif_out | awk '{print $1}'` +-NETWORK=`ip route list dev $INTERFACE scope link match $ipaddress|grep -o '^[^ ]*'` ++NETWORK=`ip route list dev $INTERFACE scope link proto kernel match $ipaddress|grep -o '^[^ ]*'` + + case $1 in + start) srca_start $ipaddress diff --git a/SOURCES/bz1402370-portblock-wait.patch b/SOURCES/bz1402370-portblock-wait.patch new file mode 100644 index 0000000..b57e5cf --- /dev/null +++ b/SOURCES/bz1402370-portblock-wait.patch @@ -0,0 +1,114 @@ +From 14b45df580668220cf97744df93cb9ee5484a14e Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 8 Dec 2016 11:18:10 +0100 +Subject: [PATCH 1/2] portblock: Use -w (wait) to avoid "insufficient + privileges" error + +--- + heartbeat/portblock | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/heartbeat/portblock b/heartbeat/portblock +index c480954..e7de217 100755 +--- a/heartbeat/portblock ++++ b/heartbeat/portblock +@@ -242,7 +242,7 @@ active_grep_pat() + chain_isactive() + { + PAT=`active_grep_pat "$1" "$2" "$3"` +- $IPTABLES -n -L INPUT | grep "$PAT" >/dev/null ++ $IPTABLES -w -n -L INPUT | grep "$PAT" >/dev/null + } + + save_tcp_connections() +@@ -370,13 +370,13 @@ IptablesBLOCK() + : OK -- chain already active + else + if $try_reset ; then +- $IPTABLES -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ $IPTABLES -w -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset + tickle_local + fi +- $IPTABLES -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP ++ $IPTABLES -w -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP + rc=$? + if $try_reset ; then +- $IPTABLES -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ $IPTABLES -w -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset + fi + fi + +@@ -389,7 +389,7 @@ IptablesUNBLOCK() + if + chain_isactive "$1" "$2" "$3" + then +- $IPTABLES -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP ++ $IPTABLES -w -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP + else + : Chain Not active + fi + +From 57d31bc04a0421cf2746830d5e987e52f9f9acd3 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 9 Dec 2016 13:57:49 +0100 +Subject: [PATCH 2/2] portblock: version check for -w + +--- + heartbeat/portblock | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/heartbeat/portblock b/heartbeat/portblock +index e7de217..92f7071 100755 +--- a/heartbeat/portblock ++++ b/heartbeat/portblock +@@ -242,7 +242,7 @@ active_grep_pat() + chain_isactive() + { + PAT=`active_grep_pat "$1" "$2" "$3"` +- $IPTABLES -w -n -L INPUT | grep "$PAT" >/dev/null ++ $IPTABLES $wait -n -L INPUT | grep "$PAT" >/dev/null + } + + save_tcp_connections() +@@ -370,13 +370,13 @@ IptablesBLOCK() + : OK -- chain already active + else + if $try_reset ; then +- $IPTABLES -w -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ $IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset + tickle_local + fi +- $IPTABLES -w -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP ++ $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP + rc=$? + if $try_reset ; then +- $IPTABLES -w -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ $IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset + fi + fi + +@@ -389,7 +389,7 @@ IptablesUNBLOCK() + if + chain_isactive "$1" "$2" "$3" + then +- $IPTABLES -w -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP ++ $IPTABLES $wait -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP + else + : Chain Not active + fi +@@ -526,6 +526,15 @@ if [ -z "$OCF_RESKEY_action" ]; then + exit $OCF_ERR_CONFIGURED + fi + ++# iptables v1.4.20+ is required to use -w (wait) ++version=$(iptables -V | awk -F ' v' '{print $NF}') ++ocf_version_cmp "$version" "1.4.19.1" ++if [ "$?" -eq "2" ]; then ++ wait="-w" ++else ++ wait="" ++fi ++ + protocol=$OCF_RESKEY_protocol + portno=$OCF_RESKEY_portno + action=$OCF_RESKEY_action diff --git a/SOURCES/bz1402511-rabbitmq-cluster-reset-mnesia-before-join.patch b/SOURCES/bz1402511-rabbitmq-cluster-reset-mnesia-before-join.patch deleted file mode 100644 index 3772674..0000000 --- a/SOURCES/bz1402511-rabbitmq-cluster-reset-mnesia-before-join.patch +++ /dev/null @@ -1,169 +0,0 @@ -diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster ---- a/heartbeat/rabbitmq-cluster 2017-01-20 15:37:43.698833068 +0100 -+++ b/heartbeat/rabbitmq-cluster 2017-01-20 16:28:56.170739557 +0100 -@@ -1,6 +1,6 @@ - #!/bin/sh - # --# Copyright (c) 2014 David Vossel -+# Copyright (c) 2014 David Vossel - # All Rights Reserved. - # - # This program is free software; you can redistribute it and/or modify -@@ -52,7 +52,7 @@ - cat < - -- -+ - 1.0 - - -@@ -111,7 +111,7 @@ - - rmq_join_list() - { -- cibadmin -Q 2>/dev/null | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" -+ cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" - } - - rmq_write_nodename() -@@ -203,7 +203,7 @@ - - rmq_set_policy() - { -- $RMQ_CTL set_policy $@ > /dev/null 2>&1 -+ $RMQ_CTL set_policy "$@" > /dev/null 2>&1 - } - - rmq_start_first() -@@ -284,7 +284,6 @@ - return $OCF_SUCCESS - } - -- - rmq_forget_cluster_node_remotely() { - local running_cluster_nodes="$1" - local node_to_forget="$2" -@@ -354,26 +353,28 @@ - return $rc - fi - -- # first try to join without wiping mnesia data -- rmq_join_existing "$join_list" -- if [ $? -ne 0 ]; then -- ocf_log info "node failed to join, wiping data directory and trying again" -- local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" -+ # Try to join existing cluster -+ ocf_log info "wiping data directory before joining" -+ local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" - -- # if the graceful join fails, use the hammer and reset all the data. -- rmq_stop -- rmq_wipe_data -- rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" -- rmq_join_existing "$join_list" -- rc=$? -+ rmq_stop -+ rmq_wipe_data -+ rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" -+ rmq_join_existing "$join_list" -+ rc=$? - -- # Restore users (if any) -- BaseDataDir=`dirname $RMQ_DATA_DIR` -- if [ -f $BaseDataDir/users.erl ] ; then -- rabbitmqctl eval " -- %% Run only if Mnesia is ready, otherwise exit. -- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), -+ if [ $rc -ne 0 ]; then -+ ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" -+ return $OCF_ERR_GENERIC -+ fi - -+ # Restore users and users' permissions (if any) -+ BaseDataDir=`dirname $RMQ_DATA_DIR` -+ if [ -f $BaseDataDir/users.erl ] ; then -+ rabbitmqctl eval " -+ %% Run only if Mnesia is ready. -+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso -+ begin - [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), - - %% Read users first -@@ -400,39 +401,51 @@ - %% Version >= 3.6.0 - {internal_user,'_','_','_','_'} -> - lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) -- end. -- " -- rm -f $BaseDataDir/users.erl -- fi -- -- if [ $rc -ne 0 ]; then -- ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" -- return $OCF_ERR_GENERIC -- fi -+ end, -+ -+ ok = file:delete(\"$BaseDataDir/users.erl\") -+ end. -+ " -+ fi -+ if [ -f $BaseDataDir/users_perms.erl ] ; then -+ rabbitmqctl eval " -+ %% Run only if Mnesia is ready. -+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso -+ begin -+ {ok, [UsersPerms]} = file:consult(\"$BaseDataDir/users_perms.erl\"), -+ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user_permission, X) end, UsersPerms), -+ -+ ok = file:delete(\"$BaseDataDir/users_perms.erl\") -+ end. -+ " - fi - - return $OCF_SUCCESS - } - - rmq_stop() { -- # Backup users -+ # Backup users and users' permissions - BaseDataDir=`dirname $RMQ_DATA_DIR` - rabbitmqctl eval " -- %% Run only if Mnesia is still available, otherwise exit. -- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), -- -- [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), -- -- Users = case WildPattern of -- %% Version < 3.6.0 -- {internal_user,'_','_','_'} -> -- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); -- %% Version >= 3.6.0 -- {internal_user,'_','_','_','_'} -> -- mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) -- end, -- -- file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). -+ %% Run only if Mnesia is still available. -+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso -+ begin -+ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), -+ -+ Users = case WildPattern of -+ %% Version < 3.6.0 -+ {internal_user,'_','_','_'} -> -+ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); -+ %% Version >= 3.6.0 -+ {internal_user,'_','_','_','_'} -> -+ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) -+ end, -+ -+ Users /= [] andalso file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])), -+ -+ UsersPerms = mnesia:dirty_select(rabbit_user_permission, [{{'\\\$1', {'\\\$2', '\\\$3','\\\$4'}, '\\\$5'}, [{'/=', '\\\$3', <<\"guest\">>}], ['\\\$_']}]), -+ UsersPerms /= [] andalso file:write_file(\"$BaseDataDir/users_perms.erl\", io_lib:fwrite(\"~p.~n\", [UsersPerms])) -+ end. - " - - rmq_monitor diff --git a/SOURCES/bz1406152-exportfs-ipv6-fix.patch b/SOURCES/bz1406152-exportfs-ipv6-fix.patch new file mode 100644 index 0000000..ac5eb18 --- /dev/null +++ b/SOURCES/bz1406152-exportfs-ipv6-fix.patch @@ -0,0 +1,14 @@ +diff -uNr a/heartbeat/exportfs b/heartbeat/exportfs +--- a/heartbeat/exportfs 2016-12-22 14:29:11.347973419 +0100 ++++ b/heartbeat/exportfs 2016-12-22 14:30:10.273326342 +0100 +@@ -204,6 +204,10 @@ + is_exported() { + local dir=$1 + local spec=$2 ++ ++ # IPv6 addressed are encased in brackets that need to be removed ++ spec=$(echo $spec | sed 's/\[//;s/\]//') ++ + exportfs | + sed -e '$! N; s/\n[[:space:]]\+/ /; t; s/[[:space:]]\+\([^[:space:]]\+\)\(\n\|$\)/ \1\2/g; P;D;' | + grep -q -x -F "$dir $spec" diff --git a/SOURCES/bz1408656-ethmonitor-monitor-interface-without-ip.patch b/SOURCES/bz1408656-ethmonitor-monitor-interface-without-ip.patch new file mode 100644 index 0000000..d5ba5b8 --- /dev/null +++ b/SOURCES/bz1408656-ethmonitor-monitor-interface-without-ip.patch @@ -0,0 +1,25 @@ +From 8ec7bd4cfcbdbff10c1c5717eae91d8c41037cda Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 24 Mar 2017 12:56:23 +0100 +Subject: [PATCH] ethmonitor: fix to be able to monitor interface without IP + +--- + heartbeat/ethmonitor | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor +index 7f5579f..81a7c0b 100755 +--- a/heartbeat/ethmonitor ++++ b/heartbeat/ethmonitor +@@ -206,8 +206,9 @@ is_interface() { + # + # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces + # +- local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \ +- | cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'` ++ local iface=`$IP2UTIL -o -f link addr show | grep " $1:" \ ++ | cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$' \ ++ | sed -e 's/:$//'` + [ "$iface" != "" ] + } + diff --git a/SOURCES/bz1409513-portblock-wait.patch b/SOURCES/bz1409513-portblock-wait.patch deleted file mode 100644 index b57e5cf..0000000 --- a/SOURCES/bz1409513-portblock-wait.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 14b45df580668220cf97744df93cb9ee5484a14e Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Thu, 8 Dec 2016 11:18:10 +0100 -Subject: [PATCH 1/2] portblock: Use -w (wait) to avoid "insufficient - privileges" error - ---- - heartbeat/portblock | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/heartbeat/portblock b/heartbeat/portblock -index c480954..e7de217 100755 ---- a/heartbeat/portblock -+++ b/heartbeat/portblock -@@ -242,7 +242,7 @@ active_grep_pat() - chain_isactive() - { - PAT=`active_grep_pat "$1" "$2" "$3"` -- $IPTABLES -n -L INPUT | grep "$PAT" >/dev/null -+ $IPTABLES -w -n -L INPUT | grep "$PAT" >/dev/null - } - - save_tcp_connections() -@@ -370,13 +370,13 @@ IptablesBLOCK() - : OK -- chain already active - else - if $try_reset ; then -- $IPTABLES -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset -+ $IPTABLES -w -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset - tickle_local - fi -- $IPTABLES -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP -+ $IPTABLES -w -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP - rc=$? - if $try_reset ; then -- $IPTABLES -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset -+ $IPTABLES -w -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset - fi - fi - -@@ -389,7 +389,7 @@ IptablesUNBLOCK() - if - chain_isactive "$1" "$2" "$3" - then -- $IPTABLES -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP -+ $IPTABLES -w -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP - else - : Chain Not active - fi - -From 57d31bc04a0421cf2746830d5e987e52f9f9acd3 Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Fri, 9 Dec 2016 13:57:49 +0100 -Subject: [PATCH 2/2] portblock: version check for -w - ---- - heartbeat/portblock | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/heartbeat/portblock b/heartbeat/portblock -index e7de217..92f7071 100755 ---- a/heartbeat/portblock -+++ b/heartbeat/portblock -@@ -242,7 +242,7 @@ active_grep_pat() - chain_isactive() - { - PAT=`active_grep_pat "$1" "$2" "$3"` -- $IPTABLES -w -n -L INPUT | grep "$PAT" >/dev/null -+ $IPTABLES $wait -n -L INPUT | grep "$PAT" >/dev/null - } - - save_tcp_connections() -@@ -370,13 +370,13 @@ IptablesBLOCK() - : OK -- chain already active - else - if $try_reset ; then -- $IPTABLES -w -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset -+ $IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset - tickle_local - fi -- $IPTABLES -w -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP -+ $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP - rc=$? - if $try_reset ; then -- $IPTABLES -w -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset -+ $IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset - fi - fi - -@@ -389,7 +389,7 @@ IptablesUNBLOCK() - if - chain_isactive "$1" "$2" "$3" - then -- $IPTABLES -w -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP -+ $IPTABLES $wait -D INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP - else - : Chain Not active - fi -@@ -526,6 +526,15 @@ if [ -z "$OCF_RESKEY_action" ]; then - exit $OCF_ERR_CONFIGURED - fi - -+# iptables v1.4.20+ is required to use -w (wait) -+version=$(iptables -V | awk -F ' v' '{print $NF}') -+ocf_version_cmp "$version" "1.4.19.1" -+if [ "$?" -eq "2" ]; then -+ wait="-w" -+else -+ wait="" -+fi -+ - protocol=$OCF_RESKEY_protocol - portno=$OCF_RESKEY_portno - action=$OCF_RESKEY_action diff --git a/SOURCES/bz1411225-oraasm.patch b/SOURCES/bz1411225-oraasm.patch new file mode 100644 index 0000000..6e07cb9 --- /dev/null +++ b/SOURCES/bz1411225-oraasm.patch @@ -0,0 +1,221 @@ +From 70030ab28f81609292cfbb3c7b34f3f033b09c57 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 23 Mar 2017 10:50:45 +0100 +Subject: [PATCH] oraasm: new resource agent for Oracle ASM Disk Groups + +--- + doc/man/Makefile.am | 1 + + heartbeat/Makefile.am | 1 + + heartbeat/oraasm | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 181 insertions(+) + create mode 100755 heartbeat/oraasm + +diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am +index 43a3f70..f5f64c8 100644 +--- a/doc/man/Makefile.am ++++ b/doc/man/Makefile.am +@@ -122,6 +122,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_nfsnotify.7 \ + ocf_heartbeat_nfsserver.7 \ + ocf_heartbeat_nginx.7 \ ++ ocf_heartbeat_oraasm.7 \ + ocf_heartbeat_oracle.7 \ + ocf_heartbeat_oralsnr.7 \ + ocf_heartbeat_pgsql.7 \ +diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am +index 91d4090..229db71 100644 +--- a/heartbeat/Makefile.am ++++ b/heartbeat/Makefile.am +@@ -119,6 +119,7 @@ ocf_SCRIPTS = AoEtarget \ + named \ + nfsnotify \ + nfsserver \ ++ oraasm \ + oracle \ + oralsnr \ + pingd \ +diff --git a/heartbeat/oraasm b/heartbeat/oraasm +new file mode 100755 +index 0000000..22b88ea +--- /dev/null ++++ b/heartbeat/oraasm +@@ -0,0 +1,179 @@ ++#!/bin/sh ++# ++# License: GNU General Public License (GPL) ++# (c) 2017 O. Albrigtsen ++# and Linux-HA contributors ++# ++# ----------------------------------------------------------------------------- ++# O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N ++# ----------------------------------------------------------------------------- ++# ++# NAME ++# oraasm : OCF resource agent script for Oracle ASM ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++# Defaults ++OCF_RESKEY_user_default="grid" ++ ++: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} ++ ++ ++oraasm_usage() { ++ cat < ++ ++ ++0.75 ++ ++OCF Resource script for Oracle ASM. It uses the ohasd init-script to manage a Oracle ASM Disk Group as a HA resource. ++Oracle ASM resource agent ++ ++ ++ ++ ++ Oracle Grid user ++ Oracle Grid user ++ ++ ++ ++ ++ ++The name of the Oracle Disk Group. ++If not specified, then the Disk Group along with its home should be listed in /etc/oratab. ++ ++ Oracle Disk Group ++ ++ ++ ++ ++The Oracle Grid home directory ++home ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++oraasm_methods() { ++ cat <<-! ++ start ++ stop ++ status ++ monitor ++ validate-all ++ methods ++ meta-data ++ usage ++ ! ++} ++ ++oraasm_getconfig() { ++ [ x = "x$OCF_RESKEY_home" ] && ++ OCF_RESKEY_home=`awk -F: "/^+$OCF_RESKEY_diskgroup:/"'{print $2}' /etc/oratab` ++ PATH="$OCF_RESKEY_home/bin:$PATH" ++ ++ ORA_ENVF=`mktemp` ++ cat << EOF > $ORA_ENVF ++PATH="$OCF_RESKEY_home/bin:$PATH" ++EOF ++ chmod 644 $ORA_ENVF ++ trap "rm -f $ORA_ENVF" EXIT ++} ++ ++oraasm_start() { ++ # if resource is already running, no need to continue code after this. ++ if oraasm_monitor; then ++ ocf_log info "Oracle ASM is already running" ++ return $OCF_SUCCESS ++ fi ++ ++ ocf_run -q /etc/init.d/ohasd start ++ ++ while ! oraasm_monitor; do ++ sleep 1 ++ done ++ ++ return $OCF_SUCCESS ++} ++ ++oraasm_stop() { ++ oraasm_monitor ++ if [ $? -ne $OCF_SUCCESS ]; then ++ # Currently not running. Nothing to do. ++ ocf_log info "Oracle ASM is already stopped" ++ ++ return $OCF_SUCCESS ++ fi ++ ++ ocf_run -q /etc/init.d/ohasd stop ++ ++ # Wait for process to stop ++ while oraasm_monitor; do ++ sleep 1 ++ done ++ ++ return $OCF_SUCCESS ++} ++ ++oraasm_monitor() { ++ su - $OCF_RESKEY_user -c ". $ORA_ENVF; crsctl check has | grep -q \"CRS-4638\"" ++ case "$?" in ++ 0) ++ rc=$OCF_SUCCESS ++ ;; ++ 1) ++ rc=$OCF_NOT_RUNNING ++ ocf_log info "Oracle ASM is not running" ++ ;; ++ *) ++ rc=$OCF_ERR_GENERIC ++ ;; ++ esac ++ return $rc ++} ++ ++oraasm_status() { ++ rc=$(oraasm_monitor) ++ return $rc ++} ++ ++oraasm_validate_all() { ++ if [ x = "x$OCF_RESKEY_home" ]; then ++ ocf_exit_reason "home not set" ++ return $OCF_ERR_CONFIGURED ++ fi ++} ++ ++ ++OCF_REQUIRED_PARAMS="user diskgroup" ++OCF_REQUIRED_BINARIES="/etc/init.d/ohasd crsctl" ++ocf_rarun $* ++ ++# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0 diff --git a/SOURCES/bz1420565-pgsql-dont-use-crm_failcount.patch b/SOURCES/bz1420565-pgsql-dont-use-crm_failcount.patch new file mode 100644 index 0000000..e423934 --- /dev/null +++ b/SOURCES/bz1420565-pgsql-dont-use-crm_failcount.patch @@ -0,0 +1,64 @@ +diff -uNr a/heartbeat/pgsql b/heartbeat/pgsql +--- a/heartbeat/pgsql 2017-03-09 11:50:06.365145803 +0100 ++++ b/heartbeat/pgsql 2017-03-09 12:19:41.566177608 +0100 +@@ -966,8 +966,13 @@ + cmp_location=`printf "$master_baseline\n$my_master_baseline\n" |\ + sort | head -1` + if [ "$cmp_location" != "$my_master_baseline" ]; then ++ # We used to set the failcount to INF for the resource here in ++ # order to move the master to the other node. However, setting ++ # the failcount should be done only by the CRM and so this use ++ # got deprecated in pacemaker version 1.1.17. Now we do the ++ # "ban resource from the node". + ocf_exit_reason "My data is newer than new master's one. New master's location : $master_baseline" +- $CRM_FAILCOUNT -r $OCF_RESOURCE_INSTANCE -U $NODENAME -v INFINITY ++ exec_with_retry 0 $CRM_RESOURCE -B -r $OCF_RESOURCE_INSTANCE -N $NODENAME -Q + return $OCF_ERR_GENERIC + fi + fi +@@ -1526,6 +1531,36 @@ + wait $func_pid + } + ++# retry command when command doesn't return 0 ++# arg1 : count >= 0 (if arg1 is 0, it retries command in infinitum(1day)) ++# arg2..argN : command and args ++exec_with_retry() { ++ local count="86400" ++ local output ++ local rc ++ ++ if [ "$1" -ne 0 ]; then ++ count=$1 ++ fi ++ shift ++ ++ while [ $count -gt 0 ]; do ++ output=`$*` ++ rc=$? ++ if [ $rc -ne 0 ]; then ++ ocf_log warn "Retrying(remain $count). \"$*\" failed. rc=$rc. stdout=\"$output\"." ++ count=`expr $count - 1` ++ sleep 1 ++ else ++ printf "${output}" ++ return 0 ++ fi ++ done ++ ++ ocf_exit_reason "giving up executing \"$*\"" ++ return $rc ++} ++ + is_node_online() { + crm_mon -1 -n | tr '[A-Z]' '[a-z]' | grep -e "^node $1 " -e "^node $1:" | grep -q -v "offline" + } +@@ -1734,7 +1769,7 @@ + CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" + CRM_ATTR_REBOOT="${HA_SBIN_DIR}/crm_attribute -l reboot" + CRM_ATTR_FOREVER="${HA_SBIN_DIR}/crm_attribute -l forever" +- CRM_FAILCOUNT="${HA_SBIN_DIR}/crm_failcount" ++ CRM_RESOURCE="${HA_SBIN_DIR}/crm_resource" + + CAN_NOT_PROMOTE="-INFINITY" + CAN_PROMOTE="100" diff --git a/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch b/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch deleted file mode 100644 index 5cd8ffa..0000000 --- a/SOURCES/bz1423424-1-update-saphana-saphanatopology.patch +++ /dev/null @@ -1,1990 +0,0 @@ -diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana ---- a/heartbeat/SAPHana 2016-10-14 10:09:56.479051279 +0200 -+++ b/heartbeat/SAPHana 2016-10-14 10:29:23.990066292 +0200 -@@ -2,8 +2,8 @@ - # - # SAPHana - # --# Description: Manages two single SAP HANA Instance in System Replication --# Planned: do also manage scale-up scenarios -+# Description: Manages two SAP HANA Databases in System Replication -+# Planned: do also manage scale-out scenarios - # currently the SAPHana is dependent of the analysis of - # SAPHanaTopology - # For supported scenarios please read the README file provided -@@ -16,7 +16,7 @@ - # Support: linux@sap.com - # License: GNU General Public License (GPL) - # Copyright: (c) 2013,2014 SUSE Linux Products GmbH --# Copyright: (c) 2015 SUSE Linux GmbH -+# (c) 2015-2016 SUSE Linux GmbH - # - # An example usage: - # See usage() function below for more details... -@@ -29,12 +29,13 @@ - # OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) - # OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) - # OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) --# OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) -+# OCF_RESKEY_SAPHanaFilter (outdated, replaced by cluster property hana_${sid}_glob_filter) - # - # - ####################################################################### - # - # Initialization: -+SAPHanaVersion="0.152.17" - timeB=$(date '+%s') - - : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -@@ -43,6 +44,12 @@ - # - ####################################################################### - # -+log_attributes=false -+if ocf_is_true "$log_attributes"; then -+ log_attr_file="/var/log/fhATTRIBUTES" -+else -+ log_attr_file="/dev/null" -+fi - - HANA_STATE_PRIMARY=0 - HANA_STATE_SECONDARY=1 -@@ -107,7 +114,7 @@ - cat <<-EOF - usage: $0 ($methods) - -- $0 manages a SAP HANA Instance as an HA resource. -+ $0 manages two SAP HANA databases (scale-up) in system replication. - - The 'start' operation starts the HANA instance or bring the "clone instance" to a WAITING status - The 'stop' operation stops the HANA instance -@@ -145,15 +152,14 @@ - - - --0.151.1 -+$SAPHanaVersion - --Manages two SAP HANA instances in system replication (SR). -+Manages two SAP HANA database systems in system replication (SR). - --The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured --in system replication. This first version is limited to the scale-up scenario. Scale-Out is --not supported in this version. -+The SAPHanaSR resource agent manages two SAP HANA database systems which are configured -+in system replication. SAPHana supports Scale-Up scenarios. - --Managing the two SAP HANA instances means that the resource agent controls the start/stop of the -+Managing the two SAP HANA database systems means that the resource agent controls the start/stop of the - instances. In addition the resource agent is able to monitor the SAP HANA databases to check their - availability on landscape host configuration level. For this monitoring the resource agent relies on interfaces - provided by SAP. A third task of the resource agent is to also check the synchronisation status -@@ -205,9 +211,10 @@ - Should cluster/RA prefer to switchover to slave instance instead of restarting master locally? Default="yes" - no: Do prefer restart locally - yes: Do prefer takever to remote site -+ never: Do never run a sr_takeover (promote) at the secondary side. THIS VALUE IS CURRENTLY NOT SUPPORTED. - - Local or site recover preferred? -- -+ - - - Define, if a former primary should automatically be registered. -@@ -220,7 +227,7 @@ - Time difference needed between to primary time stamps, if a dual-primary situation occurs - Time difference needed between to primary time stamps, - if a dual-primary situation occurs. If the time difference is -- less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin -+ less than the time gap, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin - a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After - this registration to the new primary all data will be overwritten by the system replication. - -@@ -290,6 +297,45 @@ - local rc=0; tr -d '"'; return $rc - } - -+# function: version: cpmpare two HANA version strings -+function ver_lt() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 0 && return 0 || return 1 -+} -+ -+function ver_le() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 -+} -+ -+function ver_gt() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 2 && return 0 || return 1 -+} -+ -+function ver_ge() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 -+} -+# -+# function: version: cpmpare two HANA version strings -+# -+function version() { -+ if [ $# -eq 3 ]; then -+ case "$2" in -+ LE | le | "<=" ) ver_le $1 $3;; -+ LT | lt | "<" ) ver_lt $1 $3;; -+ GE | ge | ">=" ) ver_ge $1 $3;; -+ GT | gt | ">" ) ver_gt $1 $3;; -+ * ) return 1; -+ esac -+ elif [ $# -ge 5 ]; then -+ version $1 $2 $3 && shift 2 && version $* -+ else -+ return 1; -+ fi -+} -+ - # - # function: remoteHost2remoteNode - convert a SAP remoteHost to the cluster node name - # params: remoteHost -@@ -372,12 +418,16 @@ - dstr=$(date) - case "$attr_store" in - reboot | forever ) -- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE -- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ if ocf_is_true "$log_attributes"; then -+ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file -+ fi -+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? - ;; - props ) -- echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE -- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ if ocf_is_true "$log_attributes"; then -+ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file -+ fi -+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? - ;; - esac - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -@@ -405,12 +455,16 @@ - dstr=$(date) - case "$attr_store" in - reboot | forever ) -- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE -- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? -+ if ocf_is_true "$log_attributes"; then -+ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> $log_attr_file -+ fi -+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>$log_attr_file; rc=$? - ;; - props ) -- echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE -- crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? -+ if ocf_is_true "$log_attributes"; then -+ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> $log_attr_file -+ fi -+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>$log_attr_file; rc=$? - ;; - esac - else -@@ -460,6 +514,10 @@ - # DONE: PRIO2: Only adjust master if value is really different (try to check that) - oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) - if [ "$oldscore" != "$score" ]; then -+ dstr=$(date) -+ if ocf_is_true "$log_attributes"; then -+ echo "$dstr: SAPHana: crm_master -v $score -l reboot " >> $log_attr_file -+ fi - super_ocf_log debug "DBG: SET crm master: $score (old: $oldscore)" - ${HA_SBIN_DIR}/crm_master -v $score -l reboot; rc=$? - else -@@ -471,9 +529,9 @@ - } - - # --# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) -+# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE) - # params: NODE_ROLES NODE_SYNC_STATUS --# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], -+# globals: SCORING_TABLE[@], - # - scoring_crm_master() - { -@@ -482,7 +540,7 @@ - local sync="$2" - local skip=0 - local myScore="" -- for scan in "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}"; do -+ for scan in "${SCORING_TABLE[@]}"; do - if [ $skip -eq 0 ]; then - read rolePatt syncPatt score <<< $scan - if grep "$rolePatt" <<< "$roles"; then -@@ -494,7 +552,7 @@ - fi - done - super_ocf_log debug "DBG: scoring_crm_master adjust score $myScore" -- # TODO: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 -+ # DONE: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 - if [ -n "$myScore" ]; then - set_crm_master $myScore - fi -@@ -514,28 +572,91 @@ - } - - # -+# function: HANA_CALL -+# params: timeout-in-seconds cmd-line -+# globals: sid(r), SID(r), InstanceName(r) -+# -+function HANA_CALL() -+{ -+ # -+ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with -+ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) -+ # TODO: PRIO 5: Maybe make "su" optional by a parameter -+ local timeOut=0 -+ local onTimeOut="" -+ local rc=0 -+ local use_su=1 # Default to be changed later (see TODO above) -+ local pre_cmd="" -+ local cmd="" -+ local pre_script="" -+ local output="" -+ while [ $# -gt 0 ]; do -+ case "$1" in -+ --timeout ) timeOut=$2; shift;; -+ --use-su ) use_su=1;; -+ --on-timeout ) onTimeOut="$2"; shift;; -+ --cmd ) shift; cmd="$*"; break;; -+ esac -+ shift -+ done -+ -+ if [ $use_su -eq 1 ]; then -+ pre_cmd="su - ${sid}adm -c" -+ pre_script="true" -+ else -+ # as root user we need the library path to the SAP kernel to be able to call sapcontrol -+ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH -+ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] -+ then -+ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH -+ fi -+ pre_cmd="bash -c" -+ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" -+ fi -+ case $timeOut in -+ 0 | inf ) -+ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? -+ ;; -+ * ) -+ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? -+ # -+ # on timeout ... -+ # -+ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then -+ local second_output="" -+ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); -+ fi -+ ;; -+ esac -+ echo "$output" -+ return $rc; -+} -+ -+# - # function: saphana_init - initialize variables for the resource agent - # params: InstanceName --# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), --# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) -+# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), -+# globals: sr_name(w), remoteHost(w), otherNodes(w), remSR_name(w) - # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) - # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) - # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) -+# globals: NODENAME(w), vNAME(w), hdbver(w), - # saphana_init : Define global variables with default values, if optional parameters are not set - # - function saphana_init() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=$OCF_SUCCESS -- local vName - local clN - # local site - # two parameter models (for transition only) - # OLD: InstanceName - # NEW: SID InstanceNumber -+ NODENAME=$(crm_node -n) - SID=$OCF_RESKEY_SID - InstanceNr=$OCF_RESKEY_InstanceNumber - SIDInstanceName="${SID}_HDB${InstanceNr}" - InstanceName="HDB${InstanceNr}" -+ export SAPSYSTEMNAME=$SID - super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" - sid=$(echo "$SID" | tr [:upper:] [:lower:]) - sidadm="${sid}adm" -@@ -544,15 +665,23 @@ - # DONE: PRIO4: SAPVIRHOST might be different to NODENAME - # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes - # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 -- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 -- vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ -- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr) -+ # We rely on the following format: SID is word#4, SYSNR is work#6, vHost is word#8 -+ if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then -+ vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ -+ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) -+ super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" -+ else -+ super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" -+ fi - if [ -z "$vName" ]; then - # - # if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology - # - vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); - fi -+ if [ -z "$vName" ]; then # last fallback if we are not able to figure out the virtual host name -+ vName="$NODENAME" -+ fi - SAPVIRHOST=${vName} - PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" - AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" -@@ -571,6 +700,12 @@ - ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") - ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") - ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") -+ ATTR_NAME_HANA_OPERATION_MODE=("hana_${sid}_op_mode" "forever") -+ # -+ # new "central" attributes -+ # -+ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") -+ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) - # - # TODO: PRIO4: Table for non-preferred-site-takeover - # -@@ -591,9 +726,7 @@ - ) - SCORING_TABLE_PREFERRED_LOCAL_RESTART=( - "[0-9]*:P:[^:]*:master .* 150" -- "[0-9]*:P:[^:]*:slave .* 140" -- "[0-9]*:P:[^:]*:\? .* 0" -- "[0-9]*:P:[^:]*:- .* 0" -+ "[0-9]*:P:[^:]*:.* .* 140" - "[0-9]*:S:[^:]*:master SOK 100" - "[0-9]*:S:[^:]*:master SFAIL -INFINITY" - "[0-9]*:S:[^:]*:slave SOK 10" -@@ -602,6 +735,25 @@ - "[0-9]*:S:[^:]*:- .* 0" - ".* .* -1" - ) -+ SCORING_TABLE_PREFERRED_NEVER=( -+ "[234]*:P:[^:]*:master .* 150" -+ "[015-9]*:P:[^:]*:master .* 90" -+ "[0-9]*:P:[^:]*:.* .* -INFINITY" -+ "[0-9]*:S:[^:]*:.* .* -INFINITY" -+ ".* .* -INFINITY" -+ ) -+ if ocf_is_true $PreferSiteTakeover; then -+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}") -+ else -+ case "$PreferSiteTakeover" in -+ never|NEVER|Never ) -+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_NEVER[@]}") -+ ;; -+ * ) -+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_LOCAL_RESTART[@]}") -+ ;; -+ esac -+ fi - # - DUPLICATE_PRIMARY_TIMEOUT="${OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT:-7200}" - super_ocf_log debug "DBG: DUPLICATE_PRIMARY_TIMEOUT=$DUPLICATE_PRIMARY_TIMEOUT" -@@ -615,7 +767,7 @@ - esac - # - # -- -+ # - remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); - if [ -z "$remoteHost" ]; then - if [ ${#otherNodes[@]} -eq 1 ]; then # we are a 2 node cluster, lets assume the other is the remote-host -@@ -640,7 +792,7 @@ - sr_mode="sync" - fi - if [ -n "$remoteNode" ]; then -- rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); -+ remSR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); - fi - super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" - # optional OCF parameters, we try to guess which directories are correct -@@ -671,26 +823,21 @@ - # - SAPSTARTPROFILE="$(ls -1 $DIR_PROFILE/${OCF_RESKEY_INSTANCE_PROFILE:-${SID}_${InstanceName}_*})" - fi -- # as root user we need the library path to the SAP kernel to be able to call sapcontrol -- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH -- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] -- then -- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH -- export LD_LIBRARY_PATH -- fi - PATH=${PATH}:${DIR_EXECUTABLE}; export PATH -+ local ges_ver -+ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') -+ hdbver=${ges_ver%.*.*} -+ # -+ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state -+ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible -+ # -+ hdbState="hdbnsutil -sr_state" -+ hdbMap="hdbnsutil -sr_state" -+ if version "$hdbver" ">=" "1.00.111"; then -+ hdbState="hdbnsutil -sr_stateConfiguration" -+ hdbMap="hdbnsutil -sr_stateHostMapping" -+ fi - super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" -- ############################# -- # TODO: PRIO9: To be able to call landscapeHostConfig.py without su (so as root) -- # TODO: PRIO9: Research for environment script .htacces or something like that -- #export SAPSYSTEMNAME=ZLF -- #export DIR_INSTANCE=/usr/sap/ZLF/HDB02 -- #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIR_INSTANCE/exe:$DIR_INSTANCE/exe/Python/lib -- #export PYTHONPATH=$DIR_INSTANCE/$HOST:$DIR_INSTANCE/exe/python_support:$DIR_INSTANCE/exe -- #export PYTHONHOME=$DIR_INSTANCE/exe/Python -- #export SAP_RETRIEVAL_PATH=$DIR_INSTANCE/$HOST -- #export DIR_EXECUTABLE=$DIR_INSTANCE/exe -- ############################# - return $OCF_SUCCESS - } - -@@ -765,7 +912,11 @@ - # or ownership - they will be recreated by sapstartsrv during next start - rm -f /tmp/.sapstream5${InstanceNr}13 - rm -f /tmp/.sapstream5${InstanceNr}14 -- $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm -+ ( -+ export PATH="$DIR_EXECUTABLE${PATH:+:}$PATH" -+ export LD_LIBRARY_PATH="$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH" -+ $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm -+ ) - # now make sure the daemon has been started and is able to respond - local srvrc=1 - while [ $srvrc -eq 1 -a $(pgrep -f "sapstartsrv.*$runninginst" | wc -l) -gt 0 ] -@@ -809,31 +960,47 @@ - function check_for_primary() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=$HANA_STATE_DEFECT -- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -- # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? -+ # TODO: PRIO 3: Check beginning from which SPS does SAP support HDBSettings.sh? -+ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback -+ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files? - # This might also solve some problems when we could not figure-out the ilocal or remote site name -- for i in 1 2 3 4 5 6 7 8 9; do -+ local chkMethod="" -+ for chkMethod in hU hU hU gP; do -+ case "$chkMethod" in -+ gP ) -+ local gpKeys="" -+ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) -+ node_full_status=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') -+ node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}') -+ super_ocf_log info "ACT: Using getParameter.py as fallback - node_status=$node_status" -+ ;; -+ hU | * ) -+ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported -+ node_full_status=$(HANA_CALL --timeout 60 --cmd "$hdbState" 2>/dev/null ) -+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -+ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -+ ;; -+ esac - case "$node_status" in - primary ) -- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" -- return $HANA_STATE_PRIMARY;; -+ rc=$HANA_STATE_PRIMARY -+ break;; - syncmem | sync | async ) -- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" -- return $HANA_STATE_SECONDARY;; -+ rc=$HANA_STATE_SECONDARY -+ break;; - none ) # have seen that mode on second side BEFEORE we registered it as replica -- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" -- return $HANA_STATE_STANDALONE;; -+ rc=$HANA_STATE_STANDALONE -+ break;; - * ) - super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" - dump=$( echo $node_status | hexdump -C ); - super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" -- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -+ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback -+ # SAP_CALL - super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" - # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes - esac; -+ sleep 2 - done - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc -@@ -854,12 +1021,18 @@ - { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" -- if [ -n "$rem_SR_name" ]; then -- siteParam="--site=$rem_SR_name" -+ if [ -n "$remSR_name" ]; then -+ siteParam="--site=$remSR_name" - fi -- FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? -- super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" -- super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" -+ # TODO: Get rid of the su by using a new interface: -+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh systemReplicationStatus.py $siteParam -+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? -+ # TODO: Limit the runtime of systemReplicationStatus.py -+ # SAP_CALL -+ # FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? -+ FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py" 2>/dev/null); srRc=$? -+ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" -+ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" - # - # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) - # And is the check for return code capable for chains? -@@ -890,7 +1063,7 @@ - # ok we should be careful and set secondary to SFAIL - super_ocf_log info "FLOW $FUNCNAME SFAIL" - set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" -+ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (5) - srRc=$srRc lss=$lss" - # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary - lpa_set_lpt 10 "$remoteNode" - rc=1 -@@ -898,7 +1071,7 @@ - else - super_ocf_log info "FLOW $FUNCNAME SFAIL" - set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" -+ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (2) - srRc=$srRc" - # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary - lpa_set_lpt 10 "$remoteNode" - rc=1; -@@ -992,14 +1165,28 @@ - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 - # -- su - $sidadm -c "python $DIR_EXECUTABLE/python_support/landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? -+ # TODO: Get rid of the su by using a new interface: -+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh landscapeHostConfiguration.py -+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? -+ # DONE: Limit the runtime of landscapeHostConfiguration.py -+ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? -+ if [ $rc -eq 124 ]; then -+ # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? -+ # landscape timeout -+ sleep 20 -+ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? -+ if [ $rc -eq 124 ]; then -+ # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" -+ rc=0 -+ fi -+ fi - return $rc; - } - - # - # function: register_hana_secondary - register local hana as secondary to the other site - # params: - --# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r) -+# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r), hdbver(r) - # register_hana_secondary - # - function register_hana_secondary() -@@ -1007,17 +1194,31 @@ - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=2; - local remoteInstance=""; -+ local newParameter=0 - remoteInstance=$InstanceNr -+ -+ -+ if version "$hdbver" ">=" "1.00.110"; then -+ newParameter=1 -+ fi -+ - if ocf_is_true ${AUTOMATED_REGISTER}; then -- # -- # -- # -- # -- # -- super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" -- # -- # -- su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? -+ # TODO: Get rid of the su by using a new interface: -+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_register ... -+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? -+ # TODO: Limit the runtime of hdbnsutil -sr_register ???? -+ if [ $newParameter -eq 1 ]; then -+ local hanaOM="" -+ hanaOM=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_OPERATION_MODE[@]}) -+ if [ -n "$hanaOM" ]; then -+ hanaOM="--operationMode=$hanaOM" -+ fi -+ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name" -+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name"; rc=$? -+ else -+ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" -+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? -+ fi - # backup_global_and_nameserver - else - super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" -@@ -1051,7 +1252,7 @@ - check_sapstartsrv - rc=$? - # -- # TODO: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? -+ # DONE: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? - # - if [ $rc -eq $OCF_SUCCESS ]; then - output=$($SAPCONTROL -nr $InstanceNr -function Start) -@@ -1169,7 +1370,7 @@ - 0 ) # LPA says start-up - lpa_advice="start" - # TODO: PRIO1: We need to do a special handling for remote being a 234-Secondary in SR Status SOK -- # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) -+ # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) - # then lpa_advice="wait" - remoteRole=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_ROLES[@]}) - remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) -@@ -1193,17 +1394,20 @@ - 1) # LPA says register! - lpa_advice="register" - ;; -- 2) # LPA says wait for second LPT -+ 2) # LPA says wait for older LPA to expire -+ lpa_advice="wait" -+ ;; -+ 3) # LPA says to wait for remote LPA to be reported/announced - lpa_advice="wait" - ;; -- 3 | 4 ) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss -+ 4) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss - lpa_advice="fail" - ;; -- * ) # LPA failed with an unkonown status - FAIL resource -+ *) # LPA failed with an unkonown status - FAIL resource - lpa_advice="fail" - ;; - esac -- -+ - # DONE: PRIO2: Do we need to differ 0 and 1 here? While 0 is a fatal SAP error, 1 for down/error - if [ $lss -eq 0 ]; then - super_ocf_log err "ACT: get_hana_landscape_status reports FATAL" -@@ -1218,7 +1422,7 @@ - 2 | 3 | 4 ) # as landcape says we are up - just set the scores and return code - super_ocf_log info "LPA: landcape: UP, LPA: start ==> keep running" - LPTloc=$(date '+%s') -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - rc=$OCF_SUCCESS - ;; - 1 ) # landcape says we are down, lets start and adjust scores and return code -@@ -1226,7 +1430,7 @@ - saphana_start - rc=$? - LPTloc=$(date '+%s') -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - ;; - esac - scoring_crm_master "$my_role" "$my_sync" -@@ -1250,11 +1454,11 @@ - if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then - super_ocf_log info "ACT: Register successful" - lpa_push_lpt 10 -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - set_crm_master 0 - saphana_start_secondary - rc=$? -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - else - super_ocf_log err "ACT: Register failed" - rc=$OCF_NOT_RUNNING -@@ -1279,11 +1483,19 @@ - rc=$OCF_ERR_GENERIC - ;; - 1 ) # we are down, so we should wait --> followup in next monitor -- super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" -- # TODO: PRIO3: Check, if WAITING is correct here -- set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} -- set_crm_master -9000 -- rc=$OCF_SUCCESS -+ # DONE: PRIO3: Check, if WAITING is correct here -+ if ocf_is_true "$AUTOMATED_REGISTER" ; then -+ super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" -+ super_ocf_log info "RA: landcape: DOWN, LPA: wait ==> keep waiting" -+ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ set_crm_master -9000 -+ rc=$OCF_SUCCESS -+ else -+ super_ocf_log warning "LPA: OLD primary needs manual registration (AUTOMATED_REGISTER='false')" -+ set_hana_attribute ${NODENAME} "WAITING4REG" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ set_crm_master -9000 -+ rc=$OCF_NOT_RUNNING -+ fi - ;; - esac - ;; -@@ -1309,22 +1521,24 @@ - local ch ch_role - # - # get actual list of cluster members -- # -+ # - if [ -n "$otherNodes" ]; then - for ch in ${otherNodes[@]}; do - if [ $rc -eq 1 ]; then - ch_role=$(get_hana_attribute ${ch} ${ATTR_NAME_HANA_ROLES[@]}) --# TODO: PRIO3: check if [0-9], [234] or [34] is correct --# TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? --# grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 --# grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 --# Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master -- awk -F: 'BEGIN { rc=1 } -- $1 ~ "[34]" && $2 ="P" && $4="master" { rc=0 } -- END { exit rc }' <<< $ch_role ; rc=$? -+ # TODO: PRIO3: check if [0-9], [234] or [34] is correct -+ # TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? -+ # grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 -+ # grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 -+ # Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master -+ super_ocf_log debug "DBG: check_for_primary_master (3) ch_role=$ch_role" -+ awk -F: 'BEGIN { rc=1 } -+ $1 ~ "[34]" && $2 == "P" && $4 == "master" { rc=0 } -+ END { exit rc }' <<< $ch_role ; rc=$? -+ super_ocf_log debug "DBG: check_for_primary_master (4) rc=$rc" - fi - done -- fi -+ fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -1378,7 +1592,7 @@ - ####### LPA - begin - # - lpa_push_lpt 10 -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - # - ####### LPA - end - # -@@ -1404,7 +1618,7 @@ - rc=$OCF_SUCCESS - fi - else -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - fi - else - super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" -@@ -1454,7 +1668,7 @@ - then - if [ $STATE -eq $OCF_NOT_RUNNING ] - then -- [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" -+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE status color is $COLOR !" - rc=$STATE - fi - count=1 -@@ -1511,13 +1725,17 @@ - local crm_rc=1 - local lpt=$1 - local clpt=-1 -- local node=${2:-${NODENAME}} -+ local node=$2 - set_hana_attribute ${node} "$lpt" ${LPA_ATTR[@]}; crm_rc=$? -- clpt=$(lpa_get_lpt $NODENAME) -- if [ "$lpt" != "$clpt" ]; then -- rc=2 -+ if [ -n "$node" ]; then -+ clpt=$(lpa_get_lpt $NODENAME) -+ if [ "$lpt" != "$clpt" ]; then -+ rc=2 -+ else -+ rc=0 -+ fi - else -- rc=0 -+ super_ocf_log info "DEC: lpa_set_lpt ignore to change value for empty node name" - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc -@@ -1608,7 +1826,7 @@ - else - rc=2 - fi -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -1621,9 +1839,10 @@ - # - # Returncodes: - # 0: start --# 1: register than start --# 2: wait4gab --# 3: wait4other -+# 1: register (then start) -+# 2: wait4gab (WAIT4LPA - Older LPA needs to expire) -+# 3: wait4other (WAIT4LPA - Remote LPA needs to be announced) -+# 4: lpa internal error - # - # Initializing (if NO local LPT-file): - # SECONDARY sets to 10 -@@ -1648,7 +1867,7 @@ - # - function lpa_check_lpt_status() { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local rc=0 -+ local rc=4 - local LPTloc=-1 - local LPTrem=-1 - local LPTMark=1000 -@@ -1666,16 +1885,16 @@ - if [ -z "$LPTloc" -o "$LPTloc" -eq -1 -o "$lparc" -ne 0 ]; then - # last option - try to initialize as PRIMARY - lpa_push_lpt 20 -- lpa_set_lpt 20 -+ lpa_set_lpt 20 $NODENAME - LPTloc=20 # DEFAULT - fi - fi -- # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt -+ # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt - LPTrem=$(lpa_get_lpt $remoteNode); lparc=$? - if [ $lparc -ne 0 ]; then - # LPT of the other node could not be evaluated - LPA says WAIT - super_ocf_log debug "DBG: LPA: LPTloc=$LPTloc, LPTrem undefined ==> WAIT" -- rc=2 -+ rc=3 - else - super_ocf_log debug "DBG: LPA: LPTloc ($LPTloc) LPTrem ($LPTrem) delta ($delta)" - if [ $LPTloc -lt $LPTMark -a $LPTrem -lt $LPTMark ]; then -@@ -1683,11 +1902,11 @@ - else - delta=$DUPLICATE_PRIMARY_TIMEOUT # at least one of the lpts is a real timestamp so include delta-gap - fi -- if (( delta < LPTloc - LPTrem )); then -+ if (( delta < LPTloc - LPTrem )); then - # We are the winner - LPA says STARTUP - super_ocf_log debug "DBG: LPA: LPTloc wins $LPTloc > $LPTrem + $delta ==> START" - rc=0 -- elif (( delta < LPTrem - LPTloc )); then -+ elif (( delta < LPTrem - LPTloc )); then - if ocf_is_true "$AUTOMATED_REGISTER" ; then - # The other one has won - LPA says REGISTER - super_ocf_log debug "DBG: LPA: LPTrem wins $LPTrem > $LPTloc + $delta ==> REGISTER" -@@ -1697,12 +1916,12 @@ - rc=2 - fi - -- else -+ else - super_ocf_log debug "DBG: LPA: Difference between LPTloc and LPTrem is less than delta ($delta) ==> WAIT" - # TODO: PRIO3: ADD STALEMATE-HANDLING HERE; currently admin should set one of the lpa to 20 - rc=2 -- fi -- fi -+ fi -+ fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -1716,6 +1935,7 @@ - { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -+ # always true for scale-up - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -1728,23 +1948,15 @@ - # - function saphana_start_clone() { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING -+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING - local sqlrc; -- local chkusr; -- # TODO: PRIO4: remove check_secstore_users later -- secUser=$(check_secstore_users SAPHANA${SID}SR SLEHALOC RHELHALOC) ; chkusr=$? -- if [ $chkusr -ne 0 ]; then -- super_ocf_log err "ACT: Secure store users are missing (see best practice manual how to setup the users)" -- rc=$OCF_ERR_CONFIGURED -+ set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ check_for_primary; primary_status=$? -+ if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then -+ saphana_start_primary; rc=$? - else -- set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} -- check_for_primary; primary_status=$? -- if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then -- saphana_start_primary; rc=$? -- else -- lpa_set_lpt 10 -- saphana_start_secondary; rc=$? -- fi -+ lpa_set_lpt 10 $NODENAME -+ saphana_start_secondary; rc=$? - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc -@@ -1761,9 +1973,10 @@ - local rc=0 - local primary_status="x" - set_hana_attribute ${NODENAME} "UNDEFINED" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ super_ocf_log debug "DBG: SET UNDEFINED" - check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - fi - saphana_stop; rc=$? - return $rc -@@ -1813,26 +2026,42 @@ - # seems admin already decided that for us? -> we are running - set DEMOTED - promoted=0; - LPTloc=$(date '+%s') -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - fi - lpa_check_lpt_status; lparc=$? -- # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes -- if [ $lparc -lt 2 ]; then -- # lpa - no need to wait any longer - lets try a new start -- saphana_start_clone -- rc=$? -- super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -- else -- lpa_init_lpt $HANA_STATE_PRIMARY -- # still waiting for second site to report lpa-lpt -- if ocf_is_true "$AUTOMATED_REGISTER" ; then -- super_ocf_log info "LPA: Still waiting for remote site to report LPA status" -- else -- super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" -- fi -- return $OCF_SUCCESS -- fi -+ # DONE: PRIO1: Need to differ lpa_check_lpt_status return codes -+ case "$lparc" in -+ 0 | 1 ) -+ # lpa - no need to wait any longer - lets try a new start -+ saphana_start_clone -+ rc=$? -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc -+ ;; -+ 2 ) -+ lpa_init_lpt $HANA_STATE_PRIMARY -+ # still waiting for second site to expire -+ if ocf_is_true "$AUTOMATED_REGISTER" ; then -+ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" -+ else -+ super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" -+ super_ocf_log info "LPA: You need to manually sr_register the older primary" -+ fi -+ return $OCF_SUCCESS -+ ;; -+ 3 ) -+ lpa_init_lpt $HANA_STATE_PRIMARY -+ # still waiting for second site to report lpa-lpt -+ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" -+ return $OCF_SUCCESS -+ ;; -+ 4 ) -+ # lpa internal error -+ # TODO PRIO3: Impplement special handling for this issue - should we fail the ressource? -+ super_ocf_log info "LPA: LPA reports an internal error" -+ return $OCF_SUCCESS -+ ;; -+ esac - promoted=0; - ;; - UNDEFINED ) -@@ -1848,7 +2077,7 @@ - ;; - esac - fi -- get_hana_landscape_status; lss=$? -+ get_hana_landscape_status; lss=$? - super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" - case "$lss" in - 0 ) # FATAL or ERROR -@@ -1876,19 +2105,20 @@ - # - # TODO PRIO1: REMOVE remoteNode dependency - get_sync_status - remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) -+ # TODO HANDLING OF "NEVER" - case "$remoteSync" in - SOK | PRIM ) - super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" - set_crm_master 5 - if check_for_primary_master; then -- lpa_set_lpt 20 -+ lpa_set_lpt 20 $NODENAME - fi - ;; - SFAIL ) -- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" -+ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" - ;; - * ) -- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" -+ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" - ;; - esac - else -@@ -1916,7 +2146,7 @@ - rc=$OCF_SUCCESS - else - LPTloc=$(date '+%s') -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - lpa_push_lpt $LPTloc - if [ "$promoted" -eq 1 ]; then - set_hana_attribute "$NODENAME" "PRIM" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -@@ -1931,12 +2161,14 @@ - fi - my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) - my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) -- case "$my_role" in -+ case "$my_role" in - [12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status - ;; - [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query - if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then -- analyze_hana_sync_statusSRS -+ if [ "$promote_attr" = "PROMOTED" ]; then -+ analyze_hana_sync_statusSRS -+ fi - else - analyze_hana_sync_statusSQL - fi -@@ -1949,8 +2181,8 @@ - [234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster - lpa_check_lpt_status; again_lpa_rc=$? - if [ $again_lpa_rc -eq 2 ]; then -- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" -- lpa_set_lpt 10 -+ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" -+ lpa_set_lpt 10 $NODENAME - lpa_push_lpt 10 - rc=$OCF_NOT_RUNNING - fi -@@ -1993,7 +2225,7 @@ - # OK, we are running as HANA SECONDARY - # - if ! lpa_get_lpt ${NODENAME}; then -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - lpa_push_lpt 10 - fi - promote_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_CLONE_STATE[@]}) -@@ -2042,17 +2274,25 @@ - 0 ) # FATAL - # DONE: PRIO1: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error - # TODO: PRIO3: is OCF_ERR_GENERIC best option? -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - rc=$OCF_ERR_GENERIC - ;; - 1 ) # ERROR -- lpa_set_lpt 10 -+ lpa_set_lpt 10 $NODENAME - rc=$OCF_NOT_RUNNING - ;; - 2 | 3 | 4 ) # WARN INFO OK - rc=$OCF_SUCCESS -- lpa_set_lpt 30 -+ lpa_set_lpt 30 $NODENAME - sync_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) -+ local hanaOM="" -+ local hanaOut1="" -+ # TODO: PRIO 3: check, if using getParameter.py is the best option to analyze the set operationMode -+ # DONE: PRIO 3: Should we default to logreplay for SAP HANA >= SPS11 ? -+ hanaOut1=$(HANA_CALL --timeout 10 --use-su --cmd "getParameter.py --key=global.ini/system_replication/operation_mode --sapcontrol=1") -+ hanaFilter1=$(echo "$hanaOut1" | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') -+ hanaOM=$(echo "$hanaFilter1" | awk -F= '$1=="operation_mode" {print $2}') -+ set_hana_attribute ${NODENAME} "$hanaOM" ${ATTR_NAME_HANA_OPERATION_MODE[@]} - super_ocf_log debug "DBG: sync_attr=$sync_attr" - case "$sync_attr" in - "SOK" ) # This is a possible node to promote, when primary is missing -@@ -2112,7 +2352,7 @@ - fi - # - # First check, if we are PRIMARY or SECONDARY -- # -+ # - check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then - # FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent -@@ -2145,7 +2385,7 @@ - # - # function: saphana_promote_clone - promote a hana clone - # params: - --# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), -+# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), - # saphana_promote_clone: - # In a Master/Slave configuration get Master being the primary OR by running hana takeover - # -@@ -2169,7 +2409,7 @@ - else - if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then - # -- # we are SECONDARY/SLAVE and need to takepover ... -+ # we are SECONDARY/SLAVE and need to takeover ... promote on the replica (secondary) side... - # promote on the replica side... - # - hana_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) -@@ -2178,9 +2418,14 @@ - super_ocf_log info "ACT: !!!!!!! Promote REPLICA $SID-$InstanceName to be primary. !!!!!!" - LPTloc=$(date '+%s') - # lpa_set_lpt 20 $remoteNode -- lpa_set_lpt $LPTloc -+ lpa_set_lpt $LPTloc $NODENAME - lpa_push_lpt $LPTloc -- su - $sidadm -c "hdbnsutil -sr_takeover" -+ # TODO: Get rid of the su by using a new interface: -+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_takeover ... -+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? -+ # TODO: Limit the runtime of hdbnsutil -sr_takeover ???? -+ # SAP_CALL -+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_takeover" - # - # now gain check, if we are primary NOW - # -@@ -2248,7 +2493,6 @@ - SAPSTARTPROFILE="" - SAPHanaFilter="ra-act-dec-lpa" - --NODENAME=$(crm_node -n) - - - if [ $# -ne 1 ] -@@ -2306,8 +2550,7 @@ - fi - - # What kind of method was invoked? --THE_VERSION=$(saphana_meta_data | grep ' - - -- 0.151.1 -+ $SAPHanaVersion - Analyzes SAP HANA System Replication Topology. - This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to - all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. -@@ -207,12 +215,12 @@ - dstr=$(date) - case "$attr_store" in - reboot | forever ) -- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE -- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file -+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? - ;; - props ) -- echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE -- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file -+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? - ;; - esac - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -@@ -282,6 +290,53 @@ - } - - # -+# function: dequote - filter: remove quotes (") from stdin -+# params: - -+# globals: - -+function dequote() -+{ -+ local rc=0; tr -d '"'; return $rc -+} -+ -+# function: version: cpmpare two HANA version strings -+function ver_lt() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 0 && return 0 || return 1 -+} -+ -+function ver_le() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 -+} -+ -+function ver_gt() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 2 && return 0 || return 1 -+} -+ -+function ver_ge() { -+ ocf_version_cmp $1 $2 -+ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 -+} -+# -+# function: version: cpmpare two HANA version strings -+# -+function version() { -+ if [ $# -eq 3 ]; then -+ case "$2" in -+ LE | le | "<=" ) ver_le $1 $3;; -+ LT | lt | "<" ) ver_lt $1 $3;; -+ GE | ge | ">=" ) ver_ge $1 $3;; -+ GT | gt | ">" ) ver_gt $1 $3;; -+ * ) return 1; -+ esac -+ elif [ $# -ge 5 ]; then -+ version $1 $2 $3 && shift 2 && version $* -+ else -+ return 1; -+ fi -+} -+# - # function: is_clone - report, if resource is configured as a clone (also master/slave) - # params: - - # globals: OCF_*(r) -@@ -314,12 +369,74 @@ - } - - # -+# function: HANA_CALL -+# params: timeout-in-seconds cmd-line -+# globals: sid(r), SID(r), InstanceName(r) -+# -+function HANA_CALL() -+{ -+ # -+ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with -+ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) -+ # TODO: PRIO 5: Maybe make "su" optional by a parameter -+ local timeOut=0 -+ local onTimeOut="" -+ local rc=0 -+ local use_su=1 # Default to be changed later (see TODO above) -+ local pre_cmd="" -+ local cmd="" -+ local pre_script="" -+ local output="" -+ while [ $# -gt 0 ]; do -+ case "$1" in -+ --timeout ) timeOut=$2; shift;; -+ --use-su ) use_su=1;; -+ --on-timeout ) onTimeOut="$2"; shift;; -+ --cmd ) shift; cmd="$*"; break;; -+ esac -+ shift -+ done -+ -+ if [ $use_su -eq 1 ]; then -+ pre_cmd="su - ${sid}adm -c" -+ pre_script="true" -+ else -+ # as root user we need the library path to the SAP kernel to be able to call sapcontrol -+ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH -+ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] -+ then -+ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH -+ fi -+ pre_cmd="bash -c" -+ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" -+ fi -+ case $timeout in -+ 0 | inf ) -+ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? -+ ;; -+ * ) -+ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? -+ # -+ # on timeout ... -+ # -+ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then -+ local second_output="" -+ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); -+ fi -+ ;; -+ esac -+ echo "$output" -+ return $rc; -+} -+ -+# - # function: sht_init - initialize variables for the resource agent - # params: - - # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), --# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) -+# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w) - # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) - # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) -+# globals: NODENAME(w), hdbver(w) - # sht_init : Define global variables with default values, if optional parameters are not set - # - # -@@ -331,12 +448,14 @@ - local hdbANSWER="" - local siteID - local siteNAME -+ local chkMethod="" - HOSTEXECNAME=saphostexec - USRSAP=/usr/sap - SAPSERVICE_PATH=${USRSAP}/sapservices - SAPHOSTCTRL_PATH=${USRSAP}/hostctrl/exe - HOSTEXEC_PATH=${SAPHOSTCTRL_PATH}/${HOSTEXECNAME} - HOSTEXEC_PROFILE_PATH=${SAPHOSTCTRL_PATH}/host_profile -+ NODENAME=$(crm_node -n) - SID=$OCF_RESKEY_SID - InstanceNr=$OCF_RESKEY_InstanceNumber - myInstanceName="${SID}_HDB${InstanceNr}" -@@ -382,13 +501,6 @@ - DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" - fi - -- # as root user we need the library path to the SAP kernel to be able to call sapcontrol -- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH -- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] -- then -- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH -- export LD_LIBRARY_PATH -- fi - - PATH=${PATH}:${DIR_EXECUTABLE} - # -@@ -399,12 +511,45 @@ - *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; - *cman* ) nodelist=$(crm_node -l);; - esac -+ # -+ # get HANA version -+ # -+ local ges_ver -+ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') -+ hdbver=${ges_ver%.*.*} -+ # -+ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state -+ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible -+ # -+ hdbState="hdbnsutil -sr_state" -+ hdbMap="hdbnsutil -sr_state" -+ if version "$hdbver" ">=" "1.00.111"; then -+ hdbState="hdbnsutil -sr_stateConfiguration" -+ hdbMap="hdbnsutil -sr_stateHostMapping" -+ fi - #### SAP-CALL - # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode -- for i in 1 2 3 4 5 6 7 8 9; do -- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) -- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" -- srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') -+ for chkMethod in hU hU hU gP ; do -+ # DONE: Limit the runtime of hdbnsutil. -+ # TODO: Use getParameter.py if we get no answer -+ # SAP_CALL -+ #super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" -+ #srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') -+ case "$chkMethod" in -+ gP ) # call getParameter (gP) -+ local gpKeys="" -+ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) -+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') -+ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') -+ super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$srmode" -+ ;; -+ hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) -+ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported -+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbState --sapcontrol=1" 2>/dev/null) -+ super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" -+ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') -+ ;; -+ esac - case "$srmode" in - primary | syncmem | sync | async | none ) - # we can leave the loop as we already got a result -@@ -417,27 +562,51 @@ - esac - done - # TODO PRIO3: Implement a file lookup, if we did not get a result -- siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') -- siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') -+ siteID=$(echo "$hdbANSWER" | awk -F= '/site.id/ {print $2}') # allow 'site_id' AND 'site id' -+ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site.name/ {print $2}') - site=$siteNAME - srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') -- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) -- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" - # -- # filter all non-cluster mappings -+ # for rev >= 111 we use the new mapping query - # -- # DONE: PRIO2: Need mapping between HANA HOSTS not cluster NODES -- local hanaVHost -- hanaRemoteHost=$(for n1 in $nodelist; do -- hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) -- for n2 in $MAPPING; do -- if [ "$hanaVHost" == "$n2" ]; then -- echo $hanaVHost; -- fi; -- done; -- done ) -- super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" -- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" -+ if version "$hdbver" ">=" "1.00.111"; then -+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbMap --sapcontrol=1" 2>/dev/null) -+ fi -+ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) -+ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" -+ if [ -n "$MAPPING" ]; then -+ # we have a mapping from HANA, lets use it -+ # -+ # filter all non-cluster mappings -+ # -+ local hanaVHost="" -+ local n1="" -+ hanaRemoteHost="" -+ for n1 in $nodelist; do -+ hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) -+ for n2 in $MAPPING; do -+ if [ "$hanaVHost" == "$n2" ]; then -+ hanaRemoteHost="$hanaVHost" -+ fi; -+ done; -+ done -+ super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" -+ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" -+ else -+ # HANA DID NOT TOLD THE MAPPING, LETS TRY TO USE THE SITE ATTRIBUTES -+ local n1="" -+ local hanaSite="" -+ for n1 in $nodelist; do -+ # TODO: PRIO9 - For multi tier with more than 2 chain/star members IN the cluster we might need to be -+ # able to catch more than one remoteHost -+ # currently having more than 2 HANA in a chain/star members IN the cluster is not allowed, the third must be external -+ if [ "$NODENAME" != "$n1" ]; then -+ hanaSite=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_SITE[@]}) -+ hanaRemoteHost="$n1" -+ fi -+ done -+ super_ocf_log info "DEC: site=$site, mode=$srmode, hanaRemoteHost=$hanaRemoteHost - found by remote site ($hanaSite)" -+ fi - super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" - return $OCF_SUCCESS - } -@@ -446,38 +615,29 @@ - # function: check_for_primary - check if local SAP HANA is configured as primary - # params: - - # globals: HANA_STATE_PRIMARY(r), HANA_STATE_SECONDARY(r), HANA_STATE_DEFECT(r), HANA_STATE_STANDALONE(r) -+# srmode(r) - # - function check_for_primary() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -- node_status=$srmode -- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" -- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -- for i in 1 2 3 4 5 6 7 8 9; do -- case "$node_status" in -- primary ) -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" -- return $HANA_STATE_PRIMARY;; -- syncmem | sync | async ) -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" -- return $HANA_STATE_SECONDARY;; -- none ) # have seen that mode on second side BEFEORE we registered it as replica -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" -- return $HANA_STATE_STANDALONE;; -- * ) -- # TODO: PRIO1: Should we set SFAIL? -- # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes -- dump=$( echo $node_status | hexdump -C ); -- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" -- #### SAP-CALL -- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" -- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes -- esac; -- done -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" -- return $HANA_STATE_DEFECT -+ super_ocf_log debug "DBG: check_for_primary: srmode=$srmode" -+ case "$srmode" in -+ primary ) -+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" -+ rc=$HANA_STATE_PRIMARY;; -+ syncmem | sync | async ) -+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" -+ rc=$HANA_STATE_SECONDARY;; -+ none ) # have seen that mode on second side BEFEORE we registered it as replica -+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" -+ rc=$HANA_STATE_STANDALONE;; -+ * ) -+ dump=$( echo $srmode | hexdump -C ); -+ super_ocf_log err "ACT: check_for_primary: we didn't expect srmode to be: DUMP: <$dump>" -+ rc=$HANA_STATE_DEFECT -+ esac; -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc - } - - -@@ -653,7 +813,7 @@ - function sht_stop_clone() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -- check_for_primary; primary_status=$? -+ check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then - hanaPrim="P" - elif [ $primary_status -eq $HANA_STATE_SECONDARY ]; then -@@ -663,7 +823,7 @@ - else - hanaPrim="-" - fi -- set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} -+ set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} - sht_stop; rc=$? - return $rc - } -@@ -718,28 +878,49 @@ - fi - # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? - # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 -- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 -+ # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 - #### SAP-CALL - vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ -- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) -+ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) - # super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" - if [ -n "$vName" ]; then -- set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} -+ set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} - else - vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}) - fi - #site=$(get_site_name) - #### SAP-CALL -- hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" -- hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) -+ # SAP_CALL -+ #hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" -+ # -+ # since rev 09x SAP has added the --sapcontrol option for the landscapeHostConfiguration interface -+ # we begin to use --sapcontrol with rev 100 -+ # since rev 120 we need to use the --sapcontrol, because SAP changed the tool output -+ # -+ if version "$hdbver" ">=" "1.00.100"; then -+ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); hanalrc="$?" -+ # TODO: PRIO9: Do we need to check the lines: 'SAPCONTROL-OK: ' and 'SAPCONTROL-OK: '? -+ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | \ -+ awk -F= '$1 == "nameServerConfigRole" {f1=$2} -+ $1 == "nameServerActualRole" {f2=$2} -+ $1 == "indexServerConfigRole" {f3=$2} -+ $1 == "indexServerActualRole" {f4=$2} -+ END { printf "%s:%s:%s:%s\n", f1, f2, f3,f4 }') -+ else -+ # -+ # old code for backward compatability -+ # -+ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" -+ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) -+ fi - #if [ -z "$MAPPING" ]; then - # super_ocf_log info "ACT: Did not find remote Host at this moment" - #fi - # FH TODO PRIO3: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST" - if [ -n "$hanaRemoteHost" ]; then -- set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} -+ set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} - fi -- set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} -+ set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} - if [ -n "$site" ]; then - set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} - fi -@@ -748,8 +929,8 @@ - S ) # only secondary may propargate its sync status - case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in - *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; -- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; -- *cman* ) nodelist=$(crm_node -l);; -+ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; -+ *cman* ) nodelist=$(crm_node -l);; - esac - - for n in ${nodelist}; do -@@ -789,7 +970,6 @@ - InstanceNr="" - DIR_EXECUTABLE="" - SAPHanaFilter="ra-act-dec-lpa" --NODENAME=$(crm_node -n) - - if [ $# -ne 1 ] - then -@@ -846,8 +1026,7 @@ - fi - fi - --THE_VERSION=$(sht_meta_data | grep ' -+# License: GPL v2+ -+my $Version="0.18.2016.02.16.1"; - # -+################################################################## - use POSIX; - use strict; -+use Sys::Syslog; -+use Sys::Hostname; -+use File::Path; -+use Getopt::Long; -+use lib '/usr/share/SAPHanaSR/tests'; -+use SAPHanaSRTools; -+ -+################################### -+## this part is not for scale out and currently NOT zero-config -+ -+my $ClusterNodes=2; -+my $ClusterPrimaries=1; -+my $ClusterSecondaries=1; -+my %Name; -+my %Host; -+my $host = hostname(); - -+my $varlib='/var/lib/SAPHanaTD'; -+my $testfile='SAPHanaTD.status'; -+my $testcount=0; -+my $first_test=1; - my $sid=""; --my $table_title = "Host \\ Attr"; --my %Name; -+my @sids; -+my $ino=""; -+my $sortBy=""; -+my $table_titleH = "Host"; -+#my %Name; - my %Host; -+my %Site; -+my %Global; -+my %HName; -+my %SName; -+my %GName; -+my $help; -+my $version; -+my $cibFile=""; -+ -+sub init() -+{ -+ my $result = GetOptions ("sid=s" => \@sids, -+ "sort=s" => \$sortBy, -+ "cib=s" => \$cibFile, -+ "version" => \$version, -+ "help" => \$help, -+ ); -+ return 0; -+} -+ -+init(); -+ -+if ( $help ) { -+ printf "SAPHanaSR-showAttr {[--sid=]} [--sort=] [--cib=]\n"; -+ printf ""; -+ exit 0; -+} -+if ( $version ) { -+ printf "%s\n", $Version; -+ exit 0; -+} -+ -+if ( $cibFile ne "" ) { -+ printf "Using cib file %s\n", $cibFile; -+} - - sub max { # thanks to http://www.perlunity.de/perl/forum/thread_018329.shtml - my $a = shift; -@@ -21,113 +80,75 @@ - return $a > $b ? $a : $b; - } - --sub print_attr_host() --{ -- my ($HKey, $AKey); -- printf "%-22s", "Attribute \\ Host"; -- foreach $HKey (sort keys %Host) { -- printf "%-16s ", $HKey; -- } -- printf "\n"; -- -- printf "%s\n", "-" x 120 ; -- -- foreach $AKey (sort keys %Name) { -- printf "%-22s", $AKey; -- foreach $HKey (sort keys %Host) { -- printf "%-16.16s ", $Host{$HKey} -> {$AKey}; -- } -- -- printf "\n"; -- } -- return 0; --} -- --sub print_host_attr() --{ -- my ($AKey, $HKey, $len, $line_len, $hclen); -- $hclen=$Name{_hosts}->{_length}; -- $line_len=$hclen+1; -- printf "%-$hclen.${hclen}s ", "$table_title"; -- foreach $AKey (sort keys %Name) { -- if ($AKey ne "_hosts") { -- $len = $Name{$AKey}->{_length}; -- $line_len=$line_len+$len+1; -- printf "%-$len.${len}s ", $Name{$AKey}->{_title}; -+sub read_cib($) { -+ my $sid = shift(); -+ if ( $cibFile eq "" ) { -+ printf "Open live cib\n"; -+ open CIB, "cibadmin -Ql |" or die "CIB could not be read from cluster"; -+ } else { -+ open CIB, "<$cibFile" or die "CIB file $cibFile not found or not able to read it"; -+ } -+ while () { -+ chomp; -+ my ($host, $name, $site, $value); -+ if ( $_ =~ /cib-last-written="([^"]*)"/ ) { -+ printf "CIB-time: %s\n", $1; - } -- } -- printf "\n"; -- printf "%s\n", "-" x $line_len ; -- foreach $HKey (sort keys %Host) { -- printf "%-$hclen.${hclen}s ", $HKey; -- foreach $AKey (sort keys %Name) { -- if ($AKey ne "_hosts") { -- $len = $Name{$AKey}->{_length}; -- printf "%-$len.${len}s ", $Host{$HKey} -> {$AKey}; -- } -- } -- printf "\n"; -- } -- return 0; --} -- --open ListInstances, "/usr/sap/hostctrl/exe/saphostctrl -function ListInstances|"; --while () { -- # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 -- chomp; -- if ( $_ =~ /:\s+([A-Z][A-Z0-9][A-Z0-9])\s+-/ ) { -- $sid=tolower("$1"); -- } --} --close ListInstances; -- -- --open CIB, "cibadmin -Ql |"; --while () { -- chomp; -- my ($host, $name, $value); -- my $found=0; -- if ( $_ =~ /nvpair.*name="(\w+_${sid}_\w+)"/ ) { -- $name=$1; -- # find attribute in forever and reboot store :) -- if ( $_ =~ /id="(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { -- $host=$2; -- } -- if ( $_ =~ /value="([^"]+)"/ ) { -- $value=$1; -- $found=1; -- } -- } -- if ( $found == 1 ) { -- # -- # handle the hosts name and table-title -- # -- $Host{$host}->{$name}=${value}; -- if ( defined ($Name{_hosts}->{_length})) { -- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length($host )); -- } else { -- $Name{_hosts}->{_length} = length($host ); -+ if ( $_ =~ /node_state id=".+" uname="([a-zA-Z0-9\-\_]+)" .*crmd="([a-zA-Z0-9\-\_]+)"/ ) { -+ insertAttribute($sid, \%Host, \%HName, $1, "node_status", $2); - } -- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length( $table_title)); -- # -- # now handle the attributes name and value -- # -- $Name{$name}->{$host}=${value}; -- if ( defined ($Name{$name}->{_length})) { -- $Name{$name}->{_length} = max($Name{$name}->{_length}, length($value )); -- } else { -- $Name{$name}->{_length} = length($value ); -+ if ( $_ =~ /nvpair.*name="([a-zA-Z0-9\_\-]+_${sid}_([a-zA-Z0-9\-\_]+))"/ ) { -+ $name=$1; -+ if ( $_ =~ /id=.(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { -+ # found attribute in nodes forever and reboot store -+ $host=$2; -+ if ( $_ =~ /value="([^"]+)"/ ) { -+ $value=$1; -+ insertAttribute($sid, \%Host, \%HName, $host, $name, $value); -+ } -+ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_site_[a-zA-Z0-9\-]+_([a-zA-Z0-9\_\-]+)/) { -+ # found a site attribute -+ $site=$1; -+ if ( $name =~ /[a-zA-Z0-9\_\-]+_site_([a-zA-Z0-9\-]+)/ ) { -+ $name = $1; -+ } -+ if ( $_ =~ /value="([^"]+)"/ ) { -+ $value=$1; -+ insertAttribute($sid, \%Site, \%SName, $site, $name, $value); -+ } -+ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_glob_[a-zA-Z0-9\_\-]+/) { -+ # found a global attribute -+ $host="GLOBAL"; -+ if ( $name =~ /([a-zA-Z0-9\_\-]+)_glob_([a-zA-Z0-9\_\-]+)/ ) { -+ $name = $2; -+ } -+ if ( $_ =~ /value="([^"]+)"/ ) { -+ $value=$1; -+ insertAttribute($sid, \%Global, \%GName, "global", $name, $value); -+ } -+ } - } -- if ( $name =~ /hana_${sid}_(.*)/ ) { -- $Name{$name}->{_title} = $1; -- } else { -- $Name{$name}->{_title} = $name; -- } -- $Name{$name}->{_length} = max($Name{$name}->{_length}, length( $Name{$name}->{_title})); -- # printf "%-8s %-20s %-30s\n", $1, $2, $3; -- } -+ } -+ close CIB; - } --close CIB; - --#print_attr_host; --print_host_attr; -+if ( 0 == @sids ) { -+ my $sid_ino_list; -+ ( $sid_ino_list ) = get_sid_and_InstNr(); -+ @sids = split(",", $sid_ino_list); -+ -+} -+ -+foreach $sid (@sids) { -+ ( $sid, $ino ) = split(":", $sid); -+ $sid=tolower("$sid"); -+ %Host=(); -+ %HName=(); -+ read_cib($sid); -+ get_hana_attributes($sid); -+ if ( keys(%Host) == 0 ) { -+ printf "No attributes found for SID=%s\n", $sid; -+ } else { -+ print_host_attr(\%Host, \%HName, "Hosts", $sortBy); -+ } -+} diff --git a/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch b/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch deleted file mode 100644 index 2b9637b..0000000 --- a/SOURCES/bz1423424-2-update-saphana-saphanatopology.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana ---- a/heartbeat/SAPHana 2016-11-17 09:35:47.460984046 +0100 -+++ b/heartbeat/SAPHana 2016-11-17 09:36:20.536591188 +0100 -@@ -133,8 +133,8 @@ - function backup_global_and_nameserver() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -- cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s") -- cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s") -+ cp /hana/shared/$SID/global/hdb/custom/config/global.ini /hana/shared/$SID/global/hdb/custom/config/global.ini.$(date +"%s") -+ cp /hana/shared/$SID/global/hdb/custom/config/nameserver.ini /hana/shared/$SID/global/hdb/custom/config/nameserver.ini.$(date +"%s") - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } diff --git a/SOURCES/bz1427574-DB2-fix-HADR-DB2-V98-or-later.patch b/SOURCES/bz1427574-DB2-fix-HADR-DB2-V98-or-later.patch new file mode 100644 index 0000000..ed1a442 --- /dev/null +++ b/SOURCES/bz1427574-DB2-fix-HADR-DB2-V98-or-later.patch @@ -0,0 +1,68 @@ +From b5d3f7347ff423868d3735df377c649c3e81a12a Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 6 Apr 2017 11:36:44 +0200 +Subject: [PATCH] DB2: fix HADR support for DB2 V98+ + +--- + heartbeat/db2 | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/heartbeat/db2 b/heartbeat/db2 +index c522699..63de315 100755 +--- a/heartbeat/db2 ++++ b/heartbeat/db2 +@@ -650,7 +650,9 @@ db2_hadr_status() { + fi + + echo "$output" | +- awk '/^HADR is not active/ {print "Standard/Standalone"; exit; } ++ awk '/^\s+HADR_(ROLE|STATE) =/ {printf $3"/"} ++ /^\s+HADR_CONNECT_STATUS =/ {print $3; exit; } ++ /^HADR is not active/ {print "Standard/Standalone"; exit; } + /^Role *State */ {getline; printf "%s/%s\n", $1, $2; exit; }' + } + +@@ -680,7 +682,7 @@ db2_monitor() { + + # set master preference accordingly + case "$hadr" in +- Primary/*|Standard/*) ++ PRIMARY/*|Primary/*|Standard/*) + # perform a basic health check + CMD="if db2 connect to $db; + then +@@ -712,11 +714,11 @@ db2_monitor() { + ocf_is_ms && master_score -v 10000 -l reboot + ;; + +- Standby/*Peer) ++ STANDBY/PEER/*|Standby/*Peer) + master_score -v 8000 -l reboot + ;; + +- Standby/*) ++ STANDBY/*|Standby/*) + ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted" + master_score -D -l reboot + ;; +@@ -755,17 +757,17 @@ db2_promote() { + return $OCF_SUCCESS + ;; + +- Primary/Peer) ++ PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|Primary/Peer) + # nothing to do, only update pacemaker's view + echo MASTER > $STATE_FILE + return $OCF_SUCCESS + ;; + +- Standby/Peer) ++ STANDBY/PEER/CONNECTED|Standby/Peer) + # must take over + ;; + +- Standby/DisconnectedPeer) ++ STANDBY/PEER/DISCONNECTED|Standby/DisconnectedPeer) + # must take over forced + force="by force peer window only" + ;; diff --git a/SOURCES/bz1427611-ocf_log-use-same-log-format-as-pacemaker.patch b/SOURCES/bz1427611-ocf_log-use-same-log-format-as-pacemaker.patch new file mode 100644 index 0000000..6ba9171 --- /dev/null +++ b/SOURCES/bz1427611-ocf_log-use-same-log-format-as-pacemaker.patch @@ -0,0 +1,39 @@ +From 75816393878bf063a8c3404b5c747868024e1097 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 1 Mar 2017 15:34:26 +0100 +Subject: [PATCH] ocf_log: use same log format as pacemaker + +--- + heartbeat/ocf-directories.in | 2 +- + heartbeat/ocf-shellfuncs.in | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/ocf-directories.in b/heartbeat/ocf-directories.in +index 8d70776..d8df035 100644 +--- a/heartbeat/ocf-directories.in ++++ b/heartbeat/ocf-directories.in +@@ -13,7 +13,7 @@ exec_prefix=@exec_prefix@ + : ${HA_FIFO:=@localstatedir@/lib/heartbeat/fifo} + : ${HA_BIN:=@libexecdir@/heartbeat} + : ${HA_SBIN_DIR:=@sbindir@} +-: ${HA_DATEFMT:="%Y/%m/%d_%T "} ++: ${HA_DATEFMT:="%b %d %T "} + : ${HA_DEBUGLOG:=/dev/null} + : ${HA_RESOURCEDIR:=$HA_DIR/resource.d} + : ${HA_DOCDIR:=@datadir@/doc/heartbeat} +diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in +index 2a3b875..87b2adf 100644 +--- a/heartbeat/ocf-shellfuncs.in ++++ b/heartbeat/ocf-shellfuncs.in +@@ -231,7 +231,7 @@ __ha_log() { + [ -n "$HA_LOGFILE" ] + then + : appending to $HA_LOGFILE +- echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_LOGFILE ++ echo `hadate`" $HA_LOGTAG: ${*}" >> $HA_LOGFILE + fi + if + [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] && ! [ "$ignore_stderr" = "true" ] +-- +2.9.3 + diff --git a/SOURCES/bz1430304-NodeUtilization.patch b/SOURCES/bz1430304-NodeUtilization.patch new file mode 100644 index 0000000..3f90a69 --- /dev/null +++ b/SOURCES/bz1430304-NodeUtilization.patch @@ -0,0 +1,252 @@ +diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am +--- a/doc/man/Makefile.am 2017-03-15 14:11:58.136058131 +0100 ++++ b/doc/man/Makefile.am 2017-03-15 14:31:58.181539045 +0100 +@@ -73,6 +73,7 @@ + ocf_heartbeat_MailTo.7 \ + ocf_heartbeat_ManageRAID.7 \ + ocf_heartbeat_ManageVE.7 \ ++ ocf_heartbeat_NodeUtilization.7 \ + ocf_heartbeat_nova-compute-wait.7 \ + ocf_heartbeat_NovaEvacuate.7 \ + ocf_heartbeat_Pure-FTPd.7 \ +diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am +--- a/heartbeat/Makefile.am 2017-03-15 14:11:58.136058131 +0100 ++++ b/heartbeat/Makefile.am 2017-03-15 14:32:45.554873187 +0100 +@@ -95,6 +95,7 @@ + MailTo \ + ManageRAID \ + ManageVE \ ++ NodeUtilization \ + mysql \ + mysql-proxy \ + nagios \ +diff -uNr a/heartbeat/NodeUtilization b/heartbeat/NodeUtilization +--- a/heartbeat/NodeUtilization 1970-01-01 01:00:00.000000000 +0100 ++++ b/heartbeat/NodeUtilization 2017-03-15 14:29:18.141788491 +0100 +@@ -0,0 +1,226 @@ ++#!/bin/sh ++# ++# ++# NodeUtilization OCF Resource Agent ++# ++# Copyright (c) 2011 SUSE LINUX, John Shi ++# Copyright (c) 2016 SUSE LINUX, Kristoffer Gronlund ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++####################################################################### ++# Initialization: ++ ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++####################################################################### ++ ++NodeUtilization_meta_data() { ++ cat < ++ ++ ++1.0 ++ ++ ++The Node Utilization agent detects system parameters like available CPU, host ++memory and hypervisor memory availability, and adds them into the CIB for each ++node using crm_attribute. Run the agent as a clone resource to have it populate ++these parameters on each node. ++Note: Setting hv_memory only works with Xen at the moment, using the xl or xm ++command line tools. ++ ++Node Utilization ++ ++ ++ ++ ++If set, parameters will be updated if there are differences between the HA ++parameters and the system values when running the monitor action. ++If not set, the parameters will be set once when the resource instance starts. ++ ++Dynamically update parameters in monitor ++ ++ ++ ++ ++Enable setting node CPU utilization limit. ++Set node CPU utilization limit. ++ ++ ++ ++ ++Subtract this value when setting the CPU utilization parameter. ++CPU reservation. ++ ++ ++ ++ ++Enable setting available host memory. ++Set available host memory. ++ ++ ++ ++ ++Subtract this value when setting host memory utilization, in MB. ++Host memory reservation, in MB. ++ ++ ++ ++ ++Enable setting available hypervisor memory. ++Set available hypervisor memory. ++ ++ ++ ++ ++Subtract this value when setting hypervisor memory utilization, in MB. ++Hypervisor memory reservation, in MB. ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++Host_Total_Memory() { ++ local xentool ++ ++ xentool=$(which xl 2> /dev/null || which xm 2> /dev/null) ++ ++ if [ -x $xentool ]; then ++ $xentool info | awk '/total_memory/{printf("%d\n",$3);exit(0)}' ++ else ++ ocf_log warn "Can only set hv_memory for Xen hypervisor" ++ echo "0" ++ fi ++} ++ ++ ++set_utilization() { ++ host_name="$(ocf_local_nodename)" ++ ++ if ocf_is_true "$OCF_RESKEY_utilization_cpu"; then ++ sys_cpu=$(( $(grep -c processor /proc/cpuinfo) - $OCF_RESKEY_utilization_cpu_reservation )) ++ uti_cpu=$(crm_attribute -Q -t nodes -U "$host_name" -z -n cpu 2>/dev/null) ++ ++ if [ "$sys_cpu" != "$uti_cpu" ]; then ++ if ! crm_attribute -t nodes -U "$host_name" -z -n cpu -v $sys_cpu; then ++ ocf_log err "Failed to set the cpu utilization attribute for $host_name using crm_attribute." ++ return 1 ++ fi ++ fi ++ fi ++ ++ if ocf_is_true "$OCF_RESKEY_utilization_host_memory"; then ++ sys_mem=$(( $(awk '/MemTotal/{printf("%d\n",$2/1024);exit(0)}' /proc/meminfo) - $OCF_RESKEY_utilization_host_memory_reservation )) ++ uti_mem=$(crm_attribute -Q -t nodes -U "$host_name" -z -n host_memory 2>/dev/null) ++ ++ if [ "$sys_mem" != "$uti_mem" ]; then ++ if ! crm_attribute -t nodes -U "$host_name" -z -n host_memory -v $sys_mem; then ++ ocf_log err "Failed to set the host_memory utilization attribute for $host_name using crm_attribute." ++ return 1 ++ fi ++ fi ++ fi ++ ++ if ocf_is_true "$OCF_RESKEY_utilization_hv_memory"; then ++ hv_mem=$(( $(Host_Total_Memory) - OCF_RESKEY_utilization_hv_memory_reservation )) ++ uti_mem=$(crm_attribute -Q -t nodes -U "$host_name" -z -n hv_memory 2>/dev/null) ++ ++ [ $hv_mem -lt 0 ] && hv_mem=0 ++ ++ if [ "$hv_mem" != "$uti_mem" ]; then ++ if ! crm_attribute -t nodes -U "$host_name" -z -n hv_memory -v $hv_mem; then ++ ocf_log err "Failed to set the hv_memory utilization attribute for $host_name using crm_attribute." ++ return 1 ++ fi ++ fi ++ fi ++} ++ ++NodeUtilization_usage() { ++ cat < +Date: Fri, 28 Oct 2016 13:37:45 +0200 +Subject: [PATCH 1/2] Medium: IPaddr2: add option to enable sending refresh arp + packets in monitor + +This commit introduces a new parameter: +OCF_RESKEY_arp_count_refresh_default (default: 0) + +This parameter allows to specify whether to send gratuitous ARP packets during +the monitoring of the resource, and how many. +This is to alleviate issues with potentially stuck switch ARP caches, +which can arise in case of split brain situations. Example: +- a two node cluster is interconnected directly as well as through a switch. + Node A is master, node B is slave +- communication between master and slave is severed (split brain) +- node B initializes the virtual IPs and sends gratuitous ARP packets +- the switch updated its ARP table to point to B +- node B dies (without node A noticing) +- node A never notices anything wrong +- the switch fails to notice that the virtual IP belongs to A +--- + heartbeat/IPaddr2 | 52 +++++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 37 insertions(+), 15 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index b224ca5..c49d638 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -76,6 +76,7 @@ OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" + OCF_RESKEY_unique_clone_address_default=false + OCF_RESKEY_arp_interval_default=200 + OCF_RESKEY_arp_count_default=5 ++OCF_RESKEY_arp_count_refresh_default=0 + OCF_RESKEY_arp_bg_default=true + OCF_RESKEY_arp_mac_default="ffffffffffff" + +@@ -86,6 +87,7 @@ OCF_RESKEY_arp_mac_default="ffffffffffff" + : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} + : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} + : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} ++: ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} + : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} + : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} + ####################################################################### +@@ -274,12 +276,22 @@ Specify the interval between unsolicited ARP packets in milliseconds. + + + +-Number of unsolicited ARP packets to send. ++Number of unsolicited ARP packets to send at resource initialization. + +-ARP packet count ++ARP packet count sent during initialization + + + ++ ++ ++Number of unsolicited ARP packets to send during resource monitoring. Doing ++so helps mitigate issues of stuck ARP caches resulting from split-brain ++situations. ++ ++ARP packet count sent during monitoring ++ ++ ++ + + + Whether or not to send the ARP packets in the background. +@@ -660,20 +672,25 @@ is_infiniband() { + # Run send_arp to note peers about new mac address + # + run_send_arp() { +- ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used" + if [ "x$IP_CIP" = "xyes" ] ; then + if [ x = "x$IF_MAC" ] ; then + MY_MAC=auto + else + MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` + fi +- ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" +- fi +- ocf_log info "$SENDARP $ARGS" +- if ocf_is_true $OCF_RESKEY_arp_bg; then +- ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + else +- $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" ++ MY_MAC=auto ++ fi ++ [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ ++ || ARP_COUNT=$OCF_RESKEY_arp_count ++ if [ $ARP_COUNT -ne 0 ] ; then ++ ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" ++ ocf_log info "$SENDARP $ARGS" ++ if ocf_is_true $OCF_RESKEY_arp_bg; then ++ ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 ++ else ++ $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" ++ fi + fi + } + +@@ -720,12 +737,16 @@ run_send_ua() { + # Run ipoibarping to note peers about new Infiniband address + # + run_send_ib_arp() { +- ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip" +- ocf_log info "ipoibarping $ARGS" +- if ocf_is_true $OCF_RESKEY_arp_bg; then +- (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 +- else +- ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" ++ [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ ++ || ARP_COUNT=$OCF_RESKEY_arp_count ++ if [ $ARP_COUNT -ne 0 ] ; then ++ ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" ++ ocf_log info "ipoibarping $ARGS" ++ if ocf_is_true $OCF_RESKEY_arp_bg; then ++ (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 ++ else ++ ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" ++ fi + fi + } + +@@ -946,6 +967,7 @@ ip_monitor() { + local ip_status=`ip_served` + case $ip_status in + ok) ++ $ARP_SEND_FUN refresh + return $OCF_SUCCESS + ;; + partial|no|partial2) + +From aa1db299f0b684fb814e6c31e96890868fa90e04 Mon Sep 17 00:00:00 2001 +From: Andrea Ieri +Date: Fri, 4 Nov 2016 15:37:15 +0100 +Subject: [PATCH 2/2] Low: IPaddr2: Log refresh arp packets at debug level + instead of info + +--- + heartbeat/IPaddr2 | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index c49d638..c9acf59 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -681,11 +681,16 @@ run_send_arp() { + else + MY_MAC=auto + fi +- [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ +- || ARP_COUNT=$OCF_RESKEY_arp_count ++ if [ "x$1" = "xrefresh" ] ; then ++ ARP_COUNT=$OCF_RESKEY_arp_count_refresh ++ LOGLEVEL=debug ++ else ++ ARP_COUNT=$OCF_RESKEY_arp_count ++ LOGLEVEL=info ++ fi + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" +- ocf_log info "$SENDARP $ARGS" ++ ocf_log $LOGLEVEL "$SENDARP $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + else +@@ -737,11 +742,16 @@ run_send_ua() { + # Run ipoibarping to note peers about new Infiniband address + # + run_send_ib_arp() { +- [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ +- || ARP_COUNT=$OCF_RESKEY_arp_count ++ if [ "x$1" = "xrefresh" ] ; then ++ ARP_COUNT=$OCF_RESKEY_arp_count_refresh ++ LOGLEVEL=debug ++ else ++ ARP_COUNT=$OCF_RESKEY_arp_count ++ LOGLEVEL=info ++ fi + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" +- ocf_log info "ipoibarping $ARGS" ++ ocf_log $LOGLEVEL "ipoibarping $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + else diff --git a/SOURCES/bz1435171-named-add-support-for-rndc-options.patch b/SOURCES/bz1435171-named-add-support-for-rndc-options.patch new file mode 100644 index 0000000..dd6894c --- /dev/null +++ b/SOURCES/bz1435171-named-add-support-for-rndc-options.patch @@ -0,0 +1,62 @@ +From b78c5e48568f97415de03f68d0c8b747229c4281 Mon Sep 17 00:00:00 2001 +From: Bas Couwenberg +Date: Wed, 22 Mar 2017 15:46:50 +0100 +Subject: [PATCH] Add support for rndc options in named resource agent. + +--- + heartbeat/named | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/named b/heartbeat/named +index 2118e0c..4856cdc 100755 +--- a/heartbeat/named ++++ b/heartbeat/named +@@ -25,6 +25,7 @@ OCF_RESKEY_named_pidfile_default="/var/run/named/named.pid" + OCF_RESKEY_named_rootdir_default="" + OCF_RESKEY_named_options_default="" + OCF_RESKEY_named_keytab_file_default="" ++OCF_RESKEY_rndc_options_default="" + OCF_RESKEY_monitor_request_default="localhost" + OCF_RESKEY_monitor_response_default="127.0.0.1" + OCF_RESKEY_monitor_ip_default="127.0.0.1" +@@ -38,6 +39,7 @@ OCF_RESKEY_monitor_ip_default="127.0.0.1" + : ${OCF_RESKEY_named_rootdir=${OCF_RESKEY_named_rootdir_default}} + : ${OCF_RESKEY_named_options=${OCF_RESKEY_named_options_default}} + : ${OCF_RESKEY_named_keytab_file=${OCF_RESKEY_named_keytab_file_default}} ++: ${OCF_RESKEY_rndc_options=${OCF_RESKEY_rndc_options_default}} + : ${OCF_RESKEY_monitor_request=${OCF_RESKEY_monitor_request_default}} + : ${OCF_RESKEY_monitor_response=${OCF_RESKEY_monitor_response_default}} + : ${OCF_RESKEY_monitor_ip=${OCF_RESKEY_monitor_ip_default}} +@@ -144,6 +146,14 @@ named service keytab file (for GSS-TSIG). + + + ++ ++ ++Options for rndc process if any. ++ ++rndc_options ++ ++ ++ + + + Request that shall be sent to named for monitoring. Usually an A record in DNS. +@@ -326,7 +336,7 @@ named_monitor() { + # + + named_reload() { +- $OCF_RESKEY_rndc reload >/dev/null || return $OCF_ERR_GENERIC ++ $OCF_RESKEY_rndc $OCF_RESKEY_rndc_options reload >/dev/null || return $OCF_ERR_GENERIC + + return $OCF_SUCCESS + } +@@ -396,7 +406,7 @@ named_stop () { + + named_status || return $OCF_SUCCESS + +- $OCF_RESKEY_rndc stop >/dev/null ++ $OCF_RESKEY_rndc $OCF_RESKEY_rndc_options stop >/dev/null + if [ $? -ne 0 ]; then + ocf_log info "rndc stop failed. Killing named." + kill `cat ${OCF_RESKEY_named_pidfile}` diff --git a/SOURCES/bz1435982-rabbitmq-cluster-pacemaker-remote.patch b/SOURCES/bz1435982-rabbitmq-cluster-pacemaker-remote.patch new file mode 100644 index 0000000..ad5f57b --- /dev/null +++ b/SOURCES/bz1435982-rabbitmq-cluster-pacemaker-remote.patch @@ -0,0 +1,92 @@ +From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001 +From: Michele Baldessari +Date: Tue, 28 Mar 2017 16:21:52 +0200 +Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes + +This was first observed via +https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way +the resource agent looks for attrd entries, it will filter out any +node which does not have the @crmd=online attribute. This is the +case for pacemaker-remote nodes. To fix this we chose the more +conservative approach and only do an additional query when the first +one returned no entries. Note that this issue exhibits itself +when 'pcs status' reports rabbitmq started on a bunch of nodes: +Clone Set: rabbitmq-clone [rabbitmq] + Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 + +But the cluster_status command returns a single node: +[root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status +Cluster status of node 'rabbit@overcloud-rabbit-1' ... +[{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]}, + {running_nodes,['rabbit@overcloud-rabbit-1']}, + {cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>}, + {partitions,[]}, + {alarms,[{'rabbit@overcloud-rabbit-1',[]}]}] + +Also add some text in the help explaining that currently a mixture of +pacemaker-remote and pacemaker nodes is not supported. + +We tested this change on a pacemaker-remote only setup successfully: +Clone Set: rabbitmq-clone [rabbitmq] + Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 + +[root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status +Cluster status of node 'rabbit@overcloud-rabbit-0' ... +[{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1', + 'rabbit@overcloud-rabbit-2']}]}, + {running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1', + 'rabbit@overcloud-rabbit-0']}, + {cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>}, + {partitions,[]}, + {alarms,[{'rabbit@overcloud-rabbit-2',[]}, + {'rabbit@overcloud-rabbit-1',[]}, + {'rabbit@overcloud-rabbit-0',[]}]}] + +Signed-Off-By: Michele Baldessari +Signed-Off-By: Damien Ciabrini +--- + heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 74378be..6a17590 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -56,7 +56,9 @@ meta_data() { + 1.0 + + +-Starts cloned rabbitmq cluster instance ++Starts cloned rabbitmq cluster instance. NB: note that this RA ++cannot be spawned across a mix of pacemaker and pacemaker-remote nodes. ++Only on pacemaker *or* pacemaker-remote nodes exclusively. + + rabbitmq clustered + +@@ -111,7 +113,25 @@ rmq_local_node() + + rmq_join_list() + { +- cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" ++ local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") ++ # If join_list is empty we want to check if there are any remote nodes ++ # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector) ++ if [ -z "$join_list" ]; then ++ # Get all the nodes written in the ATTR_COOKIE no matter if ++ # they are online or not. This will be one line per node like ++ # rabbit@overcloud-rabbit-0 ++ # rabbit@overcloud-rabbit-1 ++ # ... ++ local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") ++ # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...' ++ local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}') ++ # export the intersection which gives us only the nodes that ++ # a) wrote their namein the cib attrd ++ # b) run on nodes where pacemaker_remote is enabled ++ join_list="$(echo $remote_join_list | grep $filter)" ++ fi ++ ++ echo $join_list + } + + rmq_write_nodename() diff --git a/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch b/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch deleted file mode 100644 index ad5f57b..0000000 --- a/SOURCES/bz1437122-rabbitmq-cluster-pacemaker-remote.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001 -From: Michele Baldessari -Date: Tue, 28 Mar 2017 16:21:52 +0200 -Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes - -This was first observed via -https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way -the resource agent looks for attrd entries, it will filter out any -node which does not have the @crmd=online attribute. This is the -case for pacemaker-remote nodes. To fix this we chose the more -conservative approach and only do an additional query when the first -one returned no entries. Note that this issue exhibits itself -when 'pcs status' reports rabbitmq started on a bunch of nodes: -Clone Set: rabbitmq-clone [rabbitmq] - Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 - -But the cluster_status command returns a single node: -[root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status -Cluster status of node 'rabbit@overcloud-rabbit-1' ... -[{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]}, - {running_nodes,['rabbit@overcloud-rabbit-1']}, - {cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>}, - {partitions,[]}, - {alarms,[{'rabbit@overcloud-rabbit-1',[]}]}] - -Also add some text in the help explaining that currently a mixture of -pacemaker-remote and pacemaker nodes is not supported. - -We tested this change on a pacemaker-remote only setup successfully: -Clone Set: rabbitmq-clone [rabbitmq] - Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 - -[root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status -Cluster status of node 'rabbit@overcloud-rabbit-0' ... -[{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1', - 'rabbit@overcloud-rabbit-2']}]}, - {running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1', - 'rabbit@overcloud-rabbit-0']}, - {cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>}, - {partitions,[]}, - {alarms,[{'rabbit@overcloud-rabbit-2',[]}, - {'rabbit@overcloud-rabbit-1',[]}, - {'rabbit@overcloud-rabbit-0',[]}]}] - -Signed-Off-By: Michele Baldessari -Signed-Off-By: Damien Ciabrini ---- - heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++-- - 1 file changed, 22 insertions(+), 2 deletions(-) - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index 74378be..6a17590 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -56,7 +56,9 @@ meta_data() { - 1.0 - - --Starts cloned rabbitmq cluster instance -+Starts cloned rabbitmq cluster instance. NB: note that this RA -+cannot be spawned across a mix of pacemaker and pacemaker-remote nodes. -+Only on pacemaker *or* pacemaker-remote nodes exclusively. - - rabbitmq clustered - -@@ -111,7 +113,25 @@ rmq_local_node() - - rmq_join_list() - { -- cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" -+ local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") -+ # If join_list is empty we want to check if there are any remote nodes -+ # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector) -+ if [ -z "$join_list" ]; then -+ # Get all the nodes written in the ATTR_COOKIE no matter if -+ # they are online or not. This will be one line per node like -+ # rabbit@overcloud-rabbit-0 -+ # rabbit@overcloud-rabbit-1 -+ # ... -+ local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") -+ # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...' -+ local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}') -+ # export the intersection which gives us only the nodes that -+ # a) wrote their namein the cib attrd -+ # b) run on nodes where pacemaker_remote is enabled -+ join_list="$(echo $remote_join_list | grep $filter)" -+ fi -+ -+ echo $join_list - } - - rmq_write_nodename() diff --git a/SOURCES/bz1445861-IPaddr2-IPv6-add-preferred_lft-parameter.patch b/SOURCES/bz1445861-IPaddr2-IPv6-add-preferred_lft-parameter.patch new file mode 100644 index 0000000..d8279d9 --- /dev/null +++ b/SOURCES/bz1445861-IPaddr2-IPv6-add-preferred_lft-parameter.patch @@ -0,0 +1,81 @@ +From 2918ab999cbcbe6bc04061dd070e5b0dd8465346 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Wed, 26 Apr 2017 17:51:52 +0200 +Subject: [PATCH] IPaddr2: add option for specifying IPv6's preferred_lft + +This change allows setting the preferred_lft option when creating an +IPv6 address. This can be used to ensure that the created IP address +will not be used as a source address for routing. +--- + heartbeat/IPaddr2 | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index 27b7208..2d2ba2c 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -56,6 +56,7 @@ + # OCF_RESKEY_arp_count + # OCF_RESKEY_arp_bg + # OCF_RESKEY_arp_mac ++# OCF_RESKEY_preferred_lft + # + # OCF_RESKEY_CRM_meta_clone + # OCF_RESKEY_CRM_meta_clone_max +@@ -80,6 +81,7 @@ OCF_RESKEY_arp_count_refresh_default=0 + OCF_RESKEY_arp_bg_default=true + OCF_RESKEY_arp_mac_default="ffffffffffff" + OCF_RESKEY_run_arping_default=false ++OCF_RESKEY_preferred_lft_default="forever" + + : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} + : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} +@@ -92,6 +94,7 @@ OCF_RESKEY_run_arping_default=false + : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} + : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} + : ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}} ++: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}} + ####################################################################### + + SENDARP=$HA_BIN/send_arp +@@ -350,6 +353,17 @@ Whether or not to run arping for IPv4 collision detection check. + + + ++ ++ ++For IPv6, set the preferred lifetime of the IP address. ++This can be used to ensure that the created IP address will not ++be used as a source address for routing. ++Expects a value as specified in section 5.5.4 of RFC 4862. ++ ++IPv6 preferred lifetime ++ ++ ++ + + + +@@ -590,6 +604,10 @@ add_interface () { + cmd="$cmd label $label" + msg="${msg} (with label $label)" + fi ++ if [ "$FAMILY" = "inet6" ] ;then ++ cmd="$cmd preferred_lft $OCF_RESKEY_preferred_lft" ++ msg="${msg} (with preferred_lft $OCF_RESKEY_preferred_lft)" ++ fi + + ocf_log info "$msg" + ocf_run $cmd || return $OCF_ERR_GENERIC +@@ -1076,6 +1094,11 @@ ip_validate() { + exit $OCF_ERR_CONFIGURED + fi + ++ if [ -z "$OCF_RESKEY_preferred_lft" ]; then ++ ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ + if [ -n "$IP_CIP" ]; then + + local valid=1 diff --git a/SOURCES/bz1445889-IPaddr2-IPv6-add-preferred_lft-parameter.patch b/SOURCES/bz1445889-IPaddr2-IPv6-add-preferred_lft-parameter.patch deleted file mode 100644 index a59a371..0000000 --- a/SOURCES/bz1445889-IPaddr2-IPv6-add-preferred_lft-parameter.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 2918ab999cbcbe6bc04061dd070e5b0dd8465346 Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Wed, 26 Apr 2017 17:51:52 +0200 -Subject: [PATCH] IPaddr2: add option for specifying IPv6's preferred_lft - -This change allows setting the preferred_lft option when creating an -IPv6 address. This can be used to ensure that the created IP address -will not be used as a source address for routing. ---- - heartbeat/IPaddr2 | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 -index 27b7208..2d2ba2c 100755 ---- a/heartbeat/IPaddr2 -+++ b/heartbeat/IPaddr2 -@@ -56,6 +56,7 @@ - # OCF_RESKEY_arp_count - # OCF_RESKEY_arp_bg - # OCF_RESKEY_arp_mac -+# OCF_RESKEY_preferred_lft - # - # OCF_RESKEY_CRM_meta_clone - # OCF_RESKEY_CRM_meta_clone_max -@@ -80,6 +81,7 @@ OCF_RESKEY_arp_count_refresh_default=0 - OCF_RESKEY_arp_count_default=5 - OCF_RESKEY_arp_bg_default=true - OCF_RESKEY_arp_mac_default="ffffffffffff" -+OCF_RESKEY_preferred_lft_default="forever" - - : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} - : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} -@@ -92,6 +94,7 @@ - : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default} - : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} - : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} -+: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}} - ####################################################################### - - SENDARP=$HA_BIN/send_arp -@@ -350,6 +353,17 @@ Whether or not to run arping for IPv4 collision detection check. - - - -+ -+ -+For IPv6, set the preferred lifetime of the IP address. -+This can be used to ensure that the created IP address will not -+be used as a source address for routing. -+Expects a value as specified in section 5.5.4 of RFC 4862. -+ -+IPv6 preferred lifetime -+ -+ -+ - - - -@@ -590,6 +604,10 @@ add_interface () { - cmd="$cmd label $label" - msg="${msg} (with label $label)" - fi -+ if [ "$FAMILY" = "inet6" ] ;then -+ cmd="$cmd preferred_lft $OCF_RESKEY_preferred_lft" -+ msg="${msg} (with preferred_lft $OCF_RESKEY_preferred_lft)" -+ fi - - ocf_log info "$msg" - ocf_run $cmd || return $OCF_ERR_GENERIC -@@ -1076,6 +1094,11 @@ ip_validate() { - exit $OCF_ERR_CONFIGURED - fi - -+ if [ -z "$OCF_RESKEY_preferred_lft" ]; then -+ ocf_exit_reason "Empty value is invalid for OCF_RESKEY_preferred_lft" -+ exit $OCF_ERR_CONFIGURED -+ fi -+ - if [ -n "$IP_CIP" ]; then - - local valid=1 diff --git a/SOURCES/bz1449681-1-saphana-saphanatopology-update-0.152.21.patch b/SOURCES/bz1449681-1-saphana-saphanatopology-update-0.152.21.patch new file mode 100644 index 0000000..596d3a6 --- /dev/null +++ b/SOURCES/bz1449681-1-saphana-saphanatopology-update-0.152.21.patch @@ -0,0 +1,216 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2017-05-11 12:12:17.207213156 +0200 ++++ b/heartbeat/SAPHana 2017-05-11 12:19:44.846798058 +0200 +@@ -16,7 +16,7 @@ + # Support: linux@sap.com + # License: GNU General Public License (GPL) + # Copyright: (c) 2013,2014 SUSE Linux Products GmbH +-# (c) 2015-2016 SUSE Linux GmbH ++# (c) 2015-2017 SUSE Linux GmbH + # + # An example usage: + # See usage() function below for more details... +@@ -35,7 +35,7 @@ + ####################################################################### + # + # Initialization: +-SAPHanaVersion="0.152.17" ++SAPHanaVersion="0.152.21" + timeB=$(date '+%s') + + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +@@ -133,8 +133,8 @@ + function backup_global_and_nameserver() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- cp /hana/shared/$SID/global/hdb/custom/config/global.ini /hana/shared/$SID/global/hdb/custom/config/global.ini.$(date +"%s") +- cp /hana/shared/$SID/global/hdb/custom/config/nameserver.ini /hana/shared/$SID/global/hdb/custom/config/nameserver.ini.$(date +"%s") ++ cp /hana/shared/${SID}/global/hdb/custom/config/global.ini /hana/shared/${SID}/global/hdb/custom/config/global.ini.$(date +"%s") ++ cp /hana/shared/${SID}/global/hdb/custom/config/nameserver.ini /hana/shared/${SID}/global/hdb/custom/config/nameserver.ini.$(date +"%s") + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -665,7 +665,7 @@ + # DONE: PRIO4: SAPVIRHOST might be different to NODENAME + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 +- # We rely on the following format: SID is word#4, SYSNR is work#6, vHost is word#8 ++ # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 + if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then + vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ + | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) +@@ -713,27 +713,29 @@ + "[234]*:P:[^:]*:master .* 150" + "[015-9]*:P:[^:]*:master .* 90" + "[0-9]*:P:[^:]*:slave .* 60" +- "[0-9]*:P:[^:]*:\? .* 0" +- "[0-9]*:P:[^:]*:- .* 0" ++ "[234]*:P:[^:]*:[?:-] .* 0" ++ "[015-9]*:P:[^:]*:[?:-] .* -1" + "[234]*:S:[^:]*:master SOK 100" ++ "[234]*:S:[^:]*:master PRIM 100" + "[015-9]*:S:[^:]*:master SOK 80" + "[0-9]*:S:[^:]*:master SFAIL -INFINITY" + "[0-9]*:S:[^:]*:slave SOK 10" + "[0-9]*:S:[^:]*:slave SFAIL -INFINITY" +- "[0-9]*:S:[^:]*:\? .* 0" +- "[0-9]*:S:[^:]*:- .* 0" +- ".* .* -1" ++ "[234]*:S:[^:]*:[?:-] .* 0" ++ "[015-9]*:S:[^:]*:[?:-] .* -1" ++ ".* .* -1" + ) + SCORING_TABLE_PREFERRED_LOCAL_RESTART=( +- "[0-9]*:P:[^:]*:master .* 150" +- "[0-9]*:P:[^:]*:.* .* 140" ++ "[0-9]*:P:[^:]*:master .* 150" ++ "[0-9]*:P:[^:]*:.* .* 140" + "[0-9]*:S:[^:]*:master SOK 100" ++ "[0-9]*:S:[^:]*:master PRIM 100" + "[0-9]*:S:[^:]*:master SFAIL -INFINITY" + "[0-9]*:S:[^:]*:slave SOK 10" + "[0-9]*:S:[^:]*:slave SFAIL -INFINITY" +- "[0-9]*:S:[^:]*:\? .* 0" +- "[0-9]*:S:[^:]*:- .* 0" +- ".* .* -1" ++ "[015-9]*:S:[^:]*:[?:-] .* -1" ++ "[234]*:S:[^:]*:[?:-] .* -1" ++ ".* .* -1" + ) + SCORING_TABLE_PREFERRED_NEVER=( + "[234]*:P:[^:]*:master .* 150" +@@ -1030,7 +1032,7 @@ + # TODO: Limit the runtime of systemReplicationStatus.py + # SAP_CALL + # FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? +- FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py" 2>/dev/null); srRc=$? ++ FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? + super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" + super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" + # +@@ -2445,8 +2447,9 @@ + else + # + # neither MASTER nor SLAVE - This clone instance seams to be broken!! +- # +- rc=$OCF_ERR_GENERIC ++ # bsc#1027098 - do not stop SAP HANA if "only" HANA state is not correct ++ # Let next monitor find, if that HANA instance is available or not ++ rc=$OCF_SUCCESS; + fi + fi + rc=$? +diff -uNr a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology +--- a/heartbeat/SAPHanaTopology 2017-05-11 12:12:17.205213176 +0200 ++++ b/heartbeat/SAPHanaTopology 2017-05-11 12:12:40.642982012 +0200 +@@ -14,7 +14,7 @@ + # Support: linux@sap.com + # License: GNU General Public License (GPL) + # Copyright: (c) 2014 SUSE Linux Products GmbH +-# (c) 2015-2016 SUSE Linux GmbH ++# (c) 2015-2017 SUSE Linux GmbH + # + # An example usage: + # See usage() function below for more details... +@@ -28,7 +28,7 @@ + ####################################################################### + # + # Initialization: +-SAPHanaVersion="0.152.17" ++SAPHanaVersion="0.152.21" + timeB=$(date '+%s') + + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +@@ -474,6 +474,7 @@ + ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") + ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") + ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") ++ ATTR_NAME_HANA_VERSION=("hana_${sid}_version" "reboot") + # + # new "central" attributes + # +@@ -531,7 +532,7 @@ + # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode + for chkMethod in hU hU hU gP ; do + # DONE: Limit the runtime of hdbnsutil. +- # TODO: Use getParameter.py if we get no answer ++ # DONE: Use getParameter.py if we get no answer + # SAP_CALL + #super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" + #srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') +@@ -602,7 +603,18 @@ + # currently having more than 2 HANA in a chain/star members IN the cluster is not allowed, the third must be external + if [ "$NODENAME" != "$n1" ]; then + hanaSite=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_SITE[@]}) +- hanaRemoteHost="$n1" ++ # ++ # only, if a hanaSite is found use that node - this allows majority makers ++ # ++ if [ -n "$hanaSite" ]; then ++ hanaRemoteHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) ++ # ++ # only if vhost is NOT set use the nodename instead ++ # ++ if [ -z "$hanaRemoteHost" ]; then ++ hanaRemoteHost="$n1" ++ fi ++ fi + fi + done + super_ocf_log info "DEC: site=$site, mode=$srmode, hanaRemoteHost=$hanaRemoteHost - found by remote site ($hanaSite)" +@@ -700,7 +712,7 @@ + # TODO: PRIO3: move the string "$HA_RSCTMP/SAPHana/SAPTopologyON" to a variable + # TODO: PRIO3: move the file to the clusters tmp directory? + mkdir -p $HA_RSCTMP/SAPHana +- touch $HA_RSCTMP/SAPHana/SAPTopologyON ++ touch $HA_RSCTMP/SAPHana/SAPTopologyON.${SID} + if ! check_saphostagent; then + start_saphostagent + fi +@@ -722,7 +734,7 @@ + local output="" + local rc=0 + +- rm $HA_RSCTMP/SAPHana/SAPTopologyON ++ rm $HA_RSCTMP/SAPHana/SAPTopologyON.${SID} + rc=$OCF_SUCCESS + + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -740,7 +752,7 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 + +- if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON ]; then ++ if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON.${SID} ]; then + rc=$OCF_SUCCESS + else + rc=$OCF_NOT_RUNNING +@@ -845,6 +857,11 @@ + if ocf_is_probe; then + super_ocf_log debug "DBG2: PROBE ONLY" + sht_monitor; rc=$? ++ local hana_version=$(HANA_CALL --timeout 10 --cmd "HDB version" \ ++ | awk -F':' '$1==" version" {print $2}; ' | tr -d '[:space:]') ++ if [[ -n $hana_version ]]; then ++ set_hana_attribute "${NODENAME}" "$hana_version" ${ATTR_NAME_HANA_VERSION[@]} ++ fi + else + super_ocf_log debug "DBG2: REGULAR MONITOR" + if ! check_saphostagent; then +@@ -871,9 +888,13 @@ + super_ocf_log debug "DBG2: HANA IS STANDALONE" + sht_monitor; rc=$? + else +- hanaPrim="-" +- super_ocf_log warn "ACT: sht_monitor_clone: HANA_STATE_DEFECT" +- rc=$OCF_ERR_CONFIGURED ++ # bsc#1027098 Do not mark HANA instance as failed, if "only" the HANA state could not be detected ++ hanaPrim=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]} | awk -F: '{ print $2}') ++ if [ "$hanaPrim" = "" ]; then ++ hanaPrim="-" ++ fi ++ super_ocf_log warn "ACT: sht_monitor_clone: HANA_STATE_DEFECT (primary/secondary state could not be detected at this point of time)" ++ sht_monitor; rc=$? + fi + fi + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? diff --git a/SOURCES/bz1449681-2-saphana-saphanatopology-update-0.152.21.patch b/SOURCES/bz1449681-2-saphana-saphanatopology-update-0.152.21.patch new file mode 100644 index 0000000..75cde03 --- /dev/null +++ b/SOURCES/bz1449681-2-saphana-saphanatopology-update-0.152.21.patch @@ -0,0 +1,48 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2017-06-02 11:44:30.345894798 +0200 ++++ b/heartbeat/SAPHana 2017-06-02 11:45:15.622450739 +0200 +@@ -545,6 +545,9 @@ + read rolePatt syncPatt score <<< $scan + if grep "$rolePatt" <<< "$roles"; then + if grep "$syncPatt" <<< "$sync"; then ++ super_ocf_log info "SCORE: scoring_crm_master: roles($roles) are matching pattern ($rolePatt)" ++ super_ocf_log info "SCORE: scoring_crm_master: sync($sync) is matching syncPattern ($syncPatt)" ++ super_ocf_log info "SCORE: scoring_crm_master: set score $score" + skip=1 + myScore=$score + fi +@@ -1435,6 +1438,10 @@ + lpa_set_lpt $LPTloc $NODENAME + ;; + esac ++ if [ -z "$my_role" ]; then ++ my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) ++ fi ++ super_ocf_log info "SCORE: saphana_start_primary: scoring_crm_master($my_role,$my_sync)" + scoring_crm_master "$my_role" "$my_sync" + ;; + register ) # process a REGISTER +@@ -2129,6 +2136,7 @@ + #super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here" + my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) + my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ super_ocf_log info "SCORE: saphana_monitor_primary: scoring_crm_master($my_role,$my_sync)" + scoring_crm_master "$my_role" "$my_sync" + rc=$OCF_FAILED_MASTER + fi +@@ -2191,6 +2199,7 @@ + ;; + esac + fi ++ super_ocf_log info "SCORE: saphana_monitor_primary: scoring_crm_master($my_role,$my_sync)" + scoring_crm_master "$my_role" "$my_sync" + fi + ;; +@@ -2301,6 +2310,7 @@ + super_ocf_log info "DEC: secondary with sync status SOK ==> possible takeover node" + my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) + my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) ++ super_ocf_log info "SCORE: saphana_monitor_secondary: scoring_crm_master($my_role,$my_sync)" + scoring_crm_master "$my_role" "$my_sync" + ;; + "SFAIL" ) # This is currently NOT a possible node to promote diff --git a/SOURCES/bz1451097-1-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451097-1-galera-fix-bootstrap-when-cluster-has-no-data.patch new file mode 100644 index 0000000..98e98bf --- /dev/null +++ b/SOURCES/bz1451097-1-galera-fix-bootstrap-when-cluster-has-no-data.patch @@ -0,0 +1,140 @@ +From aa486941a7d980ded7a30e404a9d91620b19c47a Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Mon, 19 Dec 2016 14:13:21 +0100 +Subject: [PATCH] galera: allow names in wsrep_cluster_address to differ from + pacemaker nodes' names + +Add a new option cluster_host_map to the galera resource agent in case +names to be used in wsrep_cluster_address need to differ from names +used for the pacemaker nodes. (e.g. when galera names map to IP +from a specific network interface) +--- + heartbeat/galera | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 65 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index fe2aa8a..45693ac 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -208,13 +208,30 @@ The galera cluster address. This takes the form of: + gcomm://node,node,node + + Only nodes present in this node list will be allowed to start a galera instance. +-It is expected that the galera node names listed in this address match valid +-pacemaker node names. ++The galera node names listed in this address are expected to match valid ++pacemaker node names. If both names need to differ, you must provide a ++mapping in option cluster_host_map. + + Galera cluster address + + + ++ ++ ++A mapping of pacemaker node names to galera node names. ++ ++To be used when both pacemaker and galera names need to differ, ++(e.g. when galera names map to IP from a specific network interface) ++This takes the form of: ++pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera ++ ++where the galera resource started on node pcmk1 would be named ++node.1.galera in the wsrep_cluster_address ++ ++Pacemaker to Galera name mapping ++ ++ ++ + + + Cluster check user. +@@ -454,6 +471,27 @@ greater_than_equal_long() + echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" + } + ++galera_to_pcmk_name() ++{ ++ local galera=$1 ++ if [ -z "$OCF_RESKEY_cluster_host_map" ]; then ++ echo $galera ++ else ++ echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}' ++ fi ++} ++ ++pcmk_to_galera_name() ++{ ++ local pcmk=$1 ++ if [ -z "$OCF_RESKEY_cluster_host_map" ]; then ++ echo $pcmk ++ else ++ echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}' ++ fi ++} ++ ++ + detect_first_master() + { + local best_commit=0 +@@ -465,6 +503,14 @@ detect_first_master() + + # avoid selecting a recovered node as bootstrap if possible + for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do ++ local pcmk_node=$(galera_to_pcmk_name $node) ++ if [ -z "$pcmk_node" ]; then ++ ocf_log error "Could not determine pacemaker node from galera name <${node}>." ++ return ++ else ++ node=$pcmk_node ++ fi ++ + if is_no_grastate $node; then + nodes_recovered="$nodes_recovered $node" + else +@@ -783,10 +829,17 @@ galera_demote() + galera_start() + { + local rc ++ local galera_node ++ ++ galera_node=$(pcmk_to_galera_name $NODENAME) ++ if [ -z "$galera_node" ]; then ++ ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." ++ return $OCF_ERR_CONFIGURED ++ fi + +- echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME ++ echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node + if [ $? -ne 0 ]; then +- ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance" ++ ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance" + return $OCF_ERR_CONFIGURED + fi + +@@ -818,6 +871,7 @@ galera_start() + galera_monitor() + { + local rc ++ local galera_node + local status_loglevel="err" + + # Set loglevel to info during probe +@@ -857,10 +911,15 @@ galera_monitor() + fi + + # if we make it here, mysql is running. Check cluster status now. ++ galera_node=$(pcmk_to_galera_name $NODENAME) ++ if [ -z "$galera_node" ]; then ++ ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." ++ return $OCF_ERR_CONFIGURED ++ fi + +- echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME ++ echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node + if [ $? -ne 0 ]; then +- ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" ++ ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" + return $OCF_ERR_GENERIC + fi + diff --git a/SOURCES/bz1451097-2-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451097-2-galera-fix-bootstrap-when-cluster-has-no-data.patch new file mode 100644 index 0000000..b72c06d --- /dev/null +++ b/SOURCES/bz1451097-2-galera-fix-bootstrap-when-cluster-has-no-data.patch @@ -0,0 +1,50 @@ +From a05eb8673bd1d5d3d41f2ed39df2650b19681d08 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Fri, 3 Mar 2017 15:31:30 +0100 +Subject: [PATCH] galera: fix the first bootstrap when cluster has no data + +The resource agent selects the first node to go into Master state +based on the biggest commit version found on each node. If case no +data were written yet into the galera cluster, the current node is +selected as a "fallback" node to bootstrap the cluster. + +The way the "fallback" node is selected is wrong because every node +takes a different decision, and this ultimately yields to 3 +single-node galera clusters being started. To fix that, let the +"fallback" node be the last one in the wsrep_cluster_address, so that +the selection algorithm yields coherent results across nodes. +--- + heartbeat/galera | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index decbaa2..475a8ba 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -451,14 +451,24 @@ pcmk_to_galera_name() + detect_first_master() + { + local best_commit=0 +- local best_node="$NODENAME" + local last_commit=0 + local missing_nodes=0 + local nodes="" + local nodes_recovered="" ++ local all_nodes ++ local best_node_gcomm ++ local best_node ++ ++ all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') ++ best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') ++ best_node=$(galera_to_pcmk_name $best_node_gcomm) ++ if [ -z "$best_node" ]; then ++ ocf_log error "Could not determine initial best node from galera name <${best_node_gcomm}>." ++ return ++ fi + + # avoid selecting a recovered node as bootstrap if possible +- for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do ++ for node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $node) + if [ -z "$pcmk_node" ]; then + ocf_log error "Could not determine pacemaker node from galera name <${node}>." diff --git a/SOURCES/bz1451097-3-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451097-3-galera-fix-bootstrap-when-cluster-has-no-data.patch new file mode 100644 index 0000000..2a9f52b --- /dev/null +++ b/SOURCES/bz1451097-3-galera-fix-bootstrap-when-cluster-has-no-data.patch @@ -0,0 +1,52 @@ +From a6b40d102e24134a3e5e99a63bd3636aebc2145a Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Thu, 13 Apr 2017 08:51:39 +0200 +Subject: [PATCH] galera: fix master target during promotion with + cluster_host_map + +When option cluster_host_map is in use, it is assumed that galera node +names map to pacemaker node names _because_ those galera names are not +part of the pacemaker cluster in the first place. + +This is not always the case (e.g. when using pacemaker bundles), so +fix accordingly. +--- + heartbeat/galera | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 475a8ba..32c4222 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -415,6 +415,13 @@ promote_everyone() + { + + for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do ++ local pcmk_node=$(galera_to_pcmk_name $node) ++ if [ -z "$pcmk_node" ]; then ++ ocf_log err "Could not determine pacemaker node from galera name <${node}>." ++ return ++ else ++ node=$pcmk_node ++ fi + + set_master_score $node + done +@@ -463,7 +470,7 @@ detect_first_master() + best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') + best_node=$(galera_to_pcmk_name $best_node_gcomm) + if [ -z "$best_node" ]; then +- ocf_log error "Could not determine initial best node from galera name <${best_node_gcomm}>." ++ ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." + return + fi + +@@ -471,7 +478,7 @@ detect_first_master() + for node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $node) + if [ -z "$pcmk_node" ]; then +- ocf_log error "Could not determine pacemaker node from galera name <${node}>." ++ ocf_log err "Could not determine pacemaker node from galera name <${node}>." + return + else + node=$pcmk_node diff --git a/SOURCES/bz1451414-1-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451414-1-galera-fix-bootstrap-when-cluster-has-no-data.patch deleted file mode 100644 index 98e98bf..0000000 --- a/SOURCES/bz1451414-1-galera-fix-bootstrap-when-cluster-has-no-data.patch +++ /dev/null @@ -1,140 +0,0 @@ -From aa486941a7d980ded7a30e404a9d91620b19c47a Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Mon, 19 Dec 2016 14:13:21 +0100 -Subject: [PATCH] galera: allow names in wsrep_cluster_address to differ from - pacemaker nodes' names - -Add a new option cluster_host_map to the galera resource agent in case -names to be used in wsrep_cluster_address need to differ from names -used for the pacemaker nodes. (e.g. when galera names map to IP -from a specific network interface) ---- - heartbeat/galera | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 65 insertions(+), 6 deletions(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index fe2aa8a..45693ac 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -208,13 +208,30 @@ The galera cluster address. This takes the form of: - gcomm://node,node,node - - Only nodes present in this node list will be allowed to start a galera instance. --It is expected that the galera node names listed in this address match valid --pacemaker node names. -+The galera node names listed in this address are expected to match valid -+pacemaker node names. If both names need to differ, you must provide a -+mapping in option cluster_host_map. - - Galera cluster address - - - -+ -+ -+A mapping of pacemaker node names to galera node names. -+ -+To be used when both pacemaker and galera names need to differ, -+(e.g. when galera names map to IP from a specific network interface) -+This takes the form of: -+pcmk1:node.1.galera;pcmk2:node.2.galera;pcmk3:node.3.galera -+ -+where the galera resource started on node pcmk1 would be named -+node.1.galera in the wsrep_cluster_address -+ -+Pacemaker to Galera name mapping -+ -+ -+ - - - Cluster check user. -@@ -454,6 +471,27 @@ greater_than_equal_long() - echo | awk -v n1="$1" -v n2="$2" '{if (n1>=n2) printf ("true"); else printf ("false");}' | grep -q "true" - } - -+galera_to_pcmk_name() -+{ -+ local galera=$1 -+ if [ -z "$OCF_RESKEY_cluster_host_map" ]; then -+ echo $galera -+ else -+ echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$2=="'"$galera"'" {print $1;exit}' -+ fi -+} -+ -+pcmk_to_galera_name() -+{ -+ local pcmk=$1 -+ if [ -z "$OCF_RESKEY_cluster_host_map" ]; then -+ echo $pcmk -+ else -+ echo "$OCF_RESKEY_cluster_host_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$pcmk"'" {print $2;exit}' -+ fi -+} -+ -+ - detect_first_master() - { - local best_commit=0 -@@ -465,6 +503,14 @@ detect_first_master() - - # avoid selecting a recovered node as bootstrap if possible - for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -+ local pcmk_node=$(galera_to_pcmk_name $node) -+ if [ -z "$pcmk_node" ]; then -+ ocf_log error "Could not determine pacemaker node from galera name <${node}>." -+ return -+ else -+ node=$pcmk_node -+ fi -+ - if is_no_grastate $node; then - nodes_recovered="$nodes_recovered $node" - else -@@ -783,10 +829,17 @@ galera_demote() - galera_start() - { - local rc -+ local galera_node -+ -+ galera_node=$(pcmk_to_galera_name $NODENAME) -+ if [ -z "$galera_node" ]; then -+ ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." -+ return $OCF_ERR_CONFIGURED -+ fi - -- echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME -+ echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node - if [ $? -ne 0 ]; then -- ocf_exit_reason "local node <${NODENAME}> must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>to start this galera instance" -+ ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) must be a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}> to start this galera instance" - return $OCF_ERR_CONFIGURED - fi - -@@ -818,6 +871,7 @@ galera_start() - galera_monitor() - { - local rc -+ local galera_node - local status_loglevel="err" - - # Set loglevel to info during probe -@@ -857,10 +911,15 @@ galera_monitor() - fi - - # if we make it here, mysql is running. Check cluster status now. -+ galera_node=$(pcmk_to_galera_name $NODENAME) -+ if [ -z "$galera_node" ]; then -+ ocf_exit_reason "Could not determine galera name from pacemaker node <${NODENAME}>." -+ return $OCF_ERR_CONFIGURED -+ fi - -- echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME -+ echo $OCF_RESKEY_wsrep_cluster_address | grep -q -F $galera_node - if [ $? -ne 0 ]; then -- ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" -+ ocf_exit_reason "local node <${NODENAME}> (galera node <${galera_node}>) is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" - return $OCF_ERR_GENERIC - fi - diff --git a/SOURCES/bz1451414-2-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451414-2-galera-fix-bootstrap-when-cluster-has-no-data.patch deleted file mode 100644 index b72c06d..0000000 --- a/SOURCES/bz1451414-2-galera-fix-bootstrap-when-cluster-has-no-data.patch +++ /dev/null @@ -1,50 +0,0 @@ -From a05eb8673bd1d5d3d41f2ed39df2650b19681d08 Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Fri, 3 Mar 2017 15:31:30 +0100 -Subject: [PATCH] galera: fix the first bootstrap when cluster has no data - -The resource agent selects the first node to go into Master state -based on the biggest commit version found on each node. If case no -data were written yet into the galera cluster, the current node is -selected as a "fallback" node to bootstrap the cluster. - -The way the "fallback" node is selected is wrong because every node -takes a different decision, and this ultimately yields to 3 -single-node galera clusters being started. To fix that, let the -"fallback" node be the last one in the wsrep_cluster_address, so that -the selection algorithm yields coherent results across nodes. ---- - heartbeat/galera | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index decbaa2..475a8ba 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -451,14 +451,24 @@ pcmk_to_galera_name() - detect_first_master() - { - local best_commit=0 -- local best_node="$NODENAME" - local last_commit=0 - local missing_nodes=0 - local nodes="" - local nodes_recovered="" -+ local all_nodes -+ local best_node_gcomm -+ local best_node -+ -+ all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') -+ best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') -+ best_node=$(galera_to_pcmk_name $best_node_gcomm) -+ if [ -z "$best_node" ]; then -+ ocf_log error "Could not determine initial best node from galera name <${best_node_gcomm}>." -+ return -+ fi - - # avoid selecting a recovered node as bootstrap if possible -- for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -+ for node in $all_nodes; do - local pcmk_node=$(galera_to_pcmk_name $node) - if [ -z "$pcmk_node" ]; then - ocf_log error "Could not determine pacemaker node from galera name <${node}>." diff --git a/SOURCES/bz1451414-3-galera-fix-bootstrap-when-cluster-has-no-data.patch b/SOURCES/bz1451414-3-galera-fix-bootstrap-when-cluster-has-no-data.patch deleted file mode 100644 index 2a9f52b..0000000 --- a/SOURCES/bz1451414-3-galera-fix-bootstrap-when-cluster-has-no-data.patch +++ /dev/null @@ -1,52 +0,0 @@ -From a6b40d102e24134a3e5e99a63bd3636aebc2145a Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Thu, 13 Apr 2017 08:51:39 +0200 -Subject: [PATCH] galera: fix master target during promotion with - cluster_host_map - -When option cluster_host_map is in use, it is assumed that galera node -names map to pacemaker node names _because_ those galera names are not -part of the pacemaker cluster in the first place. - -This is not always the case (e.g. when using pacemaker bundles), so -fix accordingly. ---- - heartbeat/galera | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index 475a8ba..32c4222 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -415,6 +415,13 @@ promote_everyone() - { - - for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -+ local pcmk_node=$(galera_to_pcmk_name $node) -+ if [ -z "$pcmk_node" ]; then -+ ocf_log err "Could not determine pacemaker node from galera name <${node}>." -+ return -+ else -+ node=$pcmk_node -+ fi - - set_master_score $node - done -@@ -463,7 +470,7 @@ detect_first_master() - best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') - best_node=$(galera_to_pcmk_name $best_node_gcomm) - if [ -z "$best_node" ]; then -- ocf_log error "Could not determine initial best node from galera name <${best_node_gcomm}>." -+ ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." - return - fi - -@@ -471,7 +478,7 @@ detect_first_master() - for node in $all_nodes; do - local pcmk_node=$(galera_to_pcmk_name $node) - if [ -z "$pcmk_node" ]; then -- ocf_log error "Could not determine pacemaker node from galera name <${node}>." -+ ocf_log err "Could not determine pacemaker node from galera name <${node}>." - return - else - node=$pcmk_node diff --git a/SOURCES/bz1451933-LVM-update-metadata-on-start-relocate.patch b/SOURCES/bz1451933-LVM-update-metadata-on-start-relocate.patch new file mode 100644 index 0000000..2514a34 --- /dev/null +++ b/SOURCES/bz1451933-LVM-update-metadata-on-start-relocate.patch @@ -0,0 +1,23 @@ +From 850ee793c5c575898528ab4bd6815431e963d22d Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 22 May 2017 15:01:16 +0200 +Subject: [PATCH] LVM: use vgscan --cache to update metadata during + start/relocate + +--- + heartbeat/LVM | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/LVM b/heartbeat/LVM +index 5b265f58f..583b9a2bd 100755 +--- a/heartbeat/LVM ++++ b/heartbeat/LVM +@@ -431,7 +431,7 @@ LVM_start() { + if [ "$LVM_MAJOR" -eq "1" ]; then + ocf_run vgscan $vg + else +- ocf_run vgscan ++ ocf_run vgscan --cache + fi + + case $(get_vg_mode) in diff --git a/SOURCES/bz1451933-LVM-warn-when-cache-mode-not-writethrough.patch b/SOURCES/bz1451933-LVM-warn-when-cache-mode-not-writethrough.patch new file mode 100644 index 0000000..29be909 --- /dev/null +++ b/SOURCES/bz1451933-LVM-warn-when-cache-mode-not-writethrough.patch @@ -0,0 +1,59 @@ +From 30ac299da6a01a2f5f42fac6b3d35275ddc001e7 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 29 May 2017 14:38:48 +0200 +Subject: [PATCH] LVM: warn when cache mode is not writethrough + +--- + heartbeat/LVM | 17 ++--------------- + 1 file changed, 2 insertions(+), 15 deletions(-) + +diff --git a/heartbeat/LVM b/heartbeat/LVM +index 583b9a2bd..7ebedac6f 100755 +--- a/heartbeat/LVM ++++ b/heartbeat/LVM +@@ -29,8 +29,6 @@ + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +-OCF_RESKEY_check_writethrough_default="false" +- + ####################################################################### + + +@@ -108,14 +106,6 @@ logical volumes. + + + +- +- +-If set to true, check if cache_mode is set to writethrough. +- +-Check if cache_mode is set to writethrough +- +- +- + + + +@@ -593,10 +583,9 @@ LVM_validate_all() { + exit $OCF_ERR_GENERIC + fi + +- if ocf_is_true "$OCF_RESKEY_check_writethrough"; then ++ if lvs --noheadings -o segtype | grep -q "cache"; then + if ! lvs --noheadings -o cache_mode "$OCF_RESKEY_volgrpname" | grep -q "writethrough"; then +- ocf_exit_reason "LVM cache is not in writethrough mode." +- exit $OCF_ERR_CONFIGURED ++ ocf_log warn "LVM CACHE IS NOT IN WRITETHROUGH MODE. THIS IS NOT A SUPPORTED CONFIGURATION." + fi + fi + +@@ -707,8 +696,6 @@ if [ -n "$OCF_RESKEY_tag" ]; then + OUR_TAG=$OCF_RESKEY_tag + fi + +-: ${OCF_RESKEY_check_writethrough=${OCF_RESKEY_check_writethrough_default}} +- + # What kind of method was invoked? + case "$1" in + diff --git a/SOURCES/bz1452049-docker-create-directories.patch b/SOURCES/bz1452049-docker-create-directories.patch new file mode 100644 index 0000000..176af73 --- /dev/null +++ b/SOURCES/bz1452049-docker-create-directories.patch @@ -0,0 +1,49 @@ +From 7792db2967793e43a9272bcea3df10238c8cb806 Mon Sep 17 00:00:00 2001 +From: Andrew Beekhof +Date: Tue, 2 May 2017 12:11:34 +1000 +Subject: [PATCH] docker: Allow callers to specify a set of directories that + should be created if the don't exist + +--- + heartbeat/docker | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/heartbeat/docker b/heartbeat/docker +index b251924..bb10f36 100755 +--- a/heartbeat/docker ++++ b/heartbeat/docker +@@ -106,6 +106,15 @@ it has initialized. + + + ++ ++ ++A comma separated list of directories that the container is expecting to use. ++The agent will ensure they exist by running 'mkdir -p' ++ ++Required mount points ++ ++ ++ + + + Specifiy the full path of a command to launch within the container to check +@@ -263,8 +272,18 @@ docker_monitor() + monitor_cmd_exec + } + ++docker_create_mounts() { ++ oldIFS="$IFS" ++ IFS="," ++ for directory in $OCF_RESKEY_mount_points; do ++ mkdir -p "$directory" ++ done ++ IFS="$oldIFS" ++} ++ + docker_start() + { ++ docker_create_mounts + local run_opts="-d --name=${CONTAINER}" + # check to see if the container has already started + docker_simple_status diff --git a/SOURCES/bz1454699-LVM-status-check-for-missing-VG.patch b/SOURCES/bz1454699-LVM-status-check-for-missing-VG.patch new file mode 100644 index 0000000..06eb832 --- /dev/null +++ b/SOURCES/bz1454699-LVM-status-check-for-missing-VG.patch @@ -0,0 +1,32 @@ +From e587a7dbc17c24de14098a1b56b6de48ded9d8ba Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 24 May 2017 13:03:47 +0200 +Subject: [PATCH] LVM: status check for missing VG + +--- + heartbeat/LVM | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/heartbeat/LVM b/heartbeat/LVM +index 5b265f58f..0e5b14d72 100755 +--- a/heartbeat/LVM ++++ b/heartbeat/LVM +@@ -320,6 +320,18 @@ LVM_status() { + fi + fi + fi ++ ++ # Check if VG is still available (e.g. for multipath where the device ++ # doesn't disappear) ++ if [ "$LVM_MAJOR" -eq "1" ]; then ++ output=$(vgscan $vg 2>&1) ++ else ++ output=$(vgscan --cache 2>&1) ++ fi ++ if ! echo "$output" | grep -q "Found.*\"$1\""; then ++ ocf_exit_reason "LVM Volume $1 is not available" ++ return $OCF_ERR_GENERIC ++ fi + + if [ -d /dev/$1 ]; then + test "`cd /dev/$1 && ls`" != "" diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 039cba3..db6b69c 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -19,6 +19,22 @@ # use the correct group for each. # +## Whether this platform defaults to using systemd as an init system +## (needs to be evaluated prior to BuildRequires being enumerated and +## installed as it's intended to conditionally select some of these, and +## for that there are only few indicators with varying reliability: +## - presence of systemd-defined macros (when building in a full-fledged +## environment, which is not the case with ordinary mock-based builds) +## - systemd-aware rpm as manifested with the presence of particular +## macro (rpm itself will trivially always be present when building) +## - existence of /usr/lib/os-release file, which is something heavily +## propagated by systemd project +## - when not good enough, there's always a possibility to check +## particular distro-specific macros (incl. version comparison) +%define systemd_native (%{?_unitdir:1}%{?!_unitdir:0}%{nil \ + } || %{?__transaction_systemd_inhibit:1}%{?!__transaction_systemd_inhibit:0}%{nil \ + } || %(test -f /usr/lib/os-release; test $? -ne 0; echo $?)) + %global upstream_prefix ClusterLabs-resource-agents %global upstream_version 5434e96 @@ -32,7 +48,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 3.9.5 -Release: 82%{?dist}.11 +Release: 105%{?dist} License: GPLv2+, LGPLv2+ and ASL 2.0 URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -168,19 +184,57 @@ Patch123: bz1328386-2-oracle-monprofile-container-databases.patch Patch124: bz1328386-3-oracle-monprofile-container-databases.patch Patch125: bz1303037-2-portblock.patch Patch126: bz1249430-2-tomcat-fix-selinux-enforced.patch -Patch127: bz1391495-nfsserver-keep-options.patch -Patch128: bz1394296-redis-fix-selinux-permissions.patch -Patch129: bz1400103-redis-notify-clients-of-master-being-demoted.patch -Patch130: bz1400103-nova-compute-wait-nova-compute-unfence.patch -Patch131: bz1409513-portblock-wait.patch -Patch132: bz1402511-rabbitmq-cluster-reset-mnesia-before-join.patch -Patch133: bz1423424-1-update-saphana-saphanatopology.patch -Patch134: bz1423424-2-update-saphana-saphanatopology.patch -Patch135: bz1437122-rabbitmq-cluster-pacemaker-remote.patch -Patch136: bz1445889-IPaddr2-IPv6-add-preferred_lft-parameter.patch -Patch137: bz1451414-1-galera-fix-bootstrap-when-cluster-has-no-data.patch -Patch138: bz1451414-2-galera-fix-bootstrap-when-cluster-has-no-data.patch -Patch139: bz1451414-3-galera-fix-bootstrap-when-cluster-has-no-data.patch +Patch127: bz1387491-nfsserver-keep-options.patch +Patch128: bz1390974-redis-fix-selinux-permissions.patch +Patch129: bz1305549-redis-notify-clients-of-master-being-demoted.patch +Patch130: bz1305549-nova-compute-wait-nova-compute-unfence.patch +Patch131: bz1360768-galera-prevent-promote-after-demote.patch +Patch132: bz1376588-iSCSITarget-properly-create-portals-for-lio-t.patch +Patch133: bz1384955-nfsserver-dont-stop-rpcbind.patch +Patch134: bz1387363-Filesystem-submount-check.patch +Patch135: bz1388854-delay-change-startdelay.patch +Patch136: bz1391470-galera-last-commit-fix-for-mariadb-10.1.18.patch +Patch137: bz1391580-portblock-return-success-on-stop-with-invalid-ip.patch +Patch138: bz1402370-portblock-wait.patch +Patch139: bz1406152-exportfs-ipv6-fix.patch +Patch140: bz1395142-1-update-saphana-saphanatopology.patch +Patch141: bz1395142-2-update-saphana-saphanatopology.patch +Patch142: bz1260713-1-sapdatabase-process-count-suser.patch +Patch143: bz1260713-2-sapdatabase-process-count-suser.patch +Patch144: bz1397393-rabbitmq-cluster-reset-mnesia-before-join.patch +Patch145: bz1392432-LVM-partial_activation-fix.patch +Patch146: bz1159328-LVM-check_writethrough.patch +Patch147: bz1359252-clvm-remove-reload-action.patch +Patch148: bz1389300-iSCSILogicalUnit-IPv6-support.patch +Patch149: bz1400172-IPsrcaddr-fix-duplicate-routes.patch +Patch150: bz1420565-pgsql-dont-use-crm_failcount.patch +Patch151: bz1427611-ocf_log-use-same-log-format-as-pacemaker.patch +Patch152: bz1430304-NodeUtilization.patch +Patch153: bz1430385-iSCSILogicalUnit-iSCSITarget-concurrent-safe.patch +Patch154: bz1434351-IPaddr2-send-arp-monitor-action.patch +Patch155: bz1435171-named-add-support-for-rndc-options.patch +Patch156: bz1077888-CTDB-fix-logging.patch +Patch157: bz1393189-1-IPaddr2-detect-duplicate-IP.patch +Patch158: bz1393189-2-IPaddr2-detect-duplicate-IP.patch +Patch159: bz1408656-ethmonitor-monitor-interface-without-ip.patch +Patch160: bz1411225-oraasm.patch +Patch161: bz1435982-rabbitmq-cluster-pacemaker-remote.patch +Patch162: bz1380405-send_arp-usage.patch +Patch163: bz1427574-DB2-fix-HADR-DB2-V98-or-later.patch +Patch164: bz1342376-rabbitmq-cluster-backup-and-restore-users-policies.patch +Patch165: bz1445861-IPaddr2-IPv6-add-preferred_lft-parameter.patch +Patch166: bz1316130-systemd-drop-in-clvmd-LVM.patch +Patch167: bz1449681-1-saphana-saphanatopology-update-0.152.21.patch +Patch168: bz1451097-1-galera-fix-bootstrap-when-cluster-has-no-data.patch +Patch169: bz1451097-2-galera-fix-bootstrap-when-cluster-has-no-data.patch +Patch170: bz1451097-3-galera-fix-bootstrap-when-cluster-has-no-data.patch +Patch171: bz1452049-docker-create-directories.patch +Patch172: bz1454699-LVM-status-check-for-missing-VG.patch +Patch173: bz1451933-LVM-update-metadata-on-start-relocate.patch +Patch174: bz1451933-LVM-warn-when-cache-mode-not-writethrough.patch +Patch175: bz1449681-2-saphana-saphanatopology-update-0.152.21.patch +Patch176: bz1342376-2-rabbitmq-cluster-backup-and-restore-users-policies.patch +Patch177: bz1342376-3-rabbitmq-cluster-backup-and-restore-users-policies.patch Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} @@ -194,6 +248,10 @@ BuildRequires: perl python-devel BuildRequires: libxslt glib2-devel BuildRequires: which +%if %{systemd_native} +BuildRequires: pkgconfig(systemd) +%endif + %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} #BuildRequires: cluster-glue-libs-devel BuildRequires: docbook-style-xsl docbook-dtds @@ -250,7 +308,7 @@ A set of scripts to interface with several services to operate in a High Availability environment for both Pacemaker and rgmanager service managers. -%ifarch x86_64 +%ifarch x86_64 ppc64le %package sap License: GPLv2+ Summary: SAP cluster resource agents and connector script @@ -268,7 +326,7 @@ Pacemaker to allow SAP instances to be managed in a cluster environment. %endif -%ifarch x86_64 +%ifarch x86_64 ppc64le %package sap-hana License: GPLv2+ Summary: SAP HANA cluster resource agents @@ -422,15 +480,53 @@ exit 1 %patch128 -p1 %patch129 -p1 %patch130 -p1 -%patch131 -p1 +%patch131 -p1 -F1 %patch132 -p1 %patch133 -p1 %patch134 -p1 %patch135 -p1 -%patch136 -p1 -F1 +%patch136 -p1 %patch137 -p1 %patch138 -p1 %patch139 -p1 +%patch140 -p1 +%patch141 -p1 +%patch142 -p1 +%patch143 -p1 +%patch144 -p1 +%patch145 -p1 +%patch146 -p1 +%patch147 -p1 +%patch148 -p1 +%patch149 -p1 +%patch150 -p1 +%patch151 -p1 +%patch152 -p1 +%patch153 -p1 +%patch154 -p1 +%patch155 -p1 +%patch156 -p1 +%patch157 -p1 +%patch158 -p1 +%patch159 -p1 +%patch160 -p1 +%patch161 -p1 +%patch162 -p1 -F2 +%patch163 -p1 +%patch164 -p1 +%patch165 -p1 +%patch166 -p1 +%patch167 -p1 +%patch168 -p1 +%patch169 -p1 +%patch170 -p1 +%patch171 -p1 +%patch172 -p1 +%patch173 -p1 +%patch174 -p1 +%patch175 -p1 +%patch176 -p1 +%patch177 -p1 %build if [ ! -f configure ]; then @@ -447,7 +543,9 @@ chmod 755 heartbeat/rabbitmq-cluster chmod 755 heartbeat/redis chmod 755 heartbeat/iface-vlan chmod 755 heartbeat/nova-compute-wait +chmod 755 heartbeat/oraasm chmod 755 heartbeat/NovaEvacuate +chmod 755 heartbeat/NodeUtilization chmod 755 heartbeat/SAPHana chmod 755 heartbeat/SAPHanaTopology @@ -475,6 +573,9 @@ chmod 755 heartbeat/clvm %configure \ %{conf_opt_fatal} \ +%if %{defined _unitdir} + --with-systemdsystemunitdir=%{_unitdir} \ +%endif --with-pkg-name=%{name} \ --with-ras-set=%{rasset} \ --with-ocft-cases=fedora @@ -533,6 +634,10 @@ rm -rf %{buildroot} /usr/lib/ocf/resource.d/redhat %endif +%if %{defined _unitdir} +%{_unitdir}/resource-agents-deps.target +%endif + %dir %{_datadir}/%{name} %dir %{_datadir}/%{name}/ocft %{_datadir}/%{name}/ocft/configs @@ -666,7 +771,7 @@ rm -rf %{buildroot} ccs_update_schema > /dev/null 2>&1 ||: %endif -%ifarch x86_64 +%ifarch x86_64 ppc64le %files sap %defattr(-,root,root) %{_sbindir}/sap_redhat_cluster_connector @@ -677,7 +782,7 @@ ccs_update_schema > /dev/null 2>&1 ||: %exclude /usr/lib/ocf/resource.d/heartbeat/SAPHana* %endif -%ifarch x86_64 +%ifarch x86_64 ppc64le %files sap-hana %defattr(-,root,root) /usr/lib/ocf/resource.d/heartbeat/SAPHana* @@ -685,51 +790,149 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog -* Tue May 16 2017 Oyvind Albrigtsen - 3.9.5-82.11 +* Fri Jun 23 2017 Oyvind Albrigtsen - 3.9.5-105 +- rabbitmq-cluster: fix to keep expiration policy + + Resolves: rhbz#1342376 + +* Fri Jun 2 2017 Oyvind Albrigtsen - 3.9.5-104 +- SAPHana/SAPHanaTopology: update to version 0.152.21 + + Resolves: rhbz#1449681 + +* Wed May 31 2017 Oyvind Albrigtsen - 3.9.5-102 +- LVM: update metadata on start/relocate +- LVM: warn when cache mode is not writethrough + + Resolves: rhbz#1451933 + +* Tue May 30 2017 Oyvind Albrigtsen - 3.9.5-101 +- LVM: status check for missing VG + + Resolves: rhbz#1454699 + +* Mon May 22 2017 Oyvind Albrigtsen - 3.9.5-100 +- docker: add "mount_points" parameter to be able to create directories + + Resolves: rhbz#1452049 + +* Tue May 16 2017 Oyvind Albrigtsen - 3.9.5-99 - galera: fix bootstrap when cluster has no data - Resolves: rhbz#1451414 + Resolves: rhbz#1451097 -* Wed Apr 26 2017 Oyvind Albrigtsen - 3.9.5-82.10 +* Wed May 3 2017 Oyvind Albrigtsen - 3.9.5-97 +- systemd: add drop-in for clvmd and LVM to avoid fencing on shutdown + + Resolves: rhbz#1316130 + +* Wed Apr 26 2017 Oyvind Albrigtsen - 3.9.5-96 - IPaddr2: add "preferred_lft" parameter for IPv6 - Resolves: rhbz#1445889 + Resolves: rhbz#1445861 + +* Fri Apr 7 2017 Oyvind Albrigtsen - 3.9.5-95 +- DB2: fix HADR for DB2 V98 or later + + Resolves: rhbz#1427574 -* Tue Apr 4 2017 Oyvind Albrigtsen - 3.9.5-82.9 +* Tue Apr 4 2017 Oyvind Albrigtsen - 3.9.5-94 +- send_arp: update usage info + + Resolves: rhbz#1380405 + +* Tue Apr 4 2017 Oyvind Albrigtsen - 3.9.5-93 - rabbitmq-cluster: allow to run on Pacemaker remote nodes +- oraasm: new resource agent for Oracle ASM - Resolves: rhbz#1437122 + Resolves: rhbz#1435982 + Resolves: rhbz#1411225 -* Fri Feb 17 2017 Oyvind Albrigtsen - 3.9.5-82.7 -- SAPHana/SAPHanaTopology: update to version 0.152.17 +* Tue Mar 28 2017 Oyvind Albrigtsen - 3.9.5-90 +- ethmonitor: fix to monitor interface without IP + + Resolves: rhbz#bz1408656 - Resolves: rhbz#1423424 +* Tue Mar 28 2017 Oyvind Albrigtsen - 3.9.5-89 +- NodeUtilization: new resource agent +- iSCSILogicalUnit, iSCSITarget: make concurrent-safe +- IPaddr2: send gratuitious ARP packets during monitor action +- named: add support for rndc options +- CTDB: fix logging +- IPaddr2: add option to detect duplicate IP -* Thu Feb 2 2017 Oyvind Albrigtsen - 3.9.5-82.6 + Resolves: rhbz#1430304 + Resolves: rhbz#1430385 + Resolves: rhbz#1434351 + Resolves: rhbz#1435171 + Resolves: rhbz#1077888 + Resolves: rhbz#1393189 + +* Thu Mar 9 2017 Oyvind Albrigtsen - 3.9.5-88 +- clvm: remove reload action +- iSCSILogicalUnit: add IPv6-support +- IPsrcaddr: fix issue with duplicate routes +- pgsql: don't use crm_failcount +- ocf_log: use same log format as Pacemaker + + Resolves: rhbz#1359252 + Resolves: rhbz#1389300 + Resolves: rhbz#1400172 + Resolves: rhbz#1420565 + Resolves: rhbz#1427611 + +* Thu Feb 2 2017 Oyvind Albrigtsen - 3.9.5-87 +- LVM: fix for "partial vg activates when partial_activation=false" - redis: notify clients of master being demoted +- SAP/SAP HANA: ppc64le build - Resolves: rhbz#1400103 + Resolves: rhbz#1392432 + Resolves: rhbz#1305549 + Resolves: rhbz#1371088 -* Fri Jan 20 2017 Oyvind Albrigtsen - 3.9.5-82.5 +* Fri Jan 27 2017 Oyvind Albrigtsen - 3.9.5-86 +- SAPDatabase: fix process count for SUSER - rabbitmq-cluster: reset Mnesia before join - Resolves: rhbz#1402511 + Resolves: rhbz#1260713 + Resolves: rhbz#1397393 -* Tue Jan 3 2017 Oyvind Albrigtsen - 3.9.5-82.4 -- portblock: use iptables wait +* Fri Jan 13 2017 Oyvind Albrigtsen - 3.9.5-85 +- exportfs: fix for IPv6 addresses +- SAPHana/SAPHanaTopology: update to version 0.152.17 - Add netstat dependency - Resolves: rhbz#1409513 + Resolves: rhbz#1406152 + Resolves: rhbz#1395142 + Resolves: rhbz#1402370 + +* Tue Dec 20 2016 Oyvind Albrigtsen - 3.9.5-84 +- galera: prevent promote after demote +- iSCSITarget: properly create portals for lio-t +- nfsserver: dont stop rpcbind +- Filesystem: submount check +- Delay: change startdelay +- galera: last commit fix for MariaDB 10.1.18+ +- portblock: return success on stop with invalid IP +- portblock: use iptables wait -* Mon Nov 14 2016 Oyvind Albrigtsen - 3.9.5-82.2 + Resolves: rhbz#1360768 + Resolves: rhbz#1376588 + Resolves: rhbz#1384955 + Resolves: rhbz#1387363 + Resolves: rhbz#1388854 + Resolves: rhbz#1391470 + Resolves: rhbz#1391580 + Resolves: rhbz#1395596 + +* Tue Nov 29 2016 Oyvind Albrigtsen - 3.9.5-83 +- nfsserver: keep options in /etc/sysconfig/nfs - redis: fix SELinux permissions +- redis: notify clients of master being demoted - Resolves: rhbz#1394296 - -* Fri Nov 4 2016 Oyvind Albrigtsen - 3.9.5-82.1 -- nfsserver: fix to preserve options in /etc/sysconfig/nfs - - Resolves: rhbz#1391495 + Resolves: rhbz#1387491 + Resolves: rhbz#1390974 + Resolves: rhbz#1305549 * Tue Sep 20 2016 Oyvind Albrigtsen - 3.9.5-82 - portblock: create tickle_dir if it doesn't exist