diff --git a/SOURCES/bz1126073-1-nfsserver-fix-systemd-status-detection.patch b/SOURCES/bz1126073-1-nfsserver-fix-systemd-status-detection.patch new file mode 100644 index 0000000..7e2d3b9 --- /dev/null +++ b/SOURCES/bz1126073-1-nfsserver-fix-systemd-status-detection.patch @@ -0,0 +1,474 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-02-05 09:04:19.350003826 +0100 ++++ b/heartbeat/nfsserver 2016-02-05 09:04:58.463395839 +0100 +@@ -208,9 +208,9 @@ + + + +- +- +- ++ ++ ++ + + + +@@ -327,11 +327,12 @@ + nfs_exec() + { + local cmd=$1 ++ local svc=$2 + set_exec_mode + + case $EXEC_MODE in + 1) ${OCF_RESKEY_nfs_init_script} $cmd;; +- 2) systemctl $cmd nfs-server.service ;; ++ 2) systemctl $cmd ${svc}.service ;; + esac + } + +@@ -353,21 +354,117 @@ + + nfsserver_monitor () + { ++ # Skip trying to start processes once before failing ++ # when run from nfsserver_start () ++ if [ "$1" == "fromstart" ]; then ++ ocf_log info "fromstart" ++ fromstart=1 ++ else ++ tries=1 ++ fi ++ ++ # systemd ++ if [ "$EXEC_MODE" -eq "2" ]; then ++ ocf_log info "Status: rpcbind" ++ rpcinfo &> /dev/null ++ rc=$? ++ if [ "$rc" -ne "0" ]; then ++ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "rpcbind is not running" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ ++ ocf_log info "Status: nfs-mountd" ++ rpcinfo -t localhost 100005 &> /dev/null ++ rc=$? ++ if [ "$rc" -ne "0" ]; then ++ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "nfs-mountd is not running" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ ++ ocf_log info "Status: nfs-idmapd" ++ fn=`mktemp` ++ nfs_exec status nfs-idmapd > $fn 2>&1 ++ rc=$? ++ ocf_log debug "$(cat $fn)" ++ rm -f $fn ++ if [ "$rc" -ne "0" ]; then ++ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ ocf_log info "Tried to start services: rc: $rc" ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "nfs-idmapd is not running" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ ++ ocf_log info "Status: rpc-statd" ++ rpcinfo -t localhost 100024 &> /dev/null ++ rc=$? ++ if [ "$rc" -ne "0" ]; then ++ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "rpc-statd is not running" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ fi ++ + fn=`mktemp` +- nfs_exec status > $fn 2>&1 ++ nfs_exec status nfs-server > $fn 2>&1 + rc=$? + ocf_log debug "$(cat $fn)" + rm -f $fn + +- #Adapte LSB status code to OCF return code ++ tfn="/proc/fs/nfsd/threads" ++ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then ++ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ ++ #Adapt LSB status code to OCF return code + if [ $rc -eq 0 ]; then + # don't report success if nfs servers are up + # without locking daemons. + v3locking_exec "status" + rc=$? + if [ $rc -ne 0 ]; then +- ocf_exit_reason "NFS server is up, but the locking daemons are down" +- rc=$OCF_ERR_GENERIC ++ if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then ++ nfsserver_start frommonitor ++ rc=$? ++ let tries=$tries-1 ++ fi ++ if [ "$rc" -ne "0" ]; then ++ ocf_exit_reason "NFS server is up, but the locking daemons are down" ++ rc=$OCF_ERR_GENERIC ++ fi + fi + return $rc + elif [ $rc -eq 3 ]; then +@@ -391,12 +488,7 @@ + # only write to the tmp /etc/sysconfig/nfs if sysconfig exists. + # otherwise this distro does not support setting these options. + if [ -d "/etc/sysconfig" ]; then +- # replace if the value exists, append otherwise +- if grep "^\s*${key}=" $file ; then +- sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file +- else +- echo "${key}=\"${value}\"" >> $file +- fi ++ echo "${key}=\"${value}\"" >> $file + elif [ "$requires_sysconfig" = "true" ]; then + ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args." + fi +@@ -409,11 +501,6 @@ + local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX) + local statd_args + +- if [ -f "$NFS_SYSCONFIG" ]; then +- ## Take the $NFS_SYSCONFIG file as our skeleton +- cp $NFS_SYSCONFIG $tmpconfig +- fi +- + # nfsd args + set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true" + +@@ -444,20 +531,14 @@ + + # override local nfs config. preserve previous local config though. + if [ -s $tmpconfig ]; then +- cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 ++ cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG" + if [ $? -ne 0 ]; then + # backup local nfs config if it doesn't have our HA autogen tag in it. + mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP + fi +- +- cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 +- if [ $? -ne 0 ]; then +- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG +- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG +- cat $tmpconfig >> $NFS_SYSCONFIG +- else +- cat $tmpconfig > $NFS_SYSCONFIG +- fi ++ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG ++ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG ++ cat $tmpconfig >> $NFS_SYSCONFIG + fi + rm -f $tmpconfig + } +@@ -476,14 +557,13 @@ + [ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm" + [ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha" + [ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak" +- [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && +- chown -R rpcuser.rpcuser "$fp/$STATD_DIR" ++ [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR" + + [ -f "$fp/etab" ] || touch "$fp/etab" + [ -f "$fp/xtab" ] || touch "$fp/xtab" + [ -f "$fp/rmtab" ] || touch "$fp/rmtab" + +- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1 ++ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null + [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" + [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" + } +@@ -563,15 +643,15 @@ + + terminate() + { +- local pids +- local i=0 ++ declare pids ++ declare i=0 + + while : ; do + pids=$(binary_status $1) + [ -z "$pids" ] && return 0 + kill $pids + sleep 1 +- i=$((i + 1)) ++ ((i++)) + [ $i -gt 3 ] && return 1 + done + } +@@ -579,22 +659,22 @@ + + killkill() + { +- local pids +- local i=0 ++ declare pids ++ declare i=0 + + while : ; do + pids=$(binary_status $1) + [ -z "$pids" ] && return 0 + kill -9 $pids + sleep 1 +- i=$((i + 1)) ++ ((i++)) + [ $i -gt 3 ] && return 1 + done + } + + stop_process() + { +- local process=$1 ++ declare process=$1 + + ocf_log info "Stopping $process" + if terminate $process; then +@@ -665,9 +745,14 @@ + + nfsserver_start () + { ++ # Skip monitor check when run from nfsserver_monitor () ++ if [ "$1" == "frommonitor" ]; then ++ frommonitor=1 ++ fi ++ + local rc; + +- if nfsserver_monitor; then ++ if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then + ocf_log debug "NFS server is already started" + return $OCF_SUCCESS + fi +@@ -693,11 +778,32 @@ + modprobe nfsd + fi + ++ # systemd ++ if [ "$EXEC_MODE" -eq "2" ]; then ++ nfs_exec start rpcbind ++ local i=10 ++ while [ "$i" -gt 0 ]; do ++ ocf_log info "Start: rpcbind i: $i" ++ rpcinfo &> /dev/null ++ rc=$? ++ if [ "$rc" -eq "0" ]; then ++ break; ++ fi ++ sleep 1 ++ let i=$i-1 ++ done ++ if [ "$i" -eq 0 ]; then ++ ocf_exit_reason "Failed to start rpcbind" ++ return $OCF_ERR_GENERIC ++ fi ++ fi ++ + # check to see if we need to start rpc.statd + v3locking_exec "status" + if [ $? -ne $OCF_SUCCESS ]; then + v3locking_exec "start" + rc=$? ++ ocf_log info "Start: v3locking: $rc" + if [ $rc -ne 0 ]; then + ocf_exit_reason "Failed to start NFS server locking daemons" + return $rc +@@ -706,8 +812,65 @@ + ocf_log info "rpc.statd already up" + fi + ++ # systemd ++ if [ "$EXEC_MODE" -eq "2" ]; then ++ nfs_exec start nfs-mountd ++ local i=10 ++ while [ "$i" -gt 0 ]; do ++ ocf_log info "Start: nfs-mountd i: $i" ++ rpcinfo -t localhost 100005 &> /dev/null ++ rc=$? ++ if [ "$rc" -eq "0" ]; then ++ break; ++ fi ++ sleep 1 ++ let i=$i-1 ++ done ++ if [ "$i" -eq 0 ]; then ++ ocf_exit_reason "Failed to start nfs-mountd" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ nfs_exec start nfs-idmapd ++ local i=10 ++ while [ "$i" -gt 0 ]; do ++ ocf_log info "Start: nfs-idmapd i: $i" ++ fn=`mktemp` ++ nfs_exec status nfs-idmapd > $fn 2>&1 ++ rc=$? ++ ocf_log debug "$(cat $fn)" ++ rm -f $fn ++ if [ "$rc" -eq "0" ]; then ++ break; ++ fi ++ sleep 1 ++ let i=$i-1 ++ done ++ if [ "$i" -eq 0 ]; then ++ ocf_exit_reason "Failed to start nfs-idmapd" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ nfs_exec start rpc-statd ++ local i=10 ++ while [ "$i" -gt 0 ]; do ++ ocf_log info "Start: rpc-statd i: $i" ++ rpcinfo -t localhost 100024 &> /dev/null ++ rc=$? ++ if [ "$rc" -eq "0" ]; then ++ break; ++ fi ++ sleep 1 ++ let i=$i-1 ++ done ++ if [ "$i" -eq 0 ]; then ++ ocf_exit_reason "Failed to start rpc-statd" ++ return $OCF_ERR_GENERIC ++ fi ++ fi ++ + fn=`mktemp` +- nfs_exec start > $fn 2>&1 ++ nfs_exec start nfs-server > $fn 2>&1 + rc=$? + ocf_log debug "$(cat $fn)" + rm -f $fn +@@ -717,6 +880,12 @@ + return $rc + fi + ++ tfn="/proc/fs/nfsd/threads" ++ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then ++ ocf_exit_reason "Failed to start NFS server: /proc/fs/nfsd/threads" ++ return $OCF_ERR_GENERIC ++ fi ++ + notify_locks + + ocf_log info "NFS server started" +@@ -733,24 +902,71 @@ + cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 + + fn=`mktemp` +- nfs_exec stop > $fn 2>&1 ++ nfs_exec stop nfs-server > $fn 2>&1 + rc=$? + ocf_log debug "$(cat $fn)" + rm -f $fn + ++ if [ $rc -ne 0 ]; then ++ ocf_exit_reason "Failed to stop NFS server" ++ return $rc ++ fi ++ ++ # systemd ++ if [ "$EXEC_MODE" -eq "2" ]; then ++ ocf_log info "Stop: threads" ++ tfn="/proc/fs/nfsd/threads" ++ if [ -f "$tfn" ] && [ "$(cat $tfn)" -gt "0" ]; then ++ ocf_exit_reason "NFS server failed to stop: /proc/fs/nfsd/threads" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ nfs_exec stop rpc-statd &> /dev/null ++ ocf_log info "Stop: rpc-statd" ++ rpcinfo -t localhost 100024 &> /dev/null ++ rc=$? ++ if [ "$rc" -eq "0" ]; then ++ ocf_exit_reason "Failed to stop rpc-statd" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ nfs_exec stop nfs-idmapd &> /dev/null ++ ocf_log info "Stop: nfs-idmapd" ++ fn=`mktemp` ++ nfs_exec status nfs-idmapd > $fn 2>&1 ++ rc=$? ++ ocf_log debug "$(cat $fn)" ++ rm -f $fn ++ if [ "$rc" -eq "0" ]; then ++ ocf_exit_reason "Failed to stop nfs-idmapd" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ nfs_exec stop nfs-mountd &> /dev/null ++ ocf_log info "Stop: nfs-mountd" ++ rpcinfo -t localhost 100005 &> /dev/null ++ rc=$? ++ if [ "$rc" -eq "0" ]; then ++ ocf_exit_reason "Failed to stop nfs-mountd" ++ return $OCF_ERR_GENERIC ++ fi ++ fi ++ + v3locking_exec "stop" + if [ $? -ne 0 ]; then + ocf_exit_reason "Failed to stop NFS locking daemons" + rc=$OCF_ERR_GENERIC + fi + +- if [ $rc -eq 0 ]; then +- unbind_tree +- ocf_log info "NFS server stopped" +- else +- ocf_exit_reason "Failed to stop NFS server" ++ # systemd ++ if [ "$EXEC_MODE" -eq "2" ]; then ++ nfs_exec stop rpcbind &> /dev/null ++ ocf_log info "Stop: rpcbind" + fi +- return $rc ++ ++ unbind_tree ++ ocf_log info "NFS server stopped" ++ return 0 + } + + nfsserver_validate () diff --git a/SOURCES/bz1126073-2-nfsserver-fix-systemd-status-detection.patch b/SOURCES/bz1126073-2-nfsserver-fix-systemd-status-detection.patch new file mode 100644 index 0000000..74ec413 --- /dev/null +++ b/SOURCES/bz1126073-2-nfsserver-fix-systemd-status-detection.patch @@ -0,0 +1,337 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-07-21 12:40:55.417326145 +0200 ++++ b/heartbeat/nfsserver 2016-07-21 12:04:49.000000000 +0200 +@@ -352,45 +352,22 @@ + + nfsserver_monitor () + { +- # Skip trying to start processes once before failing +- # when run from nfsserver_start () +- if [ "$1" == "fromstart" ]; then +- ocf_log info "fromstart" +- fromstart=1 +- else +- tries=1 +- fi +- + # systemd + if [ "$EXEC_MODE" -eq "2" ]; then + ocf_log info "Status: rpcbind" +- rpcinfo &> /dev/null ++ rpcinfo > /dev/null 2>&1 + rc=$? + if [ "$rc" -ne "0" ]; then +- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "rpcbind is not running" +- return $OCF_NOT_RUNNING +- fi ++ ocf_exit_reason "rpcbind is not running" ++ return $OCF_NOT_RUNNING + fi + + ocf_log info "Status: nfs-mountd" +- rpcinfo -t localhost 100005 &> /dev/null ++ rpcinfo -t localhost 100005 > /dev/null 2>&1 + rc=$? + if [ "$rc" -ne "0" ]; then +- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "nfs-mountd is not running" +- return $OCF_NOT_RUNNING +- fi ++ ocf_exit_reason "nfs-mountd is not running" ++ return $OCF_NOT_RUNNING + fi + + ocf_log info "Status: nfs-idmapd" +@@ -400,31 +377,16 @@ + ocf_log debug "$(cat $fn)" + rm -f $fn + if [ "$rc" -ne "0" ]; then +- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- ocf_log info "Tried to start services: rc: $rc" +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "nfs-idmapd is not running" +- return $OCF_NOT_RUNNING +- fi ++ ocf_exit_reason "nfs-idmapd is not running" ++ return $OCF_NOT_RUNNING + fi + + ocf_log info "Status: rpc-statd" +- rpcinfo -t localhost 100024 &> /dev/null ++ rpcinfo -t localhost 100024 > /dev/null 2>&1 + rc=$? + if [ "$rc" -ne "0" ]; then +- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "rpc-statd is not running" +- return $OCF_NOT_RUNNING +- fi ++ ocf_exit_reason "rpc-statd is not running" ++ return $OCF_NOT_RUNNING + fi + fi + +@@ -436,15 +398,8 @@ + + tfn="/proc/fs/nfsd/threads" + if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then +- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" +- return $OCF_NOT_RUNNING +- fi ++ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" ++ return $OCF_NOT_RUNNING + fi + + #Adapt LSB status code to OCF return code +@@ -454,15 +409,8 @@ + v3locking_exec "status" + rc=$? + if [ $rc -ne 0 ]; then +- if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then +- nfsserver_start frommonitor +- rc=$? +- let tries=$tries-1 +- fi +- if [ "$rc" -ne "0" ]; then +- ocf_exit_reason "NFS server is up, but the locking daemons are down" +- rc=$OCF_ERR_GENERIC +- fi ++ ocf_exit_reason "NFS server is up, but the locking daemons are down" ++ rc=$OCF_ERR_GENERIC + fi + return $rc + elif [ $rc -eq 3 ]; then +@@ -561,7 +509,7 @@ + [ -f "$fp/xtab" ] || touch "$fp/xtab" + [ -f "$fp/rmtab" ] || touch "$fp/rmtab" + +- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null ++ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 > /dev/null 2>&1 + [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" + [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" + } +@@ -656,15 +604,15 @@ + + terminate() + { +- declare pids +- declare i=0 ++ local pids ++ local i=0 + + while : ; do + pids=$(binary_status $1) + [ -z "$pids" ] && return 0 + kill $pids + sleep 1 +- ((i++)) ++ i=$((i + 1)) + [ $i -gt 3 ] && return 1 + done + } +@@ -672,22 +620,22 @@ + + killkill() + { +- declare pids +- declare i=0 ++ local pids ++ local i=0 + + while : ; do + pids=$(binary_status $1) + [ -z "$pids" ] && return 0 + kill -9 $pids + sleep 1 +- ((i++)) ++ i=$((i + 1)) + [ $i -gt 3 ] && return 1 + done + } + + stop_process() + { +- declare process=$1 ++ local process=$1 + + ocf_log info "Stopping $process" + if terminate $process; then +@@ -758,14 +706,9 @@ + + nfsserver_start () + { +- # Skip monitor check when run from nfsserver_monitor () +- if [ "$1" == "frommonitor" ]; then +- frommonitor=1 +- fi +- + local rc; + +- if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then ++ if nfsserver_monitor; then + ocf_log debug "NFS server is already started" + return $OCF_SUCCESS + fi +@@ -796,21 +739,17 @@ + # systemd + if [ "$EXEC_MODE" -eq "2" ]; then + nfs_exec start rpcbind +- local i=10 +- while [ "$i" -gt 0 ]; do ++ local i=1 ++ while : ; do + ocf_log info "Start: rpcbind i: $i" +- rpcinfo &> /dev/null ++ rpcinfo > /dev/null 2>&1 + rc=$? + if [ "$rc" -eq "0" ]; then + break; + fi + sleep 1 +- let i=$i-1 ++ i=$((i + 1)) + done +- if [ "$i" -eq 0 ]; then +- ocf_exit_reason "Failed to start rpcbind" +- return $OCF_ERR_GENERIC +- fi + fi + + # check to see if we need to start rpc.statd +@@ -830,25 +769,21 @@ + # systemd + if [ "$EXEC_MODE" -eq "2" ]; then + nfs_exec start nfs-mountd +- local i=10 +- while [ "$i" -gt 0 ]; do ++ local i=1 ++ while : ; do + ocf_log info "Start: nfs-mountd i: $i" +- rpcinfo -t localhost 100005 &> /dev/null ++ rpcinfo -t localhost 100005 > /dev/null 2>&1 + rc=$? + if [ "$rc" -eq "0" ]; then + break; + fi + sleep 1 +- let i=$i-1 ++ i=$((i + 1)) + done +- if [ "$i" -eq 0 ]; then +- ocf_exit_reason "Failed to start nfs-mountd" +- return $OCF_ERR_GENERIC +- fi + + nfs_exec start nfs-idmapd +- local i=10 +- while [ "$i" -gt 0 ]; do ++ local i=1 ++ while : ; do + ocf_log info "Start: nfs-idmapd i: $i" + fn=`mktemp` + nfs_exec status nfs-idmapd > $fn 2>&1 +@@ -859,29 +794,21 @@ + break; + fi + sleep 1 +- let i=$i-1 ++ i=$((i + 1)) + done +- if [ "$i" -eq 0 ]; then +- ocf_exit_reason "Failed to start nfs-idmapd" +- return $OCF_ERR_GENERIC +- fi + + nfs_exec start rpc-statd +- local i=10 +- while [ "$i" -gt 0 ]; do ++ local i=1 ++ while : ; do + ocf_log info "Start: rpc-statd i: $i" +- rpcinfo -t localhost 100024 &> /dev/null ++ rpcinfo -t localhost 100024 > /dev/null 2>&1 + rc=$? + if [ "$rc" -eq "0" ]; then + break; + fi + sleep 1 +- let i=$i-1 ++ i=$((i + 1)) + done +- if [ "$i" -eq 0 ]; then +- ocf_exit_reason "Failed to start rpc-statd" +- return $OCF_ERR_GENERIC +- fi + fi + + fn=`mktemp` +@@ -936,16 +863,16 @@ + return $OCF_ERR_GENERIC + fi + +- nfs_exec stop rpc-statd &> /dev/null ++ nfs_exec stop rpc-statd > /dev/null 2>&1 + ocf_log info "Stop: rpc-statd" +- rpcinfo -t localhost 100024 &> /dev/null ++ rpcinfo -t localhost 100024 > /dev/null 2>&1 + rc=$? + if [ "$rc" -eq "0" ]; then + ocf_exit_reason "Failed to stop rpc-statd" + return $OCF_ERR_GENERIC + fi + +- nfs_exec stop nfs-idmapd &> /dev/null ++ nfs_exec stop nfs-idmapd > /dev/null 2>&1 + ocf_log info "Stop: nfs-idmapd" + fn=`mktemp` + nfs_exec status nfs-idmapd > $fn 2>&1 +@@ -957,9 +884,9 @@ + return $OCF_ERR_GENERIC + fi + +- nfs_exec stop nfs-mountd &> /dev/null ++ nfs_exec stop nfs-mountd > /dev/null 2>&1 + ocf_log info "Stop: nfs-mountd" +- rpcinfo -t localhost 100005 &> /dev/null ++ rpcinfo -t localhost 100005 > /dev/null 2>&1 + rc=$? + if [ "$rc" -eq "0" ]; then + ocf_exit_reason "Failed to stop nfs-mountd" +@@ -975,8 +902,11 @@ + + # systemd + if [ "$EXEC_MODE" -eq "2" ]; then +- nfs_exec stop rpcbind &> /dev/null ++ nfs_exec stop rpcbind > /dev/null 2>&1 + ocf_log info "Stop: rpcbind" ++ ++ nfs_exec stop rpc-gssd > /dev/null 2>&1 ++ ocf_log info "Stop: rpc-gssd" + fi + + unbind_tree diff --git a/SOURCES/bz1212632-nagios.patch b/SOURCES/bz1212632-nagios.patch new file mode 100644 index 0000000..82a1eec --- /dev/null +++ b/SOURCES/bz1212632-nagios.patch @@ -0,0 +1,272 @@ +diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am +--- a/doc/man/Makefile.am 2016-06-06 10:32:26.889194520 +0200 ++++ b/doc/man/Makefile.am 2016-06-06 10:33:28.850643243 +0200 +@@ -118,6 +118,7 @@ + ocf_heartbeat_lxc.7 \ + ocf_heartbeat_mysql.7 \ + ocf_heartbeat_mysql-proxy.7 \ ++ ocf_heartbeat_nagios.7 \ + ocf_heartbeat_named.7 \ + ocf_heartbeat_nfsnotify.7 \ + ocf_heartbeat_nfsserver.7 \ +diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am +--- a/heartbeat/Makefile.am 2016-06-06 10:32:26.889194520 +0200 ++++ b/heartbeat/Makefile.am 2016-06-06 10:33:02.418878409 +0200 +@@ -97,6 +97,7 @@ + ManageVE \ + mysql \ + mysql-proxy \ ++ nagios \ + named \ + nfsnotify \ + nfsserver \ +diff -uNr a/heartbeat/nagios b/heartbeat/nagios +--- a/heartbeat/nagios 1970-01-01 01:00:00.000000000 +0100 ++++ b/heartbeat/nagios 2016-06-06 10:33:02.418878409 +0200 +@@ -0,0 +1,246 @@ ++#!/bin/sh ++# ++# License: GNU General Public License (GPL) ++# (c) 2015 T.J. Yang, O. Albrigtsen ++# and Linux-HA contributors ++# ++# ----------------------------------------------------------------------------- ++# O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N ++# ----------------------------------------------------------------------------- ++# ++# NAME ++# nagios : OCF resource agent script for Nagios Server ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++# Defaults ++OCF_RESKEY_user_default="nagios" ++OCF_RESKEY_group_default="nagios" ++OCF_RESKEY_binary_default="/usr/sbin/nagios" ++OCF_RESKEY_config_default="/etc/nagios/nagios.cfg" ++OCF_RESKEY_log_default="/var/log/nagios/nagios.log" ++OCF_RESKEY_retention_default="/var/log/nagios/retention.dat" ++OCF_RESKEY_command_default="/var/log/nagios/rw/nagios.cmd" ++OCF_RESKEY_pid_default="/var/run/nagios.pid" ++ ++: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} ++: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} ++: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} ++: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} ++: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} ++: ${OCF_RESKEY_retention=${OCF_RESKEY_retention_default}} ++: ${OCF_RESKEY_command=${OCF_RESKEY_command_default}} ++: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} ++ ++ ++nagios_usage() { ++ cat < ++ ++ ++0.75 ++ ++OCF Resource script for Nagios 3.x or 4.x. It manages a Nagios instance as a HA resource. ++Nagios resource agent ++ ++ ++ ++ ++ User running Nagios daemon (for file permissions) ++ Nagios user ++ ++ ++ ++ ++ Group running Nagios daemon (for file permissions) ++ Nagios group ++ ++ ++ ++ ++ Location of the Nagios binary ++ Nagios binary ++ ++ ++ ++ ++ Configuration file ++ Nagios config ++ ++ ++ ++ ++ Location of the Nagios log ++ Nagios log ++ ++ ++ ++ ++ Location of the Nagios retention file ++ Nagios retention file ++ ++ ++ ++ ++ Location of the Nagios external command file ++ Nagios command file ++ ++ ++ ++ ++ Location of the Nagios pid/lock ++ Nagios pid file ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++ ++nagios_start() { ++ nagios_validate_all ++ rc=$? ++ if [ $rc -ne 0 ]; then ++ return $rc ++ fi ++ ++ ++ # if resource is already running,no need to continue code after this. ++ if nagios_monitor; then ++ ocf_log info "Nagios is already running" ++ return $OCF_SUCCESS ++ fi ++ ++ # Remove ${OCF_RESKEY_pid} if it exists ++ rm -f ${OCF_RESKEY_pid} ++ ++ ocf_run -q touch ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid} ++ chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid} ++ rm -f ${OCF_RESKEY_command} ++ [ -x /sbin/restorecon ] && /sbin/restorecon ${OCF_RESKEY_pid} ++ ocf_run -q ${OCF_RESKEY_binary} -d ${OCF_RESKEY_config} ++ ++ while ! nagios_monitor; do ++ sleep 1 ++ done ++ ++ if [ $? -eq "0" ]; then ++ ocf_log info "Nagios started" ++ return ${OCF_SUCCESS} ++ fi ++ ++ return $OCF_SUCCESS ++} ++ ++nagios_stop() { ++ nagios_monitor ++ if [ "$?" -ne "$OCF_SUCCESS" ]; then ++ # Currently not running. Nothing to do. ++ ocf_log info "Resource is already stopped" ++ rm -f ${OCF_RESKEY_pid} ++ ++ return $OCF_SUCCESS ++ fi ++ ++ kill `cat ${OCF_RESKEY_pid}` ++ ++ # Wait for process to stop ++ while nagios_monitor; do ++ sleep 1 ++ done ++ ++ return $OCF_SUCCESS ++} ++ ++nagios_monitor(){ ++ ocf_pidfile_status ${OCF_RESKEY_pid} > /dev/null 2>&1 ++ case "$?" in ++ 0) ++ rc=$OCF_SUCCESS ++ ;; ++ 1|2) ++ rc=$OCF_NOT_RUNNING ++ ;; ++ *) ++ rc=$OCF_ERR_GENERIC ++ ;; ++ esac ++ return $rc ++} ++ ++nagios_validate_all(){ ++ check_binary ${OCF_RESKEY_binary} ++ ++ if [ ! -f ${OCF_RESKEY_config} ]; then ++ ocf_exit_reason "Configuration file ${OCF_RESKEY_config} not found" ++ return ${OCF_ERR_INSTALLED} ++ fi ++ ++ ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config} > /dev/null 2>&1; ++ if [ $? -ne "0" ]; then ++ ocf_exit_reason "Configuration check failed" ++ return ${OCF_ERR_INSTALLED} ++ fi ++} ++ ++ ++# **************************** MAIN SCRIPT ************************************ ++ ++# Make sure meta-data and usage always succeed ++case $__OCF_ACTION in ++meta-data) nagios_meta_data ++ exit $OCF_SUCCESS ++ ;; ++usage|help) nagios_usage ++ exit $OCF_SUCCESS ++ ;; ++esac ++ ++# This OCF agent script need to be run as root user. ++if ! ocf_is_root; then ++ echo "$0 agent script need to be run as root user." ++ ocf_log debug "$0 agent script need to be run as root user." ++ exit $OCF_ERR_GENERIC ++fi ++ ++# Translate each action into the appropriate function call ++case $__OCF_ACTION in ++start) nagios_start;; ++stop) nagios_stop;; ++status|monitor) nagios_monitor;; ++validate-all) nagios_validate_all;; ++*) nagios_usage ++ exit $OCF_ERR_UNIMPLEMENTED ++ ;; ++esac ++rc=$? ++ ++exit $rc ++ ++# End of this script diff --git a/SOURCES/bz1242181-virtualdomain-migrate_options.patch b/SOURCES/bz1242181-virtualdomain-migrate_options.patch new file mode 100644 index 0000000..a13b6b4 --- /dev/null +++ b/SOURCES/bz1242181-virtualdomain-migrate_options.patch @@ -0,0 +1,133 @@ +diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain +--- a/heartbeat/VirtualDomain 2016-02-29 10:54:21.870787072 +0100 ++++ b/heartbeat/VirtualDomain 2016-02-29 14:02:23.260696550 +0100 +@@ -106,11 +106,28 @@ + + Note: Be sure this composed host name is locally resolveable and the + associated IP is reachable through the favored network. ++ ++See also the migrate_options parameter below. + + Migration network host name suffix + + + ++ ++ ++Extra virsh options for the guest live migration. You can also specify ++here --migrateuri if the calculated migrate URI is unsuitable for your ++environment. If --migrateuri is set then migration_network_suffix ++and migrateport are effectively ignored. Use "%n" as the placeholder ++for the target node name. ++ ++Please refer to the libvirt documentation for details on guest ++migration. ++ ++live migrate options ++ ++ ++ + + + To additionally monitor services within the virtual domain, add this +@@ -485,14 +502,45 @@ + force_stop + } + ++mk_migrateuri() { ++ local target_node ++ local migrate_target ++ local hypervisor ++ ++ target_node="$OCF_RESKEY_CRM_meta_migrate_target" ++ ++ # A typical migration URI via a special migration network looks ++ # like "tcp://bar-mig:49152". The port would be randomly chosen ++ # by libvirt from the range 49152-49215 if omitted, at least since ++ # version 0.7.4 ... ++ if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then ++ hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" ++ # Hostname might be a FQDN ++ migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") ++ case $hypervisor in ++ qemu) ++ # For quiet ancient libvirt versions a migration port is needed ++ # and the URI must not contain the "//". Newer versions can handle ++ # the "bad" URI. ++ echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}" ++ ;; ++ xen) ++ echo "xenmigr://${migrate_target}" ++ ;; ++ *) ++ ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." ++ ;; ++ esac ++ fi ++} ++ + VirtualDomain_Migrate_To() { ++ local rc + local target_node + local remoteuri + local transport_suffix + local migrateuri +- local migrateport +- local migrate_target +- local hypervisor ++ local migrate_opts + + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + +@@ -503,38 +551,26 @@ + if [ -n "${OCF_RESKEY_migration_transport}" ]; then + transport_suffix="+${OCF_RESKEY_migration_transport}" + fi +- # A typical migration URI via a special migration network looks +- # like "tcp://bar-mig:49152". The port would be randomly chosen +- # by libvirt from the range 49152-49215 if omitted, at least since +- # version 0.7.4 ... +- if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then +- hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}" +- # Hostname might be a FQDN +- migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},") +- case $hypervisor in +- qemu) +- # For quiet ancient libvirt versions a migration port is needed +- # and the URI must not contain the "//". Newer versions can handle +- # the "bad" URI. +- migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}" +- ;; +- xen) +- migrateuri="xenmigr://${migrate_target}" +- ;; +- *) +- ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}." +- ;; +- esac ++ ++ # User defined migrateuri or do we make one? ++ migrate_opts="$OCF_RESKEY_migrate_options" ++ if echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then ++ migrateuri=`echo "$migrate_opts" | ++ sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"` ++ migrate_opts=`echo "$migrate_opts" | ++ sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\3/"` ++ else ++ migrateuri=`mk_migrateuri` + fi + # Scared of that sed expression? So am I. :-) + remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") + + # OK, we know where to connect to. Now do the actual migration. +- ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})." +- virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri} ++ ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." ++ virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri + rc=$? + if [ $rc -ne 0 ]; then +- ocf_exit_reason "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc" ++ ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" + return $OCF_ERR_GENERIC + else + ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded." diff --git a/SOURCES/bz1242558-virtualdomain-may-remove-config-file.patch b/SOURCES/bz1242558-virtualdomain-may-remove-config-file.patch new file mode 100644 index 0000000..3a9871b --- /dev/null +++ b/SOURCES/bz1242558-virtualdomain-may-remove-config-file.patch @@ -0,0 +1,40 @@ +diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain +--- a/heartbeat/VirtualDomain 2015-11-20 11:52:58.314263831 +0100 ++++ b/heartbeat/VirtualDomain 2015-11-20 11:53:55.247196256 +0100 +@@ -340,13 +340,32 @@ + return $rc + } + ++# virsh undefine removes configuration files if they are in ++# directories which are managed by libvirt. such directories ++# include also subdirectories of /etc (for instance ++# /etc/libvirt/*) which may be surprising. VirtualDomain didn't ++# include the undefine call before, hence this wasn't an issue ++# before. ++# ++# There seems to be no way to find out which directories are ++# managed by libvirt. ++# + verify_undefined() { +- for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do +- if [ "$dom" = "$DOMAIN_NAME" ]; then ++ local tmpf ++ if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" ++ then ++ tmpf=$(mktemp -t vmcfgsave.XXXXXX) ++ if [ ! -r "$tmpf" ]; then ++ ocf_log warn "unable to create temp file, disk full?" ++ # we must undefine the domain + virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 +- return ++ else ++ cp -p $OCF_RESKEY_config $tmpf ++ virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 ++ [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config ++ rm -f $tmpf + fi +- done ++ fi + } + + VirtualDomain_Start() { diff --git a/SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch b/SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch new file mode 100644 index 0000000..585b8b3 --- /dev/null +++ b/SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch @@ -0,0 +1,92 @@ +diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +--- a/heartbeat/rabbitmq-cluster 2016-02-22 11:09:48.989128414 +0100 ++++ b/heartbeat/rabbitmq-cluster 2016-02-22 11:10:12.011835745 +0100 +@@ -39,7 +39,14 @@ + RMQ_LOG_DIR="/var/log/rabbitmq" + NODENAME=$(ocf_local_nodename) + ++# this attr represents the current active local rmq node name. ++# when rmq stops or the node is fenced, this attr disappears + RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}" ++# this attr represents the last known active local rmq node name ++# when rmp stops or the node is fenced, the attr stays forever so ++# we can continue to map an offline pcmk node to it's rmq node name ++# equivalent. ++RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}" + + meta_data() { + cat < /dev/null 2>&1 & + } +@@ -154,7 +161,7 @@ rotate_catalina_out() + { + # Check catalina_%F.log is writable or not. + CURRENT_ROTATELOG_SUFFIX=`date +"%F"` +- su - -s /bin/sh $RESOURCE_TOMCAT_USER \ ++ $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \ + -c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1 + if [ $? -ne 0 ]; then + ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable." +@@ -205,7 +212,7 @@ attemptTomcatCommand() + if [ "$RESOURCE_TOMCAT_USER" = root ]; then + "$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1 + else +- tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 ++ tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 + fi + + if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then diff --git a/SOURCES/bz1249430-2-tomcat-fix-selinux-enforced.patch b/SOURCES/bz1249430-2-tomcat-fix-selinux-enforced.patch new file mode 100644 index 0000000..3d4750b --- /dev/null +++ b/SOURCES/bz1249430-2-tomcat-fix-selinux-enforced.patch @@ -0,0 +1,112 @@ +From a1860a5bbe5c63c6a34d9160a8aacffc61a89dcf Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 16 Sep 2016 14:25:28 +0200 +Subject: [PATCH] tomcat: use systemd where available due to newer versions not + generating PID-file + +--- + heartbeat/tomcat | 44 ++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 40 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/tomcat b/heartbeat/tomcat +index 07a7ce4..813d280 100755 +--- a/heartbeat/tomcat ++++ b/heartbeat/tomcat +@@ -56,6 +56,10 @@ else + SU=su + fi + ++if which systemctl > /dev/null 2>&1; then ++ SYSTEMD=1 ++fi ++ + ############################################################################ + # Usage + usage() +@@ -90,6 +94,10 @@ isrunning_tomcat() + # + isalive_tomcat() + { ++ if ocf_is_true $SYSTEMD; then ++ systemctl is-active tomcat@${TOMCAT_NAME} > /dev/null 2>&1 ++ return $? ++ fi + # As the server stops, the PID file disappears. To avoid race conditions, + # we will have remembered the PID of a running instance on script entry. + local pid=$rememberedPID +@@ -184,9 +192,31 @@ rotate_catalina_out() + } + + ############################################################################ ++# Create systemd configuration ++create_systemd_config() ++{ ++cat<<-EOF > /etc/sysconfig/tomcat@${TOMCAT_NAME} ++JAVA_HOME=${JAVA_HOME} ++JAVA_OPTS="${JAVA_OPTS}" ++CATALINA_HOME=${CATALINA_HOME} ++CATALINA_BASE=${CATALINA_BASE} ++CATALINA_OUT=${CATALINA_OUT} ++CATALINA_OPTS="${CATALINA_OPTS}" ++CATALINA_TMPDIR="${CATALINA_TMPDIR}" ++JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}" ++LOGGING_CONFIG="${LOGGING_CONFIG}" ++LOGGING_MANAGER="${LOGGING_MANAGER}" ++TOMCAT_CFG=${TOMCAT_CFG} ++EOF ++} ++ ++############################################################################ + # Tomcat Command + tomcatCommand() + { ++ if ocf_is_true $SYSTEMD; then ++ systemctl $@ tomcat@${TOMCAT_NAME} ++ else + cat<<-END_TOMCAT_COMMAND + export JAVA_HOME=${JAVA_HOME} + export JAVA_OPTS="${JAVA_OPTS}" +@@ -202,6 +232,7 @@ cat<<-END_TOMCAT_COMMAND + export TOMCAT_CFG=${TOMCAT_CFG} + $TOMCAT_START_SCRIPT $@ + END_TOMCAT_COMMAND ++ fi + } + attemptTomcatCommand() + { +@@ -209,7 +240,9 @@ attemptTomcatCommand() + export TOMCAT_CFG=$(mktemp ${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg) + fi + +- if [ "$RESOURCE_TOMCAT_USER" = root ]; then ++ if ocf_is_true $SYSTEMD; then ++ tomcatCommand $@ ++ elif [ "$RESOURCE_TOMCAT_USER" = root ]; then + "$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1 + else + tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1 +@@ -224,6 +257,9 @@ attemptTomcatCommand() + # Start Tomcat + start_tomcat() + { ++ if ocf_is_true $SYSTEMD; then ++ create_systemd_config ++ fi + cd "$CATALINA_HOME/bin" + + validate_all_tomcat || exit $? +@@ -334,11 +370,11 @@ Resource script for Tomcat. It manages a Tomcat instance as a cluster resource. + + + +- + The name of the resource, added as a Java parameter in JAVA_OPTS: +--Dname= to Tomcat process on start. Used to ensure ++-Dname=<tomcat_name> to Tomcat process on start. Used to ensure + process is still running and must be unique. +-]]> ++ + The name of the resource + + diff --git a/SOURCES/bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch b/SOURCES/bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch new file mode 100644 index 0000000..a7b9bd5 --- /dev/null +++ b/SOURCES/bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch @@ -0,0 +1,1188 @@ +diff --git a/tools/send_arp.linux.c b/tools/send_arp.linux.c +index e1c1960..477100a 100644 +--- a/tools/send_arp.linux.c ++++ b/tools/send_arp.linux.c +@@ -7,6 +7,23 @@ + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, ++ * YOSHIFUJI Hideaki ++ */ ++ ++/* Andrew Beekhof, Lars Ellenberg: ++ * Based on arping from iputils, ++ * adapted to the command line conventions established by the libnet based ++ * send_arp tool as used by the IPaddr and IPaddr2 resource agents. ++ * The libnet based send_arp, and its command line argument convention, ++ * was first added to the heartbeat project by Matt Soffen. ++ * ++ * Latest "resync" with iputils as of: ++ * git://git.linux-ipv6.org/gitroot/iputils.git ++ * 511f8356e22615479c3cc16bca64d72d204f6df3 ++ * Fri Jul 24 10:48:47 2015 ++ * To get various bugfixes and support for infiniband and other link layer ++ * addresses which do not fit into plain "sockaddr_ll", and broadcast addresses ++ * that may be different from memset(,0xff,). + */ + + #include +@@ -16,12 +33,17 @@ + #include + #include + #include ++#include + #include +-#include ++#include + #include + #include + #include + #include ++#ifdef CAPABILITIES ++#include ++#include ++#endif + + #include + #include +@@ -32,40 +54,85 @@ + #include + #include + +-static void usage(void) __attribute__((noreturn)); ++#ifdef USE_SYSFS ++#include ++struct sysfs_devattr_values; ++#endif + +-static int quit_on_reply; +-static char *device; +-static int ifindex; +-static char *source; +-static struct in_addr src, dst; +-static char *target; +-static int dad = 0, unsolicited = 0, advert = 0; +-static int quiet = 0; +-static int count = -1; +-static int timeout = 0; +-static int unicasting = 0; +-static int s = 0; +-static int broadcast_only = 0; ++#ifndef WITHOUT_IFADDRS ++#include ++#endif + +-static struct sockaddr_ll me; +-static struct sockaddr_ll he; ++#ifdef USE_IDN ++#include ++#include ++#endif + +-static struct timeval start, last; ++static char SNAPSHOT[] = "s20121221"; + +-static int sent, brd_sent; +-static int received, brd_recv, req_recv; ++static void usage(void) __attribute__((noreturn)); ++ ++#ifndef DEFAULT_DEVICE ++#define DEFAULT_DEVICE "eth0" ++#endif ++#ifdef DEFAULT_DEVICE ++# define DEFAULT_DEVICE_STR DEFAULT_DEVICE ++#else ++# define DEFAULT_DEVICE NULL ++#endif ++ ++struct device { ++ const char *name; ++ int ifindex; ++#ifndef WITHOUT_IFADDRS ++ struct ifaddrs *ifa; ++#endif ++#ifdef USE_SYSFS ++ struct sysfs_devattr_values *sysfs; ++#endif ++}; ++ ++int quit_on_reply=0; ++struct device device = { ++ .name = DEFAULT_DEVICE, ++}; ++char *source; ++struct in_addr src, dst; ++char *target; ++int dad, unsolicited, advert; ++int quiet; ++int count=-1; ++int timeout; ++int unicasting; ++int s; ++int broadcast_only; ++ ++struct sockaddr_storage me; ++struct sockaddr_storage he; ++ ++struct timeval start, last; ++ ++int sent, brd_sent; ++int received, brd_recv, req_recv; ++ ++#ifndef CAPABILITIES ++static uid_t euid; ++#endif + + #define MS_TDIFF(tv1,tv2) ( ((tv1).tv_sec-(tv2).tv_sec)*1000 + \ + ((tv1).tv_usec-(tv2).tv_usec)/1000 ) + +-static void print_hex(unsigned char *p, int len); +-static int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM); +-static void set_signal(int signo, void (*handler)(void)); +-static int send_pack(int s, struct in_addr src, struct in_addr dst, +- struct sockaddr_ll *ME, struct sockaddr_ll *HE); +-static void finish(void); +-static void catcher(void); ++#define OFFSET_OF(name,ele) ((size_t)&((name *)0)->ele) ++ ++static socklen_t sll_len(size_t halen) ++{ ++ socklen_t len = OFFSET_OF(struct sockaddr_ll, sll_addr) + halen; ++ if (len < sizeof(struct sockaddr_ll)) ++ len = sizeof(struct sockaddr_ll); ++ return len; ++} ++ ++#define SLL_LEN(hln) sll_len(hln) + + void usage(void) + { +@@ -80,14 +147,18 @@ void usage(void) + " -V : print version and exit\n" + " -c count : how many packets to send\n" + " -w timeout : how long to wait for a reply\n" +- " -I device : which ethernet device to use (eth0)\n" ++ " -I device : which ethernet device to use" ++#ifdef DEFAULT_DEVICE_STR ++ " (" DEFAULT_DEVICE_STR ")" ++#endif ++ "\n" + " -s source : source ip address\n" + " destination : ask for what ip address\n" + ); + exit(2); + } + +-void set_signal(int signo, void (*handler)(void)) ++static void set_signal(int signo, void (*handler)(void)) + { + struct sigaction sa; + +@@ -97,7 +168,126 @@ void set_signal(int signo, void (*handler)(void)) + sigaction(signo, &sa, NULL); + } + +-int send_pack(int s, struct in_addr src, struct in_addr dst, ++#ifdef CAPABILITIES ++static const cap_value_t caps[] = { CAP_NET_RAW, }; ++static cap_flag_value_t cap_raw = CAP_CLEAR; ++#endif ++ ++static void limit_capabilities(void) ++{ ++#ifdef CAPABILITIES ++ cap_t cap_p; ++ ++ cap_p = cap_get_proc(); ++ if (!cap_p) { ++ perror("arping: cap_get_proc"); ++ exit(-1); ++ } ++ ++ cap_get_flag(cap_p, CAP_NET_RAW, CAP_PERMITTED, &cap_raw); ++ ++ if (cap_raw != CAP_CLEAR) { ++ if (cap_clear(cap_p) < 0) { ++ perror("arping: cap_clear"); ++ exit(-1); ++ } ++ ++ cap_set_flag(cap_p, CAP_PERMITTED, 1, caps, CAP_SET); ++ ++ if (cap_set_proc(cap_p) < 0) { ++ perror("arping: cap_set_proc"); ++ if (errno != EPERM) ++ exit(-1); ++ } ++ } ++ ++ if (prctl(PR_SET_KEEPCAPS, 1) < 0) { ++ perror("arping: prctl"); ++ exit(-1); ++ } ++ ++ if (setuid(getuid()) < 0) { ++ perror("arping: setuid"); ++ exit(-1); ++ } ++ ++ if (prctl(PR_SET_KEEPCAPS, 0) < 0) { ++ perror("arping: prctl"); ++ exit(-1); ++ } ++ ++ cap_free(cap_p); ++#else ++ euid = geteuid(); ++#endif ++} ++ ++static int modify_capability_raw(int on) ++{ ++#ifdef CAPABILITIES ++ cap_t cap_p; ++ ++ if (cap_raw != CAP_SET) ++ return on ? -1 : 0; ++ ++ cap_p = cap_get_proc(); ++ if (!cap_p) { ++ perror("arping: cap_get_proc"); ++ return -1; ++ } ++ ++ cap_set_flag(cap_p, CAP_EFFECTIVE, 1, caps, on ? CAP_SET : CAP_CLEAR); ++ ++ if (cap_set_proc(cap_p) < 0) { ++ perror("arping: cap_set_proc"); ++ return -1; ++ } ++ ++ cap_free(cap_p); ++#else ++ if (setuid(on ? euid : getuid())) { ++ perror("arping: setuid"); ++ return -1; ++ } ++#endif ++ return 0; ++} ++ ++static int enable_capability_raw(void) ++{ ++ return modify_capability_raw(1); ++} ++ ++static int disable_capability_raw(void) ++{ ++ return modify_capability_raw(0); ++} ++ ++static void drop_capabilities(void) ++{ ++#ifdef CAPABILITIES ++ cap_t cap_p = cap_init(); ++ ++ if (!cap_p) { ++ perror("arping: cap_init"); ++ exit(-1); ++ } ++ ++ if (cap_set_proc(cap_p) < 0) { ++ perror("arping: cap_set_proc"); ++ exit(-1); ++ } ++ ++ cap_free(cap_p); ++#else ++ if (setuid(getuid()) < 0) { ++ perror("arping: setuid"); ++ exit(-1); ++ } ++#endif ++} ++ ++static int send_pack(int s, struct in_addr src, struct in_addr dst, + struct sockaddr_ll *ME, struct sockaddr_ll *HE) + { + int err; +@@ -130,7 +320,7 @@ int send_pack(int s, struct in_addr src, struct in_addr dst, + p+=4; + + gettimeofday(&now, NULL); +- err = sendto(s, buf, p-buf, 0, (struct sockaddr*)HE, sizeof(*HE)); ++ err = sendto(s, buf, p-buf, 0, (struct sockaddr*)HE, SLL_LEN(ah->ar_hln)); + if (err == p-buf) { + last = now; + sent++; +@@ -140,7 +330,7 @@ int send_pack(int s, struct in_addr src, struct in_addr dst, + return err; + } + +-void finish(void) ++static void finish(void) + { + if (!quiet) { + printf("Sent %d probes (%d broadcast(s))\n", sent, brd_sent); +@@ -158,40 +348,43 @@ void finish(void) + printf("\n"); + fflush(stdout); + } +- +- if (dad) { +- fflush(stdout); +- exit(!!received); +- } +- ++ fflush(stdout); ++ if (dad) ++ exit(!!received); + if (unsolicited) + exit(0); +- +- fflush(stdout); + exit(!received); + } + +-void catcher(void) ++static void catcher(void) + { +- struct timeval tv; ++ struct timeval tv, tv_s, tv_o; + + gettimeofday(&tv, NULL); + + if (start.tv_sec==0) + start = tv; + +- if (count-- == 0 || (timeout && MS_TDIFF(tv,start) > timeout*1000 + 500)) ++ timersub(&tv, &start, &tv_s); ++ tv_o.tv_sec = timeout; ++ tv_o.tv_usec = 500 * 1000; ++ ++ if (count-- == 0 || (timeout && timercmp(&tv_s, &tv_o, >))) + finish(); + +- if (last.tv_sec==0 || MS_TDIFF(tv,last) > 500) { +- send_pack(s, src, dst, &me, &he); ++ timersub(&tv, &last, &tv_s); ++ tv_o.tv_sec = 0; ++ ++ if (last.tv_sec==0 || timercmp(&tv_s, &tv_o, >)) { ++ send_pack(s, src, dst, ++ (struct sockaddr_ll *)&me, (struct sockaddr_ll *)&he); + if (count == 0 && unsolicited) + finish(); + } + alarm(1); + } + +-void print_hex(unsigned char *p, int len) ++static void print_hex(unsigned char *p, int len) + { + int i; + for (i=0; iar_pln != 4) + return 0; +- if (ah->ar_hln != me.sll_halen) ++ if (ah->ar_hln != ((struct sockaddr_ll *)&me)->sll_halen) + return 0; + if (len < sizeof(*ah) + 2*(4 + ah->ar_hln)) + return 0; +@@ -242,7 +435,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM) + return 0; + if (src.s_addr != dst_ip.s_addr) + return 0; +- if (memcmp(p+ah->ar_hln+4, &me.sll_addr, ah->ar_hln)) ++ if (memcmp(p+ah->ar_hln+4, ((struct sockaddr_ll *)&me)->sll_addr, ah->ar_hln)) + return 0; + } else { + /* DAD packet was: +@@ -260,7 +453,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM) + */ + if (src_ip.s_addr != dst.s_addr) + return 0; +- if (memcmp(p, &me.sll_addr, me.sll_halen) == 0) ++ if (memcmp(p, ((struct sockaddr_ll *)&me)->sll_addr, ((struct sockaddr_ll *)&me)->sll_halen) == 0) + return 0; + if (src.s_addr && src.s_addr != dst_ip.s_addr) + return 0; +@@ -276,7 +469,7 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM) + printf("for %s ", inet_ntoa(dst_ip)); + s_printed = 1; + } +- if (memcmp(p+ah->ar_hln+4, me.sll_addr, ah->ar_hln)) { ++ if (memcmp(p+ah->ar_hln+4, ((struct sockaddr_ll *)&me)->sll_addr, ah->ar_hln)) { + if (!s_printed) + printf("for "); + printf("["); +@@ -299,16 +492,78 @@ int recv_pack(unsigned char *buf, int len, struct sockaddr_ll *FROM) + brd_recv++; + if (ah->ar_op == htons(ARPOP_REQUEST)) + req_recv++; +- if (quit_on_reply) ++ if (quit_on_reply || (count == 0 && received == sent)) + finish(); + if(!broadcast_only) { +- memcpy(he.sll_addr, p, me.sll_halen); ++ memcpy(((struct sockaddr_ll *)&he)->sll_addr, p, ((struct sockaddr_ll *)&me)->sll_halen); + unicasting=1; + } + return 1; + } + +-#include ++#ifdef USE_SYSFS ++union sysfs_devattr_value { ++ unsigned long ulong; ++ void *ptr; ++}; ++ ++enum { ++ SYSFS_DEVATTR_IFINDEX, ++ SYSFS_DEVATTR_FLAGS, ++ SYSFS_DEVATTR_ADDR_LEN, ++#if 0 ++ SYSFS_DEVATTR_TYPE, ++ SYSFS_DEVATTR_ADDRESS, ++#endif ++ SYSFS_DEVATTR_BROADCAST, ++ SYSFS_DEVATTR_NUM ++}; ++ ++struct sysfs_devattr_values ++{ ++ char *ifname; ++ union sysfs_devattr_value value[SYSFS_DEVATTR_NUM]; ++}; ++ ++static int sysfs_devattr_ulong_dec(char *ptr, struct sysfs_devattr_values *v, unsigned idx); ++static int sysfs_devattr_ulong_hex(char *ptr, struct sysfs_devattr_values *v, unsigned idx); ++static int sysfs_devattr_macaddr(char *ptr, struct sysfs_devattr_values *v, unsigned idx); ++ ++struct sysfs_devattrs { ++ const char *name; ++ int (*handler)(char *ptr, struct sysfs_devattr_values *v, unsigned int idx); ++ int free; ++} sysfs_devattrs[SYSFS_DEVATTR_NUM] = { ++ [SYSFS_DEVATTR_IFINDEX] = { ++ .name = "ifindex", ++ .handler = sysfs_devattr_ulong_dec, ++ }, ++ [SYSFS_DEVATTR_ADDR_LEN] = { ++ .name = "addr_len", ++ .handler = sysfs_devattr_ulong_dec, ++ }, ++ [SYSFS_DEVATTR_FLAGS] = { ++ .name = "flags", ++ .handler = sysfs_devattr_ulong_hex, ++ }, ++#if 0 ++ [SYSFS_DEVATTR_TYPE] = { ++ .name = "type", ++ .handler = sysfs_devattr_ulong_dec, ++ }, ++ [SYSFS_DEVATTR_ADDRESS] = { ++ .name = "address", ++ .handler = sysfs_devattr_macaddr, ++ .free = 1, ++ }, ++#endif ++ [SYSFS_DEVATTR_BROADCAST] = { ++ .name = "broadcast", ++ .handler = sysfs_devattr_macaddr, ++ .free = 1, ++ }, ++}; ++#endif + + static void byebye(int nsig) + { +@@ -317,26 +572,477 @@ static void byebye(int nsig) + exit(nsig); + } + ++/* ++ * find_device() ++ * ++ * This function checks 1) if the device (if given) is okay for ARP, ++ * or 2) find fist appropriate device on the system. ++ * ++ * Return value: ++ * >0 : Succeeded, and appropriate device not found. ++ * device.ifindex remains 0. ++ * 0 : Succeeded, and approptiate device found. ++ * device.ifindex is set. ++ * <0 : Failed. Support not found, or other ++ * : system error. Try other method. ++ * ++ * If an appropriate device found, it is recorded inside the ++ * "device" variable for later reference. ++ * ++ * We have several implementations for this. ++ * by_ifaddrs(): requires getifaddr() in glibc, and rtnetlink in ++ * kernel. default and recommended for recent systems. ++ * by_sysfs(): requires libsysfs , and sysfs in kernel. ++ * by_ioctl(): unable to list devices without ipv4 address; this ++ * means, you need to supply the device name for ++ * DAD purpose. ++ */ ++/* Common check for ifa->ifa_flags */ ++static int check_ifflags(unsigned int ifflags, int fatal) ++{ ++ if (!(ifflags & IFF_UP)) { ++ if (fatal) { ++ if (!quiet) ++ printf("Interface \"%s\" is down\n", device.name); ++ exit(2); ++ } ++ return -1; ++ } ++ if (ifflags & (IFF_NOARP | IFF_LOOPBACK)) { ++ if (fatal) { ++ if (!quiet) ++ printf("Interface \"%s\" is not ARPable\n", device.name); ++ exit(dad ? 0 : 2); ++ } ++ return -1; ++ } ++ return 0; ++} ++ ++static int find_device_by_ifaddrs(void) ++{ ++#ifndef WITHOUT_IFADDRS ++ int rc; ++ struct ifaddrs *ifa0, *ifa; ++ int count = 0; ++ ++ rc = getifaddrs(&ifa0); ++ if (rc) { ++ perror("getifaddrs"); ++ return -1; ++ } ++ ++ for (ifa = ifa0; ifa; ifa = ifa->ifa_next) { ++ if (!ifa->ifa_addr) ++ continue; ++ if (ifa->ifa_addr->sa_family != AF_PACKET) ++ continue; ++ if (device.name && ifa->ifa_name && strcmp(ifa->ifa_name, device.name)) ++ continue; ++ ++ if (check_ifflags(ifa->ifa_flags, device.name != NULL) < 0) ++ continue; ++ ++ if (!((struct sockaddr_ll *)ifa->ifa_addr)->sll_halen) ++ continue; ++ if (!ifa->ifa_broadaddr) ++ continue; ++ ++ device.ifa = ifa; ++ ++ if (count++) ++ break; ++ } ++ ++ if (count == 1 && device.ifa) { ++ device.ifindex = if_nametoindex(device.ifa->ifa_name); ++ if (!device.ifindex) { ++ perror("arping: if_nametoindex"); ++ freeifaddrs(ifa0); ++ return -1; ++ } ++ device.name = device.ifa->ifa_name; ++ return 0; ++ } ++ return 1; ++#else ++ return -1; ++#endif ++} ++ ++#ifdef USE_SYSFS ++static void sysfs_devattr_values_init(struct sysfs_devattr_values *v, int do_free) ++{ ++ int i; ++ if (do_free) { ++ free(v->ifname); ++ for (i = 0; i < SYSFS_DEVATTR_NUM; i++) { ++ if (sysfs_devattrs[i].free) ++ free(v->value[i].ptr); ++ } ++ } ++ memset(v, 0, sizeof(*v)); ++} ++ ++static int sysfs_devattr_ulong(char *ptr, struct sysfs_devattr_values *v, unsigned int idx, ++ unsigned int base) ++{ ++ unsigned long *p; ++ char *ep; ++ ++ if (!ptr || !v) ++ return -1; ++ ++ p = &v->value[idx].ulong; ++ errno = 0; ++ *p = strtoul(ptr, &ep, base); ++ if ((*ptr && isspace(*ptr & 0xff)) || errno || (*ep != '\0' && *ep != '\n')) ++ goto out; ++ ++ return 0; ++out: ++ return -1; ++} ++ ++static int sysfs_devattr_ulong_dec(char *ptr, struct sysfs_devattr_values *v, unsigned int idx) ++{ ++ int rc = sysfs_devattr_ulong(ptr, v, idx, 10); ++ return rc; ++} ++ ++static int sysfs_devattr_ulong_hex(char *ptr, struct sysfs_devattr_values *v, unsigned int idx) ++{ ++ int rc = sysfs_devattr_ulong(ptr, v, idx, 16); ++ return rc; ++} ++ ++static int sysfs_devattr_macaddr(char *ptr, struct sysfs_devattr_values *v, unsigned int idx) ++{ ++ unsigned char *m; ++ int i; ++ unsigned int addrlen; ++ ++ if (!ptr || !v) ++ return -1; ++ ++ addrlen = v->value[SYSFS_DEVATTR_ADDR_LEN].ulong; ++ m = malloc(addrlen); ++ ++ for (i = 0; i < addrlen; i++) { ++ if (i && *(ptr + i * 3 - 1) != ':') ++ goto out; ++ if (sscanf(ptr + i * 3, "%02hhx", &m[i]) != 1) ++ goto out; ++ } ++ ++ v->value[idx].ptr = m; ++ return 0; ++out: ++ free(m); ++ return -1; ++} ++#endif ++ ++static int find_device_by_sysfs(void) ++{ ++ int rc = -1; ++#ifdef USE_SYSFS ++ struct sysfs_class *cls_net; ++ struct dlist *dev_list; ++ struct sysfs_class_device *dev; ++ struct sysfs_attribute *dev_attr; ++ struct sysfs_devattr_values sysfs_devattr_values; ++ int count = 0; ++ ++ if (!device.sysfs) { ++ device.sysfs = malloc(sizeof(*device.sysfs)); ++ sysfs_devattr_values_init(device.sysfs, 0); ++ } ++ ++ cls_net = sysfs_open_class("net"); ++ if (!cls_net) { ++ perror("sysfs_open_class"); ++ return -1; ++ } ++ ++ dev_list = sysfs_get_class_devices(cls_net); ++ if (!dev_list) { ++ perror("sysfs_get_class_devices"); ++ goto out; ++ } ++ ++ sysfs_devattr_values_init(&sysfs_devattr_values, 0); ++ ++ dlist_for_each_data(dev_list, dev, struct sysfs_class_device) { ++ int i; ++ int rc = -1; ++ ++ if (device.name && strcmp(dev->name, device.name)) ++ goto do_next; ++ ++ sysfs_devattr_values_init(&sysfs_devattr_values, 1); ++ ++ for (i = 0; i < SYSFS_DEVATTR_NUM; i++) { ++ ++ dev_attr = sysfs_get_classdev_attr(dev, sysfs_devattrs[i].name); ++ if (!dev_attr) { ++ perror("sysfs_get_classdev_attr"); ++ rc = -1; ++ break; ++ } ++ if (sysfs_read_attribute(dev_attr)) { ++ perror("sysfs_read_attribute"); ++ rc = -1; ++ break; ++ } ++ rc = sysfs_devattrs[i].handler(dev_attr->value, &sysfs_devattr_values, i); ++ ++ if (rc < 0) ++ break; ++ } ++ ++ if (rc < 0) ++ goto do_next; ++ ++ if (check_ifflags(sysfs_devattr_values.value[SYSFS_DEVATTR_FLAGS].ulong, ++ device.name != NULL) < 0) ++ goto do_next; ++ ++ if (!sysfs_devattr_values.value[SYSFS_DEVATTR_ADDR_LEN].ulong) ++ goto do_next; ++ ++ if (device.sysfs->value[SYSFS_DEVATTR_IFINDEX].ulong) { ++ if (device.sysfs->value[SYSFS_DEVATTR_FLAGS].ulong & IFF_RUNNING) ++ goto do_next; ++ } ++ ++ sysfs_devattr_values.ifname = strdup(dev->name); ++ if (!sysfs_devattr_values.ifname) { ++ perror("malloc"); ++ goto out; ++ } ++ ++ sysfs_devattr_values_init(device.sysfs, 1); ++ memcpy(device.sysfs, &sysfs_devattr_values, sizeof(*device.sysfs)); ++ sysfs_devattr_values_init(&sysfs_devattr_values, 0); ++ ++ if (count++) ++ break; ++ ++ continue; ++do_next: ++ sysfs_devattr_values_init(&sysfs_devattr_values, 1); ++ } ++ ++ if (count == 1) { ++ device.ifindex = device.sysfs->value[SYSFS_DEVATTR_IFINDEX].ulong; ++ device.name = device.sysfs->ifname; ++ } ++ rc = !device.ifindex; ++out: ++ sysfs_close_class(cls_net); ++#endif ++ return rc; ++} ++ ++static int check_device_by_ioctl(int s, struct ifreq *ifr) ++{ ++ if (ioctl(s, SIOCGIFFLAGS, ifr) < 0) { ++ perror("ioctl(SIOCGIFINDEX"); ++ return -1; ++ } ++ ++ if (check_ifflags(ifr->ifr_flags, device.name != NULL) < 0) ++ return 1; ++ ++ if (ioctl(s, SIOCGIFINDEX, ifr) < 0) { ++ perror("ioctl(SIOCGIFINDEX"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int find_device_by_ioctl(void) ++{ ++ int s; ++ struct ifreq *ifr0, *ifr, *ifr_end; ++ size_t ifrsize = sizeof(*ifr); ++ struct ifconf ifc; ++ static struct ifreq ifrbuf; ++ int count = 0; ++ ++ s = socket(AF_INET, SOCK_DGRAM, 0); ++ if (s < 0) { ++ perror("socket"); ++ return -1; ++ } ++ ++ memset(&ifrbuf, 0, sizeof(ifrbuf)); ++ ++ if (device.name) { ++ strncpy(ifrbuf.ifr_name, device.name, sizeof(ifrbuf.ifr_name) - 1); ++ if (check_device_by_ioctl(s, &ifrbuf)) ++ goto out; ++ count++; ++ } else { ++ do { ++ int rc; ++ ifr0 = malloc(ifrsize); ++ if (!ifr0) { ++ perror("malloc"); ++ goto out; ++ } ++ ++ ifc.ifc_buf = (char *)ifr0; ++ ifc.ifc_len = ifrsize; ++ ++ rc = ioctl(s, SIOCGIFCONF, &ifc); ++ if (rc < 0) { ++ perror("ioctl(SIOCFIFCONF"); ++ goto out; ++ } ++ ++ if (ifc.ifc_len + sizeof(*ifr0) + sizeof(struct sockaddr_storage) - sizeof(struct sockaddr) <= ifrsize) ++ break; ++ ifrsize *= 2; ++ free(ifr0); ++ ifr0 = NULL; ++ } while(ifrsize < INT_MAX / 2); ++ ++ if (!ifr0) { ++ fprintf(stderr, "arping: too many interfaces!?\n"); ++ goto out; ++ } ++ ++ ifr_end = (struct ifreq *)(((char *)ifr0) + ifc.ifc_len - sizeof(*ifr0)); ++ for (ifr = ifr0; ifr <= ifr_end; ifr++) { ++ if (check_device_by_ioctl(s, &ifrbuf)) ++ continue; ++ memcpy(&ifrbuf.ifr_name, ifr->ifr_name, sizeof(ifrbuf.ifr_name)); ++ if (count++) ++ break; ++ } ++ } ++ ++ close(s); ++ ++ if (count == 1) { ++ device.ifindex = ifrbuf.ifr_ifindex; ++ device.name = ifrbuf.ifr_name; ++ } ++ return !device.ifindex; ++out: ++ close(s); ++ return -1; ++} ++ ++static int find_device(void) ++{ ++ int rc; ++ rc = find_device_by_ifaddrs(); ++ if (rc >= 0) ++ goto out; ++ rc = find_device_by_sysfs(); ++ if (rc >= 0) ++ goto out; ++ rc = find_device_by_ioctl(); ++out: ++ return rc; ++} ++ ++/* ++ * set_device_broadcast() ++ * ++ * This fills the device "broadcast address" ++ * based on information found by find_device() funcion. ++ */ ++static int set_device_broadcast_ifaddrs_one(struct device *device, unsigned char *ba, size_t balen, int fatal) ++{ ++#ifndef WITHOUT_IFADDRS ++ struct ifaddrs *ifa; ++ struct sockaddr_ll *sll; ++ ++ if (!device) ++ return -1; ++ ++ ifa = device->ifa; ++ if (!ifa) ++ return -1; ++ ++ sll = (struct sockaddr_ll *)ifa->ifa_broadaddr; ++ ++ if (sll->sll_halen != balen) { ++ if (fatal) { ++ if (!quiet) ++ printf("Address length does not match...\n"); ++ exit(2); ++ } ++ return -1; ++ } ++ memcpy(ba, sll->sll_addr, sll->sll_halen); ++ return 0; ++#else ++ return -1; ++#endif ++} ++static int set_device_broadcast_sysfs(struct device *device, unsigned char *ba, size_t balen) ++{ ++#ifdef USE_SYSFS ++ struct sysfs_devattr_values *v; ++ if (!device) ++ return -1; ++ v = device->sysfs; ++ if (!v) ++ return -1; ++ if (v->value[SYSFS_DEVATTR_ADDR_LEN].ulong != balen) ++ return -1; ++ memcpy(ba, v->value[SYSFS_DEVATTR_BROADCAST].ptr, balen); ++ return 0; ++#else ++ return -1; ++#endif ++} ++ ++static int set_device_broadcast_fallback(struct device *device, unsigned char *ba, size_t balen) ++{ ++ if (!quiet) ++ fprintf(stderr, "WARNING: using default broadcast address.\n"); ++ memset(ba, -1, balen); ++ return 0; ++} ++ ++static void set_device_broadcast(struct device *dev, unsigned char *ba, size_t balen) ++{ ++ if (!set_device_broadcast_ifaddrs_one(dev, ba, balen, 0)) ++ return; ++ if (!set_device_broadcast_sysfs(dev, ba, balen)) ++ return; ++ set_device_broadcast_fallback(dev, ba, balen); ++} ++ + int + main(int argc, char **argv) + { + int socket_errno; + int ch; +- uid_t uid = getuid(); + int hb_mode = 0; + + signal(SIGTERM, byebye); + signal(SIGPIPE, byebye); +- +- device = strdup("eth0"); +- ++ ++ limit_capabilities(); ++ ++#ifdef USE_IDN ++ setlocale(LC_ALL, ""); ++#endif ++ ++ enable_capability_raw(); ++ + s = socket(PF_PACKET, SOCK_DGRAM, 0); + socket_errno = errno; + +- if (setuid(uid)) { +- perror("arping: setuid"); +- exit(-1); +- } ++ disable_capability_raw(); + + while ((ch = getopt(argc, argv, "h?bfDUAqc:w:s:I:Vr:i:p:")) != EOF) { + switch(ch) { +@@ -367,7 +1073,7 @@ main(int argc, char **argv) + timeout = atoi(optarg); + break; + case 'I': +- device = optarg; ++ device.name = optarg; + break; + case 'f': + quit_on_reply=1; +@@ -376,7 +1082,7 @@ main(int argc, char **argv) + source = optarg; + break; + case 'V': +- printf("send_arp utility\n"); ++ printf("send_arp utility, based on arping from iputils-%s\n", SNAPSHOT); + exit(0); + case 'p': + case 'i': +@@ -405,7 +1111,7 @@ main(int argc, char **argv) + */ + + unsolicited = 1; +- device = argv[optind]; ++ device.name = argv[optind]; + target = argv[optind+1]; + + } else { +@@ -417,10 +1123,8 @@ main(int argc, char **argv) + target = *argv; + } + +- if (device == NULL) { +- fprintf(stderr, "arping: device (option -I) is required\n"); +- usage(); +- } ++ if (device.name && !*device.name) ++ device.name = NULL; + + if (s < 0) { + errno = socket_errno; +@@ -428,39 +1132,42 @@ main(int argc, char **argv) + exit(2); + } + +- if (1) { +- struct ifreq ifr; +- memset(&ifr, 0, sizeof(ifr)); +- strncpy(ifr.ifr_name, device, IFNAMSIZ-1); +- if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) { +- fprintf(stderr, "arping: unknown iface %s\n", device); +- exit(2); +- } +- ifindex = ifr.ifr_ifindex; ++ if (find_device() < 0) ++ exit(2); + +- if (ioctl(s, SIOCGIFFLAGS, (char*)&ifr)) { +- perror("ioctl(SIOCGIFFLAGS)"); ++ if (!device.ifindex) { ++ if (device.name) { ++ fprintf(stderr, "arping: Device %s not available.\n", device.name); + exit(2); + } +- if (!(ifr.ifr_flags&IFF_UP)) { +- if (!quiet) +- printf("Interface \"%s\" is down\n", device); +- exit(2); +- } +- if (ifr.ifr_flags&(IFF_NOARP|IFF_LOOPBACK)) { +- if (!quiet) +- printf("Interface \"%s\" is not ARPable\n", device); +- exit(dad?0:2); +- } ++ fprintf(stderr, "arping: device (option -I) is required.\n"); ++ usage(); + } + + if (inet_aton(target, &dst) != 1) { + struct hostent *hp; +- hp = gethostbyname2(target, AF_INET); ++ char *idn = target; ++#ifdef USE_IDN ++ int rc; ++ ++ rc = idna_to_ascii_lz(target, &idn, 0); ++ ++ if (rc != IDNA_SUCCESS) { ++ fprintf(stderr, "arping: IDN encoding failed: %s\n", idna_strerror(rc)); ++ exit(2); ++ } ++#endif ++ ++ hp = gethostbyname2(idn, AF_INET); + if (!hp) { + fprintf(stderr, "arping: unknown host %s\n", target); + exit(2); + } ++ ++#ifdef USE_IDN ++ free(idn); ++#endif ++ + memcpy(&dst, hp->h_addr, 4); + } + +@@ -480,9 +1187,13 @@ main(int argc, char **argv) + perror("socket"); + exit(2); + } +- if (device) { +- if (setsockopt(probe_fd, SOL_SOCKET, SO_BINDTODEVICE, device, strlen(device)+1) == -1) ++ if (device.name) { ++ enable_capability_raw(); ++ ++ if (setsockopt(probe_fd, SOL_SOCKET, SO_BINDTODEVICE, device.name, strlen(device.name)+1) == -1) + perror("WARNING: interface is ignored"); ++ ++ disable_capability_raw(); + } + memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; +@@ -514,9 +1225,9 @@ main(int argc, char **argv) + close(probe_fd); + }; + +- me.sll_family = AF_PACKET; +- me.sll_ifindex = ifindex; +- me.sll_protocol = htons(ETH_P_ARP); ++ ((struct sockaddr_ll *)&me)->sll_family = AF_PACKET; ++ ((struct sockaddr_ll *)&me)->sll_ifindex = device.ifindex; ++ ((struct sockaddr_ll *)&me)->sll_protocol = htons(ETH_P_ARP); + if (bind(s, (struct sockaddr*)&me, sizeof(me)) == -1) { + perror("bind"); + exit(2); +@@ -529,18 +1240,20 @@ main(int argc, char **argv) + exit(2); + } + } +- if (me.sll_halen == 0) { ++ if (((struct sockaddr_ll *)&me)->sll_halen == 0) { + if (!quiet) +- printf("Interface \"%s\" is not ARPable (no ll address)\n", device); ++ printf("Interface \"%s\" is not ARPable (no ll address)\n", device.name); + exit(dad?0:2); + } + + he = me; +- memset(he.sll_addr, -1, he.sll_halen); ++ ++ set_device_broadcast(&device, ((struct sockaddr_ll *)&he)->sll_addr, ++ ((struct sockaddr_ll *)&he)->sll_halen); + + if (!quiet) { + printf("ARPING %s ", inet_ntoa(dst)); +- printf("from %s %s\n", inet_ntoa(src), device ? : ""); ++ printf("from %s %s\n", inet_ntoa(src), device.name ? : ""); + } + + if (!src.s_addr && !dad) { +@@ -548,6 +1261,8 @@ main(int argc, char **argv) + exit(2); + } + ++ drop_capabilities(); ++ + set_signal(SIGINT, finish); + set_signal(SIGALRM, catcher); + +@@ -556,7 +1271,7 @@ main(int argc, char **argv) + while(1) { + sigset_t sset, osset; + unsigned char packet[4096]; +- struct sockaddr_ll from; ++ struct sockaddr_storage from; + socklen_t alen = sizeof(from); + int cc; + +@@ -565,11 +1280,12 @@ main(int argc, char **argv) + perror("arping: recvfrom"); + continue; + } ++ + sigemptyset(&sset); + sigaddset(&sset, SIGALRM); + sigaddset(&sset, SIGINT); + sigprocmask(SIG_BLOCK, &sset, &osset); +- recv_pack(packet, cc, &from); ++ recv_pack(packet, cc, (struct sockaddr_ll *)&from); + sigprocmask(SIG_SETMASK, &osset, NULL); + } + } diff --git a/SOURCES/bz1263348-mysql-tmpfile-leak.patch b/SOURCES/bz1263348-mysql-tmpfile-leak.patch new file mode 100644 index 0000000..1117535 --- /dev/null +++ b/SOURCES/bz1263348-mysql-tmpfile-leak.patch @@ -0,0 +1,11 @@ +diff -uNr a/heartbeat/mysql b/heartbeat/mysql +--- a/heartbeat/mysql 2016-02-29 10:54:21.896786740 +0100 ++++ b/heartbeat/mysql 2016-02-29 10:59:13.377446910 +0100 +@@ -344,6 +344,7 @@ + + get_slave_info + rc=$? ++ rm -f $tmpfile + + if [ $rc -eq 0 ]; then + # show slave status is not empty diff --git a/SOURCES/bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch b/SOURCES/bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch new file mode 100644 index 0000000..33ac96c --- /dev/null +++ b/SOURCES/bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch @@ -0,0 +1,37 @@ +diff -uNr a/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector b/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector +--- a/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector 2013-07-18 21:17:48.000000000 +0200 ++++ b/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector 2016-02-29 11:04:48.714352114 +0100 +@@ -251,13 +251,13 @@ + open CRMOUT, "$cmd_cibadmin --local -Q --xpath '//primitive[\@type=\"$sra\"]' --node-path 2>/dev/null |" || die "could not open cibadmin output"; + while () { + my $line = $_; +- if ($line =~ /primitive..id='([a-zA-Z0-9_]+)'/) { ++ if ($line =~ /primitive..id='([a-zA-Z0-9_-]+)'/) { + ($fname) = ($1); + } else { + next; + } + +- if ( $line =~ /[group|master|clone]..id='([a-zA-Z0-9_]+)'/) { ++ if ( $line =~ /[group|master|clone]..id='([a-zA-Z0-9_-]+)'/) { + ($fgname) = ($1); + } + +@@ -265,7 +265,7 @@ + open RESOURCE1_OUT, "$cmd_cibadmin -Q --xpath \"//primitive[\@id='$fname']//nvpair[\@name='$sparam']\" 2>/dev/null |" || die "could not open cibadmin output"; + while () { + my $result = $_; +- if ($result =~ /value="([a-zA-Z0-9_]+)"/) { ++ if ($result =~ /value="([a-zA-Z0-9_-]+)"/) { + my $finstance=$1; + if ( $1 =~ /^${sid}_[a-zA-Z0-9]+${ino}_[a-zA-Z0-9_-]+$/ ) { + $foundRes=1; +@@ -279,7 +279,7 @@ + open RESOURCE2_OUT, "$cmd_cibadmin -Q --xpath \"//primitive[\@id='$fname']//nvpair[\@name='$sparam2']\" 2>/dev/null |" || die "could not open cibadmin output"; + while () { + my $result = $_; +- if ($result =~ /value="([a-zA-Z0-9_]+)"/) { ++ if ($result =~ /value="([a-zA-Z0-9_-]+)"/) { + my $finstance=$1; + if ( $1 =~ /^${sid}_[a-zA-Z0-9]+${ino}_[a-zA-Z0-9_-]+$/ ) { + $foundRes=1; diff --git a/SOURCES/bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch b/SOURCES/bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch new file mode 100644 index 0000000..57748bf --- /dev/null +++ b/SOURCES/bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch @@ -0,0 +1,60 @@ +diff -uNr a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +--- a/heartbeat/IPaddr2 2016-02-29 10:54:21.909786575 +0100 ++++ b/heartbeat/IPaddr2 2016-02-29 14:38:48.502852067 +0100 +@@ -673,19 +673,35 @@ + # + run_send_ua() { + local i +- # Wait until the allocated IPv6 address gets ready by checking +- # "tentative" flag is disappeared, otherwise send_ua can not +- # send the unsolicited advertisement requests. +- for i in 1 2 3 4 5; do +- $IP2UTIL -o -f $FAMILY addr show dev $NIC \ +- | grep -q -e "$OCF_RESKEY_ip/$NETMASK .* tentative" +- [ $? -ne 0 ] && break +- if [ $i -eq 5 ]; then +- ocf_log warn "$OCF_RESKEY_ip still has 'tentative' status. (ignored)" ++ ++ # Duplicate Address Detection [DAD] ++ # Kernel will flag the IP as 'tentative' until it ensured that ++ # there is no duplicates. ++ # If there is, it will flag it as 'dadfailed' ++ for i in $(seq 1 10); do ++ ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK) ++ case "$ipstatus" in ++ *dadfailed*) ++ ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]" ++ $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK ++ if [ $? -ne 0 ]; then ++ ocf_log err "Could not delete IPv6 address" ++ fi ++ return $OCF_ERR_GENERIC ++ ;; ++ *tentative*) ++ if [ $i -eq 10 ]; then ++ ofc_log warn "IPv6 address : DAD is still in tentative" ++ fi ++ ;; ++ *) + break +- fi ++ ;; ++ esac + sleep 1 + done ++ # Now the address should be usable ++ + ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC" + ocf_log info "$SENDUA $ARGS" + $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements." +@@ -838,6 +854,10 @@ + else + if [ -x $SENDUA ]; then + run_send_ua ++ if [ $? -ne 0 ]; then ++ ocf_exit_reason "run_send_ua failed." ++ exit $OCF_ERR_GENERIC ++ fi + fi + fi + ;; diff --git a/SOURCES/bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch b/SOURCES/bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch new file mode 100644 index 0000000..9178089 --- /dev/null +++ b/SOURCES/bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch @@ -0,0 +1,728 @@ +diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am +--- a/doc/man/Makefile.am 2016-02-02 14:49:34.546698286 +0100 ++++ b/doc/man/Makefile.am 2016-02-02 14:50:29.893979453 +0100 +@@ -73,7 +73,7 @@ + ocf_heartbeat_MailTo.7 \ + ocf_heartbeat_ManageRAID.7 \ + ocf_heartbeat_ManageVE.7 \ +- ocf_heartbeat_NovaCompute.7 \ ++ ocf_heartbeat_nova-compute-wait.7 \ + ocf_heartbeat_NovaEvacuate.7 \ + ocf_heartbeat_Pure-FTPd.7 \ + ocf_heartbeat_Raid1.7 \ +diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am +--- a/heartbeat/Makefile.am 2016-02-02 14:49:34.546698286 +0100 ++++ b/heartbeat/Makefile.am 2016-02-02 14:50:29.894979440 +0100 +@@ -52,7 +52,7 @@ + IPv6addr_LDADD = -lplumb $(LIBNETLIBS) + send_ua_LDADD = $(LIBNETLIBS) + +-osp_SCRIPTS = NovaCompute \ ++osp_SCRIPTS = nova-compute-wait \ + NovaEvacuate + + ocf_SCRIPTS = ClusterMon \ +diff -uNr a/heartbeat/NovaCompute b/heartbeat/NovaCompute +--- a/heartbeat/NovaCompute 2016-02-02 14:49:34.541698351 +0100 ++++ b/heartbeat/NovaCompute 1970-01-01 01:00:00.000000000 +0100 +@@ -1,366 +0,0 @@ +-#!/bin/sh +-# +-# +-# NovaCompute agent manages compute daemons. +-# +-# Copyright (c) 2015 +-# +-# This program is free software; you can redistribute it and/or modify +-# it under the terms of version 2 of the GNU General Public License as +-# published by the Free Software Foundation. +-# +-# This program is distributed in the hope that it would be useful, but +-# WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +-# +-# Further, this software is distributed without any warranty that it is +-# free of the rightful claim of any third person regarding infringement +-# or the like. Any license provided herein, whether implied or +-# otherwise, applies only to this software file. Patent licenses, if +-# any, provided herein do not apply to combinations of this program with +-# other software, or any other product whatsoever. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, write the Free Software Foundation, +-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +-# +- +-####################################################################### +-# Initialization: +- +-### +-: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +-. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +-### +- +-: ${__OCF_ACTION=$1} +- +-####################################################################### +- +-meta_data() { +- cat < +- +- +-1.0 +- +- +-OpenStack Nova Compute Server. +- +-OpenStack Nova Compute Server +- +- +- +- +- +-Authorization URL for connecting to keystone in admin context +- +-Authorization URL +- +- +- +- +- +-Username for connecting to keystone in admin context +- +-Username +- +- +- +- +-Password for connecting to keystone in admin context +- +-Password +- +- +- +- +- +-Tenant name for connecting to keystone in admin context. +-Note that with Keystone V3 tenant names are only unique within a domain. +- +-Tenant name +- +- +- +- +- +-DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN +- +-DNS domain +- +- +- +- +- +-Nova API location (internal, public or admin URL) +- +-Nova API location (internal, public or admin URL) +- +- +- +- +- +-Disable shared storage recovery for instances. Use at your own risk! +- +-Disable shared storage recovery for instances +- +- +- +- +- +-How long to wait for nova to finish evacuating instances elsewhere +-before starting nova-compute. Only used when the agent detects +-evacuations might be in progress. +- +-You may need to increase the start timeout when increasing this value. +- +-Delay to allow evacuations time to complete +- +- +- +- +- +- +- +- +- +- +- +- +- +-END +-} +- +-####################################################################### +- +-# don't exit on TERM, to test that lrmd makes sure that we do exit +-trap sigterm_handler TERM +-sigterm_handler() { +- ocf_log info "They use TERM to bring us down. No such luck." +- return +-} +- +-nova_usage() { +- cat </dev/null) +- if [ $? = 1 ]; then +- if [ "x${OCF_RESKEY_domain}" != x ]; then +- NOVA_HOST=$(uname -n | awk -F. '{print $1}') +- else +- NOVA_HOST=$(uname -n) +- fi +- fi +- +- # We only need to check a configured value, calculated ones are fine +- openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null +- if [ $? = 0 ]; then +- if [ "x${OCF_RESKEY_domain}" != x ]; then +- short_host=$(uname -n | awk -F. '{print $1}') +- if [ "x$NOVA_HOST" != "x${short_host}" ]; then +- ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function" +- rc=$OCF_ERR_CONFIGURED +- fi +- +- elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then +- ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function" +- rc=$OCF_ERR_CONFIGURED +- fi +- fi +- +- if [ $rc != $OCF_SUCCESS ]; then +- exit $rc +- fi +- return $rc +-} +- +-: ${OCF_RESKEY_evacuation_delay=120} +-case $__OCF_ACTION in +-meta-data) meta_data +- exit $OCF_SUCCESS +- ;; +-usage|help) nova_usage +- exit $OCF_SUCCESS +- ;; +-esac +- +-case $__OCF_ACTION in +-start) nova_validate; nova_start;; +-stop) nova_stop;; +-monitor) nova_validate; nova_monitor;; +-notify) nova_notify;; +-validate-all) exit $OCF_SUCCESS;; +-*) nova_usage +- exit $OCF_ERR_UNIMPLEMENTED +- ;; +-esac +-rc=$? +-ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" +-exit $rc +diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait +--- a/heartbeat/nova-compute-wait 1970-01-01 01:00:00.000000000 +0100 ++++ b/heartbeat/nova-compute-wait 2016-02-02 14:50:29.894979440 +0100 +@@ -0,0 +1,304 @@ ++#!/bin/sh ++# ++# ++# nova-compute-wait agent manages compute daemons. ++# ++# Copyright (c) 2015 ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++ ++####################################################################### ++# Initialization: ++ ++### ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++### ++ ++: ${__OCF_ACTION=$1} ++ ++####################################################################### ++ ++meta_data() { ++ cat < ++ ++ ++1.0 ++ ++ ++OpenStack Nova Compute Server. ++ ++OpenStack Nova Compute Server ++ ++ ++ ++ ++ ++Authorization URL for connecting to keystone in admin context ++ ++Authorization URL ++ ++ ++ ++ ++ ++Username for connecting to keystone in admin context ++ ++Username ++ ++ ++ ++ ++Password for connecting to keystone in admin context ++ ++Password ++ ++ ++ ++ ++ ++Tenant name for connecting to keystone in admin context. ++Note that with Keystone V3 tenant names are only unique within a domain. ++ ++Tenant name ++ ++ ++ ++ ++ ++DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN ++ ++DNS domain ++ ++ ++ ++ ++ ++Nova API location (internal, public or admin URL) ++ ++Nova API location (internal, public or admin URL) ++ ++ ++ ++ ++ ++Disable shared storage recovery for instances. Use at your own risk! ++ ++Disable shared storage recovery for instances ++ ++ ++ ++ ++ ++How long to wait for nova to finish evacuating instances elsewhere ++before starting nova-compute. Only used when the agent detects ++evacuations might be in progress. ++ ++You may need to increase the start timeout when increasing this value. ++ ++Delay to allow evacuations time to complete ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++####################################################################### ++ ++# don't exit on TERM, to test that lrmd makes sure that we do exit ++trap sigterm_handler TERM ++sigterm_handler() { ++ ocf_log info "They use TERM to bring us down. No such luck." ++ return ++} ++ ++nova_usage() { ++ cat </dev/null) ++ if [ $? = 1 ]; then ++ if [ "x${OCF_RESKEY_domain}" != x ]; then ++ NOVA_HOST=$(uname -n | awk -F. '{print $1}') ++ else ++ NOVA_HOST=$(uname -n) ++ fi ++ fi ++ ++ # We only need to check a configured value, calculated ones are fine ++ openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null ++ if [ $? = 0 ]; then ++ if [ "x${OCF_RESKEY_domain}" != x ]; then ++ short_host=$(uname -n | awk -F. '{print $1}') ++ if [ "x$NOVA_HOST" != "x${short_host}" ]; then ++ ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function" ++ rc=$OCF_ERR_CONFIGURED ++ fi ++ ++ elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then ++ ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function" ++ rc=$OCF_ERR_CONFIGURED ++ fi ++ fi ++ ++ if [ $rc != $OCF_SUCCESS ]; then ++ exit $rc ++ fi ++ return $rc ++} ++ ++: ${OCF_RESKEY_evacuation_delay=120} ++case $__OCF_ACTION in ++meta-data) meta_data ++ exit $OCF_SUCCESS ++ ;; ++usage|help) nova_usage ++ exit $OCF_SUCCESS ++ ;; ++esac ++ ++case $__OCF_ACTION in ++start) nova_validate; nova_start;; ++stop) nova_stop;; ++monitor) nova_validate; nova_monitor;; ++notify) nova_notify;; ++validate-all) exit $OCF_SUCCESS;; ++*) nova_usage ++ exit $OCF_ERR_UNIMPLEMENTED ++ ;; ++esac ++rc=$? ++ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" ++exit $rc +diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +--- a/heartbeat/NovaEvacuate 2016-02-02 14:49:34.541698351 +0100 ++++ b/heartbeat/NovaEvacuate 2016-02-02 14:50:22.768072003 +0100 +@@ -141,7 +141,7 @@ + } + + update_evacuation() { +- attrd_updater -p -n evacute -Q -N ${1} -v ${2} ++ attrd_updater -p -n evacuate -Q -N ${1} -v ${2} + arc=$? + if [ ${arc} != 0 ]; then + ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}" +@@ -219,7 +219,12 @@ + return $OCF_NOT_RUNNING + fi + +- handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}') ++ handle_evacuations $( ++ attrd_updater -n evacuate -A | ++ sed 's/ value=""/ value="no"/' | ++ tr '="' ' ' | ++ awk '{print $4" "$6}' ++ ) + return $OCF_SUCCESS + } + diff --git a/SOURCES/bz1283877-virtualdomain-may-remove-config-file.patch b/SOURCES/bz1283877-virtualdomain-may-remove-config-file.patch deleted file mode 100644 index 3a9871b..0000000 --- a/SOURCES/bz1283877-virtualdomain-may-remove-config-file.patch +++ /dev/null @@ -1,40 +0,0 @@ -diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain ---- a/heartbeat/VirtualDomain 2015-11-20 11:52:58.314263831 +0100 -+++ b/heartbeat/VirtualDomain 2015-11-20 11:53:55.247196256 +0100 -@@ -340,13 +340,32 @@ - return $rc - } - -+# virsh undefine removes configuration files if they are in -+# directories which are managed by libvirt. such directories -+# include also subdirectories of /etc (for instance -+# /etc/libvirt/*) which may be surprising. VirtualDomain didn't -+# include the undefine call before, hence this wasn't an issue -+# before. -+# -+# There seems to be no way to find out which directories are -+# managed by libvirt. -+# - verify_undefined() { -- for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do -- if [ "$dom" = "$DOMAIN_NAME" ]; then -+ local tmpf -+ if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME" -+ then -+ tmpf=$(mktemp -t vmcfgsave.XXXXXX) -+ if [ ! -r "$tmpf" ]; then -+ ocf_log warn "unable to create temp file, disk full?" -+ # we must undefine the domain - virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 -- return -+ else -+ cp -p $OCF_RESKEY_config $tmpf -+ virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1 -+ [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config -+ rm -f $tmpf - fi -- done -+ fi - } - - VirtualDomain_Start() { diff --git a/SOURCES/bz1284526-galera-crash-recovery.patch b/SOURCES/bz1284526-galera-crash-recovery.patch new file mode 100644 index 0000000..3e51ad0 --- /dev/null +++ b/SOURCES/bz1284526-galera-crash-recovery.patch @@ -0,0 +1,131 @@ +From d9833b68498e306d181be11adf9eee14b646a899 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Tue, 2 Feb 2016 14:34:36 +0100 +Subject: [PATCH] galera: force crash recovery if needed during last commit + detection + +--- + heartbeat/galera | 90 +++++++++++++++++++++++++++++++++++++------------------- + 1 file changed, 60 insertions(+), 30 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 7be2b00..ca94c21 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -525,6 +525,58 @@ detect_first_master() + set_bootstrap_node $best_node + } + ++detect_last_commit() ++{ ++ local last_commit ++ local recover_args="--defaults-file=$OCF_RESKEY_config \ ++ --pid-file=$OCF_RESKEY_pid \ ++ --socket=$OCF_RESKEY_socket \ ++ --datadir=$OCF_RESKEY_datadir \ ++ --user=$OCF_RESKEY_user" ++ local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' ++ ++ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" ++ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" ++ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then ++ local tmp=$(mktemp) ++ local tmperr=$(mktemp) ++ ++ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" ++ ++ ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr ++ ++ last_commit="$(cat $tmp | sed -n $recovered_position_regex)" ++ if [ -z "$last_commit" ]; then ++ # Galera uses InnoDB's 2pc transactions internally. If ++ # server was stopped in the middle of a replication, the ++ # recovery may find a "prepared" XA transaction in the ++ # redo log, and mysql won't recover automatically ++ ++ cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null ++ if [ $? -eq 0 ]; then ++ # we can only rollback the transaction, but that's OK ++ # since the DB will get resynchronized anyway ++ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" ++ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ ++ --tc-heuristic-recover=rollback > $tmp 2>/dev/null ++ ++ last_commit="$(cat $tmp | sed -n $recovered_position_regex)" ++ fi ++ fi ++ rm -f $tmp $tmperr ++ fi ++ ++ if [ ! -z "$last_commit" ]; then ++ ocf_log info "Last commit version found: $last_commit" ++ set_last_commit $last_commit ++ return $OCF_SUCCESS ++ else ++ ocf_exit_reason "Unable to detect last known write sequence number" ++ clear_last_commit ++ return $OCF_ERR_GENERIC ++ fi ++} ++ + # For galera, promote is really start + galera_promote() + { +@@ -569,13 +620,15 @@ galera_demote() + clear_bootstrap_node + clear_last_commit + +- # record last commit by "starting" galera. start is just detection of the last sequence number +- galera_start ++ # record last commit for next promotion ++ detect_last_commit ++ rc=$? ++ return $rc + } + + galera_start() + { +- local last_commit ++ local rc + + echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME + if [ $? -ne 0 ]; then +@@ -591,34 +644,11 @@ galera_start() + + mysql_common_prepare_dirs + +- ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" +- last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" +- if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then +- ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" +- local tmp=$(mktemp) +- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ +- --pid-file=$OCF_RESKEY_pid \ +- --socket=$OCF_RESKEY_socket \ +- --datadir=$OCF_RESKEY_datadir \ +- --user=$OCF_RESKEY_user \ +- --wsrep-recover > $tmp 2>&1 +- +- last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')" +- rm -f $tmp +- +- if [ "$last_commit" = "-1" ]; then +- last_commit="0" +- fi +- fi +- +- if [ -z "$last_commit" ]; then +- ocf_exit_reason "Unable to detect last known write sequence number" +- clear_last_commit +- return $OCF_ERR_GENERIC ++ detect_last_commit ++ rc=$? ++ if [ $rc -ne $OCF_SUCCESS ]; then ++ return $rc + fi +- ocf_log info "Last commit version found: $last_commit" +- +- set_last_commit $last_commit + + master_exists + if [ $? -eq 0 ]; then diff --git a/SOURCES/bz1284526-galera-heuristic-recovered.patch b/SOURCES/bz1284526-galera-heuristic-recovered.patch new file mode 100644 index 0000000..589fc11 --- /dev/null +++ b/SOURCES/bz1284526-galera-heuristic-recovered.patch @@ -0,0 +1,89 @@ +From 4d98bbcdadda60166faf7ccc512b9095b439e2bd Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Tue, 2 Feb 2016 16:29:10 +0100 +Subject: [PATCH] galera: prevent recovered nodes from bootstrapping cluster + when possible + +--- + heartbeat/README.galera | 19 ++++++++++++++++++- + heartbeat/galera | 41 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 59 insertions(+), 1 deletion(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index ca94c21..84c92fd 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -276,6 +276,22 @@ is_bootstrap() + + } + ++set_heuristic_recovered() ++{ ++ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true" ++} ++ ++clear_heuristic_recovered() ++{ ++ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D ++} ++ ++is_heuristic_recovered() ++{ ++ local node=$1 ++ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null ++} ++ + clear_last_commit() + { + ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D +@@ -398,8 +414,19 @@ detect_first_master() + local best_node="$NODENAME" + local last_commit=0 + local missing_nodes=0 ++ local nodes="" ++ local nodes_recovered="" + ++ # avoid selecting a recovered node as bootstrap if possible + for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do ++ if is_heuristic_recovered $node; then ++ nodes_recovered="$nodes_recovered $node" ++ else ++ nodes="$nodes $node" ++ fi ++ done ++ ++ for node in $nodes_recovered $nodes; do + last_commit=$(get_last_commit $node) + + if [ -z "$last_commit" ]; then +@@ -466,6 +493,12 @@ detect_last_commit() + --tc-heuristic-recover=rollback > $tmp 2>/dev/null + + last_commit="$(cat $tmp | sed -n $recovered_position_regex)" ++ if [ ! -z "$last_commit" ]; then ++ ocf_log warn "State recovered. force SST at next restart for full resynchronization" ++ rm -f ${OCF_RESKEY_datadir}/grastate.dat ++ # try not to use this node if bootstrap is needed ++ set_heuristic_recovered ++ fi + fi + fi + rm -f $tmp $tmperr +@@ -549,11 +582,17 @@ galera_promote() + if ocf_is_true $bootstrap; then + promote_everyone + clear_bootstrap_node ++ # clear attribute heuristic-recovered. if last shutdown was ++ # not clean, we cannot be extra-cautious by requesting a SST ++ # since this is the bootstrap node ++ clear_heuristic_recovered + ocf_log info "Bootstrap complete, promoting the rest of the galera instances." + else + # if this is not the bootstrap node, make sure this instance + # syncs with the rest of the cluster before promotion returns. + wait_for_sync ++ # sync is done, clear info about last recovery ++ clear_heuristic_recovered + fi + + ocf_log info "Galera started" diff --git a/SOURCES/bz1284526-galera-no-grastate.patch b/SOURCES/bz1284526-galera-no-grastate.patch new file mode 100644 index 0000000..8f2ca23 --- /dev/null +++ b/SOURCES/bz1284526-galera-no-grastate.patch @@ -0,0 +1,113 @@ +From 422ef6a2018ebf9d6765e1f2965778f42c6a9d9c Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Tue, 15 Mar 2016 18:45:13 +0100 +Subject: [PATCH] galera: don't bootstrap from a node with no grastate.dat when + possible + +--- + heartbeat/README.galera | 9 ++++----- + heartbeat/galera | 36 ++++++++++++++++++++++-------------- + 2 files changed, 26 insertions(+), 19 deletions(-) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 72add3c..e4495be 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -276,20 +276,20 @@ is_bootstrap() + + } + +-set_heuristic_recovered() ++set_no_grastate() + { +- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true" ++ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true" + } + +-clear_heuristic_recovered() ++clear_no_grastate() + { +- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D ++ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D + } + +-is_heuristic_recovered() ++is_no_grastate() + { + local node=$1 +- ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null ++ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -Q 2>/dev/null + } + + clear_last_commit() +@@ -419,7 +419,7 @@ detect_first_master() + + # avoid selecting a recovered node as bootstrap if possible + for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do +- if is_heuristic_recovered $node; then ++ if is_no_grastate $node; then + nodes_recovered="$nodes_recovered $node" + else + nodes="$nodes $node" +@@ -473,6 +473,12 @@ detect_last_commit() + local tmp=$(mktemp) + local tmperr=$(mktemp) + ++ # if we pass here because grastate.dat doesn't exist, ++ # try not to bootstrap from this node if possible ++ if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then ++ set_no_grastate ++ fi ++ + ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" + + ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr +@@ -496,8 +502,8 @@ detect_last_commit() + if [ ! -z "$last_commit" ]; then + ocf_log warn "State recovered. force SST at next restart for full resynchronization" + rm -f ${OCF_RESKEY_datadir}/grastate.dat +- # try not to use this node if bootstrap is needed +- set_heuristic_recovered ++ # try not to bootstrap from this node if possible ++ set_no_grastate + fi + fi + fi +@@ -582,17 +588,17 @@ galera_promote() + if ocf_is_true $bootstrap; then + promote_everyone + clear_bootstrap_node +- # clear attribute heuristic-recovered. if last shutdown was ++ # clear attribute no-grastate. if last shutdown was + # not clean, we cannot be extra-cautious by requesting a SST + # since this is the bootstrap node +- clear_heuristic_recovered ++ clear_no_grastate + ocf_log info "Bootstrap complete, promoting the rest of the galera instances." + else + # if this is not the bootstrap node, make sure this instance + # syncs with the rest of the cluster before promotion returns. + wait_for_sync +- # sync is done, clear info about last recovery +- clear_heuristic_recovered ++ # sync is done, clear info about last startup ++ clear_no_grastate + fi + + ocf_log info "Galera started" +@@ -611,6 +617,7 @@ galera_demote() + # if this node was previously a bootstrap node, that is no longer the case. + clear_bootstrap_node + clear_last_commit ++ clear_no_grastate + + # record last commit for next promotion + detect_last_commit +@@ -722,6 +729,7 @@ galera_stop() + clear_last_commit + clear_master_score + clear_bootstrap_node ++ clear_no_grastate + return $rc + } + diff --git a/SOURCES/bz1287303-novaevacuate-invoke-off-action.patch b/SOURCES/bz1287303-novaevacuate-invoke-off-action.patch new file mode 100644 index 0000000..99699bc --- /dev/null +++ b/SOURCES/bz1287303-novaevacuate-invoke-off-action.patch @@ -0,0 +1,31 @@ +From 5e9310bbbcd5086ea9a3edf85d523c4c2a57f1c3 Mon Sep 17 00:00:00 2001 +From: Andrew Beekhof +Date: Tue, 8 Dec 2015 13:54:12 +1100 +Subject: [PATCH] NovaEvacuate should invoke fence_compute with action 'off' + +Conceptually we are resurrecting in one direction only (off) and not +bringing it back to the current host afterwards (on) + +Also it will overwrite the attrd variable too soon. + +Change-Id: I9694945ca7eedae4f5cb6758fe1e8ce7f72ae808 +--- + ocf/NovaEvacuate | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +index a17a159..0e22d7e 100644 +--- a/heartbeat/NovaEvacuate ++++ b/heartbeat/NovaEvacuate +@@ -198,7 +198,7 @@ handle_evacuations() { + return $OCF_SUCCESS + fi + +- fence_compute ${fence_options} -o reboot -n $node ++ fence_compute ${fence_options} -o off -n $node + rc=$? + + if [ $rc = 0 ]; then +-- +1.9.1 + diff --git a/SOURCES/bz1287314-novaevacuate-simplify-nova-check.patch b/SOURCES/bz1287314-novaevacuate-simplify-nova-check.patch new file mode 100644 index 0000000..24adb9c --- /dev/null +++ b/SOURCES/bz1287314-novaevacuate-simplify-nova-check.patch @@ -0,0 +1,23 @@ +diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +--- a/heartbeat/NovaEvacuate 2016-02-29 10:54:21.933786269 +0100 ++++ b/heartbeat/NovaEvacuate 2016-02-29 13:29:27.000139496 +0100 +@@ -177,17 +177,10 @@ + esac + + if [ $need_evacuate = 1 ]; then +- found=0 + ocf_log notice "Initiating evacuation of $node" + +- for known in $(fence_compute ${fence_options} -o list | tr -d ','); do +- if [ ${known} = ${node} ]; then +- found=1 +- break +- fi +- done +- +- if [ $found = 0 ]; then ++ fence_compute ${fence_options} -o status -n ${node} ++ if [ $? != 0 ]; then + ocf_log info "Nova does not know about ${node}" + # Dont mark as no because perhaps nova is unavailable right now + continue diff --git a/SOURCES/bz1289107-saphana-mcos-support.patch b/SOURCES/bz1289107-saphana-mcos-support.patch new file mode 100644 index 0000000..1532f94 --- /dev/null +++ b/SOURCES/bz1289107-saphana-mcos-support.patch @@ -0,0 +1,1778 @@ +diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana +--- a/heartbeat/SAPHana 2016-04-26 12:01:55.620889964 +0200 ++++ b/heartbeat/SAPHana 2016-04-26 12:03:17.240897137 +0200 +@@ -2,9 +2,9 @@ + # + # SAPHana + # +-# Description: Manages two single SAP HANA Instance in System Replication ++# Description: Manages two single SAP HANA Instance in System Replication + # Planned: do also manage scale-up scenarios +-# currently the SAPHana is dependent of the analysis of ++# currently the SAPHana is dependent of the analysis of + # SAPHanaTopology + # For supported scenarios please read the README file provided + # in the same software package (rpm) +@@ -16,16 +16,17 @@ + # Support: linux@sap.com + # License: GNU General Public License (GPL) + # Copyright: (c) 2013,2014 SUSE Linux Products GmbH ++# Copyright: (c) 2015 SUSE Linux GmbH + # +-# An example usage: ++# An example usage: + # See usage() function below for more details... + # + # OCF instance parameters: +-# OCF_RESKEY_SID +-# OCF_RESKEY_InstanceNumber +-# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) +-# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) +-# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) ++# OCF_RESKEY_SID ++# OCF_RESKEY_InstanceNumber ++# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) ++# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) ++# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) + # OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) + # OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) + # OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) +@@ -71,7 +72,7 @@ + info ) + case "$shf" in + all) skip=0 +- ;; ++ ;; + none ) + skip=1 + ;; +@@ -80,13 +81,13 @@ + mtype=${mtype#fh} + echo "$shf"| grep -iq ${mtype}; search=$? + if [ $search -eq 0 ]; then +- skip=0 ++ skip=0 + else + skip=1 + fi + ;; + esac +- ;; ++ ;; + esac + if [ $skip -eq 0 ]; then + ocf_log "$level" "$message" +@@ -103,8 +104,8 @@ + local rc=0 + methods=$(saphana_methods) + methods=$(echo $methods | tr ' ' '|') +- cat <<-! +- usage: $0 ($methods) ++ cat <<-EOF ++ usage: $0 ($methods) + + $0 manages a SAP HANA Instance as an HA resource. + +@@ -118,8 +119,17 @@ + The 'validate-all' operation reports whether the parameters are valid + The 'methods' operation reports on the methods $0 supports + +- ! +- return $rc ++EOF ++ return $rc ++} ++ ++function backup_global_and_nameserver() { ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local rc=0 ++ cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s") ++ cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s") ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc + } + + # +@@ -130,11 +140,12 @@ + function saphana_meta_data() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- cat < + + +-0.149.7 ++0.151.1 + + Manages two SAP HANA instances in system replication (SR). + +@@ -157,7 +168,7 @@ + 2. landscapeHostConfiguration + The interface is used to monitor a HANA system. The python script is named landscapeHostConfiguration.py. + landscapeHostConfiguration.py has some detailed output about HANA system status +- and node roles. For our monitor the overall status is relevant. This overall ++ and node roles. For our monitor the overall status is relevant. This overall + status is reported by the returncode of the script: + 0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO, 4: OK + The SAPHana resource agent will interpret returncodes 0 as FATAL, 1 as not-running or ERROR and and returncodes 2+3+4 as RUNNING. +@@ -168,14 +179,14 @@ + system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register). + + 4. hdbsql / systemReplicationStatus +- Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script ++ Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script + "systemReplicationStatus.py" in SAP HANA SPS8 or 9. + As long as we need to use hdbsql you need to setup secure store users for linux user root to be able to + access the SAP HANA database. You need to configure a secure store user key "SAPHANA${SID}SR" which can connect the SAP +- HANA database: ++ HANA database: + + 5. saphostctrl +- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the ++ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the + SAP HANA instance. This is the hostname used during the HANA installation. + + +@@ -207,7 +218,7 @@ + + + Time difference needed between to primary time stamps, if a dual-primary situation occurs +- Time difference needed between to primary time stamps, ++ Time difference needed between to primary time stamps, + if a dual-primary situation occurs. If the time difference is + less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin + a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After +@@ -231,12 +242,8 @@ + + + +- Define SAPHana resource agent messages to be printed +- Define SAPHana resource agent messages to be printed. +- This parameter should only be set if requested by support. The default is sufficient for normal operation. +- Values: ra-act-lpa-dec-flow +- You could specify any combination of the above values like "ra-act-flow" +- ++ OUTDATED PARAMETER ++ OUTDATED PARAMETER + + + +@@ -271,7 +278,7 @@ + for m in start stop status monitor promote demote notify validate-all methods meta-data usage; do + echo "$m" + done +- return $rc ++ return $rc + } + + # +@@ -298,7 +305,7 @@ + local remoteNode="" + local rc=1 + for cl in ${otherNodes[@]}; do +- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]}) ++ vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]} "$cl") + if [ "$vHost" = "$remoteHost" ]; then # we found the correct node + remoteNode=$cl + rc=0 +@@ -347,9 +354,10 @@ + } + + # +-# function: get_hana_attribute ++# function: get_hana_attribute + # params: NODE ATTR [STORE] + # globals: - ++# output: attribute value + # + function get_hana_attribute() + { +@@ -358,14 +366,20 @@ + local attr_node=$1 + local attr_name=$2 + local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter +- local attr_default=${5:-} ++ local attr_default=${4:-} ++ local dstr + local attr_val="" +- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"); rc=$? +- if [ $debug_attributes -eq 1 ]; then +- dstr=$(date) +- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE +- fi +- echo "$attr_val" ++ dstr=$(date) ++ case "$attr_store" in ++ reboot | forever ) ++ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ props ) ++ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -388,11 +402,17 @@ + attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$? + if [ "$attr_old" != "$attr_value" ]; then + super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc " +- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store; rc=$? +- if [ $debug_attributes -eq 1 ]; then +- dstr=$(date) +- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE +- fi ++ dstr=$(date) ++ case "$attr_store" in ++ reboot | forever ) ++ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE ++ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ props ) ++ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE ++ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ esac + else + super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}" + rc=0 +@@ -408,7 +428,8 @@ + # + function assert() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local err_msg=$1 local default_rc=$OCF_NOT_RUNNING ++ local err_msg=$1 ++ local default_rc=$OCF_NOT_RUNNING + # DONE: Check, if we need to destinguish between probe and others + if ocf_is_probe; then + default_exit=$OCF_NOT_RUNNING +@@ -435,7 +456,7 @@ + local score=0 + if [ -n "$1" ]; then + score=$1 +- fi ++ fi + # DONE: PRIO2: Only adjust master if value is really different (try to check that) + oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) + if [ "$oldscore" != "$score" ]; then +@@ -452,7 +473,7 @@ + # + # function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) + # params: NODE_ROLES NODE_SYNC_STATUS +-# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], ++# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], + # + scoring_crm_master() + { +@@ -467,7 +488,7 @@ + if grep "$rolePatt" <<< "$roles"; then + if grep "$syncPatt" <<< "$sync"; then + skip=1 +- myScore=$score ++ myScore=$score + fi + fi + fi +@@ -496,7 +517,7 @@ + # function: saphana_init - initialize variables for the resource agent + # params: InstanceName + # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), +-# globals: sr_name(w), remoteHost(w), otherNodes(w) ++# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) + # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) +@@ -506,6 +527,8 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$OCF_SUCCESS + local vName ++ local clN ++ # local site + # two parameter models (for transition only) + # OLD: InstanceName + # NEW: SID InstanceNumber +@@ -528,11 +551,10 @@ + # + # if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology + # +- vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}); ++ vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); + fi + SAPVIRHOST=${vName} + PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" +- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" + AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" + LPA_DIRECTORY=/var/lib/SAPHanaRA + LPA_ATTR=("lpa_${sid}_lpt" "forever") +@@ -591,6 +613,8 @@ + *openais* ) otherNodes=($(crm_node -l | awk '$3 == "member" { if ($2 != me) { print $2 }}' me=${NODENAME}));; + *cman* ) otherNodes=($(crm_node -l | awk '{for (i=1; i<=NF; i++) { if ($i != me) { print $i }}}' me=${NODENAME}));; + esac ++ # ++ # + + remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); + if [ -z "$remoteHost" ]; then +@@ -611,9 +635,13 @@ + # ATTR_NAME_HANA_SITE + sr_name=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SITE[@]}); + sr_mode=$(get_hana_attribute "${NODENAME}" ${ATTR_NAME_HANA_SRMODE[@]}) ++ + if [ -z "$sr_mode" ]; then + sr_mode="sync" + fi ++ if [ -n "$remoteNode" ]; then ++ rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); ++ fi + super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" + # optional OCF parameters, we try to guess which directories are correct + if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] +@@ -706,7 +734,7 @@ + then + runninginst=$(echo "$output" | grep '^0 : ' | cut -d' ' -f3) + if [ "$runninginst" != "$InstanceName" ] +- then ++ then + super_ocf_log warn "ACT: sapstartsrv is running for instance $runninginst, that service will be killed" + restart=1 + else +@@ -784,38 +812,113 @@ + node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) + node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') + super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" ++ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? ++ # This might also solve some problems when we could not figure-out the ilocal or remote site name + for i in 1 2 3 4 5 6 7 8 9; do + case "$node_status" in +- primary ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" +- return $HANA_STATE_PRIMARY;; ++ primary ) ++ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" ++ return $HANA_STATE_PRIMARY;; + syncmem | sync | async ) +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" +- return $HANA_STATE_SECONDARY;; +- none ) # have seen that mode on second side BEFEORE we registered it as replica +- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" +- return $HANA_STATE_STANDALONE;; ++ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" ++ return $HANA_STATE_SECONDARY;; ++ none ) # have seen that mode on second side BEFEORE we registered it as replica ++ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" ++ return $HANA_STATE_STANDALONE;; + * ) +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" +- dump=$( echo $node_status | hexdump -C ); +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" +- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes ++ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" ++ dump=$( echo $node_status | hexdump -C ); ++ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" ++ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) ++ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" ++ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes + esac; + done + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } + ++# function: analyze_hana_sync_statusSRS ++# params: - ++# globals: DIR_EXECUTABLE(r), FULL_SR_STATUS(w), remoteNode ++# ++# systemReplicationStatus.py return-codes: ++# NoHSR = 10 ++# Error = 11 ++# Unkown = 12 ++# Initializing = 13 ++# Syncing = 14 ++# Active = 15 ++function analyze_hana_sync_statusSRS() ++{ ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" ++ if [ -n "$rem_SR_name" ]; then ++ siteParam="--site=$rem_SR_name" ++ fi ++ FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? ++ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" ++ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" ++ # ++ # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) ++ # And is the check for return code capable for chains? ++ # ++ if [ $srRc -eq 15 ]; then ++ # Fix for a HANA BUG, where a non-working SR resulted in RC 15: ++ if grep -q "ACTIVE" <<< "$FULL_SR_STATUS"; then ++ super_ocf_log info "FLOW $FUNCNAME SOK" ++ set_hana_attribute "$remoteNode" "SOK" ${ATTR_NAME_HANA_SYNC_STATUS[@]} ++ super_ocf_log info "ACT site=$sr_name, seting SOK for secondary (1)" ++ lpa_set_lpt 30 "$remoteNode" ++ rc=0; ++ else ++ # ok we should be careful and set secondary to SFAIL ++ super_ocf_log info "FLOW $FUNCNAME SFAIL" ++ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} ++ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (6) - srRc=$srRc lss=$lss No ACTIVES found in cmd output" ++ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary ++ lpa_set_lpt 10 "$remoteNode" ++ fi ++ elif [ $srRc -le 11 ]; then # 11 and 10 ++ # if systemReplicationStatus is ERROR and landscapeHostConfiguration is down than do NOT set SFAIL ++ get_hana_landscape_status; lss=$? ++ if [ $lss -lt 2 ]; then ++ # keep everithing like it was ++ rc=2 ++ else ++ # ok we should be careful and set secondary to SFAIL ++ super_ocf_log info "FLOW $FUNCNAME SFAIL" ++ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} ++ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" ++ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary ++ lpa_set_lpt 10 "$remoteNode" ++ rc=1 ++ fi ++ else ++ super_ocf_log info "FLOW $FUNCNAME SFAIL" ++ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} ++ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" ++ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary ++ lpa_set_lpt 10 "$remoteNode" ++ rc=1; ++ fi ++ super_ocf_log info "FLOW $FUNCNAME PRIM+LPA" ++ super_ocf_log info "DBG PRIM" ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc ++} ++ + # +-# function: analyze_hana_sync_status - query and check hana system replication status ++#### ++#### OLD HDBSQL STUFF FOR SPS6,7,8 AND SCALE-UP ONLY ++#### ++# function: analyze_hana_sync_statusSQL - query and check hana system replication status + # params: - + # globals: DIR_EXECUTABLE(r), remoteHost(r) + # get the HANA sync status +-# +-function analyze_hana_sync_status() ++# ++function analyze_hana_sync_statusSQL() + { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local -a clusterNodes=() +@@ -863,35 +966,9 @@ + # TODO PRIO1: REMOVE remoteNode dependency - set SFAIL + set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} + fi +- # first get a list of all secondary hosts, than a list of all secondary hosts, if the is ANY failure at this site +- # TODO: PRIO9: for first we assume there is only ONE secondary site (like ROT) +- # TODO: PRIO3: should we loop over all cluster nodes fetching their roles-attribute? To minimize sql-queries? +- # +- all_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_secondaries ); sqlrc=$? +- all_secondary_hosts=$(echo $all_secondary_hosts | dequote); +- if [ "$sqlrc" -eq 0 ]; then +- all_broken_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_failed_secondaries); sqlrc=$? +- all_broken_secondary_hosts=$(echo $all_broken_secondary_hosts | dequote); +- if [ "$sqlrc" -eq 0 ]; then +- if [ -n "$all_broken_secondary_hosts" ]; then +- # +- # we have a broken secondary site - set all hosts to "SFAIL" +- # +- # Note: since HANA hostname can be different from nodename we need to check all vhost attributes +- for n in $all_broken_secondary_hosts; do +- for cl in ${otherNodes[@]}; do +- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]}) +- if [ "$vHost" = "$n" ]; then # we found the correct node +- set_hana_attribute $cl "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} +- fi +- done +- done +- fi +- fi +- fi + else + case "$sqlrc" in +- 19 ) ++ 19 ) + # return codes 19: license error -> set SFAIL! + # DONE: PRIO1: We should NOT set SFAIL, if HDB is exactly broken now + # When HDB breaks during monitor this could prevent a prositive remote failover +@@ -901,7 +978,7 @@ + done + ;; + esac +- fi ++ fi + return $rc + } + +@@ -932,10 +1009,18 @@ + local remoteInstance=""; + remoteInstance=$InstanceNr + if ocf_is_true ${AUTOMATED_REGISTER}; then ++ # ++ # ++ # ++ # ++ # + super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" ++ # ++ # + su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? ++ # backup_global_and_nameserver + else +- super_ocf_log info "ACT: IGNORE REGISTER because AUTOMATED_REGISTER is set to FALSE" ++ super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" + rc=1 + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -945,7 +1030,7 @@ + # + # function: saphana_status - pure status check + # params: - +-# globals: SIDInstanceName, OCF_*, ++# globals: SIDInstanceName, OCF_*, + function saphana_status() { + local binDeam="hdb.sap${SIDInstanceName}" rc=0 + binDeam=${binDeam:0:15} # Process name is limited to the first 15 characters +@@ -956,13 +1041,13 @@ + # + # function: saphana_start - start a hana instance + # params: - +-# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName, ++# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName, + # + function saphana_start() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=$OCF_NOT_RUNNING + local output="" +- local loopcount=0 ++ local loopcount=0 + check_sapstartsrv + rc=$? + # +@@ -1000,11 +1085,11 @@ + # saphana_stop: Stop the SAP instance + # + function saphana_stop() { +- super_ocf_log info "FLOW $FUNCNAME ($*)" +- local output="" +- local rc=0 +- check_sapstartsrv; rc=$? +- if [ $rc -eq $OCF_SUCCESS ]; then ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local output="" ++ local rc=0 ++ check_sapstartsrv; rc=$? ++ if [ $rc -eq $OCF_SUCCESS ]; then + output=$($SAPCONTROL -nr $InstanceNr -function Stop) + rc=$? + super_ocf_log info "ACT: Stopping SAP Instance $SID-$InstanceName: $output" +@@ -1032,7 +1117,7 @@ + # function: saphana_validate - validation of (some) variables/parameters + # params: - + # globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), SAPVIRHOST(r) +-# saphana_validate: Check the symantic of the input parameters ++# saphana_validate: Check the symantic of the input parameters + # + function saphana_validate() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +@@ -1060,12 +1145,12 @@ + # + # function: saphana_start_primary - handle startup of PRIMARY in M/S + # params: +-# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*, ++# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*, + # + function saphana_start_primary() + { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING ++ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING + local lss sqlrc; + local rc=0 + local lpa_dec=4 +@@ -1074,7 +1159,7 @@ + # we will be a master (PRIMARY) so checking, if the is an OTHER master + # + super_ocf_log debug "DBG: saphana_primary - check_for_primary reports HANA_STATE_PRIMARY" +- # ++ # + lpa_init_lpt $HANA_STATE_PRIMARY + lpa_check_lpt_status; lpa_dec=$? + get_hana_landscape_status; lss=$? +@@ -1139,7 +1224,7 @@ + 1 ) # landcape says we are down, lets start and adjust scores and return code + super_ocf_log info "LPA: landcape: DOWN, LPA: start ==> start instance" + saphana_start +- rc=$? ++ rc=$? + LPTloc=$(date '+%s') + lpa_set_lpt $LPTloc + ;; +@@ -1152,7 +1237,7 @@ + # DONE: PRIO3: check if this reaction is correct - tell cluster about failed start + super_ocf_log info "LPA: landcape: UP, LPA: register ==> take down" + set_crm_master -inf +- rc=$OCF_NOT_RUNNING ++ rc=$OCF_NOT_RUNNING + ;; + 1 ) # lets try to register + # DONE: PRIO2: Like Action in start_secondary +@@ -1160,7 +1245,7 @@ + super_ocf_log info "DEC: AN OTHER HANA IS AVAILABLE ==> LETS REGISTER" + set_crm_master 0 + if wait_for_primary_master 1; then +- register_hana_secondary ++ register_hana_secondary + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + super_ocf_log info "ACT: Register successful" +@@ -1169,11 +1254,11 @@ + set_crm_master 0 + saphana_start_secondary + rc=$? +- lpa_set_lpt 30 ++ lpa_set_lpt 10 + else + super_ocf_log err "ACT: Register failed" + rc=$OCF_NOT_RUNNING +- fi ++ fi + else + # lets check next monitor, if we can register + rc=$OCF_SUCCESS +@@ -1185,6 +1270,9 @@ + case "$lss" in + 2 | 3 | 4 ) # as we ARE up we just keep it up + # TODO: PRIO3: I now change from "just keep it up to take that down" ++# TODO: PRIO1 differ lpt_advice!! ++# 2 => DOWN ++# 3 => KEEP + # TODO: PRIO3: OCF_SUCCESS, OCF_NOT_RUNNING or OCF_ERR_xxxx ? + set_crm_master -9000 + #scoring_crm_master "$my_role" "$my_sync" +@@ -1193,7 +1281,7 @@ + 1 ) # we are down, so we should wait --> followup in next monitor + super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" + # TODO: PRIO3: Check, if WAITING is correct here +- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} + set_crm_master -9000 + rc=$OCF_SUCCESS + ;; +@@ -1202,7 +1290,7 @@ + fail ) # process a lpa FAIL + super_ocf_log info "LPA: LPA reports FAIL" + set_crm_master -inf +- rc=$OCF_NOT_RUNNING ++ rc=$OCF_NOT_RUNNING + ;; + esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +@@ -1278,12 +1366,12 @@ + # + # function: saphana_start_secondary - handle startup of PRIMARY in M/S + # params: +-# globals: OCF_*(r), NODENAME, ATTR_NAME_*, ++# globals: OCF_*(r), NODENAME, ATTR_NAME_*, + # + function saphana_start_secondary() + { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING ++ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING + local sqlrc; + set_crm_master 0 + # +@@ -1291,9 +1379,9 @@ + # + lpa_push_lpt 10 + lpa_set_lpt 10 +- # ++ # + ####### LPA - end +- # ++ # + # + # we would be slave (secondary) + # we first need to check, if there are Master Nodes, because the Scecondary only starts +@@ -1311,16 +1399,16 @@ + # It seams the stating secondary could not start because of stopping primary + # so this is a WAITING situation + super_ocf_log info "ACT: PRIMARY seams to be down now ==> WAITING" +- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]} + set_crm_master -INFINITY + rc=$OCF_SUCCESS + fi + else +- lpa_set_lpt 30 ++ lpa_set_lpt 10 + fi + else + super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" +- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} ++ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]} + set_crm_master -INFINITY + rc=$OCF_SUCCESS + fi +@@ -1329,11 +1417,71 @@ + } + + # ++# function: saphana_check_local_instance ++# params: ++# output: ++# rc: rc=0 (UP) rc=1 (DOWN) ++# globals: ++# ++function saphana_check_local_instance() ++{ ++ local rc=1 ++ local count=0 ++ local SERVNO ++ local output ++ local MONITOR_SERVICES="hdbnameserver|hdbdaemon" # TODO: PRIO1: exact list of Services ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ check_sapstartsrv ++ rc=$? ++ if [ $rc -eq $OCF_SUCCESS ] ++ then ++ output=$($SAPCONTROL -nr $InstanceNr -function GetProcessList -format script) ++ # we have to parse the output, because the returncode doesn't tell anything about the instance status ++ for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` ++ do ++ local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` ++ local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` ++ local STATE=0 ++ local SEARCH ++ ++ case $COLOR in ++ GREEN|YELLOW) STATE=$OCF_SUCCESS;; ++ *) STATE=$OCF_NOT_RUNNING;; ++ esac ++ ++ SEARCH=`echo "$MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` ++ if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] ++ then ++ if [ $STATE -eq $OCF_NOT_RUNNING ] ++ then ++ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" ++ rc=$STATE ++ fi ++ count=1 ++ fi ++ done ++ ++ if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] ++ then ++ if ocf_is_probe ++ then ++ rc=1 ++ else ++ [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" ++ rc=1 ++ fi ++ fi ++ fi ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc ++} ++ ++# + # function: lpa_get_lpt - get lpt from cluster + # params: NODE + # output: LPT + # rc: rc=0: OK, rc=1: InternalERROR, rc=2: ERROR +-# globals: LPA_ATTR_*, ++# globals: LPA_ATTR_*, + # + function lpa_get_lpt() { + super_ocf_log info "FLOW $FUNCNAME ($*)" +@@ -1348,7 +1496,7 @@ + rc=2 + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc + } + + # +@@ -1372,7 +1520,7 @@ + rc=0 + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc + } + + # +@@ -1398,7 +1546,7 @@ + rc=2 + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc + } + + # +@@ -1422,15 +1570,15 @@ + rc=2 + else + rc=0 +- fi ++ fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc + } + + # + # function: lpa_init_lpt - initialize local lpt, if needed + # params: HANA_STATE +-# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r), ++# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r), + # lpa_init_lpt + # + # Returncodes: +@@ -1439,7 +1587,7 @@ + # Initializing (if NO local LPT-file): + # SECONDARY sets to 0 + # PRIMARY sets to 1 +-# ++# + function lpa_init_lpt() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=1 +@@ -1458,11 +1606,11 @@ + LPTloc=10 + lpa_push_lpt "10"; rc=$? + else +- rc=2 ++ rc=2 + fi + lpa_set_lpt $LPTloc + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc + } + + # +@@ -1472,6 +1620,10 @@ + # lpa_check_lpt_status + # + # Returncodes: ++# 0: start ++# 1: register than start ++# 2: wait4gab ++# 3: wait4other + # + # Initializing (if NO local LPT-file): + # SECONDARY sets to 10 +@@ -1480,20 +1632,20 @@ + # LPRlocal OR LPTremore ARE real lpt (>1000) + # THEN: + # Bigger LPR wins, if delta-gab is OK +-# LPTlocal >> LPTremore ===> rc=0 (start) ++# LPTlocal >> LPTremore ===> rc=0 (start) + # LPTRemote >> LPTlocal ===> rc=1 (register) +-# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait) ++# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab) + # LPRlocal AND LPTremore ARE NOT real lpt (<=1000) + # THEN: + # Bigger LPT wins +-# LPTlocal > LPTremore ===> rc=0 (start) ++# LPTlocal > LPTremore ===> rc=0 (start) + # LPTRemote > LPTlocal ===> rc=1 (register) +-# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait) ++# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab) + # LPTRemote is not initialized or node not kown in cluster (crm_mon -l) (0) + # TODO: PRIO1: Need to introduce a return-code 3 for remote sides lpa not ready + # THEN: + # WAIT ==> like STALEMATE-HANDLING ===> rc=2 (wait) +-# ++# + function lpa_check_lpt_status() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +@@ -1501,6 +1653,8 @@ + local LPTrem=-1 + local LPTMark=1000 + local delta=0 ++ local remSn_name="" ++ local remHost="" + # + # First GET LPT from ATTR-FILE-DEFAULT + # +@@ -1550,7 +1704,20 @@ + fi + fi + super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ return $rc ++} ++ ++# function: is_the_master_nameserver ++# params: - ++# rc: 0: yes, local node is THE master nameserver ++# 1: else ++# globals: ++function is_the_master_nameserver() ++{ ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local rc=0 ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc + } + + # +@@ -1574,11 +1741,12 @@ + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then + saphana_start_primary; rc=$? +- else ++ else ++ lpa_set_lpt 10 + saphana_start_secondary; rc=$? +- lpa_set_lpt 30 +- fi ++ fi + fi ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } + +@@ -1596,7 +1764,7 @@ + check_for_primary; primary_status=$? + if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then + lpa_set_lpt 10 +- fi ++ fi + saphana_stop; rc=$? + return $rc + } +@@ -1637,7 +1805,7 @@ + DEMOTED ) + promoted=0; + ;; +- WAITING ) ++ WAITING* ) + # DONE: lpa_check_lpt_status to come out of here :) + # DONE: PRIO2: CHECK IF THE FIX FOR COMING OUT OF WAITING IS CORRECT + get_hana_landscape_status; lss=$? +@@ -1648,7 +1816,8 @@ + lpa_set_lpt $LPTloc + fi + lpa_check_lpt_status; lparc=$? +- if [ $lparc -ne 2 ]; then ++ # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes ++ if [ $lparc -lt 2 ]; then + # lpa - no need to wait any longer - lets try a new start + saphana_start_clone + rc=$? +@@ -1663,7 +1832,7 @@ + super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" + fi + return $OCF_SUCCESS +- fi ++ fi + promoted=0; + ;; + UNDEFINED ) +@@ -1682,13 +1851,13 @@ + get_hana_landscape_status; lss=$? + super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" + case "$lss" in +- 0 ) # FATAL or ERROR ++ 0 ) # FATAL or ERROR + rc=$OCF_ERR_GENERIC + ;; +- 1 ) # DOWN or ERROR ++ 1 ) # DOWN or ERROR + # DONE: PRIO2: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error + if ocf_is_probe; then +- # ++ # + # leave master score untouched, only set return code + # + rc=$OCF_NOT_RUNNING +@@ -1699,7 +1868,7 @@ + # For Migration it would be good to decrease master score + # For Reload locally we should NOT adjust the master score + # ===> Should we rely on the migration threshold? +- # set_crm_master ++ # set_crm_master + if ocf_is_true "${PreferSiteTakeover}" ; then + # + # DONE: PRIO1: first check, if remote site is already (and still) in sync +@@ -1708,7 +1877,7 @@ + # TODO PRIO1: REMOVE remoteNode dependency - get_sync_status + remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) + case "$remoteSync" in +- SOK ) ++ SOK | PRIM ) + super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" + set_crm_master 5 + if check_for_primary_master; then +@@ -1718,11 +1887,11 @@ + SFAIL ) + super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" + ;; +- * ) ++ * ) + super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" + ;; +- esac +- else ++ esac ++ else + # TODO: PRIO5: SCALE-OUT ONLY? Implement for local restart + # It maybe that for the local restart we only need to decrease the secondaries promotion score + #super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here" +@@ -1765,8 +1934,12 @@ + case "$my_role" in + [12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status + ;; +- [34]:P:*:master:* ) # primary is up and should now be able to anser hdbsql query +- analyze_hana_sync_status ++ [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query ++ if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then ++ analyze_hana_sync_statusSRS ++ else ++ analyze_hana_sync_statusSQL ++ fi + ;; + esac + rem_role=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_ROLES[@]}) +@@ -1776,9 +1949,9 @@ + [234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster + lpa_check_lpt_status; again_lpa_rc=$? + if [ $again_lpa_rc -eq 2 ]; then +- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" +- lpa_set_lpt 10 +- lpa_push_lpt 10 ++ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" ++ lpa_set_lpt 10 ++ lpa_push_lpt 10 + rc=$OCF_NOT_RUNNING + fi + ;; +@@ -1812,13 +1985,13 @@ + function saphana_monitor_secondary() + { + super_ocf_log info "FLOW $FUNCNAME ($*)" +- local rc=$OCF_ERR_GENERIC +- local promoted=0 ++ local rc=$OCF_ERR_GENERIC ++ local promoted=0 + local init_attribute=0 + local lss + # + # OK, we are running as HANA SECONDARY +- # ++ # + if ! lpa_get_lpt ${NODENAME}; then + lpa_set_lpt 10 + lpa_push_lpt 10 +@@ -1863,7 +2036,7 @@ + super_ocf_log debug "DBG: saphana_monitor_clone: HANA_STATE_SECONDARY" + # + # old method was: saphana_monitor - new method is get_hana_landscape_status +- get_hana_landscape_status; lss=$? ++ get_hana_landscape_status; lss=$? + super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" + case "$lss" in + 0 ) # FATAL +@@ -1919,11 +2092,11 @@ + # a) returning 7 here and force cluster a restart of the slave + # b) starting the instance here inside the monitor -> may result in longer runtime, timeouts + # +- # first check with the status function (OS tools) if there could be something like a SAP instance running +- # as we do not know here, if we are in master or slave state we do not want to start our monitoring +- # agents (sapstartsrv) on the wrong host +- local rc=$OCF_ERR_GENERIC +- local promoted=0 ++ # first check with the status function (OS tools) if there could be something like a SAP instance running ++ # as we do not know here, if we are in master or slave state we do not want to start our monitoring ++ # agents (sapstartsrv) on the wrong host ++ local rc=$OCF_ERR_GENERIC ++ local promoted=0 + local init_attribute=0 + local lpaRc=0 + local mRc=0 +@@ -1973,7 +2146,7 @@ + # function: saphana_promote_clone - promote a hana clone + # params: - + # globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), +-# saphana_promote_clone: ++# saphana_promote_clone: + # In a Master/Slave configuration get Master being the primary OR by running hana takeover + # + function saphana_promote_clone() { +@@ -2017,7 +2190,7 @@ + rc=$OCF_SUCCESS; + else + rc=$OCF_FAILED_MASTER +- fi ++ fi + ;; + * ) + super_ocf_log err "ACT: HANA SYNC STATUS IS NOT 'SOK' SO THIS HANA SITE COULD NOT BE PROMOTED" +@@ -2039,10 +2212,10 @@ + # + # function: saphana_demote_clone - demote a hana clone instance + # params: - +-# globals: OCF_*(r), NODENAME(r), ++# globals: OCF_*(r), NODENAME(r), + # saphana_demote_clone +-# the HANA System Replication (SR) runs in a Master/Slave +-# While we could not change a HANA instance to be really demoted, we only mark the status for ++# the HANA System Replication (SR) runs in a Master/Slave ++# While we could not change a HANA instance to be really demoted, we only mark the status for + # correct monitor return codes + # + function saphana_demote_clone() { +@@ -2056,9 +2229,9 @@ + } + + # +-# function: main - main function to operate ++# function: main - main function to operate + # params: ACTION +-# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w), ++# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w), + # globals: SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r) + # + +@@ -2073,7 +2246,7 @@ + SAPCONTROL="" + DIR_PROFILE="" + SAPSTARTPROFILE="" +-SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" ++SAPHanaFilter="ra-act-dec-lpa" + + NODENAME=$(crm_node -n) + +@@ -2100,7 +2273,7 @@ + exit $OCF_SUCCESS;; + *);; + esac +-saphana_init ++saphana_init + + if ! ocf_is_root + then +@@ -2141,7 +2314,7 @@ + saphana_$ACTION$CLACT + ra_rc=$? + ;; +- validate-all) ++ validate-all) + saphana_validate + ra_rc=$? + ;; +@@ -2149,12 +2322,13 @@ + lpa_check_lpt_status + ra_rc=$? + ;; +- *) # seams to be a unknown request +- saphana_methods ++ *) # seams to be a unknown request ++ saphana_methods + ra_rc=$OCF_ERR_UNIMPLEMENTED + ;; + esac + timeE=$(date '+%s') + (( timeR = timeE - timeB )) ++#super_ocf_log info "RA ==== SAPHanaFilter=$SAPHanaFilter" + super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($THE_VERSION) (${timeR}s)====" + exit ${ra_rc} +diff -uNr a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology +--- a/heartbeat/SAPHanaTopology 2016-04-26 12:01:55.620889964 +0200 ++++ b/heartbeat/SAPHanaTopology 2016-04-26 12:03:18.033887556 +0200 +@@ -16,7 +16,7 @@ + # Copyright: (c) 2014 SUSE Linux Products GmbH + # (c) 2015 SUSE Linux GmbH + # +-# An example usage: ++# An example usage: + # See usage() function below for more details... + # + # OCF instance parameters: +@@ -41,7 +41,6 @@ + HANA_STATE_DEFECT=3 + + debug_attributes=0 +- + SH=/bin/sh + + # +@@ -57,7 +56,7 @@ + local shf="${SAPHanaFilter:-all}" + #ocf_log "info" "super_ocf_log: f:$shf l:$level m:$message" + # message levels: (dbg)|info|warn|err|error +- # ++ # + # message types: (ACT|RA|FLOW|DBG|LPA|DEC + case "$level" in + dbg | debug | warn | err | error ) skip=0 +@@ -65,7 +64,7 @@ + info ) + case "$shf" in + all) skip=0 +- ;; ++ ;; + none ) + skip=1 + ;; +@@ -74,13 +73,13 @@ + mtype=${mtype#fh} + echo "$shf"| grep -iq ${mtype}; search=$? + if [ $search -eq 0 ]; then +- skip=0 ++ skip=0 + else + skip=1 + fi + ;; + esac +- ;; ++ ;; + esac + if [ $skip -eq 0 ]; then + ocf_log "$level" "$message" +@@ -126,15 +125,15 @@ + + + +- 0.149.6 ++ 0.151.1 + Analyzes SAP HANA System Replication Topology. + This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to + all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. + In addition it starts and monitors the local saphostagent. + +-1. Interface to monitor a HANA system: landscapeHostConfiguration.py ++1. Interface to monitor a HANA system: landscapeHostConfiguration.py + landscapeHostConfiguration.py has some detailed output about HANA system status +-and node roles. For our monitor the overall status is relevant. This overall ++and node roles. For our monitor the overall status is relevant. This overall + status is reported by the returncode of the script: + 0: Internal Fatal + 1: ERROR +@@ -150,7 +149,7 @@ + system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register). + + 3. saphostctrl +- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the ++ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the + SAP HANA instance. This is the hostname used during the HANA installation. + + +@@ -172,13 +171,8 @@ + + + +- Define type of SAPHanaTopology RA messages to be printed +- Define type of SAPHanaTopology RA messages to be printed. +-Define SAPHana resource agent messages to be printed. +- This parameter should only be set if requested by support. The default is sufficient for normal operation. +- Values: ra-act-lpa-dec-flow +- You could specify any combination of the above values like "ra-act-flow" +- ++ OUTDATED ++ OUTDATED + + + +@@ -197,7 +191,7 @@ + } + + # +-# function: get_hana_attribute ++# function: get_hana_attribute + # params: NODE ATTR [STORE] + # globals: - + # +@@ -208,16 +202,19 @@ + local attr_node=$1 + local attr_name=$2 + local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter +- local attr_val="" +- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q); rc=$? +- if [ $debug_attributes -eq 1 ]; then +- dstr=$(date) +- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE +- fi +- echo "$attr_val" +- if [ $rc -ne 0 ]; then +- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -G -n "$attr_name" -l $attr_store -q" +- fi ++ local attr_default=${4:-} ++ local dstr ++ dstr=$(date) ++ case "$attr_store" in ++ reboot | forever ) ++ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE ++ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ props ) ++ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE ++ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ esac + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -234,19 +231,24 @@ + local attr_value=$2 + local attr_name=$3 + local attr_store=${4:-reboot} # DONE: PRIO5 get this (optional) from parameter ++ local attr_default=${5:-} + local rc=1 +- local attr_old +- attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store); get_rc=$? ++ local attr_old="" ++ local dstr ++ dstr=$(date) ++ attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$? + if [ "$attr_old" != "$attr_value" ]; then + super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc " +- if [ $debug_attributes -eq 1 ]; then +- dstr=$(date) +- echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE +- fi +- crm_attribute -N $attr_node -v "$attr_value" -n "$attr_name" -l $attr_store; rc=$? +- if [ $rc -ne 0 ]; then +- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store" +- fi ++ case "$attr_store" in ++ reboot | forever ) ++ echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE ++ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ props ) ++ echo "$dstr: SAPHanaTopology: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE ++ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? ++ ;; ++ esac + else + super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}" + rc=0 +@@ -299,7 +301,7 @@ + # + # yes it is a clone config - check, if its configured well + # +- if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then ++ if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then + super_ocf_log err "ACT: Clone options misconfigured. (expect: clone_node_max=1)" + exit $OCF_ERR_CONFIGURED + fi +@@ -314,8 +316,8 @@ + # + # function: sht_init - initialize variables for the resource agent + # params: - +-# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), +-# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) ++# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), ++# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) + # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) + # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) + # sht_init : Define global variables with default values, if optional parameters are not set +@@ -327,6 +329,8 @@ + local myInstanceName="" + local rc=$OCF_SUCCESS + local hdbANSWER="" ++ local siteID ++ local siteNAME + HOSTEXECNAME=saphostexec + USRSAP=/usr/sap + SAPSERVICE_PATH=${USRSAP}/sapservices +@@ -340,10 +344,9 @@ + super_ocf_log debug "DBG2: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" + sid=$(echo "$SID" | tr [:upper:] [:lower:]) + sidadm="${sid}adm" +- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" + ocf_env=$(env | grep 'OCF_RESKEY_CRM') + super_ocf_log debug "DBG3: OCF: $ocf_env" +- ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN? ++ ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN? + ATTR_NAME_HANA_PRIMARY_AT=("hana_${sid}_primary_at" "reboot") # Not really used + ATTR_NAME_HANA_CLONE_STATE=("hana_${sid}_clone_state" "reboot") # UKNOWN?, DEMOTED, PROMOTED + ATTR_NAME_HANA_REMOTEHOST=("hana_${sid}_remoteHost" "forever") +@@ -352,8 +355,14 @@ + ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") + ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") + ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") +- ++ # ++ # new "central" attributes ++ # ++ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") + # optional OCF parameters, we try to guess which directories are correct ++ ++ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) ++ + if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] + then + DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" +@@ -387,19 +396,32 @@ + # we need: mode=primary|sync|syncmem|...; site name=; mapping/=/ (multiple lines) + case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in + *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; +- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; +- *cman* ) nodelist=$(crm_node -l);; ++ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; ++ *cman* ) nodelist=$(crm_node -l);; + esac + #### SAP-CALL +- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) +- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" +- site=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') ++ # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode ++ for i in 1 2 3 4 5 6 7 8 9; do ++ hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) ++ super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" ++ srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') ++ case "$srmode" in ++ primary | syncmem | sync | async | none ) ++ # we can leave the loop as we already got a result ++ break ++ ;; ++ * ) ++ # lets pause a bit to give hdbnsutil a chance to answer next time ++ sleep 2 ++ ;; ++ esac ++ done ++ # TODO PRIO3: Implement a file lookup, if we did not get a result ++ siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') ++ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') ++ site=$siteNAME + srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') +- if [ $debug_attributes -eq 1 ]; then +- dstr=$(date) +- echo "$dstr: SAPHanaTopology: srmode=$srmode" >> /var/log/fhATTRIBUTE +- fi +- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) ++ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) + super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" + # + # filter all non-cluster mappings +@@ -413,12 +435,12 @@ + echo $hanaVHost; + fi; + done; +- done ) ++ done ) + super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" + super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" + super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" + return $OCF_SUCCESS +-} ++} + + # + # function: check_for_primary - check if local SAP HANA is configured as primary +@@ -428,32 +450,30 @@ + function check_for_primary() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 +- # DONE: Change stderr location!! +- #sidadm=lnxadm +- #node_status=$(check_for_primary_single) +- node_status=$srmode +- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" +- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" +- for i in 1 2 3 4 5 6 7 8 9; do +- case "$node_status" in +- primary ) ++ node_status=$srmode ++ super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" ++ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" ++ for i in 1 2 3 4 5 6 7 8 9; do ++ case "$node_status" in ++ primary ) + super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" + return $HANA_STATE_PRIMARY;; + syncmem | sync | async ) + super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" + return $HANA_STATE_SECONDARY;; +- none ) # have seen that mode on second side BEFEORE we registered it as replica ++ none ) # have seen that mode on second side BEFEORE we registered it as replica + super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" + return $HANA_STATE_STANDALONE;; + * ) +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" +- dump=$( echo $node_status | hexdump -C ); +- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" +- #### SAP-CALL +- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) +- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') +- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" +- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes ++ # TODO: PRIO1: Should we set SFAIL? ++ # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes ++ dump=$( echo $node_status | hexdump -C ); ++ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" ++ #### SAP-CALL ++ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) ++ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') ++ super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" ++ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes + esac; + done + super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" +@@ -464,7 +484,7 @@ + # + # function: start_saphostagent + # params: - +-# globals: ++# globals: HOSTEXEC_PATH(r), HOSTEXEC_PROFILE_PATH(r) + # + function start_saphostagent() + { +@@ -478,7 +498,7 @@ + # + # function: stop_saphostagent + # params: - +-# globals: ++# globals: HOSTEXEC_PATH(r) + # + function stop_saphostagent() + { +@@ -496,6 +516,8 @@ + function check_saphostagent() + { + local rc=1 ++ # TODO: PRIO3: should the path been removed like "saphostexec" instead of "/usr/sap/hostctrl/exe/saphostexec" ++ # or should we use ${HOSTEXEC_PATH} instead? + pgrep -f /usr/sap/hostctrl/exe/saphostexec; rc=$? + return $rc + } +@@ -509,15 +531,16 @@ + # sht_start : Start the SAP HANA instance + # + function sht_start() { +- + super_ocf_log info "FLOW $FUNCNAME ($*)" + + local rc=$OCF_NOT_RUNNING + local output="" +- local loopcount=0 ++ local loopcount=0 + +- mkdir -p /var/lib/SAPHana +- touch /var/lib/SAPHana/SAPTopologyON ++ # TODO: PRIO3: move the string "$HA_RSCTMP/SAPHana/SAPTopologyON" to a variable ++ # TODO: PRIO3: move the file to the clusters tmp directory? ++ mkdir -p $HA_RSCTMP/SAPHana ++ touch $HA_RSCTMP/SAPHana/SAPTopologyON + if ! check_saphostagent; then + start_saphostagent + fi +@@ -532,16 +555,16 @@ + # function: sht_stop - stop a hana instance + # params: - + # globals: OCF_*(r), SAPCONTROL(r), SID(r), InstanceName(r) +-# sht_stop: Stop the SAP instance ++# sht_stop: Stop the SAP HANA Topology Resource + # + function sht_stop() { + super_ocf_log info "FLOW $FUNCNAME ($*)" + local output="" + local rc=0 + +- rm /var/lib/SAPHana/SAPTopologyON ++ rm $HA_RSCTMP/SAPHana/SAPTopologyON + rc=$OCF_SUCCESS +- ++ + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -557,13 +580,13 @@ + super_ocf_log info "FLOW $FUNCNAME ($*)" + local rc=0 + +- if [ -f /var/lib/SAPHana/SAPTopologyON ]; then ++ if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON ]; then + rc=$OCF_SUCCESS + else + rc=$OCF_NOT_RUNNING + fi + +- super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } + +@@ -575,37 +598,37 @@ + # sht_status: Lightweight check of SAP instance only with OS tools + # + function sht_status() { +- super_ocf_log info "FLOW $FUNCNAME ($*)" +- local rc=0 ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local rc=0 + +- sht_monitor; rc=$? +- return $rc ++ sht_monitor; rc=$? ++ return $rc + } + + + # + # function: sht_validate - validation of (some) variables/parameters + # params: - +-# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), +-# sht_validate: Check the symantic of the input parameters ++# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), ++# sht_validate: Check the symantic of the input parameters + # + function sht_validate() { +- super_ocf_log info "FLOW $FUNCNAME ($*)" +- local rc=$OCF_SUCCESS +- if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ] +- then +- super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!" +- rc=$OCF_ERR_ARGS +- fi ++ super_ocf_log info "FLOW $FUNCNAME ($*)" ++ local rc=$OCF_SUCCESS ++ if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ] ++ then ++ super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!" ++ rc=$OCF_ERR_ARGS ++ fi + +- if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ] +- then +- super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!" +- rc=$OCF_ERR_ARGS +- fi ++ if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ] ++ then ++ super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!" ++ rc=$OCF_ERR_ARGS ++ fi + +- super_ocf_log info "FLOW $FUNCNAME rc=$rc" +- return $rc ++ super_ocf_log info "FLOW $FUNCNAME rc=$rc" ++ return $rc + } + + # +@@ -661,15 +684,15 @@ + + if ocf_is_probe; then + super_ocf_log debug "DBG2: PROBE ONLY" ++ sht_monitor; rc=$? + else + super_ocf_log debug "DBG2: REGULAR MONITOR" + if ! check_saphostagent; then + start_saphostagent + fi +- fi + # + # First check, if we are PRIMARY or SECONDARY +- # ++ # + super_ocf_log debug "DBG2: HANA SID $SID" + super_ocf_log debug "DBG2: HANA InstanceName $InstanceName" + super_ocf_log debug "DBG2: HANA InstanceNr $InstanceNr" +@@ -721,8 +744,8 @@ + set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} + fi + case "$hanaPrim" in +- P ) ;; +- S ) # only secondary may propargate its sync status ++ P ) ;; ++ S ) # only secondary may propargate its sync status + case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in + *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; + *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; +@@ -732,8 +755,10 @@ + for n in ${nodelist}; do + set_hana_attribute ${n} "$srmode" ${ATTR_NAME_HANA_SRMODE[@]} + done +- ;; ++ ;; + esac ++ # ++ fi # end ocf_is_NOT_probe + super_ocf_log info "FLOW $FUNCNAME rc=$rc" + return $rc + } +@@ -752,7 +777,7 @@ + } + + # +-# function: main - main function to operate ++# function: main - main function to operate + # params: ACTION + # globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), DIR_EXECUTABLE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r) + # +@@ -763,7 +788,7 @@ + InstanceName="" + InstanceNr="" + DIR_EXECUTABLE="" +-SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" ++SAPHanaFilter="ra-act-dec-lpa" + NODENAME=$(crm_node -n) + + if [ $# -ne 1 ] +@@ -785,11 +810,11 @@ + exit $OCF_SUCCESS;; + notify) sht_notify + exit $OCF_SUCCESS;; +- admin-setup) admin-setup +- exit $OCF_SUCCESS;; ++ admin-setup) admin-setup ++ exit $OCF_SUCCESS;; + *);; + esac +-sht_init ++sht_init + + if ! ocf_is_root + then +@@ -810,7 +835,6 @@ + exit $OCF_ERR_ARGS + fi + +- + if is_clone + then + CLACT=_clone +@@ -830,12 +854,12 @@ + sht_$ACTION$CLACT + ra_rc=$? + ;; +- validate-all) ++ validate-all) + sht_validate + ra_rc=$? + ;; +- *) # seams to be a unknown request +- sht_methods ++ *) # seams to be a unknown request ++ sht_methods + ra_rc=$OCF_ERR_UNIMPLEMENTED + ;; + esac diff --git a/SOURCES/bz1293355-novacompute-novaevacuate-fix-evacute-typo.patch b/SOURCES/bz1293355-novacompute-novaevacuate-fix-evacute-typo.patch deleted file mode 100644 index 7a8feb7..0000000 --- a/SOURCES/bz1293355-novacompute-novaevacuate-fix-evacute-typo.patch +++ /dev/null @@ -1,47 +0,0 @@ -diff -uNr a/heartbeat/NovaCompute b/heartbeat/NovaCompute ---- a/heartbeat/NovaCompute 2015-11-17 10:13:13.403119585 +0100 -+++ b/heartbeat/NovaCompute 2015-11-17 10:20:49.632620122 +0100 -@@ -159,7 +159,7 @@ - return $OCF_SUCCESS - fi - -- state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) -+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) - if [ "x$state" = x ]; then - : never been fenced - -@@ -171,7 +171,7 @@ - else - ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" - while [ "x$state" != "xno" ]; do -- state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) -+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) - sleep 5 - done - -diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate ---- a/heartbeat/NovaEvacuate 2015-11-17 10:13:13.403119585 +0100 -+++ b/heartbeat/NovaEvacuate 2015-11-17 10:38:14.424846295 +0100 -@@ -141,7 +141,7 @@ - } - - update_evacuation() { -- attrd_updater -p -n evacute -Q -N ${1} -v ${2} -+ attrd_updater -p -n evacuate -Q -N ${1} -v ${2} - arc=$? - if [ ${arc} != 0 ]; then - ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}" -@@ -219,7 +219,12 @@ - return $OCF_NOT_RUNNING - fi - -- handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}') -+ handle_evacuations $( -+ attrd_updater -n evacuate -A | -+ sed 's/ value=""/ value="no"/' | -+ tr '="' ' ' | -+ awk '{print $4" "$6}' -+ ) - return $OCF_SUCCESS - } - diff --git a/SOURCES/bz1296406-virtualdomain-migration_speed-migration_downtime.patch b/SOURCES/bz1296406-virtualdomain-migration_speed-migration_downtime.patch new file mode 100644 index 0000000..fdbde38 --- /dev/null +++ b/SOURCES/bz1296406-virtualdomain-migration_speed-migration_downtime.patch @@ -0,0 +1,101 @@ +diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain +--- a/heartbeat/VirtualDomain 2016-03-04 14:41:22.001333979 +0100 ++++ b/heartbeat/VirtualDomain 2016-03-04 14:42:34.516395470 +0100 +@@ -17,12 +17,16 @@ + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + + # Defaults ++OCF_RESKEY_migration_downtime_default=0 ++OCF_RESKEY_migration_speed_default=0 + OCF_RESKEY_force_stop_default=0 + OCF_RESKEY_autoset_utilization_cpu_default="true" + OCF_RESKEY_autoset_utilization_hv_memory_default="true" + OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 )) + OCF_RESKEY_CRM_meta_timeout_default=90000 + ++: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}} ++: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}} + : ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}} + : ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} + : ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} +@@ -96,6 +100,22 @@ + + + ++ ++ ++Define max downtime during live migration in milliseconds ++ ++Live migration downtime ++ ++ ++ ++ ++ ++Define live migration speed per resource in MiB/s ++ ++Live migration speed ++ ++ ++ + + + Use a dedicated migration network. The migration URI is composed by +@@ -562,6 +582,7 @@ + local transport_suffix + local migrateuri + local migrate_opts ++ local migrate_pid + + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + +@@ -586,9 +607,28 @@ + # Scared of that sed expression? So am I. :-) + remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,") + ++ # Live migration speed limit ++ if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then ++ ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})." ++ virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed} ++ fi ++ + # OK, we know where to connect to. Now do the actual migration. +- ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." +- virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri ++ ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)." ++ virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri & ++ ++ migrate_pid=${!} ++ ++ # Live migration downtime interval ++ # Note: You can set downtime only while live migration is in progress ++ if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then ++ sleep 2 ++ ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})." ++ virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime} ++ fi ++ ++ wait ${migrate_pid} ++ + rc=$? + if [ $rc -ne 0 ]; then + ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc" +@@ -671,6 +711,18 @@ + return $OCF_ERR_INSTALLED + fi + fi ++ ++ # Check if migration_speed is a decimal value ++ if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then ++ ocf_exit_reason "migration_speed has to be a decimal value" ++ return $OCF_ERR_CONFIGURED ++ fi ++ ++ # Check if migration_downtime is a decimal value ++ if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then ++ ocf_exit_reason "migration_downtime has to be a decimal value" ++ return $OCF_ERR_CONFIGURED ++ fi + } + + if [ $# -ne 1 ]; then diff --git a/SOURCES/bz1299404-galera-custom-host-port.patch b/SOURCES/bz1299404-galera-custom-host-port.patch new file mode 100644 index 0000000..fc3b901 --- /dev/null +++ b/SOURCES/bz1299404-galera-custom-host-port.patch @@ -0,0 +1,33 @@ +From cbccff5ed9b1fc5641063f05ad531f897d366fa4 Mon Sep 17 00:00:00 2001 +From: Mike Bayer +Date: Tue, 15 Sep 2015 14:54:05 -0400 +Subject: [PATCH] galera: add support for MYSQL_HOST and MYSQL_PORT from + clustercheck + +--- + heartbeat/galera | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/heartbeat/galera b/heartbeat/galera +index 920507b..1a1a4ce 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -704,6 +704,18 @@ if [ -n "${OCF_RESKEY_check_passwd}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" + fi + ++# This value is automatically sourced from /etc/sysconfig/checkcluster if available ++if [ -n "${MYSQL_HOST}" ]; then ++ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" ++fi ++ ++# This value is automatically sourced from /etc/sysconfig/checkcluster if available ++if [ -n "${MYSQL_PORT}" ]; then ++ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" ++fi ++ ++ ++ + # What kind of method was invoked? + case "$1" in + start) galera_start;; diff --git a/SOURCES/bz1301189-virtualdomain-fix-locale.patch b/SOURCES/bz1301189-virtualdomain-fix-locale.patch new file mode 100644 index 0000000..904cc07 --- /dev/null +++ b/SOURCES/bz1301189-virtualdomain-fix-locale.patch @@ -0,0 +1,35 @@ +diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain +--- a/heartbeat/VirtualDomain 2016-01-25 12:05:30.437008638 +0100 ++++ b/heartbeat/VirtualDomain 2016-01-25 12:25:06.850256377 +0100 +@@ -282,12 +282,13 @@ + status="no state" + while [ "$status" = "no state" ]; do + try=$(($try + 1 )) +- status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') ++ status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z') + case "$status" in +- *"error:"*"domain not found"*|"shut off") ++ *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off") + # shut off: domain is defined, but not started, will not happen if + # domain is created but not defined +- # Domain not found: domain is not defined and thus not started ++ # "Domain not found" or "failed to get domain": domain is not defined ++ # and thus not started + ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)" + rc=$OCF_NOT_RUNNING + ;; +@@ -415,11 +416,12 @@ + local status=0 + + ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}." +- out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z') ++ out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z') + ex=$? + echo >&2 "$out" + case $ex$out in +- *"error:"*"domain is not running"*|*"error:"*"domain not found"*) ++ *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\ ++ *"error:"*"failed to get domain"*) + : ;; # unexpected path to the intended outcome, all is well + [!0]*) + ocf_exit_reason "forced stop failed" diff --git a/SOURCES/bz1303037-1-portblock.patch b/SOURCES/bz1303037-1-portblock.patch new file mode 100644 index 0000000..9ed8cd1 --- /dev/null +++ b/SOURCES/bz1303037-1-portblock.patch @@ -0,0 +1,207 @@ +diff -uNr a/heartbeat/portblock b/heartbeat/portblock +--- a/heartbeat/portblock 2013-06-18 15:22:27.000000000 +0200 ++++ b/heartbeat/portblock 2016-02-29 13:51:22.205860012 +0100 +@@ -24,8 +24,10 @@ + + # Defaults + OCF_RESKEY_ip_default="0.0.0.0/0" ++OCF_RESKEY_reset_local_on_unblock_stop_default="false" + + : ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}} ++: ${OCF_RESKEY_reset_local_on_unblock_stop=${OCF_RESKEY_reset_local_on_unblock_stop_default}} + ####################################################################### + CMD=`basename $0` + TICKLETCP=$HA_BIN/tickle_tcp +@@ -37,16 +39,22 @@ + + $CMD is used to temporarily block ports using iptables. + +- It can be used to turn off a port before bringing ++ It can be used to blackhole a port before bringing + up an IP address, and enable it after a service is started. +- To do that for samba, the following resource line can be used: ++ To do that for samba, the following can be used: + +- $CMD::tcp::137,138::block \\ +- 10.10.10.20 \\ +- nmbd smbd \\ +- $CMD::tcp::137,138::unblock ++ crm configure < + portno +- ++ + + + +@@ -149,6 +160,26 @@ + + + ++ ++ ++(try to) reset server TCP sessions when unblock stops ++ ++If for some reason the long lived server side TCP sessions won't be cleaned up ++by a reconfiguration/flush/stop of whatever services this portblock protects, ++they would linger in the connection table, even after the IP is gone ++and services have been switched over to an other node. ++ ++An example would be the default NFS kernel server. ++ ++These "known" connections may seriously confuse and delay a later switchback. ++ ++Enabling this option will cause this agent to try to get rid of these connections ++by injecting a temporary iptables rule to TCP-reset outgoing packets from the ++blocked ports, and additionally tickle them locally, ++just before it starts to DROP incoming packets on "unblock stop". ++ ++ ++ + + + The IP address used to be blocked/unblocked. +@@ -233,12 +264,34 @@ + fi + } + +-run_tickle_tcp() ++tickle_remote() + { + [ -z "$OCF_RESKEY_tickle_dir" ] && return + echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle + f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip +- [ -f $f ] && cat $f | $TICKLETCP -n 3 ++ [ -r $f ] || return ++ $TICKLETCP -n 3 < $f ++} ++ ++tickle_local() ++{ ++ [ -z "$OCF_RESKEY_tickle_dir" ] && return ++ f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip ++ [ -r $f ] || return ++ # swap "local" and "remote" address, ++ # so we tickle ourselves. ++ # We set up a REJECT with tcp-reset before we do so, so we get rid of ++ # the no longer wanted potentially long lived "ESTABLISHED" connection ++ # entries on the IP we are going to delet in a sec. These would get in ++ # the way if we switch-over and then switch-back in quick succession. ++ local i ++ awk '{ print $2, $1; }' $f | $TICKLETCP ++ netstat -tn | grep -Fw $OCF_RESKEY_ip || return ++ for i in 0.1 0.5 1 2 4 ; do ++ sleep $i ++ awk '{ print $2, $1; }' $f | $TICKLETCP ++ netstat -tn | grep -Fw $OCF_RESKEY_ip || break ++ done + } + + SayActive() +@@ -304,15 +357,30 @@ + #IptablesBLOCK {udp|tcp} portno,portno ip + IptablesBLOCK() + { ++ local rc=0 ++ local try_reset=false ++ if [ "$1/$4/$__OCF_ACTION" = tcp/unblock/stop ] && ++ ocf_is_true $reset_local_on_unblock_stop ++ then ++ try_reset=true ++ fi + if + chain_isactive "$1" "$2" "$3" + then + : OK -- chain already active + else ++ if $try_reset ; then ++ $IPTABLES -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ tickle_local ++ fi + $IPTABLES -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP ++ rc=$? ++ if $try_reset ; then ++ $IPTABLES -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset ++ fi + fi + +- return $? ++ return $rc + } + + #IptablesUNBLOCK {udp|tcp} portno,portno ip +@@ -338,7 +406,7 @@ + unblock) + IptablesUNBLOCK "$@" + rc=$? +- run_tickle_tcp ++ tickle_remote + #ignore run_tickle_tcp exit code! + return $rc + ;; +@@ -411,6 +479,17 @@ + exit $OCF_ERR_CONFIGURED + ;; + esac ++ ++ if ocf_is_true $reset_local_on_unblock_stop; then ++ if [ $action != unblock ] ; then ++ ocf_log err "reset_local_on_unblock_stop is only relevant with action=unblock" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ if [ -z $OCF_RESKEY_tickle_dir ] ; then ++ ocf_log warn "reset_local_on_unblock_stop works best with tickle_dir enabled as well" ++ fi ++ fi ++ + return $OCF_SUCCESS + } + +@@ -451,6 +530,7 @@ + portno=$OCF_RESKEY_portno + action=$OCF_RESKEY_action + ip=$OCF_RESKEY_ip ++reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop + + case $1 in + start) diff --git a/SOURCES/bz1303037-2-portblock.patch b/SOURCES/bz1303037-2-portblock.patch new file mode 100644 index 0000000..96d71cd --- /dev/null +++ b/SOURCES/bz1303037-2-portblock.patch @@ -0,0 +1,31 @@ +From 8ac05986ac7ef354456253edbd22cbb4a2d96e90 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 16 Sep 2016 10:19:38 +0200 +Subject: [PATCH] portblock: create tickle_dir if it doesnt exist + +--- + heartbeat/portblock | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/portblock b/heartbeat/portblock +index c480954..c97488b 100755 +--- a/heartbeat/portblock ++++ b/heartbeat/portblock +@@ -466,8 +466,7 @@ IptablesValidateAll() + exit $OCF_ERR_CONFIGURED + fi + if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then +- ocf_log err "The tickle dir doesn't exist!" +- exit $OCF_ERR_INSTALLED ++ mkdir -p $OCF_RESKEY_tickle_dir + fi + fi + +@@ -534,6 +533,7 @@ reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop + + case $1 in + start) ++ IptablesValidateAll + IptablesStart $protocol $portno $ip $action + ;; + diff --git a/SOURCES/bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch b/SOURCES/bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch new file mode 100644 index 0000000..222a840 --- /dev/null +++ b/SOURCES/bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch @@ -0,0 +1,45 @@ +From: Peter Lemenkov +Date: Mon, 29 Feb 2016 12:46:50 +0100 +Subject: [PATCH] Backup and restore rabbitmq users during resource restart + +Signed-off-by: Peter Lemenkov + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index cc45f09..4545495 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -289,7 +289,19 @@ rmq_start() { + rmq_stop + rmq_wipe_data + rmq_join_existing "$join_list" +- if [ $? -ne 0 ]; then ++ rc=$? ++ ++ # Restore users (if any) ++ BaseDataDir=`dirname $RMQ_DATA_DIR` ++ if [ -f $BaseDataDir/users.erl ] ; then ++ rabbitmqctl eval " ++ {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), ++ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users). ++ " ++ rm -f $BaseDataDir/users.erl ++ fi ++ ++ if [ $rc -ne 0 ]; then + ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" + return $OCF_ERR_GENERIC + fi +@@ -299,6 +311,13 @@ rmq_start() { + } + + rmq_stop() { ++ # Backup users ++ BaseDataDir=`dirname $RMQ_DATA_DIR` ++ rabbitmqctl eval " ++ Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]), ++ file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). ++ " ++ + rmq_monitor + if [ $? -eq $OCF_NOT_RUNNING ]; then + return $OCF_SUCCESS diff --git a/SOURCES/bz1304019-novaevacuate-invoke-off-action.patch b/SOURCES/bz1304019-novaevacuate-invoke-off-action.patch deleted file mode 100644 index 99699bc..0000000 --- a/SOURCES/bz1304019-novaevacuate-invoke-off-action.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 5e9310bbbcd5086ea9a3edf85d523c4c2a57f1c3 Mon Sep 17 00:00:00 2001 -From: Andrew Beekhof -Date: Tue, 8 Dec 2015 13:54:12 +1100 -Subject: [PATCH] NovaEvacuate should invoke fence_compute with action 'off' - -Conceptually we are resurrecting in one direction only (off) and not -bringing it back to the current host afterwards (on) - -Also it will overwrite the attrd variable too soon. - -Change-Id: I9694945ca7eedae4f5cb6758fe1e8ce7f72ae808 ---- - ocf/NovaEvacuate | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate -index a17a159..0e22d7e 100644 ---- a/heartbeat/NovaEvacuate -+++ b/heartbeat/NovaEvacuate -@@ -198,7 +198,7 @@ handle_evacuations() { - return $OCF_SUCCESS - fi - -- fence_compute ${fence_options} -o reboot -n $node -+ fence_compute ${fence_options} -o off -n $node - rc=$? - - if [ $rc = 0 ]; then --- -1.9.1 - diff --git a/SOURCES/bz1304370-1-nfsserver-fix-systemd-status-detection.patch b/SOURCES/bz1304370-1-nfsserver-fix-systemd-status-detection.patch deleted file mode 100644 index 7e2d3b9..0000000 --- a/SOURCES/bz1304370-1-nfsserver-fix-systemd-status-detection.patch +++ /dev/null @@ -1,474 +0,0 @@ -diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver ---- a/heartbeat/nfsserver 2016-02-05 09:04:19.350003826 +0100 -+++ b/heartbeat/nfsserver 2016-02-05 09:04:58.463395839 +0100 -@@ -208,9 +208,9 @@ - - - -- -- -- -+ -+ -+ - - - -@@ -327,11 +327,12 @@ - nfs_exec() - { - local cmd=$1 -+ local svc=$2 - set_exec_mode - - case $EXEC_MODE in - 1) ${OCF_RESKEY_nfs_init_script} $cmd;; -- 2) systemctl $cmd nfs-server.service ;; -+ 2) systemctl $cmd ${svc}.service ;; - esac - } - -@@ -353,21 +354,117 @@ - - nfsserver_monitor () - { -+ # Skip trying to start processes once before failing -+ # when run from nfsserver_start () -+ if [ "$1" == "fromstart" ]; then -+ ocf_log info "fromstart" -+ fromstart=1 -+ else -+ tries=1 -+ fi -+ -+ # systemd -+ if [ "$EXEC_MODE" -eq "2" ]; then -+ ocf_log info "Status: rpcbind" -+ rpcinfo &> /dev/null -+ rc=$? -+ if [ "$rc" -ne "0" ]; then -+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "rpcbind is not running" -+ return $OCF_NOT_RUNNING -+ fi -+ fi -+ -+ ocf_log info "Status: nfs-mountd" -+ rpcinfo -t localhost 100005 &> /dev/null -+ rc=$? -+ if [ "$rc" -ne "0" ]; then -+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "nfs-mountd is not running" -+ return $OCF_NOT_RUNNING -+ fi -+ fi -+ -+ ocf_log info "Status: nfs-idmapd" -+ fn=`mktemp` -+ nfs_exec status nfs-idmapd > $fn 2>&1 -+ rc=$? -+ ocf_log debug "$(cat $fn)" -+ rm -f $fn -+ if [ "$rc" -ne "0" ]; then -+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ ocf_log info "Tried to start services: rc: $rc" -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "nfs-idmapd is not running" -+ return $OCF_NOT_RUNNING -+ fi -+ fi -+ -+ ocf_log info "Status: rpc-statd" -+ rpcinfo -t localhost 100024 &> /dev/null -+ rc=$? -+ if [ "$rc" -ne "0" ]; then -+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "rpc-statd is not running" -+ return $OCF_NOT_RUNNING -+ fi -+ fi -+ fi -+ - fn=`mktemp` -- nfs_exec status > $fn 2>&1 -+ nfs_exec status nfs-server > $fn 2>&1 - rc=$? - ocf_log debug "$(cat $fn)" - rm -f $fn - -- #Adapte LSB status code to OCF return code -+ tfn="/proc/fs/nfsd/threads" -+ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then -+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" -+ return $OCF_NOT_RUNNING -+ fi -+ fi -+ -+ #Adapt LSB status code to OCF return code - if [ $rc -eq 0 ]; then - # don't report success if nfs servers are up - # without locking daemons. - v3locking_exec "status" - rc=$? - if [ $rc -ne 0 ]; then -- ocf_exit_reason "NFS server is up, but the locking daemons are down" -- rc=$OCF_ERR_GENERIC -+ if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then -+ nfsserver_start frommonitor -+ rc=$? -+ let tries=$tries-1 -+ fi -+ if [ "$rc" -ne "0" ]; then -+ ocf_exit_reason "NFS server is up, but the locking daemons are down" -+ rc=$OCF_ERR_GENERIC -+ fi - fi - return $rc - elif [ $rc -eq 3 ]; then -@@ -391,12 +488,7 @@ - # only write to the tmp /etc/sysconfig/nfs if sysconfig exists. - # otherwise this distro does not support setting these options. - if [ -d "/etc/sysconfig" ]; then -- # replace if the value exists, append otherwise -- if grep "^\s*${key}=" $file ; then -- sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file -- else -- echo "${key}=\"${value}\"" >> $file -- fi -+ echo "${key}=\"${value}\"" >> $file - elif [ "$requires_sysconfig" = "true" ]; then - ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args." - fi -@@ -409,11 +501,6 @@ - local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX) - local statd_args - -- if [ -f "$NFS_SYSCONFIG" ]; then -- ## Take the $NFS_SYSCONFIG file as our skeleton -- cp $NFS_SYSCONFIG $tmpconfig -- fi -- - # nfsd args - set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true" - -@@ -444,20 +531,14 @@ - - # override local nfs config. preserve previous local config though. - if [ -s $tmpconfig ]; then -- cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 -+ cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG" - if [ $? -ne 0 ]; then - # backup local nfs config if it doesn't have our HA autogen tag in it. - mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP - fi -- -- cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1 -- if [ $? -ne 0 ]; then -- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG -- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG -- cat $tmpconfig >> $NFS_SYSCONFIG -- else -- cat $tmpconfig > $NFS_SYSCONFIG -- fi -+ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG -+ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG -+ cat $tmpconfig >> $NFS_SYSCONFIG - fi - rm -f $tmpconfig - } -@@ -476,14 +557,13 @@ - [ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm" - [ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha" - [ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak" -- [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] && -- chown -R rpcuser.rpcuser "$fp/$STATD_DIR" -+ [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR" - - [ -f "$fp/etab" ] || touch "$fp/etab" - [ -f "$fp/xtab" ] || touch "$fp/xtab" - [ -f "$fp/rmtab" ] || touch "$fp/rmtab" - -- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1 -+ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null - [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" - [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" - } -@@ -563,15 +643,15 @@ - - terminate() - { -- local pids -- local i=0 -+ declare pids -+ declare i=0 - - while : ; do - pids=$(binary_status $1) - [ -z "$pids" ] && return 0 - kill $pids - sleep 1 -- i=$((i + 1)) -+ ((i++)) - [ $i -gt 3 ] && return 1 - done - } -@@ -579,22 +659,22 @@ - - killkill() - { -- local pids -- local i=0 -+ declare pids -+ declare i=0 - - while : ; do - pids=$(binary_status $1) - [ -z "$pids" ] && return 0 - kill -9 $pids - sleep 1 -- i=$((i + 1)) -+ ((i++)) - [ $i -gt 3 ] && return 1 - done - } - - stop_process() - { -- local process=$1 -+ declare process=$1 - - ocf_log info "Stopping $process" - if terminate $process; then -@@ -665,9 +745,14 @@ - - nfsserver_start () - { -+ # Skip monitor check when run from nfsserver_monitor () -+ if [ "$1" == "frommonitor" ]; then -+ frommonitor=1 -+ fi -+ - local rc; - -- if nfsserver_monitor; then -+ if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then - ocf_log debug "NFS server is already started" - return $OCF_SUCCESS - fi -@@ -693,11 +778,32 @@ - modprobe nfsd - fi - -+ # systemd -+ if [ "$EXEC_MODE" -eq "2" ]; then -+ nfs_exec start rpcbind -+ local i=10 -+ while [ "$i" -gt 0 ]; do -+ ocf_log info "Start: rpcbind i: $i" -+ rpcinfo &> /dev/null -+ rc=$? -+ if [ "$rc" -eq "0" ]; then -+ break; -+ fi -+ sleep 1 -+ let i=$i-1 -+ done -+ if [ "$i" -eq 0 ]; then -+ ocf_exit_reason "Failed to start rpcbind" -+ return $OCF_ERR_GENERIC -+ fi -+ fi -+ - # check to see if we need to start rpc.statd - v3locking_exec "status" - if [ $? -ne $OCF_SUCCESS ]; then - v3locking_exec "start" - rc=$? -+ ocf_log info "Start: v3locking: $rc" - if [ $rc -ne 0 ]; then - ocf_exit_reason "Failed to start NFS server locking daemons" - return $rc -@@ -706,8 +812,65 @@ - ocf_log info "rpc.statd already up" - fi - -+ # systemd -+ if [ "$EXEC_MODE" -eq "2" ]; then -+ nfs_exec start nfs-mountd -+ local i=10 -+ while [ "$i" -gt 0 ]; do -+ ocf_log info "Start: nfs-mountd i: $i" -+ rpcinfo -t localhost 100005 &> /dev/null -+ rc=$? -+ if [ "$rc" -eq "0" ]; then -+ break; -+ fi -+ sleep 1 -+ let i=$i-1 -+ done -+ if [ "$i" -eq 0 ]; then -+ ocf_exit_reason "Failed to start nfs-mountd" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ nfs_exec start nfs-idmapd -+ local i=10 -+ while [ "$i" -gt 0 ]; do -+ ocf_log info "Start: nfs-idmapd i: $i" -+ fn=`mktemp` -+ nfs_exec status nfs-idmapd > $fn 2>&1 -+ rc=$? -+ ocf_log debug "$(cat $fn)" -+ rm -f $fn -+ if [ "$rc" -eq "0" ]; then -+ break; -+ fi -+ sleep 1 -+ let i=$i-1 -+ done -+ if [ "$i" -eq 0 ]; then -+ ocf_exit_reason "Failed to start nfs-idmapd" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ nfs_exec start rpc-statd -+ local i=10 -+ while [ "$i" -gt 0 ]; do -+ ocf_log info "Start: rpc-statd i: $i" -+ rpcinfo -t localhost 100024 &> /dev/null -+ rc=$? -+ if [ "$rc" -eq "0" ]; then -+ break; -+ fi -+ sleep 1 -+ let i=$i-1 -+ done -+ if [ "$i" -eq 0 ]; then -+ ocf_exit_reason "Failed to start rpc-statd" -+ return $OCF_ERR_GENERIC -+ fi -+ fi -+ - fn=`mktemp` -- nfs_exec start > $fn 2>&1 -+ nfs_exec start nfs-server > $fn 2>&1 - rc=$? - ocf_log debug "$(cat $fn)" - rm -f $fn -@@ -717,6 +880,12 @@ - return $rc - fi - -+ tfn="/proc/fs/nfsd/threads" -+ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then -+ ocf_exit_reason "Failed to start NFS server: /proc/fs/nfsd/threads" -+ return $OCF_ERR_GENERIC -+ fi -+ - notify_locks - - ocf_log info "NFS server started" -@@ -733,24 +902,71 @@ - cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1 - - fn=`mktemp` -- nfs_exec stop > $fn 2>&1 -+ nfs_exec stop nfs-server > $fn 2>&1 - rc=$? - ocf_log debug "$(cat $fn)" - rm -f $fn - -+ if [ $rc -ne 0 ]; then -+ ocf_exit_reason "Failed to stop NFS server" -+ return $rc -+ fi -+ -+ # systemd -+ if [ "$EXEC_MODE" -eq "2" ]; then -+ ocf_log info "Stop: threads" -+ tfn="/proc/fs/nfsd/threads" -+ if [ -f "$tfn" ] && [ "$(cat $tfn)" -gt "0" ]; then -+ ocf_exit_reason "NFS server failed to stop: /proc/fs/nfsd/threads" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ nfs_exec stop rpc-statd &> /dev/null -+ ocf_log info "Stop: rpc-statd" -+ rpcinfo -t localhost 100024 &> /dev/null -+ rc=$? -+ if [ "$rc" -eq "0" ]; then -+ ocf_exit_reason "Failed to stop rpc-statd" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ nfs_exec stop nfs-idmapd &> /dev/null -+ ocf_log info "Stop: nfs-idmapd" -+ fn=`mktemp` -+ nfs_exec status nfs-idmapd > $fn 2>&1 -+ rc=$? -+ ocf_log debug "$(cat $fn)" -+ rm -f $fn -+ if [ "$rc" -eq "0" ]; then -+ ocf_exit_reason "Failed to stop nfs-idmapd" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ nfs_exec stop nfs-mountd &> /dev/null -+ ocf_log info "Stop: nfs-mountd" -+ rpcinfo -t localhost 100005 &> /dev/null -+ rc=$? -+ if [ "$rc" -eq "0" ]; then -+ ocf_exit_reason "Failed to stop nfs-mountd" -+ return $OCF_ERR_GENERIC -+ fi -+ fi -+ - v3locking_exec "stop" - if [ $? -ne 0 ]; then - ocf_exit_reason "Failed to stop NFS locking daemons" - rc=$OCF_ERR_GENERIC - fi - -- if [ $rc -eq 0 ]; then -- unbind_tree -- ocf_log info "NFS server stopped" -- else -- ocf_exit_reason "Failed to stop NFS server" -+ # systemd -+ if [ "$EXEC_MODE" -eq "2" ]; then -+ nfs_exec stop rpcbind &> /dev/null -+ ocf_log info "Stop: rpcbind" - fi -- return $rc -+ -+ unbind_tree -+ ocf_log info "NFS server stopped" -+ return 0 - } - - nfsserver_validate () diff --git a/SOURCES/bz1304370-2-nfsserver-fix-systemd-status-detection.patch b/SOURCES/bz1304370-2-nfsserver-fix-systemd-status-detection.patch deleted file mode 100644 index 74ec413..0000000 --- a/SOURCES/bz1304370-2-nfsserver-fix-systemd-status-detection.patch +++ /dev/null @@ -1,337 +0,0 @@ -diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver ---- a/heartbeat/nfsserver 2016-07-21 12:40:55.417326145 +0200 -+++ b/heartbeat/nfsserver 2016-07-21 12:04:49.000000000 +0200 -@@ -352,45 +352,22 @@ - - nfsserver_monitor () - { -- # Skip trying to start processes once before failing -- # when run from nfsserver_start () -- if [ "$1" == "fromstart" ]; then -- ocf_log info "fromstart" -- fromstart=1 -- else -- tries=1 -- fi -- - # systemd - if [ "$EXEC_MODE" -eq "2" ]; then - ocf_log info "Status: rpcbind" -- rpcinfo &> /dev/null -+ rpcinfo > /dev/null 2>&1 - rc=$? - if [ "$rc" -ne "0" ]; then -- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "rpcbind is not running" -- return $OCF_NOT_RUNNING -- fi -+ ocf_exit_reason "rpcbind is not running" -+ return $OCF_NOT_RUNNING - fi - - ocf_log info "Status: nfs-mountd" -- rpcinfo -t localhost 100005 &> /dev/null -+ rpcinfo -t localhost 100005 > /dev/null 2>&1 - rc=$? - if [ "$rc" -ne "0" ]; then -- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "nfs-mountd is not running" -- return $OCF_NOT_RUNNING -- fi -+ ocf_exit_reason "nfs-mountd is not running" -+ return $OCF_NOT_RUNNING - fi - - ocf_log info "Status: nfs-idmapd" -@@ -400,31 +377,16 @@ - ocf_log debug "$(cat $fn)" - rm -f $fn - if [ "$rc" -ne "0" ]; then -- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- ocf_log info "Tried to start services: rc: $rc" -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "nfs-idmapd is not running" -- return $OCF_NOT_RUNNING -- fi -+ ocf_exit_reason "nfs-idmapd is not running" -+ return $OCF_NOT_RUNNING - fi - - ocf_log info "Status: rpc-statd" -- rpcinfo -t localhost 100024 &> /dev/null -+ rpcinfo -t localhost 100024 > /dev/null 2>&1 - rc=$? - if [ "$rc" -ne "0" ]; then -- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "rpc-statd is not running" -- return $OCF_NOT_RUNNING -- fi -+ ocf_exit_reason "rpc-statd is not running" -+ return $OCF_NOT_RUNNING - fi - fi - -@@ -436,15 +398,8 @@ - - tfn="/proc/fs/nfsd/threads" - if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then -- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" -- return $OCF_NOT_RUNNING -- fi -+ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads" -+ return $OCF_NOT_RUNNING - fi - - #Adapt LSB status code to OCF return code -@@ -454,15 +409,8 @@ - v3locking_exec "status" - rc=$? - if [ $rc -ne 0 ]; then -- if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then -- nfsserver_start frommonitor -- rc=$? -- let tries=$tries-1 -- fi -- if [ "$rc" -ne "0" ]; then -- ocf_exit_reason "NFS server is up, but the locking daemons are down" -- rc=$OCF_ERR_GENERIC -- fi -+ ocf_exit_reason "NFS server is up, but the locking daemons are down" -+ rc=$OCF_ERR_GENERIC - fi - return $rc - elif [ $rc -eq 3 ]; then -@@ -561,7 +509,7 @@ - [ -f "$fp/xtab" ] || touch "$fp/xtab" - [ -f "$fp/rmtab" ] || touch "$fp/rmtab" - -- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null -+ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 > /dev/null 2>&1 - [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state" - [ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp" - } -@@ -656,15 +604,15 @@ - - terminate() - { -- declare pids -- declare i=0 -+ local pids -+ local i=0 - - while : ; do - pids=$(binary_status $1) - [ -z "$pids" ] && return 0 - kill $pids - sleep 1 -- ((i++)) -+ i=$((i + 1)) - [ $i -gt 3 ] && return 1 - done - } -@@ -672,22 +620,22 @@ - - killkill() - { -- declare pids -- declare i=0 -+ local pids -+ local i=0 - - while : ; do - pids=$(binary_status $1) - [ -z "$pids" ] && return 0 - kill -9 $pids - sleep 1 -- ((i++)) -+ i=$((i + 1)) - [ $i -gt 3 ] && return 1 - done - } - - stop_process() - { -- declare process=$1 -+ local process=$1 - - ocf_log info "Stopping $process" - if terminate $process; then -@@ -758,14 +706,9 @@ - - nfsserver_start () - { -- # Skip monitor check when run from nfsserver_monitor () -- if [ "$1" == "frommonitor" ]; then -- frommonitor=1 -- fi -- - local rc; - -- if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then -+ if nfsserver_monitor; then - ocf_log debug "NFS server is already started" - return $OCF_SUCCESS - fi -@@ -796,21 +739,17 @@ - # systemd - if [ "$EXEC_MODE" -eq "2" ]; then - nfs_exec start rpcbind -- local i=10 -- while [ "$i" -gt 0 ]; do -+ local i=1 -+ while : ; do - ocf_log info "Start: rpcbind i: $i" -- rpcinfo &> /dev/null -+ rpcinfo > /dev/null 2>&1 - rc=$? - if [ "$rc" -eq "0" ]; then - break; - fi - sleep 1 -- let i=$i-1 -+ i=$((i + 1)) - done -- if [ "$i" -eq 0 ]; then -- ocf_exit_reason "Failed to start rpcbind" -- return $OCF_ERR_GENERIC -- fi - fi - - # check to see if we need to start rpc.statd -@@ -830,25 +769,21 @@ - # systemd - if [ "$EXEC_MODE" -eq "2" ]; then - nfs_exec start nfs-mountd -- local i=10 -- while [ "$i" -gt 0 ]; do -+ local i=1 -+ while : ; do - ocf_log info "Start: nfs-mountd i: $i" -- rpcinfo -t localhost 100005 &> /dev/null -+ rpcinfo -t localhost 100005 > /dev/null 2>&1 - rc=$? - if [ "$rc" -eq "0" ]; then - break; - fi - sleep 1 -- let i=$i-1 -+ i=$((i + 1)) - done -- if [ "$i" -eq 0 ]; then -- ocf_exit_reason "Failed to start nfs-mountd" -- return $OCF_ERR_GENERIC -- fi - - nfs_exec start nfs-idmapd -- local i=10 -- while [ "$i" -gt 0 ]; do -+ local i=1 -+ while : ; do - ocf_log info "Start: nfs-idmapd i: $i" - fn=`mktemp` - nfs_exec status nfs-idmapd > $fn 2>&1 -@@ -859,29 +794,21 @@ - break; - fi - sleep 1 -- let i=$i-1 -+ i=$((i + 1)) - done -- if [ "$i" -eq 0 ]; then -- ocf_exit_reason "Failed to start nfs-idmapd" -- return $OCF_ERR_GENERIC -- fi - - nfs_exec start rpc-statd -- local i=10 -- while [ "$i" -gt 0 ]; do -+ local i=1 -+ while : ; do - ocf_log info "Start: rpc-statd i: $i" -- rpcinfo -t localhost 100024 &> /dev/null -+ rpcinfo -t localhost 100024 > /dev/null 2>&1 - rc=$? - if [ "$rc" -eq "0" ]; then - break; - fi - sleep 1 -- let i=$i-1 -+ i=$((i + 1)) - done -- if [ "$i" -eq 0 ]; then -- ocf_exit_reason "Failed to start rpc-statd" -- return $OCF_ERR_GENERIC -- fi - fi - - fn=`mktemp` -@@ -936,16 +863,16 @@ - return $OCF_ERR_GENERIC - fi - -- nfs_exec stop rpc-statd &> /dev/null -+ nfs_exec stop rpc-statd > /dev/null 2>&1 - ocf_log info "Stop: rpc-statd" -- rpcinfo -t localhost 100024 &> /dev/null -+ rpcinfo -t localhost 100024 > /dev/null 2>&1 - rc=$? - if [ "$rc" -eq "0" ]; then - ocf_exit_reason "Failed to stop rpc-statd" - return $OCF_ERR_GENERIC - fi - -- nfs_exec stop nfs-idmapd &> /dev/null -+ nfs_exec stop nfs-idmapd > /dev/null 2>&1 - ocf_log info "Stop: nfs-idmapd" - fn=`mktemp` - nfs_exec status nfs-idmapd > $fn 2>&1 -@@ -957,9 +884,9 @@ - return $OCF_ERR_GENERIC - fi - -- nfs_exec stop nfs-mountd &> /dev/null -+ nfs_exec stop nfs-mountd > /dev/null 2>&1 - ocf_log info "Stop: nfs-mountd" -- rpcinfo -t localhost 100005 &> /dev/null -+ rpcinfo -t localhost 100005 > /dev/null 2>&1 - rc=$? - if [ "$rc" -eq "0" ]; then - ocf_exit_reason "Failed to stop nfs-mountd" -@@ -975,8 +902,11 @@ - - # systemd - if [ "$EXEC_MODE" -eq "2" ]; then -- nfs_exec stop rpcbind &> /dev/null -+ nfs_exec stop rpcbind > /dev/null 2>&1 - ocf_log info "Stop: rpcbind" -+ -+ nfs_exec stop rpc-gssd > /dev/null 2>&1 -+ ocf_log info "Stop: rpc-gssd" - fi - - unbind_tree diff --git a/SOURCES/bz1304711-galera-custom-host-port.patch b/SOURCES/bz1304711-galera-custom-host-port.patch deleted file mode 100644 index fc3b901..0000000 --- a/SOURCES/bz1304711-galera-custom-host-port.patch +++ /dev/null @@ -1,33 +0,0 @@ -From cbccff5ed9b1fc5641063f05ad531f897d366fa4 Mon Sep 17 00:00:00 2001 -From: Mike Bayer -Date: Tue, 15 Sep 2015 14:54:05 -0400 -Subject: [PATCH] galera: add support for MYSQL_HOST and MYSQL_PORT from - clustercheck - ---- - heartbeat/galera | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/heartbeat/galera b/heartbeat/galera -index 920507b..1a1a4ce 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -704,6 +704,18 @@ if [ -n "${OCF_RESKEY_check_passwd}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" - fi - -+# This value is automatically sourced from /etc/sysconfig/checkcluster if available -+if [ -n "${MYSQL_HOST}" ]; then -+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" -+fi -+ -+# This value is automatically sourced from /etc/sysconfig/checkcluster if available -+if [ -n "${MYSQL_PORT}" ]; then -+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" -+fi -+ -+ -+ - # What kind of method was invoked? - case "$1" in - start) galera_start;; diff --git a/SOURCES/bz1304811-replace-novacompute-with-nova-compute-wait.patch b/SOURCES/bz1304811-replace-novacompute-with-nova-compute-wait.patch deleted file mode 100644 index 55c2676..0000000 --- a/SOURCES/bz1304811-replace-novacompute-with-nova-compute-wait.patch +++ /dev/null @@ -1,702 +0,0 @@ -diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am ---- a/doc/man/Makefile.am 2016-01-13 09:03:39.443177797 +0100 -+++ b/doc/man/Makefile.am 2016-01-13 09:12:15.932795618 +0100 -@@ -73,7 +73,7 @@ - ocf_heartbeat_MailTo.7 \ - ocf_heartbeat_ManageRAID.7 \ - ocf_heartbeat_ManageVE.7 \ -- ocf_heartbeat_NovaCompute.7 \ -+ ocf_heartbeat_nova-compute-wait.7 \ - ocf_heartbeat_NovaEvacuate.7 \ - ocf_heartbeat_Pure-FTPd.7 \ - ocf_heartbeat_Raid1.7 \ -diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am ---- a/heartbeat/Makefile.am 2016-01-13 09:03:39.443177797 +0100 -+++ b/heartbeat/Makefile.am 2016-01-13 09:11:11.085604509 +0100 -@@ -52,7 +52,7 @@ - IPv6addr_LDADD = -lplumb $(LIBNETLIBS) - send_ua_LDADD = $(LIBNETLIBS) - --osp_SCRIPTS = NovaCompute \ -+osp_SCRIPTS = nova-compute-wait \ - NovaEvacuate - - ocf_SCRIPTS = ClusterMon \ -diff -uNr a/heartbeat/NovaCompute b/heartbeat/NovaCompute ---- a/heartbeat/NovaCompute 2016-01-13 09:03:39.439177858 +0100 -+++ b/heartbeat/NovaCompute 1970-01-01 01:00:00.000000000 +0100 -@@ -1,366 +0,0 @@ --#!/bin/sh --# --# --# NovaCompute agent manages compute daemons. --# --# Copyright (c) 2015 --# --# This program is free software; you can redistribute it and/or modify --# it under the terms of version 2 of the GNU General Public License as --# published by the Free Software Foundation. --# --# This program is distributed in the hope that it would be useful, but --# WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --# --# Further, this software is distributed without any warranty that it is --# free of the rightful claim of any third person regarding infringement --# or the like. Any license provided herein, whether implied or --# otherwise, applies only to this software file. Patent licenses, if --# any, provided herein do not apply to combinations of this program with --# other software, or any other product whatsoever. --# --# You should have received a copy of the GNU General Public License --# along with this program; if not, write the Free Software Foundation, --# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. --# -- --####################################################################### --# Initialization: -- --### --: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} --. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs --### -- --: ${__OCF_ACTION=$1} -- --####################################################################### -- --meta_data() { -- cat < -- -- --1.0 -- -- --OpenStack Nova Compute Server. -- --OpenStack Nova Compute Server -- -- -- -- -- --Authorization URL for connecting to keystone in admin context -- --Authorization URL -- -- -- -- -- --Username for connecting to keystone in admin context -- --Username -- -- -- -- --Password for connecting to keystone in admin context -- --Password -- -- -- -- -- --Tenant name for connecting to keystone in admin context. --Note that with Keystone V3 tenant names are only unique within a domain. -- --Tenant name -- -- -- -- -- --DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN -- --DNS domain -- -- -- -- -- --Nova API location (internal, public or admin URL) -- --Nova API location (internal, public or admin URL) -- -- -- -- -- --Disable shared storage recovery for instances. Use at your own risk! -- --Disable shared storage recovery for instances -- -- -- -- -- --How long to wait for nova to finish evacuating instances elsewhere --before starting nova-compute. Only used when the agent detects --evacuations might be in progress. -- --You may need to increase the start timeout when increasing this value. -- --Delay to allow evacuations time to complete -- -- -- -- -- -- -- -- -- -- -- -- -- --END --} -- --####################################################################### -- --# don't exit on TERM, to test that lrmd makes sure that we do exit --trap sigterm_handler TERM --sigterm_handler() { -- ocf_log info "They use TERM to bring us down. No such luck." -- return --} -- --nova_usage() { -- cat </dev/null) -- if [ $? = 1 ]; then -- if [ "x${OCF_RESKEY_domain}" != x ]; then -- NOVA_HOST=$(uname -n | awk -F. '{print $1}') -- else -- NOVA_HOST=$(uname -n) -- fi -- fi -- -- # We only need to check a configured value, calculated ones are fine -- openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null -- if [ $? = 0 ]; then -- if [ "x${OCF_RESKEY_domain}" != x ]; then -- short_host=$(uname -n | awk -F. '{print $1}') -- if [ "x$NOVA_HOST" != "x${short_host}" ]; then -- ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function" -- rc=$OCF_ERR_CONFIGURED -- fi -- -- elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then -- ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function" -- rc=$OCF_ERR_CONFIGURED -- fi -- fi -- -- if [ $rc != $OCF_SUCCESS ]; then -- exit $rc -- fi -- return $rc --} -- --: ${OCF_RESKEY_evacuation_delay=120} --case $__OCF_ACTION in --meta-data) meta_data -- exit $OCF_SUCCESS -- ;; --usage|help) nova_usage -- exit $OCF_SUCCESS -- ;; --esac -- --case $__OCF_ACTION in --start) nova_validate; nova_start;; --stop) nova_stop;; --monitor) nova_validate; nova_monitor;; --notify) nova_notify;; --validate-all) exit $OCF_SUCCESS;; --*) nova_usage -- exit $OCF_ERR_UNIMPLEMENTED -- ;; --esac --rc=$? --ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" --exit $rc -diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait ---- a/heartbeat/nova-compute-wait 1970-01-01 01:00:00.000000000 +0100 -+++ b/heartbeat/nova-compute-wait 2016-01-12 16:09:20.863425170 +0100 -@@ -0,0 +1,304 @@ -+#!/bin/sh -+# -+# -+# nova-compute-wait agent manages compute daemons. -+# -+# Copyright (c) 2015 -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of version 2 of the GNU General Public License as -+# published by the Free Software Foundation. -+# -+# This program is distributed in the hope that it would be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -+# -+# Further, this software is distributed without any warranty that it is -+# free of the rightful claim of any third person regarding infringement -+# or the like. Any license provided herein, whether implied or -+# otherwise, applies only to this software file. Patent licenses, if -+# any, provided herein do not apply to combinations of this program with -+# other software, or any other product whatsoever. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, write the Free Software Foundation, -+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -+# -+ -+####################################################################### -+# Initialization: -+ -+### -+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -+### -+ -+: ${__OCF_ACTION=$1} -+ -+####################################################################### -+ -+meta_data() { -+ cat < -+ -+ -+1.0 -+ -+ -+OpenStack Nova Compute Server. -+ -+OpenStack Nova Compute Server -+ -+ -+ -+ -+ -+Authorization URL for connecting to keystone in admin context -+ -+Authorization URL -+ -+ -+ -+ -+ -+Username for connecting to keystone in admin context -+ -+Username -+ -+ -+ -+ -+Password for connecting to keystone in admin context -+ -+Password -+ -+ -+ -+ -+ -+Tenant name for connecting to keystone in admin context. -+Note that with Keystone V3 tenant names are only unique within a domain. -+ -+Tenant name -+ -+ -+ -+ -+ -+DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN -+ -+DNS domain -+ -+ -+ -+ -+ -+Nova API location (internal, public or admin URL) -+ -+Nova API location (internal, public or admin URL) -+ -+ -+ -+ -+ -+Disable shared storage recovery for instances. Use at your own risk! -+ -+Disable shared storage recovery for instances -+ -+ -+ -+ -+ -+How long to wait for nova to finish evacuating instances elsewhere -+before starting nova-compute. Only used when the agent detects -+evacuations might be in progress. -+ -+You may need to increase the start timeout when increasing this value. -+ -+Delay to allow evacuations time to complete -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+END -+} -+ -+####################################################################### -+ -+# don't exit on TERM, to test that lrmd makes sure that we do exit -+trap sigterm_handler TERM -+sigterm_handler() { -+ ocf_log info "They use TERM to bring us down. No such luck." -+ return -+} -+ -+nova_usage() { -+ cat </dev/null) -+ if [ $? = 1 ]; then -+ if [ "x${OCF_RESKEY_domain}" != x ]; then -+ NOVA_HOST=$(uname -n | awk -F. '{print $1}') -+ else -+ NOVA_HOST=$(uname -n) -+ fi -+ fi -+ -+ # We only need to check a configured value, calculated ones are fine -+ openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null -+ if [ $? = 0 ]; then -+ if [ "x${OCF_RESKEY_domain}" != x ]; then -+ short_host=$(uname -n | awk -F. '{print $1}') -+ if [ "x$NOVA_HOST" != "x${short_host}" ]; then -+ ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function" -+ rc=$OCF_ERR_CONFIGURED -+ fi -+ -+ elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then -+ ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function" -+ rc=$OCF_ERR_CONFIGURED -+ fi -+ fi -+ -+ if [ $rc != $OCF_SUCCESS ]; then -+ exit $rc -+ fi -+ return $rc -+} -+ -+: ${OCF_RESKEY_evacuation_delay=120} -+case $__OCF_ACTION in -+meta-data) meta_data -+ exit $OCF_SUCCESS -+ ;; -+usage|help) nova_usage -+ exit $OCF_SUCCESS -+ ;; -+esac -+ -+case $__OCF_ACTION in -+start) nova_validate; nova_start;; -+stop) nova_stop;; -+monitor) nova_validate; nova_monitor;; -+notify) nova_notify;; -+validate-all) exit $OCF_SUCCESS;; -+*) nova_usage -+ exit $OCF_ERR_UNIMPLEMENTED -+ ;; -+esac -+rc=$? -+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" -+exit $rc diff --git a/SOURCES/bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch b/SOURCES/bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch new file mode 100644 index 0000000..406be02 --- /dev/null +++ b/SOURCES/bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch @@ -0,0 +1,20 @@ +diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain +--- a/heartbeat/VirtualDomain 2016-04-26 12:22:22.345053246 +0200 ++++ b/heartbeat/VirtualDomain 2016-04-26 12:24:27.479535075 +0200 +@@ -263,8 +263,6 @@ + + if [ -n "$emulator" ]; then + basename $emulator +- else +- ocf_log error "Unable to determine emulator for $DOMAIN_NAME" + fi + } + +@@ -301,6 +299,7 @@ + ;; + # This can be expanded to check for additional emulators + *) ++ ocf_log error "Unable to determine emulator for $DOMAIN_NAME" + ;; + esac + diff --git a/SOURCES/bz1311180-rabbitmq-cluster-forget-stopped-cluster-nodes.patch b/SOURCES/bz1311180-rabbitmq-cluster-forget-stopped-cluster-nodes.patch deleted file mode 100644 index 585b8b3..0000000 --- a/SOURCES/bz1311180-rabbitmq-cluster-forget-stopped-cluster-nodes.patch +++ /dev/null @@ -1,92 +0,0 @@ -diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster ---- a/heartbeat/rabbitmq-cluster 2016-02-22 11:09:48.989128414 +0100 -+++ b/heartbeat/rabbitmq-cluster 2016-02-22 11:10:12.011835745 +0100 -@@ -39,7 +39,14 @@ - RMQ_LOG_DIR="/var/log/rabbitmq" - NODENAME=$(ocf_local_nodename) - -+# this attr represents the current active local rmq node name. -+# when rmq stops or the node is fenced, this attr disappears - RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}" -+# this attr represents the last known active local rmq node name -+# when rmp stops or the node is fenced, the attr stays forever so -+# we can continue to map an offline pcmk node to it's rmq node name -+# equivalent. -+RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}" - - meta_data() { - cat < -Date: Mon, 29 Feb 2016 12:46:50 +0100 -Subject: [PATCH] Backup and restore rabbitmq users during resource restart - -Signed-off-by: Peter Lemenkov - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index cc45f09..4545495 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -289,7 +289,19 @@ rmq_start() { - rmq_stop - rmq_wipe_data - rmq_join_existing "$join_list" -- if [ $? -ne 0 ]; then -+ rc=$? -+ -+ # Restore users (if any) -+ BaseDataDir=`dirname $RMQ_DATA_DIR` -+ if [ -f $BaseDataDir/users.erl ] ; then -+ rabbitmqctl eval " -+ {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), -+ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users). -+ " -+ rm -f $BaseDataDir/users.erl -+ fi -+ -+ if [ $rc -ne 0 ]; then - ocf_log info "node failed to join even after reseting local data. Check SELINUX policy" - return $OCF_ERR_GENERIC - fi -@@ -299,6 +311,13 @@ rmq_start() { - } - - rmq_stop() { -+ # Backup users -+ BaseDataDir=`dirname $RMQ_DATA_DIR` -+ rabbitmqctl eval " -+ Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]), -+ file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). -+ " -+ - rmq_monitor - if [ $? -eq $OCF_NOT_RUNNING ]; then - return $OCF_SUCCESS diff --git a/SOURCES/bz1317578-oralsnr-fails-if-username-is-longer-than-8-chars.patch b/SOURCES/bz1317578-oralsnr-fails-if-username-is-longer-than-8-chars.patch new file mode 100644 index 0000000..17c7001 --- /dev/null +++ b/SOURCES/bz1317578-oralsnr-fails-if-username-is-longer-than-8-chars.patch @@ -0,0 +1,15 @@ +diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr +index c47f121..622138c 100755 +--- a/heartbeat/oralsnr ++++ b/heartbeat/oralsnr +@@ -267,8 +267,8 @@ oralsnr_validate_all() { + + # used in ora-common.sh + show_procs() { +- ps -e -o pid,user,args | +- grep '[t]nslsnr' | grep -i -w "$listener" | grep -w "$ORACLE_OWNER" ++ ps -U "$ORACLE_OWNER" -o pid,user,args | ++ grep '[t]nslsnr' | grep -i -w "$listener" + } + proc_pids() { show_procs | awk '{print $1}'; } + PROCS_CLEANUP_TIME="10" diff --git a/SOURCES/bz1318744-galera-crash-recovery.patch b/SOURCES/bz1318744-galera-crash-recovery.patch deleted file mode 100644 index 3e51ad0..0000000 --- a/SOURCES/bz1318744-galera-crash-recovery.patch +++ /dev/null @@ -1,131 +0,0 @@ -From d9833b68498e306d181be11adf9eee14b646a899 Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Tue, 2 Feb 2016 14:34:36 +0100 -Subject: [PATCH] galera: force crash recovery if needed during last commit - detection - ---- - heartbeat/galera | 90 +++++++++++++++++++++++++++++++++++++------------------- - 1 file changed, 60 insertions(+), 30 deletions(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index 7be2b00..ca94c21 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -525,6 +525,58 @@ detect_first_master() - set_bootstrap_node $best_node - } - -+detect_last_commit() -+{ -+ local last_commit -+ local recover_args="--defaults-file=$OCF_RESKEY_config \ -+ --pid-file=$OCF_RESKEY_pid \ -+ --socket=$OCF_RESKEY_socket \ -+ --datadir=$OCF_RESKEY_datadir \ -+ --user=$OCF_RESKEY_user" -+ local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' -+ -+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" -+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" -+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then -+ local tmp=$(mktemp) -+ local tmperr=$(mktemp) -+ -+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" -+ -+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr -+ -+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)" -+ if [ -z "$last_commit" ]; then -+ # Galera uses InnoDB's 2pc transactions internally. If -+ # server was stopped in the middle of a replication, the -+ # recovery may find a "prepared" XA transaction in the -+ # redo log, and mysql won't recover automatically -+ -+ cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null -+ if [ $? -eq 0 ]; then -+ # we can only rollback the transaction, but that's OK -+ # since the DB will get resynchronized anyway -+ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" -+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ -+ --tc-heuristic-recover=rollback > $tmp 2>/dev/null -+ -+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)" -+ fi -+ fi -+ rm -f $tmp $tmperr -+ fi -+ -+ if [ ! -z "$last_commit" ]; then -+ ocf_log info "Last commit version found: $last_commit" -+ set_last_commit $last_commit -+ return $OCF_SUCCESS -+ else -+ ocf_exit_reason "Unable to detect last known write sequence number" -+ clear_last_commit -+ return $OCF_ERR_GENERIC -+ fi -+} -+ - # For galera, promote is really start - galera_promote() - { -@@ -569,13 +620,15 @@ galera_demote() - clear_bootstrap_node - clear_last_commit - -- # record last commit by "starting" galera. start is just detection of the last sequence number -- galera_start -+ # record last commit for next promotion -+ detect_last_commit -+ rc=$? -+ return $rc - } - - galera_start() - { -- local last_commit -+ local rc - - echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME - if [ $? -ne 0 ]; then -@@ -591,34 +644,11 @@ galera_start() - - mysql_common_prepare_dirs - -- ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" -- last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" -- if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then -- ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" -- local tmp=$(mktemp) -- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ -- --pid-file=$OCF_RESKEY_pid \ -- --socket=$OCF_RESKEY_socket \ -- --datadir=$OCF_RESKEY_datadir \ -- --user=$OCF_RESKEY_user \ -- --wsrep-recover > $tmp 2>&1 -- -- last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')" -- rm -f $tmp -- -- if [ "$last_commit" = "-1" ]; then -- last_commit="0" -- fi -- fi -- -- if [ -z "$last_commit" ]; then -- ocf_exit_reason "Unable to detect last known write sequence number" -- clear_last_commit -- return $OCF_ERR_GENERIC -+ detect_last_commit -+ rc=$? -+ if [ $rc -ne $OCF_SUCCESS ]; then -+ return $rc - fi -- ocf_log info "Last commit version found: $last_commit" -- -- set_last_commit $last_commit - - master_exists - if [ $? -eq 0 ]; then diff --git a/SOURCES/bz1318744-galera-heuristic-recovered.patch b/SOURCES/bz1318744-galera-heuristic-recovered.patch deleted file mode 100644 index 589fc11..0000000 --- a/SOURCES/bz1318744-galera-heuristic-recovered.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 4d98bbcdadda60166faf7ccc512b9095b439e2bd Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Tue, 2 Feb 2016 16:29:10 +0100 -Subject: [PATCH] galera: prevent recovered nodes from bootstrapping cluster - when possible - ---- - heartbeat/README.galera | 19 ++++++++++++++++++- - heartbeat/galera | 41 +++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 59 insertions(+), 1 deletion(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index ca94c21..84c92fd 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -276,6 +276,22 @@ is_bootstrap() - - } - -+set_heuristic_recovered() -+{ -+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true" -+} -+ -+clear_heuristic_recovered() -+{ -+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D -+} -+ -+is_heuristic_recovered() -+{ -+ local node=$1 -+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null -+} -+ - clear_last_commit() - { - ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D -@@ -398,8 +414,19 @@ detect_first_master() - local best_node="$NODENAME" - local last_commit=0 - local missing_nodes=0 -+ local nodes="" -+ local nodes_recovered="" - -+ # avoid selecting a recovered node as bootstrap if possible - for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -+ if is_heuristic_recovered $node; then -+ nodes_recovered="$nodes_recovered $node" -+ else -+ nodes="$nodes $node" -+ fi -+ done -+ -+ for node in $nodes_recovered $nodes; do - last_commit=$(get_last_commit $node) - - if [ -z "$last_commit" ]; then -@@ -466,6 +493,12 @@ detect_last_commit() - --tc-heuristic-recover=rollback > $tmp 2>/dev/null - - last_commit="$(cat $tmp | sed -n $recovered_position_regex)" -+ if [ ! -z "$last_commit" ]; then -+ ocf_log warn "State recovered. force SST at next restart for full resynchronization" -+ rm -f ${OCF_RESKEY_datadir}/grastate.dat -+ # try not to use this node if bootstrap is needed -+ set_heuristic_recovered -+ fi - fi - fi - rm -f $tmp $tmperr -@@ -549,11 +582,17 @@ galera_promote() - if ocf_is_true $bootstrap; then - promote_everyone - clear_bootstrap_node -+ # clear attribute heuristic-recovered. if last shutdown was -+ # not clean, we cannot be extra-cautious by requesting a SST -+ # since this is the bootstrap node -+ clear_heuristic_recovered - ocf_log info "Bootstrap complete, promoting the rest of the galera instances." - else - # if this is not the bootstrap node, make sure this instance - # syncs with the rest of the cluster before promotion returns. - wait_for_sync -+ # sync is done, clear info about last recovery -+ clear_heuristic_recovered - fi - - ocf_log info "Galera started" diff --git a/SOURCES/bz1318744-galera-no-grastate.patch b/SOURCES/bz1318744-galera-no-grastate.patch deleted file mode 100644 index 8f2ca23..0000000 --- a/SOURCES/bz1318744-galera-no-grastate.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 422ef6a2018ebf9d6765e1f2965778f42c6a9d9c Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Tue, 15 Mar 2016 18:45:13 +0100 -Subject: [PATCH] galera: don't bootstrap from a node with no grastate.dat when - possible - ---- - heartbeat/README.galera | 9 ++++----- - heartbeat/galera | 36 ++++++++++++++++++++++-------------- - 2 files changed, 26 insertions(+), 19 deletions(-) - -diff --git a/heartbeat/galera b/heartbeat/galera -index 72add3c..e4495be 100755 ---- a/heartbeat/galera -+++ b/heartbeat/galera -@@ -276,20 +276,20 @@ is_bootstrap() - - } - --set_heuristic_recovered() -+set_no_grastate() - { -- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true" -+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true" - } - --clear_heuristic_recovered() -+clear_no_grastate() - { -- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D -+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D - } - --is_heuristic_recovered() -+is_no_grastate() - { - local node=$1 -- ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null -+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -Q 2>/dev/null - } - - clear_last_commit() -@@ -419,7 +419,7 @@ detect_first_master() - - # avoid selecting a recovered node as bootstrap if possible - for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -- if is_heuristic_recovered $node; then -+ if is_no_grastate $node; then - nodes_recovered="$nodes_recovered $node" - else - nodes="$nodes $node" -@@ -473,6 +473,12 @@ detect_last_commit() - local tmp=$(mktemp) - local tmperr=$(mktemp) - -+ # if we pass here because grastate.dat doesn't exist, -+ # try not to bootstrap from this node if possible -+ if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then -+ set_no_grastate -+ fi -+ - ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" - - ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr -@@ -496,8 +502,8 @@ detect_last_commit() - if [ ! -z "$last_commit" ]; then - ocf_log warn "State recovered. force SST at next restart for full resynchronization" - rm -f ${OCF_RESKEY_datadir}/grastate.dat -- # try not to use this node if bootstrap is needed -- set_heuristic_recovered -+ # try not to bootstrap from this node if possible -+ set_no_grastate - fi - fi - fi -@@ -582,17 +588,17 @@ galera_promote() - if ocf_is_true $bootstrap; then - promote_everyone - clear_bootstrap_node -- # clear attribute heuristic-recovered. if last shutdown was -+ # clear attribute no-grastate. if last shutdown was - # not clean, we cannot be extra-cautious by requesting a SST - # since this is the bootstrap node -- clear_heuristic_recovered -+ clear_no_grastate - ocf_log info "Bootstrap complete, promoting the rest of the galera instances." - else - # if this is not the bootstrap node, make sure this instance - # syncs with the rest of the cluster before promotion returns. - wait_for_sync -- # sync is done, clear info about last recovery -- clear_heuristic_recovered -+ # sync is done, clear info about last startup -+ clear_no_grastate - fi - - ocf_log info "Galera started" -@@ -611,6 +617,7 @@ galera_demote() - # if this node was previously a bootstrap node, that is no longer the case. - clear_bootstrap_node - clear_last_commit -+ clear_no_grastate - - # record last commit for next promotion - detect_last_commit -@@ -722,6 +729,7 @@ galera_stop() - clear_last_commit - clear_master_score - clear_bootstrap_node -+ clear_no_grastate - return $rc - } - diff --git a/SOURCES/bz1318985-oracle-fix-unable-to-start-because-of-ORA-01081.patch b/SOURCES/bz1318985-oracle-fix-unable-to-start-because-of-ORA-01081.patch new file mode 100644 index 0000000..eba8e5b --- /dev/null +++ b/SOURCES/bz1318985-oracle-fix-unable-to-start-because-of-ORA-01081.patch @@ -0,0 +1,22 @@ +From e1ce1963da082992494916f9548065ec8ff326f1 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 18 Mar 2016 11:10:17 +0100 +Subject: [PATCH] oracle: "shutdown immediate;" is needed after cleanup to be + able to recover from the ORA-01081 error + +--- + heartbeat/oracle | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/heartbeat/oracle b/heartbeat/oracle +index 951221c..5a8aca8 100755 +--- a/heartbeat/oracle ++++ b/heartbeat/oracle +@@ -611,6 +611,7 @@ oracle_start() { + if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then + ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)" + ora_cleanup ++ output=`dbasql dbstop_immediate` + output=`dbasql dbstart_mount` + fi + fi diff --git a/SOURCES/bz1320783-nova-compute-wait-fix-invalid-hostname-issue.patch b/SOURCES/bz1320783-nova-compute-wait-fix-invalid-hostname-issue.patch new file mode 100644 index 0000000..4f9e7cb --- /dev/null +++ b/SOURCES/bz1320783-nova-compute-wait-fix-invalid-hostname-issue.patch @@ -0,0 +1,169 @@ +diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait +--- a/heartbeat/nova-compute-wait 2016-05-13 11:50:54.434532591 +0200 ++++ b/heartbeat/nova-compute-wait 2016-05-13 12:04:41.997856291 +0200 +@@ -52,34 +52,25 @@ + + + +- +-Authorization URL for connecting to keystone in admin context +- +-Authorization URL ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + + +- +-Username for connecting to keystone in admin context +- +-Username ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + +- +-Password for connecting to keystone in admin context +- +-Password ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + + +- +-Tenant name for connecting to keystone in admin context. +-Note that with Keystone V3 tenant names are only unique within a domain. +- +-Tenant name ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + +@@ -92,18 +83,14 @@ + + + +- +-Nova API location (internal, public or admin URL) +- +-Nova API location (internal, public or admin URL) ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + + +- +-Disable shared storage recovery for instances. Use at your own risk! +- +-Disable shared storage recovery for instances ++Deprecated - do not use anymore. ++Deprecated - do not use anymore + + + +@@ -186,9 +173,8 @@ + + nova_validate() { + rc=$OCF_SUCCESS +- fence_options="" + +- check_binary openstack-config ++ check_binary crudini + check_binary nova-compute + + if [ ! -f /etc/nova/nova.conf ]; then +@@ -196,81 +182,14 @@ + exit $OCF_ERR_CONFIGURED + fi + +- if [ -z "${OCF_RESKEY_auth_url}" ]; then +- ocf_exit_reason "auth_url not configured" +- exit $OCF_ERR_CONFIGURED +- fi +- +- fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}" +- +- if [ -z "${OCF_RESKEY_username}" ]; then +- ocf_exit_reason "username not configured" +- exit $OCF_ERR_CONFIGURED +- fi +- +- fence_options="${fence_options} -l ${OCF_RESKEY_username}" +- +- if [ -z "${OCF_RESKEY_password}" ]; then +- ocf_exit_reason "password not configured" +- exit $OCF_ERR_CONFIGURED +- fi +- +- fence_options="${fence_options} -p ${OCF_RESKEY_password}" +- +- if [ -z "${OCF_RESKEY_tenant_name}" ]; then +- ocf_exit_reason "tenant_name not configured" +- exit $OCF_ERR_CONFIGURED +- fi +- +- fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}" +- +- if [ -n "${OCF_RESKEY_domain}" ]; then +- fence_options="${fence_options} -d ${OCF_RESKEY_domain}" +- fi +- +- if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then +- if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then +- fence_options="${fence_options} --no-shared-storage" +- fi +- fi +- +- if [ -n "${OCF_RESKEY_endpoint_type}" ]; then +- case ${OCF_RESKEY_endpoint_type} in +- adminURL|publicURL|internalURL) ;; +- *) +- ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL" +- exit $OCF_ERR_CONFIGURED +- ;; +- esac +- fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}" +- fi +- +- # we take a chance here and hope that host is either not configured +- # or configured in nova.conf +- +- NOVA_HOST=$(openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null) ++ NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null) + if [ $? = 1 ]; then +- if [ "x${OCF_RESKEY_domain}" != x ]; then +- NOVA_HOST=$(uname -n | awk -F. '{print $1}') +- else +- NOVA_HOST=$(uname -n) +- fi +- fi +- +- # We only need to check a configured value, calculated ones are fine +- openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null +- if [ $? = 0 ]; then +- if [ "x${OCF_RESKEY_domain}" != x ]; then +- short_host=$(uname -n | awk -F. '{print $1}') +- if [ "x$NOVA_HOST" != "x${short_host}" ]; then +- ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function" +- rc=$OCF_ERR_CONFIGURED +- fi +- +- elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then +- ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function" +- rc=$OCF_ERR_CONFIGURED +- fi ++ short_host=$(uname -n | awk -F. '{print $1}') ++ if [ "x${OCF_RESKEY_domain}" != x ]; then ++ NOVA_HOST=${short_host}.${OCF_RESKEY_domain} ++ else ++ NOVA_HOST=$(uname -n) ++ fi + fi + + if [ $rc != $OCF_SUCCESS ]; then diff --git a/SOURCES/bz1325453-nfsserver-var-lib-nfs-fix.patch b/SOURCES/bz1325453-nfsserver-var-lib-nfs-fix.patch new file mode 100644 index 0000000..7b96ee8 --- /dev/null +++ b/SOURCES/bz1325453-nfsserver-var-lib-nfs-fix.patch @@ -0,0 +1,29 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-04-11 10:28:05.988977035 +0200 ++++ b/heartbeat/nfsserver 2016-04-11 16:31:50.150445968 +0200 +@@ -332,7 +332,11 @@ + + case $EXEC_MODE in + 1) ${OCF_RESKEY_nfs_init_script} $cmd;; +- 2) systemctl $cmd ${svc}.service ;; ++ 2) if ! echo $svc | grep -q "\."; then ++ svc="${svc}.service" ++ fi ++ systemctl $cmd $svc ++ ;; + esac + } + +@@ -587,6 +591,12 @@ + ocf_log debug "$fp is already bound to /var/lib/nfs" + return 0 + fi ++ ++ if nfs_exec status var-lib-nfs-rpc_pipefs.mount; then ++ ocf_log debug "/var/lib/nfs/rpc_pipefs already mounted. Unmounting in preparation to bind mount nfs dir" ++ nfs_exec stop var-lib-nfs-rpc_pipefs.mount ++ fi ++ + mount --bind $fp /var/lib/nfs + [ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs + } diff --git a/SOURCES/bz1328018-garbd-Introduces-garbd-resource-agent.patch b/SOURCES/bz1328018-garbd-Introduces-garbd-resource-agent.patch new file mode 100644 index 0000000..af18286 --- /dev/null +++ b/SOURCES/bz1328018-garbd-Introduces-garbd-resource-agent.patch @@ -0,0 +1,474 @@ +From beb8dd713fa3a15ca01738de33f2031d1e5925d9 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Wed, 1 Jun 2016 17:14:04 +0200 +Subject: [PATCH 1/2] garbd: Introduces garbd resource-agent + +--- + heartbeat/garbd | 417 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 417 insertions(+) + create mode 100755 heartbeat/garbd + +diff --git a/heartbeat/garbd b/heartbeat/garbd +new file mode 100755 +index 0000000..950df76 +--- /dev/null ++++ b/heartbeat/garbd +@@ -0,0 +1,417 @@ ++#!/bin/sh ++# ++# Copyright (c) 2015 Damien Ciabrini ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++ ++## ++# README. ++# ++# Resource agent for garbd, the Galera arbitrator ++# ++# You can use this agent if you run an even number of galera nodes, ++# and you want an additional node to avoid split-brain situations. ++# ++# garbd requires that a Galera cluster is running, so make sure to ++# add a proper ordering constraint to the cluster, e.g.: ++# ++# pcs constraint order galera-master then garbd ++# ++# If you add garbd to the cluster while Galera is not running, you ++# might want to disable it before setting up ordering constraint, e.g.: ++# ++# pcs resource create garbd garbd \ ++# wsrep_cluster_address=gcomm://node1:4567,node2:4567 \ ++# meta target-role=stopped ++# ++# Use location constraints to avoid running galera and garbd on ++# the same node, e.g.: ++# ++# pcs constraint colocation add garbd with galera-master -INFINITY ++# pcs constraint location garbd prefers node3=INFINITY ++# ++## ++ ++####################################################################### ++# Initialization: ++ ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++####################################################################### ++# Set default paramenter values ++ ++OCF_RESKEY_binary_default="/usr/sbin/garbd" ++OCF_RESKEY_log_default="/var/log/garbd.log" ++OCF_RESKEY_pid_default="/var/run/garbd.pid" ++OCF_RESKEY_user_default="mysql" ++if [ "X${HOSTOS}" = "XOpenBSD" ];then ++ OCF_RESKEY_group_default="_mysql" ++else ++ OCF_RESKEY_group_default="mysql" ++fi ++ ++: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} ++: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} ++: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} ++: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} ++: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} ++ ++usage() { ++ cat < ++ ++ ++1.0 ++ ++ ++Resource script for managing Galera arbitrator. ++ ++Manages a galera arbitrator instance ++ ++ ++ ++ ++Location of the Galera arbitrator binary ++ ++garbd server binary ++ ++ ++ ++ ++ ++User running the garbd process ++ ++garbd user ++ ++ ++ ++ ++ ++Group running garbd (for logfile permissions) ++ ++garbd group ++ ++ ++ ++ ++ ++The logfile to be used for garbd. ++ ++Galera arbitrator log file ++ ++ ++ ++ ++ ++The pidfile to be used for garbd. ++ ++Galera arbitrator pidfile ++ ++ ++ ++ ++ ++Additional parameters which are passed to garbd on startup. ++ ++Additional parameters to pass to garbd ++ ++ ++ ++ ++ ++The galera cluster address. This takes the form of: ++gcomm://node:port,node:port,node:port ++ ++Unlike Galera servers, port is mandatory for garbd. ++ ++Galera cluster address ++ ++ ++ ++ ++ ++The group name of the Galera cluster to connect to. ++ ++Galera cluster name ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++ ++garbd_start() ++{ ++ local rc ++ local pid ++ local start_wait ++ local garbd_params ++ ++ garbd_status info ++ rc=$? ++ if [ $rc -eq $OCF_SUCCESS ]; then ++ ocf_exit_reason "garbd started outside of the cluster's control" ++ return $OCF_ERR_GENERIC; ++ fi ++ ++ touch $OCF_RESKEY_log ++ chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log ++ chmod 0640 $OCF_RESKEY_log ++ [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log ++ ++ garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \ ++ --group ${OCF_RESKEY_wsrep_cluster_name} \ ++ --log ${OCF_RESKEY_log}" ++ ++ if [ ! -z "${OCF_RESKEY_options}" ]; then ++ garbd_params="${garbd_params} --options=${OCF_RESKEY_options}" ++ fi ++ ++ # garbd has no parameter to run as a specific user, ++ # so we need to start it by our own means ++ pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!") ++ ++ # garbd doesn't create a pidfile either, so we create our own ++ echo $pid > $OCF_RESKEY_pid ++ if [ $? -ne 0 ]; then ++ ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation" ++ return $OCF_ERR_GENERIC ++ fi ++ ++ # Spin waiting for garbd to connect to the cluster. ++ # Let the CRM/LRM time us out if required. ++ start_wait=1 ++ while [ $start_wait -eq 1 ]; do ++ garbd_monitor info ++ rc=$? ++ if [ $rc -eq $OCF_NOT_RUNNING ]; then ++ ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}" ++ return $OCF_ERR_GENERIC ++ elif [ $rc -eq $OCF_SUCCESS ]; then ++ start_wait=0 ++ fi ++ sleep 2 ++ done ++ ++ ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" ++ return $OCF_SUCCESS ++} ++ ++garbd_status() ++{ ++ local loglevel=$1 ++ local rc ++ ocf_pidfile_status $OCF_RESKEY_pid ++ rc=$? ++ ++ if [ $rc -eq 0 ]; then ++ return $OCF_SUCCESS ++ elif [ $rc -eq 2 ]; then ++ return $OCF_NOT_RUNNING ++ else ++ # clean up if pidfile is stale ++ if [ $rc -eq 1 ]; then ++ ocf_log $loglevel "garbd not running: removing old PID file" ++ rm -f $OCF_RESKEY_pid ++ fi ++ return $OCF_ERR_GENERIC ++ fi ++} ++ ++garbd_monitor() ++{ ++ local rc ++ local pid ++ local loglevel=$1 ++ ++ # Set loglevel to info during probe ++ if ocf_is_probe; then ++ loglevel="info" ++ fi ++ ++ garbd_status $loglevel ++ rc=$? ++ ++ # probe just wants to know if garbd is running or not ++ if [ ocf_is_probe -a $rc -ne $OCF_SUCCESS ]; then ++ rc=$OCF_NOT_RUNNING ++ fi ++ ++ # Consider garbd is working if it's connected to at least ++ # one node in the galera cluster. ++ # Note: a Galera node in Non-Primary state will be ++ # stopped by the galera RA. So we can assume that ++ # garbd will always be connected to the right partition ++ if [ $rc -eq $OCF_SUCCESS ]; then ++ pid=`cat $OCF_RESKEY_pid 2> /dev/null ` ++ netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" ++ if [ $? -ne 0 ]; then ++ ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" ++ rc=$OCF_ERR_GENERIC ++ fi ++ fi ++ ++ return $rc ++} ++ ++garbd_stop() ++{ ++ local rc ++ local pid ++ ++ if [ ! -f $OCF_RESKEY_pid ]; then ++ ocf_log info "garbd is not running" ++ return $OCF_SUCCESS ++ fi ++ ++ pid=`cat $OCF_RESKEY_pid 2> /dev/null ` ++ ++ ocf_log info "stopping garbd" ++ ++ # make sure the process is stopped ++ ocf_stop_processes TERM 10 $pid ++ rc=$? ++ ++ if [ $rc -ne 0 ]; then ++ return $OCF_ERR_GENERIC ++ else ++ rm -f $OCF_RESKEY_pid ++ ocf_log info "garbd stopped" ++ return $OCF_SUCCESS ++ fi ++} ++ ++garbd_validate() ++{ ++ if ! have_binary "$OCF_RESKEY_binary"; then ++ ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary" ++ return $OCF_ERR_INSTALLED; ++ fi ++ ++ if ! have_binary "netstat"; then ++ ocf_exit_reason "Setup problem: couldn't find command: netstat" ++ return $OCF_ERR_INSTALLED; ++ fi ++ ++ if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then ++ ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value." ++ return $OCF_ERR_CONFIGURED ++ fi ++ ++ # unlike galera RA, ports must be set in cluster address for garbd ++ # https://github.com/codership/galera/issues/98 ++ for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do ++ echo $node | grep -s -q ':[1-9][0-9]*$' ++ if [ $? -ne 0 ]; then ++ ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)." ++ return $OCF_ERR_CONFIGURED ++ fi ++ done ++ ++ # Ensure that the encryption method is set if garbd is configured ++ # to use SSL. ++ echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)=' ++ if [ $? -eq 0 ]; then ++ echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher=' ++ if [ $? -ne 0 ]; then ++ ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled." ++ return $OCF_ERR_CONFIGURED ++ fi ++ fi ++ ++ if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then ++ ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value." ++ return $OCF_ERR_CONFIGURED ++ fi ++ ++ if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then ++ ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" ++ return $OCF_ERR_INSTALLED ++ fi ++ ++ if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then ++ ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist" ++ return $OCF_ERR_INSTALLED ++ fi ++ ++ return $OCF_SUCCESS ++} ++ ++case "$1" in ++ meta-data) meta_data ++ exit $OCF_SUCCESS;; ++ usage|help) usage ++ exit $OCF_SUCCESS;; ++esac ++ ++garbd_validate ++rc=$? ++ ++# trap configuration errors early, but don't block stop in such cases ++LSB_STATUS_STOPPED=3 ++if [ $rc -ne 0 ]; then ++ case "$1" in ++ stop) exit $OCF_SUCCESS;; ++ status) exit $LSB_STATUS_STOPPED;; ++ *) exit $rc;; ++ esac ++fi ++ ++# What kind of method was invoked? ++case "$1" in ++ start) garbd_start;; ++ stop) garbd_stop;; ++ status) garbd_status err;; ++ monitor) garbd_monitor err;; ++ promote) garbd_promote;; ++ demote) garbd_demote;; ++ validate-all) exit $OCF_SUCCESS;; ++ ++ *) usage ++ exit $OCF_ERR_UNIMPLEMENTED;; ++esac +-- +2.5.5 + + +From f36298aa97fc4cbed3e2eff28d6821f4314becbe Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Fri, 3 Jun 2016 18:27:38 +0200 +Subject: [PATCH 2/2] garbd: fix install and man page + +--- + doc/man/Makefile.am | 1 + + heartbeat/Makefile.am | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am +index 5e28895..25fb29b 100644 +--- a/doc/man/Makefile.am ++++ b/doc/man/Makefile.am +@@ -105,6 +105,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_exportfs.7 \ + ocf_heartbeat_fio.7 \ + ocf_heartbeat_galera.7 \ ++ ocf_heartbeat_garbd.7 \ + ocf_heartbeat_iSCSILogicalUnit.7 \ + ocf_heartbeat_iSCSITarget.7 \ + ocf_heartbeat_iface-bridge.7 \ +diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am +index b70c104..df0e3b8 100644 +--- a/heartbeat/Makefile.am ++++ b/heartbeat/Makefile.am +@@ -76,6 +76,7 @@ ocf_SCRIPTS = ClusterMon \ + Filesystem \ + fio \ + galera \ ++ garbd \ + ids \ + iscsi \ + ICP \ +-- +2.5.5 + diff --git a/SOURCES/bz1328386-1-oracle-monprofile-container-databases.patch b/SOURCES/bz1328386-1-oracle-monprofile-container-databases.patch new file mode 100644 index 0000000..975cce2 --- /dev/null +++ b/SOURCES/bz1328386-1-oracle-monprofile-container-databases.patch @@ -0,0 +1,24 @@ +From 8ec7eb9fa6ccc242555eea7e3f0ebd7537799943 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 19 Apr 2016 14:27:39 +0200 +Subject: [PATCH] oracle: inform user that monprofile must start with C## for + container databases + +--- + heartbeat/oracle | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/heartbeat/oracle b/heartbeat/oracle +index 5a8aca8..d68fa6e 100755 +--- a/heartbeat/oracle ++++ b/heartbeat/oracle +@@ -402,6 +402,9 @@ check_mon_profile() { + output=`dbasql mk_mon_profile show_mon_profile` + if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then + return 0 ++ elif echo "$output" | grep ORA-65140 >/dev/null 2>&1; then ++ ocf_exit_reason "monprofile must start with C## for container databases" ++ return $OCF_ERR_CONFIGURED + else + ocf_log err "could not create $MONPROFILE oracle profile" + ocf_log err "sqlplus output: $output" diff --git a/SOURCES/bz1328386-2-oracle-monprofile-container-databases.patch b/SOURCES/bz1328386-2-oracle-monprofile-container-databases.patch new file mode 100644 index 0000000..c0774d9 --- /dev/null +++ b/SOURCES/bz1328386-2-oracle-monprofile-container-databases.patch @@ -0,0 +1,49 @@ +From f84cdaa6ecf0febb6d33733bfdb30f3d41f615e1 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 29 Aug 2016 16:54:30 +0200 +Subject: [PATCH] oracle: fix issue with C## in monprofile + +--- + heartbeat/oracle | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/oracle b/heartbeat/oracle +index e8e6148..da322a7 100755 +--- a/heartbeat/oracle ++++ b/heartbeat/oracle +@@ -371,7 +371,7 @@ show_mon_profile() { + } + mk_mon_profile() { + cat< +Date: Mon, 29 Aug 2016 17:33:01 +0200 +Subject: [PATCH] oracle: add quotes for monuser and monpassword and inform + user to start monuser with C## if it's a container database + +--- + heartbeat/oracle | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +diff --git a/heartbeat/oracle b/heartbeat/oracle +index da322a7..6fad5bc 100755 +--- a/heartbeat/oracle ++++ b/heartbeat/oracle +@@ -291,7 +291,7 @@ dbasql() { + runsql "connect / as sysdba" $* + } + monsql() { +- runsql "connect $MONUSR/\"$MONPWD\"" $* ++ runsql "connect \"$MONUSR\"/\"$MONPWD\"" $* + } + # use dbasql_one if the query should result in a single line output + # at times people stuff commands in oracle .profile +@@ -379,19 +379,19 @@ show_mon_user() { + } + mk_mon_user() { + cat</dev/null; then + return 0 ++ elif echo "$output" | grep ORA-65096 >/dev/null 2>&1; then ++ ocf_exit_reason "monuser must start with C## for container databases" ++ return $OCF_ERR_CONFIGURED + else + ocf_exit_reason "could not create $MONUSR oracle user" + ocf_log err "sqlplus output: $output" +@@ -757,7 +760,7 @@ MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default} + MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default} + MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default} + +-MONUSR=$(echo $MONUSR | awk '{print toupper($0)}') ++MONUSR=$(echo "$MONUSR" | awk '{print toupper($0)}') + MONPROFILE=$(echo "$MONPROFILE" | awk '{print toupper($0)}') + OCF_REQUIRED_PARAMS="sid" + OCF_REQUIRED_BINARIES="sqlplus" diff --git a/SOURCES/bz1332435-nfsserver-var-lib-nfs-fix.patch b/SOURCES/bz1332435-nfsserver-var-lib-nfs-fix.patch deleted file mode 100644 index 3ecc3bf..0000000 --- a/SOURCES/bz1332435-nfsserver-var-lib-nfs-fix.patch +++ /dev/null @@ -1,29 +0,0 @@ -diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver ---- a/heartbeat/nfsserver 2016-04-11 10:28:05.988977035 +0200 -+++ b/heartbeat/nfsserver 2016-04-11 16:31:50.150445968 +0200 -@@ -332,7 +332,11 @@ - - case $EXEC_MODE in - 1) ${OCF_RESKEY_nfs_init_script} $cmd;; -- 2) systemctl $cmd ${svc}.service ;; -+ 2) if ! echo $svc | grep -q "\."; then -+ svc="${svc}.service" -+ fi -+ systemctl $cmd $svc -+ ;; - esac - } - -@@ -587,6 +591,12 @@ - ocf_log debug "$fp is already bound to /var/lib/nfs" - return 0 - fi -+ -+ if nfs_exec status var-lib-nfs-rpc_pipefs.mount; then -+ ocf_log debug "/var/lib/nfs/rpc_pipefs already mounted. Unmounting in preparation to bind mount nfs dir" -+ systemctl stop var-lib-nfs-rpc_pipefs.mount -+ fi -+ - mount --bind $fp /var/lib/nfs - [ $SELINUX_ENABLED -eq 0 ] && restorecon /var/lib/nfs - } diff --git a/SOURCES/bz1337109-tickle_tcp-fix.patch b/SOURCES/bz1337109-tickle_tcp-fix.patch new file mode 100644 index 0000000..fd2363e --- /dev/null +++ b/SOURCES/bz1337109-tickle_tcp-fix.patch @@ -0,0 +1,23 @@ +From 1e3c0b11d68b8713f20abe12d6997eb853def797 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 19 Apr 2016 10:15:50 +0200 +Subject: [PATCH] tickle_tcp: Fix "Failed to open raw socket (Invalid + argument)" issue + +--- + tools/tickle_tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/tickle_tcp.c b/tools/tickle_tcp.c +index cf0bdcb..7c5a537 100644 +--- a/tools/tickle_tcp.c ++++ b/tools/tickle_tcp.c +@@ -245,7 +245,7 @@ int send_tickle_ack(const sock_addr *dst, + ip4pkt.tcp.window = htons(1234); + ip4pkt.tcp.check = tcp_checksum((uint16_t *)&ip4pkt.tcp, sizeof(ip4pkt.tcp), &ip4pkt.ip); + +- s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW)); ++ s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (s == -1) { + fprintf(stderr, "Failed to open raw socket (%s)\n", strerror(errno)); + return -1; diff --git a/SOURCES/bz1337124-mysql-use-replication_port-parameter.patch b/SOURCES/bz1337124-mysql-use-replication_port-parameter.patch new file mode 100644 index 0000000..ef3aa56 --- /dev/null +++ b/SOURCES/bz1337124-mysql-use-replication_port-parameter.patch @@ -0,0 +1,55 @@ +From 98e235caa31c7cc73a834d8046e6f2ec2d04b832 Mon Sep 17 00:00:00 2001 +From: Marian Marinov +Date: Sun, 10 Apr 2016 22:44:16 +0300 +Subject: [PATCH] heartbeat/mysql: Handle non-standard mysql server port + +Signed-off-by: Marian Marinov +--- + heartbeat/mysql | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/heartbeat/mysql b/heartbeat/mysql +index e2d54dd..be914d3 100755 +--- a/heartbeat/mysql ++++ b/heartbeat/mysql +@@ -549,6 +549,7 @@ set_master() { + + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \ + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \ ++ MASTER_PORT=$OCF_RESKEY_replication_port, \ + MASTER_USER='$OCF_RESKEY_replication_user', \ + MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params" + rm -f $tmpfile +-- +2.5.5 + +From e78f106cc5edabc50eb3622ce384ed2493250ec5 Mon Sep 17 00:00:00 2001 +From: Mathieu Peltier +Date: Thu, 6 Nov 2014 17:16:38 +0100 +Subject: [PATCH] Modified replication_user description: RELOAD privilege is + required for RESET SLAVE or RESET SLAVE ALL command. + +--- + heartbeat/mysql | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/mysql b/heartbeat/mysql +index d895369..df65502 100755 +--- a/heartbeat/mysql ++++ b/heartbeat/mysql +@@ -211,9 +211,9 @@ Additional parameters which are passed to the mysqld on startup. + MySQL replication user. This user is used for starting and stopping + MySQL replication, for setting and resetting the master host, and for + setting and unsetting read-only mode. Because of that, this user must +-have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS +-privileges on all nodes within the cluster. Mandatory if you define +-a master-slave resource. ++have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD ++privileges on all nodes within the cluster. Mandatory if you define a ++master-slave resource. + + MySQL replication user + +-- +2.5.5 + diff --git a/SOURCES/bz1337615-nfsserver-rpcpipefs_dir.patch b/SOURCES/bz1337615-nfsserver-rpcpipefs_dir.patch new file mode 100644 index 0000000..45a802d --- /dev/null +++ b/SOURCES/bz1337615-nfsserver-rpcpipefs_dir.patch @@ -0,0 +1,60 @@ +diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver +--- a/heartbeat/nfsserver 2016-07-21 12:38:01.298076314 +0200 ++++ b/heartbeat/nfsserver 2016-07-21 12:39:05.345432538 +0200 +@@ -245,14 +245,8 @@ + fp="$OCF_RESKEY_nfs_shared_infodir" + : ${OCF_RESKEY_nfs_notify_cmd="$DEFAULT_NOTIFY_CMD"} + : ${OCF_RESKEY_nfs_notify_foreground="$DEFAULT_NOTIFY_FOREGROUND"} +- +-if [ -z ${OCF_RESKEY_rpcpipefs_dir} ]; then +- rpcpipefs_make_dir=$fp/rpc_pipefs +- rpcpipefs_umount_dir=${DEFAULT_RPCPIPEFS_DIR} +-else +- rpcpipefs_make_dir=${OCF_RESKEY_rpcpipefs_dir} +- rpcpipefs_umount_dir=${OCF_RESKEY_rpcpipefs_dir} +-fi ++: ${OCF_RESKEY_rpcpipefs_dir="$DEFAULT_RPCPIPEFS_DIR"} ++OCF_RESKEY_rpcpipefs_dir=${OCF_RESKEY_rpcpipefs_dir%/} + + # Use statd folder if it exists + if [ -d "/var/lib/nfs/statd" ]; then +@@ -554,7 +548,7 @@ + fi + + [ -d "$fp" ] || mkdir -p $fp +- [ -d "$rpcpipefs_make_dir" ] || mkdir -p $rpcpipefs_make_dir ++ [ -d "$OCF_RESKEY_rpcpipefs_dir" ] || mkdir -p $OCF_RESKEY_rpcpipefs_dir + [ -d "$fp/v4recovery" ] || mkdir -p $fp/v4recovery + + [ -d "$fp/$STATD_DIR" ] || mkdir -p "$fp/$STATD_DIR" +@@ -603,9 +597,18 @@ + + unbind_tree () + { +- if `mount | grep -q " on $rpcpipefs_umount_dir"`; then +- umount -t rpc_pipefs $rpcpipefs_umount_dir +- fi ++ local i=1 ++ while `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir"` && [ "$i" -le 10 ]; do ++ ocf_log info "Stop: umount ($i/10 attempts)" ++ umount -t rpc_pipefs $OCF_RESKEY_rpcpipefs_dir ++ sleep 1 ++ i=$((i + 1)) ++ done ++ ++ case $EXEC_MODE in ++ [23]) nfs_exec stop var-lib-nfs-rpc_pipefs.mount;; ++ esac ++ + if is_bound /var/lib/nfs; then + umount /var/lib/nfs + fi +@@ -771,6 +774,8 @@ + prepare_directory + bind_tree + ++ mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir ++ + # remove the sm-notify pid so sm-notify will be allowed to run again without requiring a reboot. + rm -f /var/run/sm-notify.pid + # diff --git a/SOURCES/bz1342478-rabbitmq-cluster-return-code-69-not-running.patch b/SOURCES/bz1342478-rabbitmq-cluster-return-code-69-not-running.patch new file mode 100644 index 0000000..7fc59b1 --- /dev/null +++ b/SOURCES/bz1342478-rabbitmq-cluster-return-code-69-not-running.patch @@ -0,0 +1,73 @@ +diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +--- a/heartbeat/rabbitmq-cluster 2016-06-03 16:17:09.794967156 +0200 ++++ b/heartbeat/rabbitmq-cluster 2016-06-03 16:27:29.777803932 +0200 +@@ -167,8 +167,13 @@ + rmq_delete_nodename + return $OCF_NOT_RUNNING + ;; ++ 69) ++ ocf_log info "RabbitMQ server is not running" ++ rmq_delete_nodename ++ return $OCF_NOT_RUNNING ++ ;; + *) +- ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc" ++ ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc" + rmq_delete_nodename + return $OCF_ERR_GENERIC + ;; +From 41657b4108211725878b6b46883ff6cc72e44fa9 Mon Sep 17 00:00:00 2001 +From: Peter Lemenkov +Date: Mon, 4 Jul 2016 17:09:16 +0200 +Subject: [PATCH] More RabbitMQ POSIX error codes + +We must add the following POSIX error codes in order to detect node +failure: + +* 68 - EX_NOHOST +* 69 - EX_UNAVAILABLE +* 70 - EX_SOFTWARE +* 75 - EX_TEMPFAIL +* 78 - EX_CONFIG + +The following commits introduced these return values: + +* rabbitmq/rabbitmq-server@7984540175d0b8852025165b6b6a0ac05d692c98 +* rabbitmq/rabbitmq-common@92ae50e5964d4f079c7b2abed1caaa8ab54a439b + +For the error codes meanings go to: + +* http://www.sbras.ru/cgi-bin/www/unix_help/unix-man?sysexits+3 +* http://linux.die.net/include/sysexits.h +* https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=misc/sysexits.h;hb=HEAD + +Note that the following error valies do not mean that the node is +stopped and therefore doesn't covered by this commit: + +* 64 - EX_USAGE +* 65 - EX_DATAERR +* 67 - EX_NOUSER + +Signed-off-by: Peter Lemenkov +--- + heartbeat/rabbitmq-cluster | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index b9ae38e..651b837 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -162,12 +162,7 @@ rmq_monitor() { + + return $OCF_SUCCESS + ;; +- 2) +- ocf_log info "RabbitMQ server is not running" +- rmq_delete_nodename +- return $OCF_NOT_RUNNING +- ;; +- 69) ++ 2|68|69|70|75|78) + ocf_log info "RabbitMQ server is not running" + rmq_delete_nodename + return $OCF_NOT_RUNNING diff --git a/SOURCES/bz1343905-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1343905-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch new file mode 100644 index 0000000..47975b4 --- /dev/null +++ b/SOURCES/bz1343905-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch @@ -0,0 +1,102 @@ +From f00a952bd5e133cad30689d9edcc98f5d33a71a9 Mon Sep 17 00:00:00 2001 +From: Peter Lemenkov +Date: Thu, 16 Jun 2016 16:44:48 +0200 +Subject: [PATCH] Enable dump/restore users from RabbitMQ ver. 3.6.x + +RabbitMQ changed internal_users scheme since ver. 3.6.0. See the +following links for further details: + +* rabbitmq/rabbitmq-server#270 +* rabbitmq/rabbitmq-server#310 +* rabbitmq/rabbitmq-common@9c86a7401cf464dc20527890192c5dc0fe43b6c8 +* rabbitmq/rabbitmq-server@93b5a3a8092f52063cbca3ab661c7c6bae43c512 + +CC @oalbrigt + +Signed-off-by: Peter Lemenkov +--- + heartbeat/rabbitmq-cluster | 64 ++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 50 insertions(+), 14 deletions(-) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 0724901..facca35 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -342,14 +342,40 @@ rmq_start() { + rmq_join_existing "$join_list" + rc=$? + +- # Restore users (if any) +- BaseDataDir=`dirname $RMQ_DATA_DIR` +- if [ -f $BaseDataDir/users.erl ] ; then +- rabbitmqctl eval " +- {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), +- lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users). +- " +- rm -f $BaseDataDir/users.erl ++ # Restore users (if any) ++ BaseDataDir=`dirname $RMQ_DATA_DIR` ++ if [ -f $BaseDataDir/users.erl ] ; then ++ rabbitmqctl eval " ++ ++ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ ++ %% Read users first ++ {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), ++ ++ Upgrade = fun ++ ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; ++ ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} ++ end, ++ ++ Downgrade = fun ++ ({internal_user, A, B, C}) -> {internal_user, A, B, C}; ++ ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; ++ %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. ++ %% Unfortunately, this case will require manual intervention - user have to run: ++ %% rabbitmqctl change_password ++ ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} ++ end, ++ ++ case WildPattern of ++ %% Version < 3.6.0 ++ {internal_user,'_','_','_'} -> ++ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Downgrade(X)) end, Users); ++ %% Version >= 3.6.0 ++ {internal_user,'_','_','_','_'} -> ++ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) ++ end. ++ " ++ rm -f $BaseDataDir/users.erl + fi + + if [ $rc -ne 0 ]; then +@@ -362,12 +388,22 @@ rmq_start() { + } + + rmq_stop() { +- # Backup users +- BaseDataDir=`dirname $RMQ_DATA_DIR` +- rabbitmqctl eval " +- Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]), +- file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). +- " ++ # Backup users ++ BaseDataDir=`dirname $RMQ_DATA_DIR` ++ rabbitmqctl eval " ++ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), ++ ++ Users = case WildPattern of ++ %% Version < 3.6.0 ++ {internal_user,'_','_','_'} -> ++ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); ++ %% Version >= 3.6.0 ++ {internal_user,'_','_','_','_'} -> ++ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) ++ end, ++ ++ file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). ++ " + + rmq_monitor + if [ $? -eq $OCF_NOT_RUNNING ]; then diff --git a/SOURCES/bz1343905-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1343905-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch new file mode 100644 index 0000000..32a05c3 --- /dev/null +++ b/SOURCES/bz1343905-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch @@ -0,0 +1,37 @@ +From 74b3cff4fce5483d126b16131db53f8bd5804c82 Mon Sep 17 00:00:00 2001 +From: Peter Lemenkov +Date: Tue, 21 Jun 2016 15:48:07 +0200 +Subject: [PATCH] Don't run scriptlets if Mnesia isn't available + +See this rhbz for further details and symptoms: + +https://bugzilla.redhat.com/1343905 + +Signed-off-by: Peter Lemenkov +--- + heartbeat/rabbitmq-cluster | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index facca35..18e3206 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -346,6 +346,8 @@ rmq_start() { + BaseDataDir=`dirname $RMQ_DATA_DIR` + if [ -f $BaseDataDir/users.erl ] ; then + rabbitmqctl eval " ++ %% Run only if Mnesia is ready, otherwise exit. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), + + [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), + +@@ -391,6 +393,9 @@ rmq_stop() { + # Backup users + BaseDataDir=`dirname $RMQ_DATA_DIR` + rabbitmqctl eval " ++ %% Run only if Mnesia is still available, otherwise exit. ++ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), ++ + [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), + + Users = case WildPattern of diff --git a/SOURCES/bz1343905-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1343905-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch new file mode 100644 index 0000000..2d1abe8 --- /dev/null +++ b/SOURCES/bz1343905-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch @@ -0,0 +1,53 @@ +From 279bae7ec9a571a4d52b0d876850e27772eb0933 Mon Sep 17 00:00:00 2001 +From: Jiri Stransky +Date: Thu, 23 Jun 2016 12:55:06 +0200 +Subject: [PATCH] RabbitMQ: Forget node before 2nd joining attempt + +If a first attempt at joining an existing cluster has failed and we +resort to wiping the local RabbitMQ data, make sure we also request the +local node to be forgotten from the existing cluster before we make the +join attempt, otherwise the node will be rejected. +--- + heartbeat/rabbitmq-cluster | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 0724901..b9ae38e 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -279,6 +279,22 @@ rmq_join_existing() + return $OCF_SUCCESS + } + ++rmq_forget_cluster_node_remotely() { ++ local running_cluster_nodes="$1" ++ local node_to_forget="$2" ++ ++ ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]." ++ for running_cluster_node in $running_cluster_nodes; do ++ rabbitmqctl -n $running_cluster_node forget_cluster_node $node_to_forget ++ if [ $? = 0 ]; then ++ ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node." ++ return ++ else ++ ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node." ++ fi ++ done ++} ++ + rmq_notify() { + node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}" + mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" +@@ -336,9 +352,12 @@ rmq_start() { + rmq_join_existing "$join_list" + if [ $? -ne 0 ]; then + ocf_log info "node failed to join, wiping data directory and trying again" ++ local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" ++ + # if the graceful join fails, use the hammer and reset all the data. + rmq_stop + rmq_wipe_data ++ rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" + rmq_join_existing "$join_list" + rc=$? + diff --git a/SOURCES/bz1343905-rabbitmq-automatic-cluster-recovery.patch b/SOURCES/bz1343905-rabbitmq-automatic-cluster-recovery.patch new file mode 100644 index 0000000..d51cfe7 --- /dev/null +++ b/SOURCES/bz1343905-rabbitmq-automatic-cluster-recovery.patch @@ -0,0 +1,39 @@ +commit 1621dbb60454840d469f3a0e317a97d94510f7ab +Author: John Eckersberg +Date: Tue Jul 26 13:47:39 2016 -0400 + + rabbitmq: Allow automatic cluster recovery before forcing it + + When joining a node into an existing cluster, check to see if it is + already clustered before force removing it from the cluster and + re-adding. If the clustering is already functional there's no need to + force it again. + +diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster +index 651b837..966dd64 100755 +--- a/heartbeat/rabbitmq-cluster ++++ b/heartbeat/rabbitmq-cluster +@@ -238,6 +238,11 @@ rmq_start_first() + return $rc + } + ++rmq_is_clustered() ++{ ++ $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true ++} ++ + rmq_join_existing() + { + local join_list="$1" +@@ -249,6 +254,11 @@ rmq_join_existing() + return $OCF_ERR_GENERIC + fi + ++ if rmq_is_clustered; then ++ ocf_log info "Successfully re-joined existing rabbitmq cluster automatically" ++ return $OCF_SUCCESS ++ fi ++ + # unconditionally join the cluster + $RMQ_CTL stop_app > /dev/null 2>&1 + for node in $(echo "$join_list"); do diff --git a/SOURCES/bz1344225-garbd-Introduces-garbd-resource-agent.patch b/SOURCES/bz1344225-garbd-Introduces-garbd-resource-agent.patch deleted file mode 100644 index af18286..0000000 --- a/SOURCES/bz1344225-garbd-Introduces-garbd-resource-agent.patch +++ /dev/null @@ -1,474 +0,0 @@ -From beb8dd713fa3a15ca01738de33f2031d1e5925d9 Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Wed, 1 Jun 2016 17:14:04 +0200 -Subject: [PATCH 1/2] garbd: Introduces garbd resource-agent - ---- - heartbeat/garbd | 417 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 417 insertions(+) - create mode 100755 heartbeat/garbd - -diff --git a/heartbeat/garbd b/heartbeat/garbd -new file mode 100755 -index 0000000..950df76 ---- /dev/null -+++ b/heartbeat/garbd -@@ -0,0 +1,417 @@ -+#!/bin/sh -+# -+# Copyright (c) 2015 Damien Ciabrini -+# All Rights Reserved. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of version 2 of the GNU General Public License as -+# published by the Free Software Foundation. -+# -+# This program is distributed in the hope that it would be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -+# -+# Further, this software is distributed without any warranty that it is -+# free of the rightful claim of any third person regarding infringement -+# or the like. Any license provided herein, whether implied or -+# otherwise, applies only to this software file. Patent licenses, if -+# any, provided herein do not apply to combinations of this program with -+# other software, or any other product whatsoever. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, write the Free Software Foundation, -+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -+# -+ -+## -+# README. -+# -+# Resource agent for garbd, the Galera arbitrator -+# -+# You can use this agent if you run an even number of galera nodes, -+# and you want an additional node to avoid split-brain situations. -+# -+# garbd requires that a Galera cluster is running, so make sure to -+# add a proper ordering constraint to the cluster, e.g.: -+# -+# pcs constraint order galera-master then garbd -+# -+# If you add garbd to the cluster while Galera is not running, you -+# might want to disable it before setting up ordering constraint, e.g.: -+# -+# pcs resource create garbd garbd \ -+# wsrep_cluster_address=gcomm://node1:4567,node2:4567 \ -+# meta target-role=stopped -+# -+# Use location constraints to avoid running galera and garbd on -+# the same node, e.g.: -+# -+# pcs constraint colocation add garbd with galera-master -INFINITY -+# pcs constraint location garbd prefers node3=INFINITY -+# -+## -+ -+####################################################################### -+# Initialization: -+ -+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} -+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -+ -+####################################################################### -+# Set default paramenter values -+ -+OCF_RESKEY_binary_default="/usr/sbin/garbd" -+OCF_RESKEY_log_default="/var/log/garbd.log" -+OCF_RESKEY_pid_default="/var/run/garbd.pid" -+OCF_RESKEY_user_default="mysql" -+if [ "X${HOSTOS}" = "XOpenBSD" ];then -+ OCF_RESKEY_group_default="_mysql" -+else -+ OCF_RESKEY_group_default="mysql" -+fi -+ -+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} -+: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}} -+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} -+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} -+: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} -+ -+usage() { -+ cat < -+ -+ -+1.0 -+ -+ -+Resource script for managing Galera arbitrator. -+ -+Manages a galera arbitrator instance -+ -+ -+ -+ -+Location of the Galera arbitrator binary -+ -+garbd server binary -+ -+ -+ -+ -+ -+User running the garbd process -+ -+garbd user -+ -+ -+ -+ -+ -+Group running garbd (for logfile permissions) -+ -+garbd group -+ -+ -+ -+ -+ -+The logfile to be used for garbd. -+ -+Galera arbitrator log file -+ -+ -+ -+ -+ -+The pidfile to be used for garbd. -+ -+Galera arbitrator pidfile -+ -+ -+ -+ -+ -+Additional parameters which are passed to garbd on startup. -+ -+Additional parameters to pass to garbd -+ -+ -+ -+ -+ -+The galera cluster address. This takes the form of: -+gcomm://node:port,node:port,node:port -+ -+Unlike Galera servers, port is mandatory for garbd. -+ -+Galera cluster address -+ -+ -+ -+ -+ -+The group name of the Galera cluster to connect to. -+ -+Galera cluster name -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+END -+} -+ -+ -+garbd_start() -+{ -+ local rc -+ local pid -+ local start_wait -+ local garbd_params -+ -+ garbd_status info -+ rc=$? -+ if [ $rc -eq $OCF_SUCCESS ]; then -+ ocf_exit_reason "garbd started outside of the cluster's control" -+ return $OCF_ERR_GENERIC; -+ fi -+ -+ touch $OCF_RESKEY_log -+ chown $OCF_RESKEY_user:$OCF_RESKEY_group $OCF_RESKEY_log -+ chmod 0640 $OCF_RESKEY_log -+ [ -x /sbin/restorecon ] && /sbin/restorecon $OCF_RESKEY_log -+ -+ garbd_params="--address=${OCF_RESKEY_wsrep_cluster_address} \ -+ --group ${OCF_RESKEY_wsrep_cluster_name} \ -+ --log ${OCF_RESKEY_log}" -+ -+ if [ ! -z "${OCF_RESKEY_options}" ]; then -+ garbd_params="${garbd_params} --options=${OCF_RESKEY_options}" -+ fi -+ -+ # garbd has no parameter to run as a specific user, -+ # so we need to start it by our own means -+ pid=$(su - -s /bin/sh $OCF_RESKEY_user -c "${OCF_RESKEY_binary} ${garbd_params} >/dev/null 2>&1 & echo \$!") -+ -+ # garbd doesn't create a pidfile either, so we create our own -+ echo $pid > $OCF_RESKEY_pid -+ if [ $? -ne 0 ]; then -+ ocf_exit_reason "Cannot create pidfile for garbd at $OCF_RESKEY_pid (rc=$?), please check your installation" -+ return $OCF_ERR_GENERIC -+ fi -+ -+ # Spin waiting for garbd to connect to the cluster. -+ # Let the CRM/LRM time us out if required. -+ start_wait=1 -+ while [ $start_wait -eq 1 ]; do -+ garbd_monitor info -+ rc=$? -+ if [ $rc -eq $OCF_NOT_RUNNING ]; then -+ ocf_exit_reason "garbd failed to start (pid=$pid), check logs in ${OCF_RESKEY_log}" -+ return $OCF_ERR_GENERIC -+ elif [ $rc -eq $OCF_SUCCESS ]; then -+ start_wait=0 -+ fi -+ sleep 2 -+ done -+ -+ ocf_log info "garbd connected to cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" -+ return $OCF_SUCCESS -+} -+ -+garbd_status() -+{ -+ local loglevel=$1 -+ local rc -+ ocf_pidfile_status $OCF_RESKEY_pid -+ rc=$? -+ -+ if [ $rc -eq 0 ]; then -+ return $OCF_SUCCESS -+ elif [ $rc -eq 2 ]; then -+ return $OCF_NOT_RUNNING -+ else -+ # clean up if pidfile is stale -+ if [ $rc -eq 1 ]; then -+ ocf_log $loglevel "garbd not running: removing old PID file" -+ rm -f $OCF_RESKEY_pid -+ fi -+ return $OCF_ERR_GENERIC -+ fi -+} -+ -+garbd_monitor() -+{ -+ local rc -+ local pid -+ local loglevel=$1 -+ -+ # Set loglevel to info during probe -+ if ocf_is_probe; then -+ loglevel="info" -+ fi -+ -+ garbd_status $loglevel -+ rc=$? -+ -+ # probe just wants to know if garbd is running or not -+ if [ ocf_is_probe -a $rc -ne $OCF_SUCCESS ]; then -+ rc=$OCF_NOT_RUNNING -+ fi -+ -+ # Consider garbd is working if it's connected to at least -+ # one node in the galera cluster. -+ # Note: a Galera node in Non-Primary state will be -+ # stopped by the galera RA. So we can assume that -+ # garbd will always be connected to the right partition -+ if [ $rc -eq $OCF_SUCCESS ]; then -+ pid=`cat $OCF_RESKEY_pid 2> /dev/null ` -+ netstat -tnp 2>/dev/null | grep -s -q "ESTABLISHED.*${pid}/" -+ if [ $? -ne 0 ]; then -+ ocf_log $loglevel "garbd disconnected from cluster \"${OCF_RESKEY_wsrep_cluster_name}\"" -+ rc=$OCF_ERR_GENERIC -+ fi -+ fi -+ -+ return $rc -+} -+ -+garbd_stop() -+{ -+ local rc -+ local pid -+ -+ if [ ! -f $OCF_RESKEY_pid ]; then -+ ocf_log info "garbd is not running" -+ return $OCF_SUCCESS -+ fi -+ -+ pid=`cat $OCF_RESKEY_pid 2> /dev/null ` -+ -+ ocf_log info "stopping garbd" -+ -+ # make sure the process is stopped -+ ocf_stop_processes TERM 10 $pid -+ rc=$? -+ -+ if [ $rc -ne 0 ]; then -+ return $OCF_ERR_GENERIC -+ else -+ rm -f $OCF_RESKEY_pid -+ ocf_log info "garbd stopped" -+ return $OCF_SUCCESS -+ fi -+} -+ -+garbd_validate() -+{ -+ if ! have_binary "$OCF_RESKEY_binary"; then -+ ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary" -+ return $OCF_ERR_INSTALLED; -+ fi -+ -+ if ! have_binary "netstat"; then -+ ocf_exit_reason "Setup problem: couldn't find command: netstat" -+ return $OCF_ERR_INSTALLED; -+ fi -+ -+ if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then -+ ocf_exit_reason "garbd must be configured with a wsrep_cluster_address value." -+ return $OCF_ERR_CONFIGURED -+ fi -+ -+ # unlike galera RA, ports must be set in cluster address for garbd -+ # https://github.com/codership/galera/issues/98 -+ for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do -+ echo $node | grep -s -q ':[1-9][0-9]*$' -+ if [ $? -ne 0 ]; then -+ ocf_exit_reason "wsrep_cluster_address must specify ports (gcomm://node1:port,node2:port)." -+ return $OCF_ERR_CONFIGURED -+ fi -+ done -+ -+ # Ensure that the encryption method is set if garbd is configured -+ # to use SSL. -+ echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_(key|cert)=' -+ if [ $? -eq 0 ]; then -+ echo $OCF_RESKEY_options | grep -s -q -i -E '\bsocket.ssl_cipher=' -+ if [ $? -ne 0 ]; then -+ ocf_exit_reason "option socket.ssl_cipher must be set if SSL is enabled." -+ return $OCF_ERR_CONFIGURED -+ fi -+ fi -+ -+ if [ -z "$OCF_RESKEY_wsrep_cluster_name" ]; then -+ ocf_exit_reason "garbd must be configured with a wsrep_cluster_name value." -+ return $OCF_ERR_CONFIGURED -+ fi -+ -+ if ! getent passwd $OCF_RESKEY_user >/dev/null 2>&1; then -+ ocf_exit_reason "User $OCF_RESKEY_user doesn't exist" -+ return $OCF_ERR_INSTALLED -+ fi -+ -+ if ! getent group $OCF_RESKEY_group >/dev/null 2>&1; then -+ ocf_exit_reason "Group $OCF_RESKEY_group doesn't exist" -+ return $OCF_ERR_INSTALLED -+ fi -+ -+ return $OCF_SUCCESS -+} -+ -+case "$1" in -+ meta-data) meta_data -+ exit $OCF_SUCCESS;; -+ usage|help) usage -+ exit $OCF_SUCCESS;; -+esac -+ -+garbd_validate -+rc=$? -+ -+# trap configuration errors early, but don't block stop in such cases -+LSB_STATUS_STOPPED=3 -+if [ $rc -ne 0 ]; then -+ case "$1" in -+ stop) exit $OCF_SUCCESS;; -+ status) exit $LSB_STATUS_STOPPED;; -+ *) exit $rc;; -+ esac -+fi -+ -+# What kind of method was invoked? -+case "$1" in -+ start) garbd_start;; -+ stop) garbd_stop;; -+ status) garbd_status err;; -+ monitor) garbd_monitor err;; -+ promote) garbd_promote;; -+ demote) garbd_demote;; -+ validate-all) exit $OCF_SUCCESS;; -+ -+ *) usage -+ exit $OCF_ERR_UNIMPLEMENTED;; -+esac --- -2.5.5 - - -From f36298aa97fc4cbed3e2eff28d6821f4314becbe Mon Sep 17 00:00:00 2001 -From: Damien Ciabrini -Date: Fri, 3 Jun 2016 18:27:38 +0200 -Subject: [PATCH 2/2] garbd: fix install and man page - ---- - doc/man/Makefile.am | 1 + - heartbeat/Makefile.am | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am -index 5e28895..25fb29b 100644 ---- a/doc/man/Makefile.am -+++ b/doc/man/Makefile.am -@@ -105,6 +105,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ - ocf_heartbeat_exportfs.7 \ - ocf_heartbeat_fio.7 \ - ocf_heartbeat_galera.7 \ -+ ocf_heartbeat_garbd.7 \ - ocf_heartbeat_iSCSILogicalUnit.7 \ - ocf_heartbeat_iSCSITarget.7 \ - ocf_heartbeat_iface-bridge.7 \ -diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am -index b70c104..df0e3b8 100644 ---- a/heartbeat/Makefile.am -+++ b/heartbeat/Makefile.am -@@ -76,6 +76,7 @@ ocf_SCRIPTS = ClusterMon \ - Filesystem \ - fio \ - galera \ -+ garbd \ - ids \ - iscsi \ - ICP \ --- -2.5.5 - diff --git a/SOURCES/bz1344228-rabbitmq-cluster-return-code-69-not-running.patch b/SOURCES/bz1344228-rabbitmq-cluster-return-code-69-not-running.patch deleted file mode 100644 index 7fc59b1..0000000 --- a/SOURCES/bz1344228-rabbitmq-cluster-return-code-69-not-running.patch +++ /dev/null @@ -1,73 +0,0 @@ -diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster ---- a/heartbeat/rabbitmq-cluster 2016-06-03 16:17:09.794967156 +0200 -+++ b/heartbeat/rabbitmq-cluster 2016-06-03 16:27:29.777803932 +0200 -@@ -167,8 +167,13 @@ - rmq_delete_nodename - return $OCF_NOT_RUNNING - ;; -+ 69) -+ ocf_log info "RabbitMQ server is not running" -+ rmq_delete_nodename -+ return $OCF_NOT_RUNNING -+ ;; - *) -- ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc" -+ ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc" - rmq_delete_nodename - return $OCF_ERR_GENERIC - ;; -From 41657b4108211725878b6b46883ff6cc72e44fa9 Mon Sep 17 00:00:00 2001 -From: Peter Lemenkov -Date: Mon, 4 Jul 2016 17:09:16 +0200 -Subject: [PATCH] More RabbitMQ POSIX error codes - -We must add the following POSIX error codes in order to detect node -failure: - -* 68 - EX_NOHOST -* 69 - EX_UNAVAILABLE -* 70 - EX_SOFTWARE -* 75 - EX_TEMPFAIL -* 78 - EX_CONFIG - -The following commits introduced these return values: - -* rabbitmq/rabbitmq-server@7984540175d0b8852025165b6b6a0ac05d692c98 -* rabbitmq/rabbitmq-common@92ae50e5964d4f079c7b2abed1caaa8ab54a439b - -For the error codes meanings go to: - -* http://www.sbras.ru/cgi-bin/www/unix_help/unix-man?sysexits+3 -* http://linux.die.net/include/sysexits.h -* https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=misc/sysexits.h;hb=HEAD - -Note that the following error valies do not mean that the node is -stopped and therefore doesn't covered by this commit: - -* 64 - EX_USAGE -* 65 - EX_DATAERR -* 67 - EX_NOUSER - -Signed-off-by: Peter Lemenkov ---- - heartbeat/rabbitmq-cluster | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index b9ae38e..651b837 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -162,12 +162,7 @@ rmq_monitor() { - - return $OCF_SUCCESS - ;; -- 2) -- ocf_log info "RabbitMQ server is not running" -- rmq_delete_nodename -- return $OCF_NOT_RUNNING -- ;; -- 69) -+ 2|68|69|70|75|78) - ocf_log info "RabbitMQ server is not running" - rmq_delete_nodename - return $OCF_NOT_RUNNING diff --git a/SOURCES/bz1347536-saphana-mcos-support.patch b/SOURCES/bz1347536-saphana-mcos-support.patch deleted file mode 100644 index 1532f94..0000000 --- a/SOURCES/bz1347536-saphana-mcos-support.patch +++ /dev/null @@ -1,1778 +0,0 @@ -diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana ---- a/heartbeat/SAPHana 2016-04-26 12:01:55.620889964 +0200 -+++ b/heartbeat/SAPHana 2016-04-26 12:03:17.240897137 +0200 -@@ -2,9 +2,9 @@ - # - # SAPHana - # --# Description: Manages two single SAP HANA Instance in System Replication -+# Description: Manages two single SAP HANA Instance in System Replication - # Planned: do also manage scale-up scenarios --# currently the SAPHana is dependent of the analysis of -+# currently the SAPHana is dependent of the analysis of - # SAPHanaTopology - # For supported scenarios please read the README file provided - # in the same software package (rpm) -@@ -16,16 +16,17 @@ - # Support: linux@sap.com - # License: GNU General Public License (GPL) - # Copyright: (c) 2013,2014 SUSE Linux Products GmbH -+# Copyright: (c) 2015 SUSE Linux GmbH - # --# An example usage: -+# An example usage: - # See usage() function below for more details... - # - # OCF instance parameters: --# OCF_RESKEY_SID --# OCF_RESKEY_InstanceNumber --# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) --# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) --# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) -+# OCF_RESKEY_SID -+# OCF_RESKEY_InstanceNumber -+# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) -+# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default) -+# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) - # OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) - # OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) - # OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) -@@ -71,7 +72,7 @@ - info ) - case "$shf" in - all) skip=0 -- ;; -+ ;; - none ) - skip=1 - ;; -@@ -80,13 +81,13 @@ - mtype=${mtype#fh} - echo "$shf"| grep -iq ${mtype}; search=$? - if [ $search -eq 0 ]; then -- skip=0 -+ skip=0 - else - skip=1 - fi - ;; - esac -- ;; -+ ;; - esac - if [ $skip -eq 0 ]; then - ocf_log "$level" "$message" -@@ -103,8 +104,8 @@ - local rc=0 - methods=$(saphana_methods) - methods=$(echo $methods | tr ' ' '|') -- cat <<-! -- usage: $0 ($methods) -+ cat <<-EOF -+ usage: $0 ($methods) - - $0 manages a SAP HANA Instance as an HA resource. - -@@ -118,8 +119,17 @@ - The 'validate-all' operation reports whether the parameters are valid - The 'methods' operation reports on the methods $0 supports - -- ! -- return $rc -+EOF -+ return $rc -+} -+ -+function backup_global_and_nameserver() { -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local rc=0 -+ cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s") -+ cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s") -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc - } - - # -@@ -130,11 +140,12 @@ - function saphana_meta_data() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -- cat < - - --0.149.7 -+0.151.1 - - Manages two SAP HANA instances in system replication (SR). - -@@ -157,7 +168,7 @@ - 2. landscapeHostConfiguration - The interface is used to monitor a HANA system. The python script is named landscapeHostConfiguration.py. - landscapeHostConfiguration.py has some detailed output about HANA system status -- and node roles. For our monitor the overall status is relevant. This overall -+ and node roles. For our monitor the overall status is relevant. This overall - status is reported by the returncode of the script: - 0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO, 4: OK - The SAPHana resource agent will interpret returncodes 0 as FATAL, 1 as not-running or ERROR and and returncodes 2+3+4 as RUNNING. -@@ -168,14 +179,14 @@ - system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register). - - 4. hdbsql / systemReplicationStatus -- Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script -+ Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script - "systemReplicationStatus.py" in SAP HANA SPS8 or 9. - As long as we need to use hdbsql you need to setup secure store users for linux user root to be able to - access the SAP HANA database. You need to configure a secure store user key "SAPHANA${SID}SR" which can connect the SAP -- HANA database: -+ HANA database: - - 5. saphostctrl -- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the -+ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the - SAP HANA instance. This is the hostname used during the HANA installation. - - -@@ -207,7 +218,7 @@ - - - Time difference needed between to primary time stamps, if a dual-primary situation occurs -- Time difference needed between to primary time stamps, -+ Time difference needed between to primary time stamps, - if a dual-primary situation occurs. If the time difference is - less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin - a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After -@@ -231,12 +242,8 @@ - - - -- Define SAPHana resource agent messages to be printed -- Define SAPHana resource agent messages to be printed. -- This parameter should only be set if requested by support. The default is sufficient for normal operation. -- Values: ra-act-lpa-dec-flow -- You could specify any combination of the above values like "ra-act-flow" -- -+ OUTDATED PARAMETER -+ OUTDATED PARAMETER - - - -@@ -271,7 +278,7 @@ - for m in start stop status monitor promote demote notify validate-all methods meta-data usage; do - echo "$m" - done -- return $rc -+ return $rc - } - - # -@@ -298,7 +305,7 @@ - local remoteNode="" - local rc=1 - for cl in ${otherNodes[@]}; do -- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]}) -+ vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]} "$cl") - if [ "$vHost" = "$remoteHost" ]; then # we found the correct node - remoteNode=$cl - rc=0 -@@ -347,9 +354,10 @@ - } - - # --# function: get_hana_attribute -+# function: get_hana_attribute - # params: NODE ATTR [STORE] - # globals: - -+# output: attribute value - # - function get_hana_attribute() - { -@@ -358,14 +366,20 @@ - local attr_node=$1 - local attr_name=$2 - local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter -- local attr_default=${5:-} -+ local attr_default=${4:-} -+ local dstr - local attr_val="" -- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"); rc=$? -- if [ $debug_attributes -eq 1 ]; then -- dstr=$(date) -- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE -- fi -- echo "$attr_val" -+ dstr=$(date) -+ case "$attr_store" in -+ reboot | forever ) -+ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE -+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ props ) -+ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE -+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ esac - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -388,11 +402,17 @@ - attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$? - if [ "$attr_old" != "$attr_value" ]; then - super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc " -- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store; rc=$? -- if [ $debug_attributes -eq 1 ]; then -- dstr=$(date) -- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE -- fi -+ dstr=$(date) -+ case "$attr_store" in -+ reboot | forever ) -+ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE -+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ props ) -+ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE -+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ esac - else - super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}" - rc=0 -@@ -408,7 +428,8 @@ - # - function assert() { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local err_msg=$1 local default_rc=$OCF_NOT_RUNNING -+ local err_msg=$1 -+ local default_rc=$OCF_NOT_RUNNING - # DONE: Check, if we need to destinguish between probe and others - if ocf_is_probe; then - default_exit=$OCF_NOT_RUNNING -@@ -435,7 +456,7 @@ - local score=0 - if [ -n "$1" ]; then - score=$1 -- fi -+ fi - # DONE: PRIO2: Only adjust master if value is really different (try to check that) - oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) - if [ "$oldscore" != "$score" ]; then -@@ -452,7 +473,7 @@ - # - # function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) - # params: NODE_ROLES NODE_SYNC_STATUS --# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], -+# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], - # - scoring_crm_master() - { -@@ -467,7 +488,7 @@ - if grep "$rolePatt" <<< "$roles"; then - if grep "$syncPatt" <<< "$sync"; then - skip=1 -- myScore=$score -+ myScore=$score - fi - fi - fi -@@ -496,7 +517,7 @@ - # function: saphana_init - initialize variables for the resource agent - # params: InstanceName - # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), --# globals: sr_name(w), remoteHost(w), otherNodes(w) -+# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) - # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) - # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) - # globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) -@@ -506,6 +527,8 @@ - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=$OCF_SUCCESS - local vName -+ local clN -+ # local site - # two parameter models (for transition only) - # OLD: InstanceName - # NEW: SID InstanceNumber -@@ -528,11 +551,10 @@ - # - # if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology - # -- vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}); -+ vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); - fi - SAPVIRHOST=${vName} - PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" -- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" - AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" - LPA_DIRECTORY=/var/lib/SAPHanaRA - LPA_ATTR=("lpa_${sid}_lpt" "forever") -@@ -591,6 +613,8 @@ - *openais* ) otherNodes=($(crm_node -l | awk '$3 == "member" { if ($2 != me) { print $2 }}' me=${NODENAME}));; - *cman* ) otherNodes=($(crm_node -l | awk '{for (i=1; i<=NF; i++) { if ($i != me) { print $i }}}' me=${NODENAME}));; - esac -+ # -+ # - - remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); - if [ -z "$remoteHost" ]; then -@@ -611,9 +635,13 @@ - # ATTR_NAME_HANA_SITE - sr_name=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SITE[@]}); - sr_mode=$(get_hana_attribute "${NODENAME}" ${ATTR_NAME_HANA_SRMODE[@]}) -+ - if [ -z "$sr_mode" ]; then - sr_mode="sync" - fi -+ if [ -n "$remoteNode" ]; then -+ rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); -+ fi - super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" - # optional OCF parameters, we try to guess which directories are correct - if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] -@@ -706,7 +734,7 @@ - then - runninginst=$(echo "$output" | grep '^0 : ' | cut -d' ' -f3) - if [ "$runninginst" != "$InstanceName" ] -- then -+ then - super_ocf_log warn "ACT: sapstartsrv is running for instance $runninginst, that service will be killed" - restart=1 - else -@@ -784,38 +812,113 @@ - node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) - node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') - super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -+ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? -+ # This might also solve some problems when we could not figure-out the ilocal or remote site name - for i in 1 2 3 4 5 6 7 8 9; do - case "$node_status" in -- primary ) -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" -- return $HANA_STATE_PRIMARY;; -+ primary ) -+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" -+ return $HANA_STATE_PRIMARY;; - syncmem | sync | async ) -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" -- return $HANA_STATE_SECONDARY;; -- none ) # have seen that mode on second side BEFEORE we registered it as replica -- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" -- return $HANA_STATE_STANDALONE;; -+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" -+ return $HANA_STATE_SECONDARY;; -+ none ) # have seen that mode on second side BEFEORE we registered it as replica -+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" -+ return $HANA_STATE_STANDALONE;; - * ) -- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" -- dump=$( echo $node_status | hexdump -C ); -- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" -- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -- super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" -- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes -+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" -+ dump=$( echo $node_status | hexdump -C ); -+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" -+ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -+ super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" -+ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes - esac; - done - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } - -+# function: analyze_hana_sync_statusSRS -+# params: - -+# globals: DIR_EXECUTABLE(r), FULL_SR_STATUS(w), remoteNode -+# -+# systemReplicationStatus.py return-codes: -+# NoHSR = 10 -+# Error = 11 -+# Unkown = 12 -+# Initializing = 13 -+# Syncing = 14 -+# Active = 15 -+function analyze_hana_sync_statusSRS() -+{ -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" -+ if [ -n "$rem_SR_name" ]; then -+ siteParam="--site=$rem_SR_name" -+ fi -+ FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? -+ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" -+ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" -+ # -+ # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) -+ # And is the check for return code capable for chains? -+ # -+ if [ $srRc -eq 15 ]; then -+ # Fix for a HANA BUG, where a non-working SR resulted in RC 15: -+ if grep -q "ACTIVE" <<< "$FULL_SR_STATUS"; then -+ super_ocf_log info "FLOW $FUNCNAME SOK" -+ set_hana_attribute "$remoteNode" "SOK" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -+ super_ocf_log info "ACT site=$sr_name, seting SOK for secondary (1)" -+ lpa_set_lpt 30 "$remoteNode" -+ rc=0; -+ else -+ # ok we should be careful and set secondary to SFAIL -+ super_ocf_log info "FLOW $FUNCNAME SFAIL" -+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (6) - srRc=$srRc lss=$lss No ACTIVES found in cmd output" -+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary -+ lpa_set_lpt 10 "$remoteNode" -+ fi -+ elif [ $srRc -le 11 ]; then # 11 and 10 -+ # if systemReplicationStatus is ERROR and landscapeHostConfiguration is down than do NOT set SFAIL -+ get_hana_landscape_status; lss=$? -+ if [ $lss -lt 2 ]; then -+ # keep everithing like it was -+ rc=2 -+ else -+ # ok we should be careful and set secondary to SFAIL -+ super_ocf_log info "FLOW $FUNCNAME SFAIL" -+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" -+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary -+ lpa_set_lpt 10 "$remoteNode" -+ rc=1 -+ fi -+ else -+ super_ocf_log info "FLOW $FUNCNAME SFAIL" -+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" -+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary -+ lpa_set_lpt 10 "$remoteNode" -+ rc=1; -+ fi -+ super_ocf_log info "FLOW $FUNCNAME PRIM+LPA" -+ super_ocf_log info "DBG PRIM" -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc -+} -+ - # --# function: analyze_hana_sync_status - query and check hana system replication status -+#### -+#### OLD HDBSQL STUFF FOR SPS6,7,8 AND SCALE-UP ONLY -+#### -+# function: analyze_hana_sync_statusSQL - query and check hana system replication status - # params: - - # globals: DIR_EXECUTABLE(r), remoteHost(r) - # get the HANA sync status --# --function analyze_hana_sync_status() -+# -+function analyze_hana_sync_statusSQL() - { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local -a clusterNodes=() -@@ -863,35 +966,9 @@ - # TODO PRIO1: REMOVE remoteNode dependency - set SFAIL - set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} - fi -- # first get a list of all secondary hosts, than a list of all secondary hosts, if the is ANY failure at this site -- # TODO: PRIO9: for first we assume there is only ONE secondary site (like ROT) -- # TODO: PRIO3: should we loop over all cluster nodes fetching their roles-attribute? To minimize sql-queries? -- # -- all_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_secondaries ); sqlrc=$? -- all_secondary_hosts=$(echo $all_secondary_hosts | dequote); -- if [ "$sqlrc" -eq 0 ]; then -- all_broken_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_failed_secondaries); sqlrc=$? -- all_broken_secondary_hosts=$(echo $all_broken_secondary_hosts | dequote); -- if [ "$sqlrc" -eq 0 ]; then -- if [ -n "$all_broken_secondary_hosts" ]; then -- # -- # we have a broken secondary site - set all hosts to "SFAIL" -- # -- # Note: since HANA hostname can be different from nodename we need to check all vhost attributes -- for n in $all_broken_secondary_hosts; do -- for cl in ${otherNodes[@]}; do -- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]}) -- if [ "$vHost" = "$n" ]; then # we found the correct node -- set_hana_attribute $cl "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} -- fi -- done -- done -- fi -- fi -- fi - else - case "$sqlrc" in -- 19 ) -+ 19 ) - # return codes 19: license error -> set SFAIL! - # DONE: PRIO1: We should NOT set SFAIL, if HDB is exactly broken now - # When HDB breaks during monitor this could prevent a prositive remote failover -@@ -901,7 +978,7 @@ - done - ;; - esac -- fi -+ fi - return $rc - } - -@@ -932,10 +1009,18 @@ - local remoteInstance=""; - remoteInstance=$InstanceNr - if ocf_is_true ${AUTOMATED_REGISTER}; then -+ # -+ # -+ # -+ # -+ # - super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" -+ # -+ # - su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? -+ # backup_global_and_nameserver - else -- super_ocf_log info "ACT: IGNORE REGISTER because AUTOMATED_REGISTER is set to FALSE" -+ super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" - rc=1 - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -@@ -945,7 +1030,7 @@ - # - # function: saphana_status - pure status check - # params: - --# globals: SIDInstanceName, OCF_*, -+# globals: SIDInstanceName, OCF_*, - function saphana_status() { - local binDeam="hdb.sap${SIDInstanceName}" rc=0 - binDeam=${binDeam:0:15} # Process name is limited to the first 15 characters -@@ -956,13 +1041,13 @@ - # - # function: saphana_start - start a hana instance - # params: - --# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName, -+# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName, - # - function saphana_start() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=$OCF_NOT_RUNNING - local output="" -- local loopcount=0 -+ local loopcount=0 - check_sapstartsrv - rc=$? - # -@@ -1000,11 +1085,11 @@ - # saphana_stop: Stop the SAP instance - # - function saphana_stop() { -- super_ocf_log info "FLOW $FUNCNAME ($*)" -- local output="" -- local rc=0 -- check_sapstartsrv; rc=$? -- if [ $rc -eq $OCF_SUCCESS ]; then -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local output="" -+ local rc=0 -+ check_sapstartsrv; rc=$? -+ if [ $rc -eq $OCF_SUCCESS ]; then - output=$($SAPCONTROL -nr $InstanceNr -function Stop) - rc=$? - super_ocf_log info "ACT: Stopping SAP Instance $SID-$InstanceName: $output" -@@ -1032,7 +1117,7 @@ - # function: saphana_validate - validation of (some) variables/parameters - # params: - - # globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), SAPVIRHOST(r) --# saphana_validate: Check the symantic of the input parameters -+# saphana_validate: Check the symantic of the input parameters - # - function saphana_validate() { - super_ocf_log info "FLOW $FUNCNAME ($*)" -@@ -1060,12 +1145,12 @@ - # - # function: saphana_start_primary - handle startup of PRIMARY in M/S - # params: --# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*, -+# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*, - # - function saphana_start_primary() - { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING -+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING - local lss sqlrc; - local rc=0 - local lpa_dec=4 -@@ -1074,7 +1159,7 @@ - # we will be a master (PRIMARY) so checking, if the is an OTHER master - # - super_ocf_log debug "DBG: saphana_primary - check_for_primary reports HANA_STATE_PRIMARY" -- # -+ # - lpa_init_lpt $HANA_STATE_PRIMARY - lpa_check_lpt_status; lpa_dec=$? - get_hana_landscape_status; lss=$? -@@ -1139,7 +1224,7 @@ - 1 ) # landcape says we are down, lets start and adjust scores and return code - super_ocf_log info "LPA: landcape: DOWN, LPA: start ==> start instance" - saphana_start -- rc=$? -+ rc=$? - LPTloc=$(date '+%s') - lpa_set_lpt $LPTloc - ;; -@@ -1152,7 +1237,7 @@ - # DONE: PRIO3: check if this reaction is correct - tell cluster about failed start - super_ocf_log info "LPA: landcape: UP, LPA: register ==> take down" - set_crm_master -inf -- rc=$OCF_NOT_RUNNING -+ rc=$OCF_NOT_RUNNING - ;; - 1 ) # lets try to register - # DONE: PRIO2: Like Action in start_secondary -@@ -1160,7 +1245,7 @@ - super_ocf_log info "DEC: AN OTHER HANA IS AVAILABLE ==> LETS REGISTER" - set_crm_master 0 - if wait_for_primary_master 1; then -- register_hana_secondary -+ register_hana_secondary - check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then - super_ocf_log info "ACT: Register successful" -@@ -1169,11 +1254,11 @@ - set_crm_master 0 - saphana_start_secondary - rc=$? -- lpa_set_lpt 30 -+ lpa_set_lpt 10 - else - super_ocf_log err "ACT: Register failed" - rc=$OCF_NOT_RUNNING -- fi -+ fi - else - # lets check next monitor, if we can register - rc=$OCF_SUCCESS -@@ -1185,6 +1270,9 @@ - case "$lss" in - 2 | 3 | 4 ) # as we ARE up we just keep it up - # TODO: PRIO3: I now change from "just keep it up to take that down" -+# TODO: PRIO1 differ lpt_advice!! -+# 2 => DOWN -+# 3 => KEEP - # TODO: PRIO3: OCF_SUCCESS, OCF_NOT_RUNNING or OCF_ERR_xxxx ? - set_crm_master -9000 - #scoring_crm_master "$my_role" "$my_sync" -@@ -1193,7 +1281,7 @@ - 1 ) # we are down, so we should wait --> followup in next monitor - super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" - # TODO: PRIO3: Check, if WAITING is correct here -- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} - set_crm_master -9000 - rc=$OCF_SUCCESS - ;; -@@ -1202,7 +1290,7 @@ - fail ) # process a lpa FAIL - super_ocf_log info "LPA: LPA reports FAIL" - set_crm_master -inf -- rc=$OCF_NOT_RUNNING -+ rc=$OCF_NOT_RUNNING - ;; - esac - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -@@ -1278,12 +1366,12 @@ - # - # function: saphana_start_secondary - handle startup of PRIMARY in M/S - # params: --# globals: OCF_*(r), NODENAME, ATTR_NAME_*, -+# globals: OCF_*(r), NODENAME, ATTR_NAME_*, - # - function saphana_start_secondary() - { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING -+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING - local sqlrc; - set_crm_master 0 - # -@@ -1291,9 +1379,9 @@ - # - lpa_push_lpt 10 - lpa_set_lpt 10 -- # -+ # - ####### LPA - end -- # -+ # - # - # we would be slave (secondary) - # we first need to check, if there are Master Nodes, because the Scecondary only starts -@@ -1311,16 +1399,16 @@ - # It seams the stating secondary could not start because of stopping primary - # so this is a WAITING situation - super_ocf_log info "ACT: PRIMARY seams to be down now ==> WAITING" -- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]} - set_crm_master -INFINITY - rc=$OCF_SUCCESS - fi - else -- lpa_set_lpt 30 -+ lpa_set_lpt 10 - fi - else - super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" -- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]} -+ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]} - set_crm_master -INFINITY - rc=$OCF_SUCCESS - fi -@@ -1329,11 +1417,71 @@ - } - - # -+# function: saphana_check_local_instance -+# params: -+# output: -+# rc: rc=0 (UP) rc=1 (DOWN) -+# globals: -+# -+function saphana_check_local_instance() -+{ -+ local rc=1 -+ local count=0 -+ local SERVNO -+ local output -+ local MONITOR_SERVICES="hdbnameserver|hdbdaemon" # TODO: PRIO1: exact list of Services -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ check_sapstartsrv -+ rc=$? -+ if [ $rc -eq $OCF_SUCCESS ] -+ then -+ output=$($SAPCONTROL -nr $InstanceNr -function GetProcessList -format script) -+ # we have to parse the output, because the returncode doesn't tell anything about the instance status -+ for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u` -+ do -+ local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3` -+ local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3` -+ local STATE=0 -+ local SEARCH -+ -+ case $COLOR in -+ GREEN|YELLOW) STATE=$OCF_SUCCESS;; -+ *) STATE=$OCF_NOT_RUNNING;; -+ esac -+ -+ SEARCH=`echo "$MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'` -+ if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ] -+ then -+ if [ $STATE -eq $OCF_NOT_RUNNING ] -+ then -+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" -+ rc=$STATE -+ fi -+ count=1 -+ fi -+ done -+ -+ if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ] -+ then -+ if ocf_is_probe -+ then -+ rc=1 -+ else -+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!" -+ rc=1 -+ fi -+ fi -+ fi -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc -+} -+ -+# - # function: lpa_get_lpt - get lpt from cluster - # params: NODE - # output: LPT - # rc: rc=0: OK, rc=1: InternalERROR, rc=2: ERROR --# globals: LPA_ATTR_*, -+# globals: LPA_ATTR_*, - # - function lpa_get_lpt() { - super_ocf_log info "FLOW $FUNCNAME ($*)" -@@ -1348,7 +1496,7 @@ - rc=2 - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc - } - - # -@@ -1372,7 +1520,7 @@ - rc=0 - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc - } - - # -@@ -1398,7 +1546,7 @@ - rc=2 - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc - } - - # -@@ -1422,15 +1570,15 @@ - rc=2 - else - rc=0 -- fi -+ fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc - } - - # - # function: lpa_init_lpt - initialize local lpt, if needed - # params: HANA_STATE --# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r), -+# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r), - # lpa_init_lpt - # - # Returncodes: -@@ -1439,7 +1587,7 @@ - # Initializing (if NO local LPT-file): - # SECONDARY sets to 0 - # PRIMARY sets to 1 --# -+# - function lpa_init_lpt() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=1 -@@ -1458,11 +1606,11 @@ - LPTloc=10 - lpa_push_lpt "10"; rc=$? - else -- rc=2 -+ rc=2 - fi - lpa_set_lpt $LPTloc - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc - } - - # -@@ -1472,6 +1620,10 @@ - # lpa_check_lpt_status - # - # Returncodes: -+# 0: start -+# 1: register than start -+# 2: wait4gab -+# 3: wait4other - # - # Initializing (if NO local LPT-file): - # SECONDARY sets to 10 -@@ -1480,20 +1632,20 @@ - # LPRlocal OR LPTremore ARE real lpt (>1000) - # THEN: - # Bigger LPR wins, if delta-gab is OK --# LPTlocal >> LPTremore ===> rc=0 (start) -+# LPTlocal >> LPTremore ===> rc=0 (start) - # LPTRemote >> LPTlocal ===> rc=1 (register) --# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait) -+# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab) - # LPRlocal AND LPTremore ARE NOT real lpt (<=1000) - # THEN: - # Bigger LPT wins --# LPTlocal > LPTremore ===> rc=0 (start) -+# LPTlocal > LPTremore ===> rc=0 (start) - # LPTRemote > LPTlocal ===> rc=1 (register) --# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait) -+# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab) - # LPTRemote is not initialized or node not kown in cluster (crm_mon -l) (0) - # TODO: PRIO1: Need to introduce a return-code 3 for remote sides lpa not ready - # THEN: - # WAIT ==> like STALEMATE-HANDLING ===> rc=2 (wait) --# -+# - function lpa_check_lpt_status() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -@@ -1501,6 +1653,8 @@ - local LPTrem=-1 - local LPTMark=1000 - local delta=0 -+ local remSn_name="" -+ local remHost="" - # - # First GET LPT from ATTR-FILE-DEFAULT - # -@@ -1550,7 +1704,20 @@ - fi - fi - super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ return $rc -+} -+ -+# function: is_the_master_nameserver -+# params: - -+# rc: 0: yes, local node is THE master nameserver -+# 1: else -+# globals: -+function is_the_master_nameserver() -+{ -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local rc=0 -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc - } - - # -@@ -1574,11 +1741,12 @@ - check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then - saphana_start_primary; rc=$? -- else -+ else -+ lpa_set_lpt 10 - saphana_start_secondary; rc=$? -- lpa_set_lpt 30 -- fi -+ fi - fi -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } - -@@ -1596,7 +1764,7 @@ - check_for_primary; primary_status=$? - if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then - lpa_set_lpt 10 -- fi -+ fi - saphana_stop; rc=$? - return $rc - } -@@ -1637,7 +1805,7 @@ - DEMOTED ) - promoted=0; - ;; -- WAITING ) -+ WAITING* ) - # DONE: lpa_check_lpt_status to come out of here :) - # DONE: PRIO2: CHECK IF THE FIX FOR COMING OUT OF WAITING IS CORRECT - get_hana_landscape_status; lss=$? -@@ -1648,7 +1816,8 @@ - lpa_set_lpt $LPTloc - fi - lpa_check_lpt_status; lparc=$? -- if [ $lparc -ne 2 ]; then -+ # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes -+ if [ $lparc -lt 2 ]; then - # lpa - no need to wait any longer - lets try a new start - saphana_start_clone - rc=$? -@@ -1663,7 +1832,7 @@ - super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" - fi - return $OCF_SUCCESS -- fi -+ fi - promoted=0; - ;; - UNDEFINED ) -@@ -1682,13 +1851,13 @@ - get_hana_landscape_status; lss=$? - super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" - case "$lss" in -- 0 ) # FATAL or ERROR -+ 0 ) # FATAL or ERROR - rc=$OCF_ERR_GENERIC - ;; -- 1 ) # DOWN or ERROR -+ 1 ) # DOWN or ERROR - # DONE: PRIO2: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error - if ocf_is_probe; then -- # -+ # - # leave master score untouched, only set return code - # - rc=$OCF_NOT_RUNNING -@@ -1699,7 +1868,7 @@ - # For Migration it would be good to decrease master score - # For Reload locally we should NOT adjust the master score - # ===> Should we rely on the migration threshold? -- # set_crm_master -+ # set_crm_master - if ocf_is_true "${PreferSiteTakeover}" ; then - # - # DONE: PRIO1: first check, if remote site is already (and still) in sync -@@ -1708,7 +1877,7 @@ - # TODO PRIO1: REMOVE remoteNode dependency - get_sync_status - remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) - case "$remoteSync" in -- SOK ) -+ SOK | PRIM ) - super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" - set_crm_master 5 - if check_for_primary_master; then -@@ -1718,11 +1887,11 @@ - SFAIL ) - super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" - ;; -- * ) -+ * ) - super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" - ;; -- esac -- else -+ esac -+ else - # TODO: PRIO5: SCALE-OUT ONLY? Implement for local restart - # It maybe that for the local restart we only need to decrease the secondaries promotion score - #super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here" -@@ -1765,8 +1934,12 @@ - case "$my_role" in - [12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status - ;; -- [34]:P:*:master:* ) # primary is up and should now be able to anser hdbsql query -- analyze_hana_sync_status -+ [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query -+ if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then -+ analyze_hana_sync_statusSRS -+ else -+ analyze_hana_sync_statusSQL -+ fi - ;; - esac - rem_role=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_ROLES[@]}) -@@ -1776,9 +1949,9 @@ - [234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster - lpa_check_lpt_status; again_lpa_rc=$? - if [ $again_lpa_rc -eq 2 ]; then -- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" -- lpa_set_lpt 10 -- lpa_push_lpt 10 -+ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" -+ lpa_set_lpt 10 -+ lpa_push_lpt 10 - rc=$OCF_NOT_RUNNING - fi - ;; -@@ -1812,13 +1985,13 @@ - function saphana_monitor_secondary() - { - super_ocf_log info "FLOW $FUNCNAME ($*)" -- local rc=$OCF_ERR_GENERIC -- local promoted=0 -+ local rc=$OCF_ERR_GENERIC -+ local promoted=0 - local init_attribute=0 - local lss - # - # OK, we are running as HANA SECONDARY -- # -+ # - if ! lpa_get_lpt ${NODENAME}; then - lpa_set_lpt 10 - lpa_push_lpt 10 -@@ -1863,7 +2036,7 @@ - super_ocf_log debug "DBG: saphana_monitor_clone: HANA_STATE_SECONDARY" - # - # old method was: saphana_monitor - new method is get_hana_landscape_status -- get_hana_landscape_status; lss=$? -+ get_hana_landscape_status; lss=$? - super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" - case "$lss" in - 0 ) # FATAL -@@ -1919,11 +2092,11 @@ - # a) returning 7 here and force cluster a restart of the slave - # b) starting the instance here inside the monitor -> may result in longer runtime, timeouts - # -- # first check with the status function (OS tools) if there could be something like a SAP instance running -- # as we do not know here, if we are in master or slave state we do not want to start our monitoring -- # agents (sapstartsrv) on the wrong host -- local rc=$OCF_ERR_GENERIC -- local promoted=0 -+ # first check with the status function (OS tools) if there could be something like a SAP instance running -+ # as we do not know here, if we are in master or slave state we do not want to start our monitoring -+ # agents (sapstartsrv) on the wrong host -+ local rc=$OCF_ERR_GENERIC -+ local promoted=0 - local init_attribute=0 - local lpaRc=0 - local mRc=0 -@@ -1973,7 +2146,7 @@ - # function: saphana_promote_clone - promote a hana clone - # params: - - # globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), --# saphana_promote_clone: -+# saphana_promote_clone: - # In a Master/Slave configuration get Master being the primary OR by running hana takeover - # - function saphana_promote_clone() { -@@ -2017,7 +2190,7 @@ - rc=$OCF_SUCCESS; - else - rc=$OCF_FAILED_MASTER -- fi -+ fi - ;; - * ) - super_ocf_log err "ACT: HANA SYNC STATUS IS NOT 'SOK' SO THIS HANA SITE COULD NOT BE PROMOTED" -@@ -2039,10 +2212,10 @@ - # - # function: saphana_demote_clone - demote a hana clone instance - # params: - --# globals: OCF_*(r), NODENAME(r), -+# globals: OCF_*(r), NODENAME(r), - # saphana_demote_clone --# the HANA System Replication (SR) runs in a Master/Slave --# While we could not change a HANA instance to be really demoted, we only mark the status for -+# the HANA System Replication (SR) runs in a Master/Slave -+# While we could not change a HANA instance to be really demoted, we only mark the status for - # correct monitor return codes - # - function saphana_demote_clone() { -@@ -2056,9 +2229,9 @@ - } - - # --# function: main - main function to operate -+# function: main - main function to operate - # params: ACTION --# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w), -+# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w), - # globals: SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r) - # - -@@ -2073,7 +2246,7 @@ - SAPCONTROL="" - DIR_PROFILE="" - SAPSTARTPROFILE="" --SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" -+SAPHanaFilter="ra-act-dec-lpa" - - NODENAME=$(crm_node -n) - -@@ -2100,7 +2273,7 @@ - exit $OCF_SUCCESS;; - *);; - esac --saphana_init -+saphana_init - - if ! ocf_is_root - then -@@ -2141,7 +2314,7 @@ - saphana_$ACTION$CLACT - ra_rc=$? - ;; -- validate-all) -+ validate-all) - saphana_validate - ra_rc=$? - ;; -@@ -2149,12 +2322,13 @@ - lpa_check_lpt_status - ra_rc=$? - ;; -- *) # seams to be a unknown request -- saphana_methods -+ *) # seams to be a unknown request -+ saphana_methods - ra_rc=$OCF_ERR_UNIMPLEMENTED - ;; - esac - timeE=$(date '+%s') - (( timeR = timeE - timeB )) -+#super_ocf_log info "RA ==== SAPHanaFilter=$SAPHanaFilter" - super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($THE_VERSION) (${timeR}s)====" - exit ${ra_rc} -diff -uNr a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology ---- a/heartbeat/SAPHanaTopology 2016-04-26 12:01:55.620889964 +0200 -+++ b/heartbeat/SAPHanaTopology 2016-04-26 12:03:18.033887556 +0200 -@@ -16,7 +16,7 @@ - # Copyright: (c) 2014 SUSE Linux Products GmbH - # (c) 2015 SUSE Linux GmbH - # --# An example usage: -+# An example usage: - # See usage() function below for more details... - # - # OCF instance parameters: -@@ -41,7 +41,6 @@ - HANA_STATE_DEFECT=3 - - debug_attributes=0 -- - SH=/bin/sh - - # -@@ -57,7 +56,7 @@ - local shf="${SAPHanaFilter:-all}" - #ocf_log "info" "super_ocf_log: f:$shf l:$level m:$message" - # message levels: (dbg)|info|warn|err|error -- # -+ # - # message types: (ACT|RA|FLOW|DBG|LPA|DEC - case "$level" in - dbg | debug | warn | err | error ) skip=0 -@@ -65,7 +64,7 @@ - info ) - case "$shf" in - all) skip=0 -- ;; -+ ;; - none ) - skip=1 - ;; -@@ -74,13 +73,13 @@ - mtype=${mtype#fh} - echo "$shf"| grep -iq ${mtype}; search=$? - if [ $search -eq 0 ]; then -- skip=0 -+ skip=0 - else - skip=1 - fi - ;; - esac -- ;; -+ ;; - esac - if [ $skip -eq 0 ]; then - ocf_log "$level" "$message" -@@ -126,15 +125,15 @@ - - - -- 0.149.6 -+ 0.151.1 - Analyzes SAP HANA System Replication Topology. - This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to - all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. - In addition it starts and monitors the local saphostagent. - --1. Interface to monitor a HANA system: landscapeHostConfiguration.py -+1. Interface to monitor a HANA system: landscapeHostConfiguration.py - landscapeHostConfiguration.py has some detailed output about HANA system status --and node roles. For our monitor the overall status is relevant. This overall -+and node roles. For our monitor the overall status is relevant. This overall - status is reported by the returncode of the script: - 0: Internal Fatal - 1: ERROR -@@ -150,7 +149,7 @@ - system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register). - - 3. saphostctrl -- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the -+ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the - SAP HANA instance. This is the hostname used during the HANA installation. - - -@@ -172,13 +171,8 @@ - - - -- Define type of SAPHanaTopology RA messages to be printed -- Define type of SAPHanaTopology RA messages to be printed. --Define SAPHana resource agent messages to be printed. -- This parameter should only be set if requested by support. The default is sufficient for normal operation. -- Values: ra-act-lpa-dec-flow -- You could specify any combination of the above values like "ra-act-flow" -- -+ OUTDATED -+ OUTDATED - - - -@@ -197,7 +191,7 @@ - } - - # --# function: get_hana_attribute -+# function: get_hana_attribute - # params: NODE ATTR [STORE] - # globals: - - # -@@ -208,16 +202,19 @@ - local attr_node=$1 - local attr_name=$2 - local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter -- local attr_val="" -- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q); rc=$? -- if [ $debug_attributes -eq 1 ]; then -- dstr=$(date) -- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE -- fi -- echo "$attr_val" -- if [ $rc -ne 0 ]; then -- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -G -n "$attr_name" -l $attr_store -q" -- fi -+ local attr_default=${4:-} -+ local dstr -+ dstr=$(date) -+ case "$attr_store" in -+ reboot | forever ) -+ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE -+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ props ) -+ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE -+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ esac - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -234,19 +231,24 @@ - local attr_value=$2 - local attr_name=$3 - local attr_store=${4:-reboot} # DONE: PRIO5 get this (optional) from parameter -+ local attr_default=${5:-} - local rc=1 -- local attr_old -- attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store); get_rc=$? -+ local attr_old="" -+ local dstr -+ dstr=$(date) -+ attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$? - if [ "$attr_old" != "$attr_value" ]; then - super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc " -- if [ $debug_attributes -eq 1 ]; then -- dstr=$(date) -- echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE -- fi -- crm_attribute -N $attr_node -v "$attr_value" -n "$attr_name" -l $attr_store; rc=$? -- if [ $rc -ne 0 ]; then -- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store" -- fi -+ case "$attr_store" in -+ reboot | forever ) -+ echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE -+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ props ) -+ echo "$dstr: SAPHanaTopology: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE -+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? -+ ;; -+ esac - else - super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}" - rc=0 -@@ -299,7 +301,7 @@ - # - # yes it is a clone config - check, if its configured well - # -- if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then -+ if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then - super_ocf_log err "ACT: Clone options misconfigured. (expect: clone_node_max=1)" - exit $OCF_ERR_CONFIGURED - fi -@@ -314,8 +316,8 @@ - # - # function: sht_init - initialize variables for the resource agent - # params: - --# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), --# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) -+# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), -+# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) - # globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) - # globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) - # sht_init : Define global variables with default values, if optional parameters are not set -@@ -327,6 +329,8 @@ - local myInstanceName="" - local rc=$OCF_SUCCESS - local hdbANSWER="" -+ local siteID -+ local siteNAME - HOSTEXECNAME=saphostexec - USRSAP=/usr/sap - SAPSERVICE_PATH=${USRSAP}/sapservices -@@ -340,10 +344,9 @@ - super_ocf_log debug "DBG2: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" - sid=$(echo "$SID" | tr [:upper:] [:lower:]) - sidadm="${sid}adm" -- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" - ocf_env=$(env | grep 'OCF_RESKEY_CRM') - super_ocf_log debug "DBG3: OCF: $ocf_env" -- ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN? -+ ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN? - ATTR_NAME_HANA_PRIMARY_AT=("hana_${sid}_primary_at" "reboot") # Not really used - ATTR_NAME_HANA_CLONE_STATE=("hana_${sid}_clone_state" "reboot") # UKNOWN?, DEMOTED, PROMOTED - ATTR_NAME_HANA_REMOTEHOST=("hana_${sid}_remoteHost" "forever") -@@ -352,8 +355,14 @@ - ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") - ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") - ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") -- -+ # -+ # new "central" attributes -+ # -+ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") - # optional OCF parameters, we try to guess which directories are correct -+ -+ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) -+ - if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ] - then - DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe" -@@ -387,19 +396,32 @@ - # we need: mode=primary|sync|syncmem|...; site name=; mapping/=/ (multiple lines) - case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in - *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; -- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; -- *cman* ) nodelist=$(crm_node -l);; -+ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; -+ *cman* ) nodelist=$(crm_node -l);; - esac - #### SAP-CALL -- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) -- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" -- site=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') -+ # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode -+ for i in 1 2 3 4 5 6 7 8 9; do -+ hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) -+ super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" -+ srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') -+ case "$srmode" in -+ primary | syncmem | sync | async | none ) -+ # we can leave the loop as we already got a result -+ break -+ ;; -+ * ) -+ # lets pause a bit to give hdbnsutil a chance to answer next time -+ sleep 2 -+ ;; -+ esac -+ done -+ # TODO PRIO3: Implement a file lookup, if we did not get a result -+ siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') -+ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') -+ site=$siteNAME - srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') -- if [ $debug_attributes -eq 1 ]; then -- dstr=$(date) -- echo "$dstr: SAPHanaTopology: srmode=$srmode" >> /var/log/fhATTRIBUTE -- fi -- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) -+ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) - super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" - # - # filter all non-cluster mappings -@@ -413,12 +435,12 @@ - echo $hanaVHost; - fi; - done; -- done ) -+ done ) - super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" - super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" - super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" - return $OCF_SUCCESS --} -+} - - # - # function: check_for_primary - check if local SAP HANA is configured as primary -@@ -428,32 +450,30 @@ - function check_for_primary() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 -- # DONE: Change stderr location!! -- #sidadm=lnxadm -- #node_status=$(check_for_primary_single) -- node_status=$srmode -- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" -- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -- for i in 1 2 3 4 5 6 7 8 9; do -- case "$node_status" in -- primary ) -+ node_status=$srmode -+ super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" -+ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" -+ for i in 1 2 3 4 5 6 7 8 9; do -+ case "$node_status" in -+ primary ) - super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" - return $HANA_STATE_PRIMARY;; - syncmem | sync | async ) - super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" - return $HANA_STATE_SECONDARY;; -- none ) # have seen that mode on second side BEFEORE we registered it as replica -+ none ) # have seen that mode on second side BEFEORE we registered it as replica - super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" - return $HANA_STATE_STANDALONE;; - * ) -- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" -- dump=$( echo $node_status | hexdump -C ); -- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" -- #### SAP-CALL -- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" -- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes -+ # TODO: PRIO1: Should we set SFAIL? -+ # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes -+ dump=$( echo $node_status | hexdump -C ); -+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" -+ #### SAP-CALL -+ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) -+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') -+ super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" -+ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes - esac; - done - super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" -@@ -464,7 +484,7 @@ - # - # function: start_saphostagent - # params: - --# globals: -+# globals: HOSTEXEC_PATH(r), HOSTEXEC_PROFILE_PATH(r) - # - function start_saphostagent() - { -@@ -478,7 +498,7 @@ - # - # function: stop_saphostagent - # params: - --# globals: -+# globals: HOSTEXEC_PATH(r) - # - function stop_saphostagent() - { -@@ -496,6 +516,8 @@ - function check_saphostagent() - { - local rc=1 -+ # TODO: PRIO3: should the path been removed like "saphostexec" instead of "/usr/sap/hostctrl/exe/saphostexec" -+ # or should we use ${HOSTEXEC_PATH} instead? - pgrep -f /usr/sap/hostctrl/exe/saphostexec; rc=$? - return $rc - } -@@ -509,15 +531,16 @@ - # sht_start : Start the SAP HANA instance - # - function sht_start() { -- - super_ocf_log info "FLOW $FUNCNAME ($*)" - - local rc=$OCF_NOT_RUNNING - local output="" -- local loopcount=0 -+ local loopcount=0 - -- mkdir -p /var/lib/SAPHana -- touch /var/lib/SAPHana/SAPTopologyON -+ # TODO: PRIO3: move the string "$HA_RSCTMP/SAPHana/SAPTopologyON" to a variable -+ # TODO: PRIO3: move the file to the clusters tmp directory? -+ mkdir -p $HA_RSCTMP/SAPHana -+ touch $HA_RSCTMP/SAPHana/SAPTopologyON - if ! check_saphostagent; then - start_saphostagent - fi -@@ -532,16 +555,16 @@ - # function: sht_stop - stop a hana instance - # params: - - # globals: OCF_*(r), SAPCONTROL(r), SID(r), InstanceName(r) --# sht_stop: Stop the SAP instance -+# sht_stop: Stop the SAP HANA Topology Resource - # - function sht_stop() { - super_ocf_log info "FLOW $FUNCNAME ($*)" - local output="" - local rc=0 - -- rm /var/lib/SAPHana/SAPTopologyON -+ rm $HA_RSCTMP/SAPHana/SAPTopologyON - rc=$OCF_SUCCESS -- -+ - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -557,13 +580,13 @@ - super_ocf_log info "FLOW $FUNCNAME ($*)" - local rc=0 - -- if [ -f /var/lib/SAPHana/SAPTopologyON ]; then -+ if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON ]; then - rc=$OCF_SUCCESS - else - rc=$OCF_NOT_RUNNING - fi - -- super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } - -@@ -575,37 +598,37 @@ - # sht_status: Lightweight check of SAP instance only with OS tools - # - function sht_status() { -- super_ocf_log info "FLOW $FUNCNAME ($*)" -- local rc=0 -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local rc=0 - -- sht_monitor; rc=$? -- return $rc -+ sht_monitor; rc=$? -+ return $rc - } - - - # - # function: sht_validate - validation of (some) variables/parameters - # params: - --# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), --# sht_validate: Check the symantic of the input parameters -+# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), -+# sht_validate: Check the symantic of the input parameters - # - function sht_validate() { -- super_ocf_log info "FLOW $FUNCNAME ($*)" -- local rc=$OCF_SUCCESS -- if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ] -- then -- super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!" -- rc=$OCF_ERR_ARGS -- fi -+ super_ocf_log info "FLOW $FUNCNAME ($*)" -+ local rc=$OCF_SUCCESS -+ if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ] -+ then -+ super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!" -+ rc=$OCF_ERR_ARGS -+ fi - -- if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ] -- then -- super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!" -- rc=$OCF_ERR_ARGS -- fi -+ if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ] -+ then -+ super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!" -+ rc=$OCF_ERR_ARGS -+ fi - -- super_ocf_log info "FLOW $FUNCNAME rc=$rc" -- return $rc -+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" -+ return $rc - } - - # -@@ -661,15 +684,15 @@ - - if ocf_is_probe; then - super_ocf_log debug "DBG2: PROBE ONLY" -+ sht_monitor; rc=$? - else - super_ocf_log debug "DBG2: REGULAR MONITOR" - if ! check_saphostagent; then - start_saphostagent - fi -- fi - # - # First check, if we are PRIMARY or SECONDARY -- # -+ # - super_ocf_log debug "DBG2: HANA SID $SID" - super_ocf_log debug "DBG2: HANA InstanceName $InstanceName" - super_ocf_log debug "DBG2: HANA InstanceNr $InstanceNr" -@@ -721,8 +744,8 @@ - set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} - fi - case "$hanaPrim" in -- P ) ;; -- S ) # only secondary may propargate its sync status -+ P ) ;; -+ S ) # only secondary may propargate its sync status - case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in - *corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; - *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; -@@ -732,8 +755,10 @@ - for n in ${nodelist}; do - set_hana_attribute ${n} "$srmode" ${ATTR_NAME_HANA_SRMODE[@]} - done -- ;; -+ ;; - esac -+ # -+ fi # end ocf_is_NOT_probe - super_ocf_log info "FLOW $FUNCNAME rc=$rc" - return $rc - } -@@ -752,7 +777,7 @@ - } - - # --# function: main - main function to operate -+# function: main - main function to operate - # params: ACTION - # globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), DIR_EXECUTABLE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r) - # -@@ -763,7 +788,7 @@ - InstanceName="" - InstanceNr="" - DIR_EXECUTABLE="" --SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}" -+SAPHanaFilter="ra-act-dec-lpa" - NODENAME=$(crm_node -n) - - if [ $# -ne 1 ] -@@ -785,11 +810,11 @@ - exit $OCF_SUCCESS;; - notify) sht_notify - exit $OCF_SUCCESS;; -- admin-setup) admin-setup -- exit $OCF_SUCCESS;; -+ admin-setup) admin-setup -+ exit $OCF_SUCCESS;; - *);; - esac --sht_init -+sht_init - - if ! ocf_is_root - then -@@ -810,7 +835,6 @@ - exit $OCF_ERR_ARGS - fi - -- - if is_clone - then - CLACT=_clone -@@ -830,12 +854,12 @@ - sht_$ACTION$CLACT - ra_rc=$? - ;; -- validate-all) -+ validate-all) - sht_validate - ra_rc=$? - ;; -- *) # seams to be a unknown request -- sht_methods -+ *) # seams to be a unknown request -+ sht_methods - ra_rc=$OCF_ERR_UNIMPLEMENTED - ;; - esac diff --git a/SOURCES/bz1351446-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1351446-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch deleted file mode 100644 index 47975b4..0000000 --- a/SOURCES/bz1351446-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch +++ /dev/null @@ -1,102 +0,0 @@ -From f00a952bd5e133cad30689d9edcc98f5d33a71a9 Mon Sep 17 00:00:00 2001 -From: Peter Lemenkov -Date: Thu, 16 Jun 2016 16:44:48 +0200 -Subject: [PATCH] Enable dump/restore users from RabbitMQ ver. 3.6.x - -RabbitMQ changed internal_users scheme since ver. 3.6.0. See the -following links for further details: - -* rabbitmq/rabbitmq-server#270 -* rabbitmq/rabbitmq-server#310 -* rabbitmq/rabbitmq-common@9c86a7401cf464dc20527890192c5dc0fe43b6c8 -* rabbitmq/rabbitmq-server@93b5a3a8092f52063cbca3ab661c7c6bae43c512 - -CC @oalbrigt - -Signed-off-by: Peter Lemenkov ---- - heartbeat/rabbitmq-cluster | 64 ++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 50 insertions(+), 14 deletions(-) - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index 0724901..facca35 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -342,14 +342,40 @@ rmq_start() { - rmq_join_existing "$join_list" - rc=$? - -- # Restore users (if any) -- BaseDataDir=`dirname $RMQ_DATA_DIR` -- if [ -f $BaseDataDir/users.erl ] ; then -- rabbitmqctl eval " -- {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), -- lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users). -- " -- rm -f $BaseDataDir/users.erl -+ # Restore users (if any) -+ BaseDataDir=`dirname $RMQ_DATA_DIR` -+ if [ -f $BaseDataDir/users.erl ] ; then -+ rabbitmqctl eval " -+ -+ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), -+ -+ %% Read users first -+ {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"), -+ -+ Upgrade = fun -+ ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5}; -+ ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D} -+ end, -+ -+ Downgrade = fun -+ ({internal_user, A, B, C}) -> {internal_user, A, B, C}; -+ ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C}; -+ %% Incompatible scheme, so we will loose user's password ('B' value) during conversion. -+ %% Unfortunately, this case will require manual intervention - user have to run: -+ %% rabbitmqctl change_password -+ ({internal_user, A, B, C, _}) -> {internal_user, A, B, C} -+ end, -+ -+ case WildPattern of -+ %% Version < 3.6.0 -+ {internal_user,'_','_','_'} -> -+ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Downgrade(X)) end, Users); -+ %% Version >= 3.6.0 -+ {internal_user,'_','_','_','_'} -> -+ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, Upgrade(X)) end, Users) -+ end. -+ " -+ rm -f $BaseDataDir/users.erl - fi - - if [ $rc -ne 0 ]; then -@@ -362,12 +388,22 @@ rmq_start() { - } - - rmq_stop() { -- # Backup users -- BaseDataDir=`dirname $RMQ_DATA_DIR` -- rabbitmqctl eval " -- Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]), -- file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). -- " -+ # Backup users -+ BaseDataDir=`dirname $RMQ_DATA_DIR` -+ rabbitmqctl eval " -+ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), -+ -+ Users = case WildPattern of -+ %% Version < 3.6.0 -+ {internal_user,'_','_','_'} -> -+ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]); -+ %% Version >= 3.6.0 -+ {internal_user,'_','_','_','_'} -> -+ mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]) -+ end, -+ -+ file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])). -+ " - - rmq_monitor - if [ $? -eq $OCF_NOT_RUNNING ]; then diff --git a/SOURCES/bz1351446-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1351446-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch deleted file mode 100644 index 32a05c3..0000000 --- a/SOURCES/bz1351446-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 74b3cff4fce5483d126b16131db53f8bd5804c82 Mon Sep 17 00:00:00 2001 -From: Peter Lemenkov -Date: Tue, 21 Jun 2016 15:48:07 +0200 -Subject: [PATCH] Don't run scriptlets if Mnesia isn't available - -See this rhbz for further details and symptoms: - -https://bugzilla.redhat.com/1343905 - -Signed-off-by: Peter Lemenkov ---- - heartbeat/rabbitmq-cluster | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index facca35..18e3206 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -346,6 +346,8 @@ rmq_start() { - BaseDataDir=`dirname $RMQ_DATA_DIR` - if [ -f $BaseDataDir/users.erl ] ; then - rabbitmqctl eval " -+ %% Run only if Mnesia is ready, otherwise exit. -+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), - - [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), - -@@ -391,6 +393,9 @@ rmq_stop() { - # Backup users - BaseDataDir=`dirname $RMQ_DATA_DIR` - rabbitmqctl eval " -+ %% Run only if Mnesia is still available, otherwise exit. -+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) orelse halt(), -+ - [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]), - - Users = case WildPattern of diff --git a/SOURCES/bz1351446-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch b/SOURCES/bz1351446-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch deleted file mode 100644 index 2d1abe8..0000000 --- a/SOURCES/bz1351446-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 279bae7ec9a571a4d52b0d876850e27772eb0933 Mon Sep 17 00:00:00 2001 -From: Jiri Stransky -Date: Thu, 23 Jun 2016 12:55:06 +0200 -Subject: [PATCH] RabbitMQ: Forget node before 2nd joining attempt - -If a first attempt at joining an existing cluster has failed and we -resort to wiping the local RabbitMQ data, make sure we also request the -local node to be forgotten from the existing cluster before we make the -join attempt, otherwise the node will be rejected. ---- - heartbeat/rabbitmq-cluster | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index 0724901..b9ae38e 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -279,6 +279,22 @@ rmq_join_existing() - return $OCF_SUCCESS - } - -+rmq_forget_cluster_node_remotely() { -+ local running_cluster_nodes="$1" -+ local node_to_forget="$2" -+ -+ ocf_log info "Forgetting $node_to_forget via nodes [ $(echo $running_cluster_nodes | tr '\n' ' ') ]." -+ for running_cluster_node in $running_cluster_nodes; do -+ rabbitmqctl -n $running_cluster_node forget_cluster_node $node_to_forget -+ if [ $? = 0 ]; then -+ ocf_log info "Succeeded forgetting $node_to_forget via $running_cluster_node." -+ return -+ else -+ ocf_log err "Failed to forget node $node_to_forget via $running_cluster_node." -+ fi -+ done -+} -+ - rmq_notify() { - node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}" - mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" -@@ -336,9 +352,12 @@ rmq_start() { - rmq_join_existing "$join_list" - if [ $? -ne 0 ]; then - ocf_log info "node failed to join, wiping data directory and trying again" -+ local local_rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)" -+ - # if the graceful join fails, use the hammer and reset all the data. - rmq_stop - rmq_wipe_data -+ rmq_forget_cluster_node_remotely "$join_list" "$local_rmq_node" - rmq_join_existing "$join_list" - rc=$? - diff --git a/SOURCES/bz1351446-4-rabbitmq-automatic-cluster-recovery.patch b/SOURCES/bz1351446-4-rabbitmq-automatic-cluster-recovery.patch deleted file mode 100644 index d51cfe7..0000000 --- a/SOURCES/bz1351446-4-rabbitmq-automatic-cluster-recovery.patch +++ /dev/null @@ -1,39 +0,0 @@ -commit 1621dbb60454840d469f3a0e317a97d94510f7ab -Author: John Eckersberg -Date: Tue Jul 26 13:47:39 2016 -0400 - - rabbitmq: Allow automatic cluster recovery before forcing it - - When joining a node into an existing cluster, check to see if it is - already clustered before force removing it from the cluster and - re-adding. If the clustering is already functional there's no need to - force it again. - -diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster -index 651b837..966dd64 100755 ---- a/heartbeat/rabbitmq-cluster -+++ b/heartbeat/rabbitmq-cluster -@@ -238,6 +238,11 @@ rmq_start_first() - return $rc - } - -+rmq_is_clustered() -+{ -+ $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true -+} -+ - rmq_join_existing() - { - local join_list="$1" -@@ -249,6 +254,11 @@ rmq_join_existing() - return $OCF_ERR_GENERIC - fi - -+ if rmq_is_clustered; then -+ ocf_log info "Successfully re-joined existing rabbitmq cluster automatically" -+ return $OCF_SUCCESS -+ fi -+ - # unconditionally join the cluster - $RMQ_CTL stop_app > /dev/null 2>&1 - for node in $(echo "$join_list"); do diff --git a/SOURCES/bz1358895-oracle-fix-monprofile.patch b/SOURCES/bz1358895-oracle-fix-monprofile.patch new file mode 100644 index 0000000..1ef61d4 --- /dev/null +++ b/SOURCES/bz1358895-oracle-fix-monprofile.patch @@ -0,0 +1,22 @@ +From 5f1088e7e6b7d15e6615d57dcf77834df9ded690 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 22 Jul 2016 10:39:59 +0200 +Subject: [PATCH] oracle: fix MONPROFILE to use monprofile parameter when set + +--- + heartbeat/oracle | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/oracle b/heartbeat/oracle +index d68fa6e..e8e6148 100755 +--- a/heartbeat/oracle ++++ b/heartbeat/oracle +@@ -755,7 +755,7 @@ PROCS_CLEANUP_TIME="30" + + MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default} + MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default} +-MONPROFILE=${OCF_RESKEY_monprofile_default:-$OCF_RESKEY_monprofile_default} ++MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default} + + MONUSR=$(echo $MONUSR | awk '{print toupper($0)}') + MONPROFILE=$(echo $MONPROFILE | awk '{print toupper($0)}') diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index ddc7528..04b8f00 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -32,7 +32,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 3.9.5 -Release: 54%{?dist}.17 +Release: 82%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -125,26 +125,49 @@ Patch80: bz1168251-SAPHana-agents-update2.patch Patch81: bz1168251-SAPHana-agents-update3.patch Patch82: bz1168251-SAPHana-agents_update4.patch Patch83: bz1251484-redis-client-passwd-support.patch -Patch84: bz1283877-virtualdomain-may-remove-config-file.patch -Patch85: bz1293355-novacompute-novaevacuate-fix-evacute-typo.patch -Patch86: bz1304019-novaevacuate-invoke-off-action.patch -Patch87: bz1304370-1-nfsserver-fix-systemd-status-detection.patch -Patch88: bz1304711-galera-custom-host-port.patch -Patch89: bz1304811-replace-novacompute-with-nova-compute-wait.patch -Patch90: bz1311180-rabbitmq-cluster-forget-stopped-cluster-nodes.patch -Patch91: bz1316633-backup-and-restore-rabbitmq-users-during-resource-re.patch -Patch92: bz1318744-galera-crash-recovery.patch -Patch93: bz1318744-galera-heuristic-recovered.patch -Patch94: bz1318744-galera-no-grastate.patch -Patch95: bz1332435-nfsserver-var-lib-nfs-fix.patch -Patch96: bz1344225-garbd-Introduces-garbd-resource-agent.patch -Patch97: bz1344228-rabbitmq-cluster-return-code-69-not-running.patch -Patch98: bz1347536-saphana-mcos-support.patch -Patch99: bz1351446-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch -Patch100: bz1351446-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch -Patch101: bz1351446-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch -Patch102: bz1351446-4-rabbitmq-automatic-cluster-recovery.patch -Patch103: bz1304370-2-nfsserver-fix-systemd-status-detection.patch +Patch84: bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch +Patch85: bz1287303-novaevacuate-invoke-off-action.patch +Patch86: bz1126073-1-nfsserver-fix-systemd-status-detection.patch +Patch87: bz1299404-galera-custom-host-port.patch +Patch88: bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch +Patch89: bz1249430-1-tomcat-fix-selinux-enforced.patch +Patch90: bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch +Patch91: bz1263348-mysql-tmpfile-leak.patch +Patch92: bz1242181-virtualdomain-migrate_options.patch +Patch93: bz1242558-virtualdomain-may-remove-config-file.patch +Patch94: bz1301189-virtualdomain-fix-locale.patch +Patch95: bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch +Patch96: bz1212632-nagios.patch +Patch97: bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch +Patch98: bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch +Patch99: bz1287314-novaevacuate-simplify-nova-check.patch +Patch100: bz1303037-1-portblock.patch +Patch101: bz1284526-galera-crash-recovery.patch +Patch102: bz1284526-galera-heuristic-recovered.patch +Patch103: bz1284526-galera-no-grastate.patch +Patch104: bz1289107-saphana-mcos-support.patch +Patch105: bz1296406-virtualdomain-migration_speed-migration_downtime.patch +Patch106: bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch +Patch107: bz1317578-oralsnr-fails-if-username-is-longer-than-8-chars.patch +Patch108: bz1318985-oracle-fix-unable-to-start-because-of-ORA-01081.patch +Patch109: bz1325453-nfsserver-var-lib-nfs-fix.patch +Patch110: bz1320783-nova-compute-wait-fix-invalid-hostname-issue.patch +Patch111: bz1328018-garbd-Introduces-garbd-resource-agent.patch +Patch112: bz1337109-tickle_tcp-fix.patch +Patch113: bz1337615-nfsserver-rpcpipefs_dir.patch +Patch114: bz1337124-mysql-use-replication_port-parameter.patch +Patch115: bz1328386-1-oracle-monprofile-container-databases.patch +Patch116: bz1342478-rabbitmq-cluster-return-code-69-not-running.patch +Patch117: bz1343905-1-rabbitmq-cluster-dump-restore-users-3.6.x.patch +Patch118: bz1343905-2-rabbitmq-cluster-dump-restore-users-3.6.x.patch +Patch119: bz1343905-3-rabbitmq-cluster-dump-restore-users-3.6.x.patch +Patch120: bz1126073-2-nfsserver-fix-systemd-status-detection.patch +Patch121: bz1358895-oracle-fix-monprofile.patch +Patch122: bz1343905-rabbitmq-automatic-cluster-recovery.patch +Patch123: bz1328386-2-oracle-monprofile-container-databases.patch +Patch124: bz1328386-3-oracle-monprofile-container-databases.patch +Patch125: bz1303037-2-portblock.patch +Patch126: bz1249430-2-tomcat-fix-selinux-enforced.patch Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} @@ -347,14 +370,37 @@ exit 1 %patch93 -p1 %patch94 -p1 %patch95 -p1 -%patch96 -p1 -F2 +%patch96 -p1 %patch97 -p1 %patch98 -p1 %patch99 -p1 %patch100 -p1 -%patch101 -p1 -F2 +%patch101 -p1 %patch102 -p1 %patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 -F2 +%patch112 -p1 +%patch113 -p1 +%patch114 -p1 +%patch115 -p1 +%patch116 -p1 +%patch117 -p1 +%patch118 -p1 +%patch119 -p1 -F2 +%patch120 -p1 +%patch121 -p1 +%patch122 -p1 +%patch123 -p1 +%patch124 -p1 -F2 +%patch125 -p1 +%patch126 -p1 %build if [ ! -f configure ]; then @@ -364,6 +410,7 @@ fi chmod 755 heartbeat/galera chmod 755 heartbeat/garbd chmod 755 heartbeat/mysql-common.sh +chmod 755 heartbeat/nagios chmod 755 heartbeat/nfsnotify chmod 755 heartbeat/docker chmod 755 heartbeat/rabbitmq-cluster @@ -513,7 +560,6 @@ rm -rf %{buildroot} %exclude /usr/lib/ocf/resource.d/heartbeat/ldirectord %exclude /usr/lib/ocf/resource.d/heartbeat/lxc %exclude /usr/lib/ocf/resource.d/heartbeat/pingd -%exclude /usr/lib/ocf/resource.d/heartbeat/portblock %exclude /usr/lib/ocf/resource.d/heartbeat/pound %exclude /usr/lib/ocf/resource.d/heartbeat/proftpd %exclude /usr/lib/ocf/resource.d/heartbeat/scsi2reservation @@ -554,7 +600,6 @@ rm -rf %{buildroot} %exclude %{_mandir}/man7/ocf_heartbeat_jboss.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_lxc.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_pingd.7.gz -%exclude %{_mandir}/man7/ocf_heartbeat_portblock.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_pound.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_proftpd.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_scsi2reservation.7.gz @@ -610,79 +655,161 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog -* Fri Aug 26 2016 Oyvind Albrigtsen - 3.9.5-54.17 -- nfsserver: fix monitor issues causing NFS to start on "debug-monitor" - and "resource cleanup" +* Tue Sep 20 2016 Oyvind Albrigtsen - 3.9.5-82 +- portblock: create tickle_dir if it doesn't exist +- tomcat: use systemd if available - Resolves: rhbz#1370385 + Resolves: rhbz#1303037 + Resolves: rhbz#1249430 -* Wed Jul 27 2016 Andrew Beekhof - 3.9.5-54.16 +* Mon Aug 29 2016 Oyvind Albrigtsen - 3.9.5-81 +- oracle: fix issue with C## in monprofile and inform user that + monuser must start with C## as well for container databases + + Resolves: rhbz#1328386 + +* Wed Jul 27 2016 Andrew Beekhof - 3.9.5-80 - rabbit: Allow automatic cluster recovery before forcing it - Resolves: rhbz#1351446 + Resolves: rhbz#1343905 + +* Fri Jul 22 2016 Oyvind Albrigtsen - 3.9.5-79 +- oracle: use monprofile parameter + + Resolves: rhbz#1358895 + +* Thu Jul 21 2016 Oyvind Albrigtsen - 3.9.5-78 +- nfsserver: fix monitor issues causing NFS to start on + "debug-monitor" and "resource cleanup" +- nfsserver: remove "up to 10 tries" on start to avoid issues with + some services taking longer to start +- nfsserver: stop rpc-gssd to allow unmounting of "rpcpipefs_dir" -* Tue Jul 5 2016 Oyvind Albrigtsen - 3.9.5-54.15 + Resolves: rhbz#1356866 + Resolves: rhbz#1126073 + Resolves: rhbz#1346733 + +* Tue Jul 5 2016 Oyvind Albrigtsen - 3.9.5-77 - rabbitmq-cluster: add return codes for not running - Resolves: rhbz#1344228 + Resolves: rhbz#1342478 -* Thu Jun 30 2016 Oyvind Albrigtsen - 3.9.5-54.14 +* Fri Jun 24 2016 Oyvind Albrigtsen - 3.9.5-76 - rabbitmq-cluster: support dump/restore users for RabbitMQ v. 3.6.x - Resolves: rhbz#1351446 + Resolves: rhbz#1343905 -* Fri Jun 17 2016 Oyvind Albrigtsen - 3.9.5-54.13 -- SAP HANA: add Multiple Components One System (MCOS) support +* Mon Jun 6 2016 Oyvind Albrigtsen - 3.9.5-73 +- portblock: fix tickle_tcp bug +- nfsserver: use rpcpipefs_dir variable +- mysql: use replication_port variable +- oracle: inform user that monprofile must start with C## for + container databases - Resolves: rhbz#1347536 + Resolves: rhbz#1337109 + Resolves: rhbz#1337615 + Resolves: rhbz#1337124 + Resolves: rhbz#1328386 -* Thu Jun 9 2016 Oyvind Albrigtsen - 3.9.5-54.12 +* Fri Jun 3 2016 Damien Ciabrini - 3.9.5-72 - garbd: Introduces garbd resource-agent - Resolves: rhbz#1344225 + Resolves: rhbz#1328018 + +* Fri May 13 2016 Oyvind Albrigtsen - 3.9.5-71 +- nova-compute-wait: fix "Invalid Nova host name" issue -* Tue May 3 2016 Oyvind Albrigtsen - 3.9.5-54.10 -- nfsserver: fix nfs-idmapd fails to start due to var-lib-nfs-rpc_pipefs.mount being active + Resolves: rhbz#1320783 - Resolves: rhbz#1332435 +* Tue May 3 2016 Oyvind Albrigtsen - 3.9.5-70 +- nfsserver: fix nfs-idmapd fails to start due to + var-lib-nfs-rpc_pipefs.mount being active -* Thu Mar 17 2016 Damien Ciabrini - 3.9.5-54.9 + Resolves: rhbz#1325453 + +* Tue Apr 26 2016 Oyvind Albrigtsen - 3.9.5-69 +- SAP HANA: add Multiple Components One System (MCOS) support +- VirtualDomain: add migration_speed and migration_downtime options +- VirtualDomain: fix unnecessary error when probing nonexistent domain +- oralsnr: fix status check fail when username is more than 8 characters long +- oracle: fix unable to start because of ORA-01081 + + Resolves: rhbz#1289107 + Resolves: rhbz#1296406 + Resolves: rhbz#1307160 + Resolves: rhbz#1317578 + Resolves: rhbz#1318985 + +* Thu Mar 17 2016 Damien Ciabrini - 3.9.5-68 - galera: recover blocked nodes with --tc-heuristics-recover - Resolves: rhbz#1318744 + Resolves: rhbz#1284526 -* Fri Mar 11 2016 Oyvind Albrigtsen - 3.9.5-54.8 -- rabbitmq-cluster: keep users during resource reload +* Tue Mar 1 2016 Oyvind Albrigtsen - 3.9.5-67 +- sap_redhat_cluster_connector: add support for hostnames with "-" +- NovaEvacuate: simplify nova check +- portblock: new resource agent - Resolves: rhbz#1316633 + Resolves: rhbz#1265527 + Resolves: rhbz#1287314 + Resolves: rhbz#1303037 -* Tue Feb 23 2016 Oyvind Albrigtsen - 3.9.5-54.7 -- rabbitmq-cluster: fix to forget stopped cluster nodes +* Tue Mar 1 2016 Peter Lemenkov - 3.9.5-65 +- RabbitMQ: keep users during resource reload (small regression fix) + + Resolves: rhbz#1303803 + +* Tue Mar 1 2016 Peter Lemenkov - 3.9.5-64 +- RabbitMQ: keep users during resource reload + + Resolves: rhbz#1303803 + +* Tue Mar 1 2016 Oyvind Albrigtsen - 3.9.5-63 +- IPaddr2: use IPv6 DAD for collision detection +- nagios: new resource agent - Resolves: rhbz#1311180 + Resolves: rhbz#1276699 + Resolves: rhbz#1212632 -* Fri Feb 5 2016 Oyvind Albrigtsen - 3.9.5-54.6 +* Mon Feb 29 2016 Oyvind Albrigtsen - 3.9.5-62 +- tomcat: fix for SELinux enforced mode +- send_arp: fix buffer overflow on infiniband devices +- mysql: fix tmpfile leak +- VirtualDomain: add migrate_options parameter +- VirtualDomain: fix issue where config file might get removed +- VirtualDomain: fix locale in stop and status functions() + + Resolves: rhbz#1249430 + Resolves: rhbz#1250728 + Resolves: rhbz#1263348 + Resolves: rhbz#1242181 + Resolves: rhbz#1242558 + Resolves: rhbz#1301189 + +* Mon Feb 22 2016 Oyvind Albrigtsen - 3.9.5-60 +- rabbitmq-cluster: fix to forget stopped cluster nodes - nfsserver: fix systemd status detection + + Resolves: rhbz#1247303 + Resolves: rhbz#1126073 + +* Wed Feb 3 2016 Oyvind Albrigtsen - 3.9.5-57 - Replace NovaCompute with nova-compute-wait which lets systemd manage the nova-compute process - Resolves: rhbz#1304370 - Resolves: rhbz#1304811 + Resolves: rhbz#1304011 -* Thu Feb 4 2016 Oyvind Albrigtsen - 3.9.5-54.4 -- NovaEvacuate: invoke off action +* Wed Feb 3 2016 Oyvind Albrigtsen - 3.9.5-56 - galera: add custom host/port support - Resolves: rhbz#1304019 - Resolves: rhbz#1304711 + Resolves: rhbz#1299404 -* Wed Jan 20 2016 Oyvind Albrigtsen - 3.9.5-54.3 +* Tue Feb 2 2016 Oyvind Albrigtsen - 3.9.5-55 - NovaCompute/NovaEvacuate: Fix 'evacute' typo - Resolves: rhbz#1293355 +- NovaEvacuate invoke off action -* Fri Nov 20 2015 Oyvind Albrigtsen - 3.9.5-54.1 -- Fix VirtualDomain may remove config file - Resolves: rhbz#1283877 + Resolves: rhbz#1282723 + Resolves: rhbz#1287303 * Mon Sep 7 2015 Fabio M. Di Nitto - 3.9.5-54 - Fix redis client password regexp @@ -1049,10 +1176,10 @@ Resolves: rhbz# 773395 * Sat Jul 21 2012 Fedora Release Engineering - 3.9.2-3.5 - Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild -* Thu Jul 05 2012 Chris Feist - 3.9.2-3.4 +* Thu Jul 5 2012 Chris Feist - 3.9.2-3.4 - Fix location of lvm (change from /sbin to /usr/sbin) -* Wed Apr 04 2012 Jon Ciesla - 3.9.2-3.3 +* Wed Apr 4 2012 Jon Ciesla - 3.9.2-3.3 - Rebuilt to fix rawhide dependency issues (caused by move of fsck from /sbin to /usr/sbin).