Blame SOURCES/bz1745713-rabbitmq-cluster-1-monitor-mnesia-status.patch

b4b3ce
From fcaa52bb98a8686d993550c6f4ab7867625c8059 Mon Sep 17 00:00:00 2001
b4b3ce
From: John Eckersberg <jeckersb@redhat.com>
b4b3ce
Date: Wed, 29 Aug 2018 16:18:55 -0400
b4b3ce
Subject: [PATCH] rabbitmq-cluster: get cluster status from mnesia during
b4b3ce
 monitor
b4b3ce
b4b3ce
If mnesia is not running (for example if `rabbitmqctl stop_app` has
b4b3ce
been called, or the service has paused during partition due to the
b4b3ce
pause_minority strategy) then the cluster_status command to
b4b3ce
rabbitmqctl will read the cached cluster status from disk and the
b4b3ce
command returns 0 even though the service isn't really running at all.
b4b3ce
b4b3ce
Instead, force the cluster status to be read from mnesia.  If mnesia
b4b3ce
is not running due to the above or similar circumstances, the command
b4b3ce
will catch that and properly fail the monitor action.
b4b3ce
b4b3ce
Resolves: RHBZ#1595753
b4b3ce
---
b4b3ce
 heartbeat/rabbitmq-cluster | 20 +++++---------------
b4b3ce
 1 file changed, 5 insertions(+), 15 deletions(-)
b4b3ce
b4b3ce
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
b4b3ce
index a7d2db614..204917475 100755
b4b3ce
--- a/heartbeat/rabbitmq-cluster
b4b3ce
+++ b/heartbeat/rabbitmq-cluster
b4b3ce
@@ -181,26 +181,16 @@ remove_pid () {
b4b3ce
 rmq_monitor() {
b4b3ce
 	local rc
b4b3ce
 
b4b3ce
-	$RMQ_CTL cluster_status > /dev/null 2>&1
b4b3ce
-	rc=$?
b4b3ce
-	case "$rc" in
b4b3ce
-	0)
b4b3ce
+	if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
b4b3ce
 		ocf_log debug "RabbitMQ server is running normally"
b4b3ce
 		rmq_write_nodename
b4b3ce
-		
b4b3ce
+
b4b3ce
 		return $OCF_SUCCESS
b4b3ce
-	;;
b4b3ce
-	2|68|69|70|75|78)
b4b3ce
-		ocf_log info "RabbitMQ server is not running"
b4b3ce
+	else
b4b3ce
+		ocf_log info "RabbitMQ server could not get cluster status from mnesia"
b4b3ce
 		rmq_delete_nodename
b4b3ce
 		return $OCF_NOT_RUNNING
b4b3ce
-	;;
b4b3ce
-	*)
b4b3ce
-		ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
b4b3ce
-		rmq_delete_nodename
b4b3ce
-		return $OCF_ERR_GENERIC
b4b3ce
-	;;
b4b3ce
-	esac
b4b3ce
+	fi
b4b3ce
 }
b4b3ce
 
b4b3ce
 rmq_init_and_wait()