|
|
a89620 |
From fcaa52bb98a8686d993550c6f4ab7867625c8059 Mon Sep 17 00:00:00 2001
|
|
|
a89620 |
From: John Eckersberg <jeckersb@redhat.com>
|
|
|
a89620 |
Date: Wed, 29 Aug 2018 16:18:55 -0400
|
|
|
a89620 |
Subject: [PATCH] rabbitmq-cluster: get cluster status from mnesia during
|
|
|
a89620 |
monitor
|
|
|
a89620 |
|
|
|
a89620 |
If mnesia is not running (for example if `rabbitmqctl stop_app` has
|
|
|
a89620 |
been called, or the service has paused during partition due to the
|
|
|
a89620 |
pause_minority strategy) then the cluster_status command to
|
|
|
a89620 |
rabbitmqctl will read the cached cluster status from disk and the
|
|
|
a89620 |
command returns 0 even though the service isn't really running at all.
|
|
|
a89620 |
|
|
|
a89620 |
Instead, force the cluster status to be read from mnesia. If mnesia
|
|
|
a89620 |
is not running due to the above or similar circumstances, the command
|
|
|
a89620 |
will catch that and properly fail the monitor action.
|
|
|
a89620 |
|
|
|
a89620 |
Resolves: RHBZ#1595753
|
|
|
a89620 |
---
|
|
|
a89620 |
heartbeat/rabbitmq-cluster | 20 +++++---------------
|
|
|
a89620 |
1 file changed, 5 insertions(+), 15 deletions(-)
|
|
|
a89620 |
|
|
|
a89620 |
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
|
|
|
a89620 |
index a7d2db614..204917475 100755
|
|
|
a89620 |
--- a/heartbeat/rabbitmq-cluster
|
|
|
a89620 |
+++ b/heartbeat/rabbitmq-cluster
|
|
|
a89620 |
@@ -181,26 +181,16 @@ remove_pid () {
|
|
|
a89620 |
rmq_monitor() {
|
|
|
a89620 |
local rc
|
|
|
a89620 |
|
|
|
a89620 |
- $RMQ_CTL cluster_status > /dev/null 2>&1
|
|
|
a89620 |
- rc=$?
|
|
|
a89620 |
- case "$rc" in
|
|
|
a89620 |
- 0)
|
|
|
a89620 |
+ if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
|
|
|
a89620 |
ocf_log debug "RabbitMQ server is running normally"
|
|
|
a89620 |
rmq_write_nodename
|
|
|
a89620 |
-
|
|
|
a89620 |
+
|
|
|
a89620 |
return $OCF_SUCCESS
|
|
|
a89620 |
- ;;
|
|
|
a89620 |
- 2|68|69|70|75|78)
|
|
|
a89620 |
- ocf_log info "RabbitMQ server is not running"
|
|
|
a89620 |
+ else
|
|
|
a89620 |
+ ocf_log info "RabbitMQ server could not get cluster status from mnesia"
|
|
|
a89620 |
rmq_delete_nodename
|
|
|
a89620 |
return $OCF_NOT_RUNNING
|
|
|
a89620 |
- ;;
|
|
|
a89620 |
- *)
|
|
|
a89620 |
- ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
|
|
|
a89620 |
- rmq_delete_nodename
|
|
|
a89620 |
- return $OCF_ERR_GENERIC
|
|
|
a89620 |
- ;;
|
|
|
a89620 |
- esac
|
|
|
a89620 |
+ fi
|
|
|
a89620 |
}
|
|
|
a89620 |
|
|
|
a89620 |
rmq_init_and_wait()
|