Blame SOURCES/bz1435982-rabbitmq-cluster-pacemaker-remote.patch

aa0b36
From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001
aa0b36
From: Michele Baldessari <michele@acksyn.org>
aa0b36
Date: Tue, 28 Mar 2017 16:21:52 +0200
aa0b36
Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes
aa0b36
aa0b36
This was first observed via
aa0b36
https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way
aa0b36
the resource agent looks for attrd entries, it will filter out any
aa0b36
node which does not have the @crmd=online attribute. This is the
aa0b36
case for pacemaker-remote nodes. To fix this we chose the more
aa0b36
conservative approach and only do an additional query when the first
aa0b36
one returned no entries. Note that this issue exhibits itself
aa0b36
when 'pcs status' reports rabbitmq started on a bunch of nodes:
aa0b36
Clone Set: rabbitmq-clone [rabbitmq]
aa0b36
    Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2
aa0b36
aa0b36
But the cluster_status command returns a single node:
aa0b36
[root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status
aa0b36
Cluster status of node 'rabbit@overcloud-rabbit-1' ...
aa0b36
[{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]},
aa0b36
 {running_nodes,['rabbit@overcloud-rabbit-1']},
aa0b36
 {cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>},
aa0b36
 {partitions,[]},
aa0b36
 {alarms,[{'rabbit@overcloud-rabbit-1',[]}]}]
aa0b36
aa0b36
Also add some text in the help explaining that currently a mixture of
aa0b36
pacemaker-remote and pacemaker nodes is not supported.
aa0b36
aa0b36
We tested this change on a pacemaker-remote only setup successfully:
aa0b36
Clone Set: rabbitmq-clone [rabbitmq]
aa0b36
    Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2
aa0b36
aa0b36
[root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status
aa0b36
Cluster status of node 'rabbit@overcloud-rabbit-0' ...
aa0b36
[{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1',
aa0b36
                'rabbit@overcloud-rabbit-2']}]},
aa0b36
 {running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1',
aa0b36
                 'rabbit@overcloud-rabbit-0']},
aa0b36
 {cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>},
aa0b36
 {partitions,[]},
aa0b36
 {alarms,[{'rabbit@overcloud-rabbit-2',[]},
aa0b36
          {'rabbit@overcloud-rabbit-1',[]},
aa0b36
          {'rabbit@overcloud-rabbit-0',[]}]}]
aa0b36
aa0b36
Signed-Off-By: Michele Baldessari <michele@acksyn.org>
aa0b36
Signed-Off-By: Damien Ciabrini <dciabrin@redhat.com>
aa0b36
---
aa0b36
 heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++--
aa0b36
 1 file changed, 22 insertions(+), 2 deletions(-)
aa0b36
aa0b36
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
aa0b36
index 74378be..6a17590 100755
aa0b36
--- a/heartbeat/rabbitmq-cluster
aa0b36
+++ b/heartbeat/rabbitmq-cluster
aa0b36
@@ -56,7 +56,9 @@ meta_data() {
aa0b36
 <version>1.0</version>
aa0b36
 
aa0b36
 <longdesc lang="en">
aa0b36
-Starts cloned rabbitmq cluster instance
aa0b36
+Starts cloned rabbitmq cluster instance. NB: note that this RA
aa0b36
+cannot be spawned across a mix of pacemaker and pacemaker-remote nodes.
aa0b36
+Only on pacemaker *or* pacemaker-remote nodes exclusively.
aa0b36
 </longdesc>
aa0b36
 <shortdesc lang="en">rabbitmq clustered</shortdesc>
aa0b36
 
aa0b36
@@ -111,7 +113,25 @@ rmq_local_node()
aa0b36
 
aa0b36
 rmq_join_list()
aa0b36
 {
aa0b36
-    cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
aa0b36
+	local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
aa0b36
+	# If join_list is empty we want to check if there are any remote nodes
aa0b36
+	# where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector)
aa0b36
+	if [ -z "$join_list" ]; then
aa0b36
+		# Get all the nodes written in the ATTR_COOKIE no matter if
aa0b36
+		# they are online or not. This will be one line per node like
aa0b36
+		# rabbit@overcloud-rabbit-0
aa0b36
+		# rabbit@overcloud-rabbit-1
aa0b36
+		# ...
aa0b36
+		local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
aa0b36
+		# The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...'
aa0b36
+		local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}')
aa0b36
+		# export the intersection which gives us only the nodes that
aa0b36
+		# a) wrote their namein the cib attrd
aa0b36
+		# b) run on nodes where pacemaker_remote is enabled
aa0b36
+		join_list="$(echo $remote_join_list | grep $filter)"
aa0b36
+	fi
aa0b36
+
aa0b36
+	echo $join_list
aa0b36
 }
aa0b36
 
aa0b36
 rmq_write_nodename()