From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Tue, 28 Mar 2017 16:21:52 +0200
Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes
This was first observed via
https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way
the resource agent looks for attrd entries, it will filter out any
node which does not have the @crmd=online attribute. This is the
case for pacemaker-remote nodes. To fix this we chose the more
conservative approach and only do an additional query when the first
one returned no entries. Note that this issue exhibits itself
when 'pcs status' reports rabbitmq started on a bunch of nodes:
Clone Set: rabbitmq-clone [rabbitmq]
Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2
But the cluster_status command returns a single node:
[root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status
Cluster status of node 'rabbit@overcloud-rabbit-1' ...
[{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]},
{running_nodes,['rabbit@overcloud-rabbit-1']},
{cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>},
{partitions,[]},
{alarms,[{'rabbit@overcloud-rabbit-1',[]}]}]
Also add some text in the help explaining that currently a mixture of
pacemaker-remote and pacemaker nodes is not supported.
We tested this change on a pacemaker-remote only setup successfully:
Clone Set: rabbitmq-clone [rabbitmq]
Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2
[root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status
Cluster status of node 'rabbit@overcloud-rabbit-0' ...
[{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1',
'rabbit@overcloud-rabbit-2']}]},
{running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1',
'rabbit@overcloud-rabbit-0']},
{cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>},
{partitions,[]},
{alarms,[{'rabbit@overcloud-rabbit-2',[]},
{'rabbit@overcloud-rabbit-1',[]},
{'rabbit@overcloud-rabbit-0',[]}]}]
Signed-Off-By: Michele Baldessari <michele@acksyn.org>
Signed-Off-By: Damien Ciabrini <dciabrin@redhat.com>
---
heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 74378be..6a17590 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -56,7 +56,9 @@ meta_data() {
<version>1.0</version>
<longdesc lang="en">
-Starts cloned rabbitmq cluster instance
+Starts cloned rabbitmq cluster instance. NB: note that this RA
+cannot be spawned across a mix of pacemaker and pacemaker-remote nodes.
+Only on pacemaker *or* pacemaker-remote nodes exclusively.
</longdesc>
<shortdesc lang="en">rabbitmq clustered</shortdesc>
@@ -111,7 +113,25 @@ rmq_local_node()
rmq_join_list()
{
- cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
+ local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
+ # If join_list is empty we want to check if there are any remote nodes
+ # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector)
+ if [ -z "$join_list" ]; then
+ # Get all the nodes written in the ATTR_COOKIE no matter if
+ # they are online or not. This will be one line per node like
+ # rabbit@overcloud-rabbit-0
+ # rabbit@overcloud-rabbit-1
+ # ...
+ local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p")
+ # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...'
+ local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}')
+ # export the intersection which gives us only the nodes that
+ # a) wrote their namein the cib attrd
+ # b) run on nodes where pacemaker_remote is enabled
+ join_list="$(echo $remote_join_list | grep $filter)"
+ fi
+
+ echo $join_list
}
rmq_write_nodename()