From 51b03e5e892cd2446c84dc78e17b0ad3bdbe76d2 Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Tue, 28 Mar 2017 16:21:52 +0200 Subject: [PATCH] Allow the rabbitmq cluster to work on pacemaker remote nodes This was first observed via https://bugzilla.redhat.com/show_bug.cgi?id=1435982. Due to the way the resource agent looks for attrd entries, it will filter out any node which does not have the @crmd=online attribute. This is the case for pacemaker-remote nodes. To fix this we chose the more conservative approach and only do an additional query when the first one returned no entries. Note that this issue exhibits itself when 'pcs status' reports rabbitmq started on a bunch of nodes: Clone Set: rabbitmq-clone [rabbitmq] Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 But the cluster_status command returns a single node: [root@overcloud-rabbit-1 ~]# rabbitmqctl cluster_status Cluster status of node 'rabbit@overcloud-rabbit-1' ... [{nodes,[{disc,['rabbit@overcloud-rabbit-1']}]}, {running_nodes,['rabbit@overcloud-rabbit-1']}, {cluster_name,<<"rabbit@overcloud-rabbit-1.localdomain">>}, {partitions,[]}, {alarms,[{'rabbit@overcloud-rabbit-1',[]}]}] Also add some text in the help explaining that currently a mixture of pacemaker-remote and pacemaker nodes is not supported. We tested this change on a pacemaker-remote only setup successfully: Clone Set: rabbitmq-clone [rabbitmq] Started: [ overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 [root@overcloud-rabbit-0 ~]# rabbitmqctl cluster_status Cluster status of node 'rabbit@overcloud-rabbit-0' ... [{nodes,[{disc,['rabbit@overcloud-rabbit-0','rabbit@overcloud-rabbit-1', 'rabbit@overcloud-rabbit-2']}]}, {running_nodes,['rabbit@overcloud-rabbit-2','rabbit@overcloud-rabbit-1', 'rabbit@overcloud-rabbit-0']}, {cluster_name,<<"rabbit@overcloud-rabbit-0.localdomain">>}, {partitions,[]}, {alarms,[{'rabbit@overcloud-rabbit-2',[]}, {'rabbit@overcloud-rabbit-1',[]}, {'rabbit@overcloud-rabbit-0',[]}]}] Signed-Off-By: Michele Baldessari Signed-Off-By: Damien Ciabrini --- heartbeat/rabbitmq-cluster | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster index 74378be..6a17590 100755 --- a/heartbeat/rabbitmq-cluster +++ b/heartbeat/rabbitmq-cluster @@ -56,7 +56,9 @@ meta_data() { 1.0 -Starts cloned rabbitmq cluster instance +Starts cloned rabbitmq cluster instance. NB: note that this RA +cannot be spawned across a mix of pacemaker and pacemaker-remote nodes. +Only on pacemaker *or* pacemaker-remote nodes exclusively. rabbitmq clustered @@ -111,7 +113,25 @@ rmq_local_node() rmq_join_list() { - cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p" + local join_list=$(cibadmin -Q --xpath "//node_state[@crmd='online']//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") + # If join_list is empty we want to check if there are any remote nodes + # where rabbitmq is allowed to run (i.e. nodes without the crmd=online selector) + if [ -z "$join_list" ]; then + # Get all the nodes written in the ATTR_COOKIE no matter if + # they are online or not. This will be one line per node like + # rabbit@overcloud-rabbit-0 + # rabbit@overcloud-rabbit-1 + # ... + local remote_join_list=$(cibadmin -Q --xpath "//node_state//nvpair[@name='$RMQ_CRM_ATTR_COOKIE']" | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p") + # The following expression prepares a filter like '-e overcloud-rabbit-0 -e overcloud-rabbit-1 -e ...' + local filter=$(crm_mon -r --as-xml | xmllint --format --xpath "//nodes//node[@online='true' and @standby='false']/@name" - | xargs -n1 echo | awk -F= '{print "-e "$2}') + # export the intersection which gives us only the nodes that + # a) wrote their namein the cib attrd + # b) run on nodes where pacemaker_remote is enabled + join_list="$(echo $remote_join_list | grep $filter)" + fi + + echo $join_list } rmq_write_nodename()