Blame SOURCES/rabbitmq-cluster.patch

150f7b
From a06ce7c166f4a7801b1fb7d50c77dead8a0c7a1d Mon Sep 17 00:00:00 2001
150f7b
From: David Vossel <dvossel@redhat.com>
150f7b
Date: Wed, 21 Jan 2015 18:00:18 -0500
150f7b
Subject: [PATCH] High: introducing rabbitmq clustering agent
150f7b
150f7b
---
150f7b
 doc/man/Makefile.am        |   1 +
150f7b
 heartbeat/Makefile.am      |   1 +
150f7b
 heartbeat/rabbitmq-cluster | 370 +++++++++++++++++++++++++++++++++++++++++++++
150f7b
 3 files changed, 372 insertions(+)
150f7b
 create mode 100755 heartbeat/rabbitmq-cluster
150f7b
150f7b
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
150f7b
index eafb2d1..62e619a 100644
150f7b
--- a/doc/man/Makefile.am
150f7b
+++ b/doc/man/Makefile.am
150f7b
@@ -127,6 +127,7 @@ man_MANS	       = ocf_heartbeat_AoEtarget.7 \
150f7b
                           ocf_heartbeat_postfix.7 \
150f7b
                           ocf_heartbeat_pound.7 \
150f7b
                           ocf_heartbeat_proftpd.7 \
150f7b
+                          ocf_heartbeat_rabbitmq-cluster.7 \
150f7b
                           ocf_heartbeat_rsyncd.7 \
150f7b
                           ocf_heartbeat_rsyslog.7 \
150f7b
                           ocf_heartbeat_scsi2reservation.7 \
150f7b
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
150f7b
index 330b7f7..66dcff2 100644
150f7b
--- a/heartbeat/Makefile.am
150f7b
+++ b/heartbeat/Makefile.am
150f7b
@@ -106,6 +106,7 @@ ocf_SCRIPTS	     =  ClusterMon		\
150f7b
 			pgsql			\
150f7b
 			proftpd			\
150f7b
 			Pure-FTPd		\
150f7b
+			rabbitmq-cluster	\
150f7b
 			Raid1			\
150f7b
 			Route			\
150f7b
 			rsyncd			\
150f7b
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
150f7b
new file mode 100755
150f7b
index 0000000..b9dcfc3
150f7b
--- /dev/null
150f7b
+++ b/heartbeat/rabbitmq-cluster
150f7b
@@ -0,0 +1,370 @@
150f7b
+#!/bin/sh
150f7b
+#
150f7b
+# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
150f7b
+#                    All Rights Reserved.
150f7b
+#
150f7b
+# This program is free software; you can redistribute it and/or modify
150f7b
+# it under the terms of version 2 of the GNU General Public License as
150f7b
+# published by the Free Software Foundation.
150f7b
+#
150f7b
+# This program is distributed in the hope that it would be useful, but
150f7b
+# WITHOUT ANY WARRANTY; without even the implied warranty of
150f7b
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
150f7b
+#
150f7b
+# Further, this software is distributed without any warranty that it is
150f7b
+# free of the rightful claim of any third person regarding infringement
150f7b
+# or the like.  Any license provided herein, whether implied or
150f7b
+# otherwise, applies only to this software file.  Patent licenses, if
150f7b
+# any, provided herein do not apply to combinations of this program with
150f7b
+# other software, or any other product whatsoever.
150f7b
+#
150f7b
+# You should have received a copy of the GNU General Public License
150f7b
+# along with this program; if not, write the Free Software Foundation,
150f7b
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
150f7b
+#
150f7b
+
150f7b
+#######################################################################
150f7b
+# Initialization:
150f7b
+
150f7b
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
150f7b
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
150f7b
+
150f7b
+#######################################################################
150f7b
+
150f7b
+RMQ_SERVER=/usr/sbin/rabbitmq-server
150f7b
+RMQ_CTL=/usr/sbin/rabbitmqctl
150f7b
+RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
150f7b
+RMQ_PID_DIR="/var/run/rabbitmq"
150f7b
+RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
150f7b
+RMQ_LOG_DIR="/var/log/rabbitmq"
150f7b
+NODENAME=$(ocf_local_nodename)
150f7b
+
150f7b
+RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
150f7b
+
150f7b
+meta_data() {
150f7b
+	cat <
150f7b
+
150f7b
+
150f7b
+<resource-agent name="rabbitmq-cluster" version="0.9">
150f7b
+<version>1.0</version>
150f7b
+
150f7b
+<longdesc lang="en">
150f7b
+Starts cloned rabbitmq cluster instance
150f7b
+</longdesc>
150f7b
+<shortdesc lang="en">rabbitmq clustered</shortdesc>
150f7b
+
150f7b
+<parameters>
150f7b
+<parameter name="set_policy" unique="1">
150f7b
+<longdesc lang="en">
150f7b
+Policy string to pass to 'rabbitmqctl set_policy' right after bootstrapping the first rabbitmq instance.
150f7b
+</longdesc>
150f7b
+<shortdesc lang="en">rabbitmqctl set_policy args</shortdesc>
150f7b
+<content type="string" default="" />
150f7b
+</parameter>
150f7b
+
150f7b
+</parameters>
150f7b
+
150f7b
+<actions>
150f7b
+<action name="start"        timeout="100" />
150f7b
+<action name="stop"         timeout="90" />
150f7b
+<action name="monitor"      timeout="40" interval="10" depth="0" />
150f7b
+<action name="meta-data"    timeout="10" />
150f7b
+<action name="validate-all"   timeout="20" />
150f7b
+</actions>
150f7b
+</resource-agent>
150f7b
+END
150f7b
+}
150f7b
+
150f7b
+#######################################################################
150f7b
+
150f7b
+rmq_usage() {
150f7b
+	cat <
150f7b
+usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
150f7b
+
150f7b
+Expects to have a fully populated OCF RA-compliant environment set.
150f7b
+END
150f7b
+}
150f7b
+
150f7b
+rmq_wipe_data()
150f7b
+{
150f7b
+	rm -rf $RMQ_DATA_DIR > /dev/null 2>&1 
150f7b
+}
150f7b
+
150f7b
+rmq_local_node()
150f7b
+{
150f7b
+
150f7b
+	local node_name=$(rabbitmqctl status 2>&1 | sed -n -e "s/^.*[S|s]tatus of node \(.*\)\s.*$/\1/p" | tr -d "'")
150f7b
+
150f7b
+	if [ -z "$node_name" ]; then
150f7b
+		node_name=$(cat /etc/rabbitmq/rabbitmq-env.conf 2>/dev/null | grep "\s*RABBITMQ_NODENAME=" | awk -F= '{print $2}')
150f7b
+	fi
150f7b
+
150f7b
+	echo "$node_name"
150f7b
+}
150f7b
+
150f7b
+rmq_join_list()
150f7b
+{
150f7b
+	cibadmin -Q 2>/dev/null | grep "$RMQ_CRM_ATTR_COOKIE" | sed -n -e "s/^.*value=.\(.*\)\".*$/\1/p"
150f7b
+}
150f7b
+
150f7b
+rmq_write_nodename()
150f7b
+{
150f7b
+	local node_name=$(rmq_local_node)
150f7b
+
150f7b
+	if [ -z "$node_name" ]; then
150f7b
+		ocf_log err "Failed to determine rabbitmq node name, exiting"
150f7b
+		exit $OCF_ERR_GENERIC
150f7b
+	fi
150f7b
+
150f7b
+	# store the pcmknode to rmq node mapping as an attribute
150f7b
+	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
150f7b
+}
150f7b
+
150f7b
+rmq_delete_nodename()
150f7b
+{
150f7b
+	# remove node-name
150f7b
+	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -D
150f7b
+}
150f7b
+
150f7b
+prepare_dir () {
150f7b
+	if [ ! -d ${1} ] ; then
150f7b
+		mkdir -p ${1}
150f7b
+		chown -R rabbitmq:rabbitmq ${1}
150f7b
+		chmod 755 ${1}
150f7b
+	fi
150f7b
+}
150f7b
+
150f7b
+remove_pid () {
150f7b
+	rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
150f7b
+}
150f7b
+
150f7b
+rmq_monitor() {
150f7b
+	local rc
150f7b
+
150f7b
+	$RMQ_CTL cluster_status > /dev/null 2>&1
150f7b
+	rc=$?
150f7b
+	case "$rc" in
150f7b
+	0)
150f7b
+		ocf_log debug "RabbitMQ server is running normally"
150f7b
+		rmq_write_nodename
150f7b
+		
150f7b
+		return $OCF_SUCCESS
150f7b
+	;;
150f7b
+	2)
150f7b
+		ocf_log info "RabbitMQ server is not running"
150f7b
+		rmq_delete_nodename
150f7b
+		return $OCF_NOT_RUNNING
150f7b
+	;;
150f7b
+	*)
150f7b
+		ocf_log err "Unexpected return code from '$RMQ_CTL cluster status' exit code: $rc"
150f7b
+		rmq_delete_nodename
150f7b
+		return $OCF_ERR_GENERIC
150f7b
+	;;
150f7b
+	esac
150f7b
+}
150f7b
+
150f7b
+rmq_init_and_wait()
150f7b
+{
150f7b
+	local rc
150f7b
+
150f7b
+	prepare_dir $RMQ_PID_DIR
150f7b
+	prepare_dir $RMQ_LOG_DIR
150f7b
+	remove_pid
150f7b
+
150f7b
+	# the server startup script uses this environment variable
150f7b
+	export RABBITMQ_PID_FILE="$RMQ_PID_FILE"
150f7b
+
150f7b
+	setsid sh -c "$RMQ_SERVER > ${RMQ_LOG_DIR}/startup_log 2> ${RMQ_LOG_DIR}/startup_err" &
150f7b
+
150f7b
+	ocf_log info "Waiting for server to start"
150f7b
+	$RMQ_CTL wait $RMQ_PID_FILE
150f7b
+	rc=$?
150f7b
+	if [ $rc -ne $OCF_SUCCESS ]; then
150f7b
+		remove_pid
150f7b
+		ocf_log info "rabbitmq-server start failed: $rc"
150f7b
+		return $OCF_ERR_GENERIC
150f7b
+	fi
150f7b
+
150f7b
+	rmq_monitor
150f7b
+	return $?
150f7b
+}
150f7b
+
150f7b
+rmq_set_policy()
150f7b
+{
150f7b
+	$RMQ_CTL set_policy $@ > /dev/null 2>&1
150f7b
+}
150f7b
+
150f7b
+rmq_start_first()
150f7b
+{
150f7b
+	local rc
150f7b
+
150f7b
+	ocf_log info "Bootstrapping rabbitmq cluster"
150f7b
+	rmq_wipe_data
150f7b
+	rmq_init_and_wait
150f7b
+	rc=$?
150f7b
+
150f7b
+	if [ $rc -eq 0 ]; then
150f7b
+		rc=$OCF_SUCCESS
150f7b
+		ocf_log info "cluster bootstrapped"
150f7b
+
150f7b
+		if [ -n "$OCF_RESKEY_set_policy" ]; then
150f7b
+			# do not quote set_policy, we are passing in arguments
150f7b
+			rmq_set_policy $OCF_RESKEY_set_policy > /dev/null 2>&1
150f7b
+			if [ $? -ne 0 ]; then
150f7b
+				ocf_log err "Failed to set policy: $OCF_RESKEY_set_policy"
150f7b
+				rc=$OCF_ERR_GENERIC
150f7b
+			else 
150f7b
+				ocf_log info "Policy set: $OCF_RESKEY_set_policy"
150f7b
+			fi
150f7b
+		fi
150f7b
+
150f7b
+	else
150f7b
+		ocf_log info "failed to bootstrap cluster. Check SELINUX policy"
150f7b
+		rc=$OCF_ERR_GENERIC
150f7b
+	fi
150f7b
+
150f7b
+	return $rc
150f7b
+}
150f7b
+
150f7b
+rmq_join_existing()
150f7b
+{
150f7b
+	local join_list="$1"
150f7b
+	local rc=$OCF_ERR_GENERIC
150f7b
+
150f7b
+	ocf_log info "Joining existing cluster with [ $(echo $join_list | tr '\n' ' ') ] nodes."
150f7b
+	rmq_init_and_wait
150f7b
+	if [ $? -ne 0 ]; then
150f7b
+		return $OCF_ERR_GENERIC
150f7b
+	fi
150f7b
+
150f7b
+	# unconditionally join the cluster
150f7b
+	$RMQ_CTL stop_app > /dev/null 2>&1
150f7b
+	for node in $(echo "$join_list"); do
150f7b
+		ocf_log info "Attempting to join cluster with target node $node"
150f7b
+		$RMQ_CTL join_cluster $node
150f7b
+		if [ $? -eq 0 ]; then
150f7b
+			ocf_log info "Joined cluster by connecting to node $node, starting app"
150f7b
+			$RMQ_CTL start_app
150f7b
+			rc=$?
150f7b
+			if [ $rc -ne 0 ]; then
150f7b
+				ocf_log err "'$RMQ_CTL start_app' failed"
150f7b
+			fi
150f7b
+			break;
150f7b
+		fi
150f7b
+	done
150f7b
+
150f7b
+	if [ "$rc" -ne 0 ]; then
150f7b
+		ocf_log info "Join process incomplete, shutting down."
150f7b
+		return $OCF_ERR_GENERIC
150f7b
+	fi
150f7b
+
150f7b
+	ocf_log info "Successfully joined existing rabbitmq cluster"
150f7b
+	return $OCF_SUCCESS
150f7b
+}
150f7b
+
150f7b
+rmq_start() {
150f7b
+	local join_list=""
150f7b
+	local rc
150f7b
+
150f7b
+	rmq_monitor
150f7b
+	if [ $? -eq $OCF_SUCCESS ]; then
150f7b
+		return $OCF_SUCCESS
150f7b
+	fi
150f7b
+
150f7b
+	join_list=$(rmq_join_list)
150f7b
+
150f7b
+	# No join list means no active instances are up. This instance
150f7b
+	# is the first, so it needs to bootstrap the rest
150f7b
+	if [ -z "$join_list" ]; then
150f7b
+		rmq_start_first
150f7b
+		rc=$?
150f7b
+		return $rc
150f7b
+	fi
150f7b
+
150f7b
+	# first try to join without wiping mnesia data
150f7b
+	rmq_join_existing "$join_list"
150f7b
+	if [ $? -ne 0 ]; then
150f7b
+		ocf_log info "node failed to join, wiping data directory and trying again"
150f7b
+		# if the graceful join fails, use the hammer and reset all the data.
150f7b
+		rmq_stop 
150f7b
+		rmq_wipe_data
150f7b
+		rmq_join_existing "$join_list"
150f7b
+		if [ $? -ne 0 ]; then
150f7b
+			ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
150f7b
+			return $OCF_ERR_GENERIC
150f7b
+		fi
150f7b
+	fi
150f7b
+
150f7b
+	return $OCF_SUCCESS
150f7b
+}
150f7b
+
150f7b
+rmq_stop() {
150f7b
+	rmq_monitor
150f7b
+	if [ $? -eq $OCF_NOT_RUNNING ]; then
150f7b
+		return $OCF_SUCCESS
150f7b
+	fi
150f7b
+
150f7b
+	$RMQ_CTL stop
150f7b
+	rc=$?
150f7b
+
150f7b
+	if [ $rc -ne 0 ]; then
150f7b
+		ocf_log err "rabbitmq-server stop command failed: $RMQ_CTL stop, $rc"
150f7b
+		return $rc
150f7b
+	fi
150f7b
+
150f7b
+	#TODO add kill logic
150f7b
+	stop_wait=1
150f7b
+	while [ $stop_wait = 1 ]; do
150f7b
+		rmq_monitor
150f7b
+		rc=$?
150f7b
+		if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
150f7b
+			stop_wait=0
150f7b
+			break
150f7b
+		elif [ "$rc" -ne $OCF_SUCCESS ]; then
150f7b
+			ocf_log info "rabbitmq-server stop failed: $rc"
150f7b
+			exit $OCF_ERR_GENERIC
150f7b
+		fi
150f7b
+		sleep 1
150f7b
+	done
150f7b
+
150f7b
+	remove_pid
150f7b
+	return $OCF_SUCCESS
150f7b
+}
150f7b
+
150f7b
+rmq_validate() {
150f7b
+	check_binary $RMQ_SERVER
150f7b
+	check_binary $RMQ_CTL
150f7b
+
150f7b
+	# This resource only makes sense as a clone right now. at some point
150f7b
+	# we may want to verify the following.
150f7b
+	#TODO verify cloned
150f7b
+	#TODO verify ordered=true
150f7b
+
150f7b
+	# Given that this resource does the cluster join explicitly,
150f7b
+	# having a cluster_nodes list in the static config file will
150f7b
+	# likely conflict with this agent. 
150f7b
+	#TODO verify no cluster list in rabbitmq conf
150f7b
+	#cat /etc/rabbitmq/rabbitmq.config | grep "cluster_nodes"
150f7b
+
150f7b
+	return $OCF_SUCCESS
150f7b
+}
150f7b
+
150f7b
+case $__OCF_ACTION in
150f7b
+meta-data)	meta_data
150f7b
+		exit $OCF_SUCCESS
150f7b
+		;;
150f7b
+start)		rmq_start;;
150f7b
+stop)		rmq_stop;;
150f7b
+monitor)	rmq_monitor;;
150f7b
+validate-all)	rmq_validate;;
150f7b
+usage|help)	rmq_usage
150f7b
+		exit $OCF_SUCCESS
150f7b
+		;;
150f7b
+*)		rmq_usage
150f7b
+		exit $OCF_ERR_UNIMPLEMENTED
150f7b
+		;;
150f7b
+esac
150f7b
+rc=$?
150f7b
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
150f7b
+exit $rc
150f7b
+
150f7b
-- 
150f7b
1.8.4.2
150f7b