Blame SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch

f784e8
diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
f784e8
--- a/heartbeat/rabbitmq-cluster	2016-02-22 11:09:48.989128414 +0100
f784e8
+++ b/heartbeat/rabbitmq-cluster	2016-02-22 11:10:12.011835745 +0100
f784e8
@@ -39,7 +39,14 @@
f784e8
 RMQ_LOG_DIR="/var/log/rabbitmq"
f784e8
 NODENAME=$(ocf_local_nodename)
f784e8
 
f784e8
+# this attr represents the current active local rmq node name.
f784e8
+# when rmq stops or the node is fenced, this attr disappears
f784e8
 RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
f784e8
+# this attr represents the last known active local rmq node name
f784e8
+# when rmp stops or the node is fenced, the attr stays forever so
f784e8
+# we can continue to map an offline pcmk node to it's rmq node name
f784e8
+# equivalent. 
f784e8
+RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}"
f784e8
 
f784e8
 meta_data() {
f784e8
 	cat <
f784e8
@@ -79,7 +86,7 @@
f784e8
 
f784e8
 rmq_usage() {
f784e8
 	cat <
f784e8
-usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
f784e8
+usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
f784e8
 
f784e8
 Expects to have a fully populated OCF RA-compliant environment set.
f784e8
 END
f784e8
@@ -116,8 +123,13 @@
f784e8
 		exit $OCF_ERR_GENERIC
f784e8
 	fi
f784e8
 
f784e8
-	# store the pcmknode to rmq node mapping as an attribute
f784e8
+	# store the pcmknode to rmq node mapping as a transient attribute. This allows
f784e8
+	# us to retrieve the join list with a simple xpath.
f784e8
 	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
f784e8
+
f784e8
+	# the pcmknode to rmq node mapping as a permanent attribute as well. this lets
f784e8
+	# us continue to map offline nodes to their equivalent rmq node name
f784e8
+	${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name"
f784e8
 }
f784e8
 
f784e8
 rmq_delete_nodename()
f784e8
@@ -262,6 +274,41 @@
f784e8
 	return $OCF_SUCCESS
f784e8
 }
f784e8
 
f784e8
+
f784e8
+rmq_notify() {
f784e8
+	node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}"
f784e8
+	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
f784e8
+
f784e8
+
f784e8
+	# When notifications are on, this agent is going to "forget" nodes once they
f784e8
+	# leave the cluster. This is thought to resolve some issues where rabbitmq
f784e8
+	# blocks trying to sync with an offline node after a fencing action occurs.
f784e8
+	if ! [ "${mode}" = "post-stop" ]; then
f784e8
+		return $OCF_SUCCESS
f784e8
+	fi
f784e8
+
f784e8
+	rmq_monitor
f784e8
+	if [ $? -ne $OCF_SUCCESS ]; then
f784e8
+		# only run forget when we are for sure active 
f784e8
+		return $OCF_SUCCESS
f784e8
+	fi
f784e8
+
f784e8
+	# forget each stopped rmq instance in the provided pcmk node in the list.
f784e8
+	for node in $(echo "$node_list"); do
f784e8
+		local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $node -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
f784e8
+		if [ -z "$rmq_node" ]; then
f784e8
+			ocf_log warn "Unable to map pcmk node $node to a known rmq node."
f784e8
+			continue	
f784e8
+		fi
f784e8
+		ocf_log notice "Forgetting stopped node $rmq_node"
f784e8
+		$RMQ_CTL forget_cluster_node $rmq_node
f784e8
+		if [ $? -ne 0 ]; then
f784e8
+			ocf_log warn "Unable to forget offline node $rmq_node."
f784e8
+		fi
f784e8
+	done
f784e8
+	return $OCF_SUCCESS
f784e8
+}
f784e8
+
f784e8
 rmq_start() {
f784e8
 	local join_list=""
f784e8
 	local rc
f784e8
@@ -357,6 +404,7 @@
f784e8
 stop)		rmq_stop;;
f784e8
 monitor)	rmq_monitor;;
f784e8
 validate-all)	rmq_validate;;
f784e8
+notify)		rmq_notify;;
f784e8
 usage|help)	rmq_usage
f784e8
 		exit $OCF_SUCCESS
f784e8
 		;;