Blob Blame History Raw
From d83b9a9394ef69ca2801c84dee46094a224ca654 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 5 Mar 2015 13:47:58 -0600
Subject: [PATCH] redis agent support

---
 doc/man/Makefile.am   |   1 +
 heartbeat/Makefile.am |   1 +
 heartbeat/redis       | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 521 insertions(+)
 create mode 100644 heartbeat/redis

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 43d60d9..653e818 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -125,6 +125,7 @@ man_MANS	       = ocf_heartbeat_AoEtarget.7 \
                           ocf_heartbeat_pound.7 \
                           ocf_heartbeat_proftpd.7 \
                           ocf_heartbeat_rabbitmq-cluster.7 \
+                          ocf_heartbeat_redis.7 \
                           ocf_heartbeat_rsyncd.7 \
                           ocf_heartbeat_rsyslog.7 \
                           ocf_heartbeat_scsi2reservation.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 3bcf2d9..e4ed4fd 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -105,6 +105,7 @@ ocf_SCRIPTS	     =  ClusterMon		\
 			rabbitmq-cluster	\
 			Raid1			\
 			Route			\
+			redis			\
 			rsyncd			\
 			rsyslog			\
 			SAPDatabase		\
diff --git a/heartbeat/redis b/heartbeat/redis
new file mode 100644
index 0000000..6b479b2
--- /dev/null
+++ b/heartbeat/redis
@@ -0,0 +1,519 @@
+#!/bin/bash
+
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
+
+: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
+: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
+: ${OCF_RESKEY_user:=redis}
+: ${OCF_RESKEY_rundir:=/var/run/redis}
+: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
+: ${OCF_RESKEY_socket_name:=redis.sock}
+: ${OCF_RESKEY_port:=6379}
+
+if [ -z "$OCF_RESKEY_config" ]; then
+	if [ -f "/etc/redis.conf" ]; then
+		OCF_RESKEY_config="/etc/redis.conf"
+	else
+		OCF_RESKEY_config="/etc/redis/redis.conf"
+	fi
+fi
+
+CHECK_SLAVE_STATE=0
+
+REDIS_SERVER="$OCF_RESKEY_bin"
+REDIS_CLIENT="$OCF_RESKEY_client_bin"
+REDIS_CONFIG="$OCF_RESKEY_config"
+REDIS_USER="$OCF_RESKEY_user"
+REDIS_RUNDIR="$OCF_RESKEY_rundir"
+REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
+REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
+REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
+
+function meta_data() {
+	cat <<EOI
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="redis">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource agent script for redis server. 
+
+This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
+When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
+</longdesc>
+
+<shortdesc lang="en">Redis server</shortdesc>
+
+<parameters>
+<parameter name="bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-server\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_bin}" />
+</parameter>
+
+<parameter name="client_bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-cli\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_client_bin}" />
+</parameter>
+
+<parameter name="config" unique="1" required="0">
+<longdesc lang="en">
+Path to 'redis.conf'
+</longdesc>
+<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
+<content type="string" default="${OCF_RESKEY_config}" />
+</parameter>
+
+<parameter name="user" unique="0" required="0">
+<longdesc lang="en">
+User to run redis as
+</longdesc>
+<shortdesc lang="en">Redis user</shortdesc>
+<content type="string" default="${OCF_RESKEY_user}" />
+</parameter>
+
+<parameter name="rundir" unique="1" required="0">
+<longdesc lang="en">
+Directory to store socket and pid file in
+</longdesc>
+<shortdesc lang="en">Redis var/run dir</shortdesc>
+<content type="string" default="${OCF_RESKEY_rundir}"/>
+</parameter>
+
+<parameter name="pidfile_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the pidfile. Will be created in the rundir. 
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis pidfile name</shortdesc>
+<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
+</parameter>
+
+<parameter name="socket_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the socket. Will be crated in the rundir. 
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis socket name</shortdesc>
+<content type="string" default="${OCF_RESKEY_socket_name}"/>
+</parameter>
+
+<parameter name="port" unique="0" required="0">
+<longdesc lang="en">
+Port for replication client to connect to on remote server
+</longdesc>
+<shortdesc lang="en">Replication port</shortdesc>
+<content type="string" default="${OCF_RESKEY_port}"/>
+</parameter>
+
+<parameter name="wait_last_known_master" unique="0" required="0">
+<longdesc lang="en">
+During redis cluster bootstrap, wait for the last known master to be
+promoted before allowing any other instances in the cluster to be
+promoted. This lessens the risk of data loss when persistent data
+is in use.
+</longdesc>
+<shortdesc lang="en">Wait for last known master</shortdesc>
+<content type="boolean" default="false"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120" />
+<action name="stop" timeout="120" />
+<action name="status" timeout="60" />
+<action name="monitor" depth="0" timeout="60" interval="45" />
+<action name="monitor" role="Master" depth="0" timeout="60" interval="20" />
+<action name="monitor" role="Slave" depth="0" timeout="60" interval="60" />
+<action name="promote" timeout="120" />
+<action name="demote" timeout="120" />
+<action name="notify" timeout="90" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+EOI
+}
+
+INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
+MASTER_HOST=""
+MASTER_ACTIVE_CACHED=""
+MASTER_ACTIVE=""
+
+master_is_active()
+{
+	if [ -z "$MASTER_ACTIVE_CACHED" ]; then
+		# determine if a master instance is already up and is healthy
+		crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
+		MASTER_ACTIVE=$?
+		MASTER_ACTIVE_CACHED="true"
+	fi
+	return $MASTER_ACTIVE
+}
+
+function set_master()
+{
+	MASTER_HOST="$1"
+	${CRM_ATTR_REPL_INFO} -v "$1" -q
+}
+
+function last_known_master()
+{
+	if [ -z "$MASTER_HOST" ]; then
+		MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query  -q  2>/dev/null)"
+	fi
+	echo "$MASTER_HOST"
+}
+
+function crm_master_reboot() {
+	"${HA_SBIN_DIR}/crm_master" -l reboot "$@"
+}
+
+function calculate_score()
+{
+	perf_score="$1"
+	connected_clients="$2"
+
+    	if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
+		# only set perferred score by slave_priority if
+		# we are not waiting for the last known master. Otherwise
+		# we want the agent to have complete control over the scoring.
+		perf_score=""
+		connected_clients="0"
+	fi
+
+	if [[ -z "$perf_score" ]]; then
+		if [[ "$(last_known_master)" == "$NODENAME" ]]; then
+			perf_score=1000
+		else
+			perf_score=1
+		fi
+	fi
+	perf_score=$(( perf_score + connected_clients ))
+	echo "$perf_score"
+}
+
+function set_score()
+{
+	local score="$1"
+
+    	if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
+		local last_master="$(last_known_master)"
+		if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
+			ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
+			return
+		fi
+	fi
+
+	ocf_log debug "monitor: Setting master score to '$score'"
+	crm_master_reboot -v "$score"
+}
+
+function redis_client() {
+	ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@"
+	"$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+}
+
+function simple_status() {
+	local pid
+
+	if ! [ -f "$REDIS_PIDFILE" ]; then
+		return $OCF_NOT_RUNNING
+	fi
+
+	pid="$(<"$REDIS_PIDFILE")"
+	pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
+
+	ocf_log debug "monitor: redis-server running under pid $pid"
+
+	return $OCF_SUCCESS
+}
+
+function monitor() {
+	local res
+
+	simple_status
+	res=$?
+	if (( res != OCF_SUCCESS )); then
+		return $res
+	fi
+
+	typeset -A info
+	while read line; do
+		[[ "$line" == "#"* ]] && continue
+		[[ "$line" != *":"* ]] && continue
+		IFS=':' read -r key value <<< "$line"
+		info[$key]="$value"
+	done < <(redis_client info)
+	if [[ -z "${info[role]}" ]]; then
+		ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
+		return $OCF_ERR_GENERIC
+	fi
+
+	if ocf_is_ms; then
+		# Here we see if a score has already been set.
+		# If score isn't set we the redis setting 'slave_priority'.
+		# If that isn't set, we default to 1000 for a master, and 1 for slave.
+		# We then add 1 for each connected client
+		score="$(crm_master_reboot --get-value --quiet 2>/dev/null)"
+		if [[ -z "$score" ]]; then
+			score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
+			set_score "$score"
+		fi
+
+		if [[ "${info[role]}" == "master" ]]; then
+			if ocf_is_probe; then
+				set_master "$NODENAME"
+			fi
+			return $OCF_RUNNING_MASTER
+		fi
+
+		if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
+			if [[ "${info[master_link_status]}" != "up" ]]; then
+				ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
+				return $OCF_ERR_GENERIC
+			fi
+			if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
+				ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
+				return $OCF_ERR_GENERIC
+			fi
+		fi
+	fi
+	return $OCF_SUCCESS
+}
+
+function start() {
+	monitor
+	status=$?
+
+	if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+		ocf_log info "start: redis is already running"
+		return $OCF_SUCCESS
+	fi
+
+	[[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
+	chown -R "$REDIS_USER" "$REDIS_RUNDIR"
+
+	ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
+	output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
+
+	while true; do
+		# wait for redis to start
+		typeset -A info
+		while read line; do
+			[[ "$line" == "#"* ]] && continue
+			[[ "$line" != *":"* ]] && continue
+			IFS=':' read -r key value <<< "$line"
+			info[$key]="$value"
+		done < <(redis_client info)
+
+		if (( info[loading] == 0 )); then
+			break
+		elif (( info[loading] == 1 )); then
+			sleep "${info[loading_eta_seconds]}"
+		elif pidof "$REDIS_SERVER" >/dev/null; then
+			# unknown error, but the process still exists.
+			# This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
+			#   See https://github.com/antirez/redis/issues/2368
+			# It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
+			sleep 1
+		else
+			ocf_log err "start: Unknown error waiting for redis to start"
+			return $OCF_ERR_GENERIC
+		fi
+	done
+
+	ocf_is_ms && demote # pacemaker expects resources to start in slave mode
+
+	monitor
+	status=$?
+	if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+		return $OCF_SUCCESS
+	fi
+
+	ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }"
+	return $status
+}
+
+function stop() {
+	monitor
+	status=$?
+
+	if (( status == OCF_NOT_RUNNING )); then
+		ocf_log info "stop: redis is already stopped"
+		crm_master_reboot -D
+		return $OCF_SUCCESS
+	fi
+
+	pid="$(<"$REDIS_PIDFILE")"
+	kill -TERM "$pid"
+
+	while true; do
+		simple_status
+		status=$?
+		if (( status == OCF_NOT_RUNNING )); then
+			crm_master_reboot -D
+			return $OCF_SUCCESS
+		fi
+		sleep 1
+	done
+}
+
+function promote() {
+	monitor
+	status=$?
+
+	if (( status == OCF_RUNNING_MASTER )); then
+		ocf_log info "promote: Already running as master"
+		set_master "$NODENAME"
+		return $OCF_SUCCESS
+	elif (( status != OCF_SUCCESS )); then
+		ocf_log err "promote: Node is not running as a slave"
+		return $OCF_ERR_GENERIC
+	fi
+
+	redis_client slaveof no one
+
+	monitor
+	status=$?
+	if (( status == OCF_RUNNING_MASTER )); then
+		set_master "$NODENAME"
+		return $OCF_SUCCESS
+	fi
+
+	ocf_log err "promote: Unknown error while promoting to master (status=$status)"
+	return $OCF_ERR_GENERIC
+}
+
+function demote() {
+	local master_host
+	local master_port
+
+	CHECK_SLAVE_STATE=1
+	monitor
+	status=$?
+
+	if (( status == OCF_SUCCESS )); then
+		ocf_log info "demote: Already running as slave"
+		return $OCF_SUCCESS
+	elif (( status == OCF_NOT_RUNNING )); then
+		ocf_log err "demote: Failed to demote, redis not running."
+		return $OCF_NOT_RUNNING
+	fi
+
+	master_host="$(last_known_master)"
+	master_port="${REDIS_REPLICATION_PORT}"
+
+	# The elected master has to remain a slave during startup.
+	# During this period a placeholder master host is assigned.
+	if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then
+		CHECK_SLAVE_STATE=0
+		master_host="no-such-master"
+	elif ! master_is_active; then
+		# no master has been promoted yet. we'll be notified when the
+		# master starts.
+		CHECK_SLAVE_STATE=0
+		master_host="no-such-master"
+	fi
+
+	ocf_log info "demote: Setting master to '$master_host'"
+
+	redis_client slaveof "$master_host" "$master_port"
+
+	# wait briefly for the slave to connect to the master	
+	for (( c=1; c <= 20; c++ ))
+	do
+		monitor
+		status=$?
+		if (( status == OCF_SUCCESS )); then
+			return $OCF_SUCCESS
+		fi
+		sleep 1
+	done
+
+	ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
+	return $OCF_ERR_GENERIC
+}
+
+function notify() {
+	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+	case "$mode" in
+		post-demote|post-promote) # change the master
+			monitor
+			status=$?
+			if (( status == OCF_SUCCESS )); then # were a slave
+				# calling demote updates the slave's connection
+				# to the newly appointed Master instance.
+				demote
+			fi
+			;;
+	esac
+	return $OCF_SUCCESS
+}
+
+function validate() {
+	if [[ -x "$REDIS_SERVER" ]]; then
+		ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
+		return $OCF_ERR_INSTALLED
+	fi
+	if [[ -x "$REDIS_CLIENT" ]]; then
+		ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
+		return $OCF_ERR_INSTALLED
+	fi
+	if [[ -f "$REDIS_CONFIG" ]]; then
+		ocf_log err "validate: $REDIS_CONFIG does not exist"
+		return $OCF_ERR_CONFIGURED
+	fi
+	if !  getent passwd "$REDIS_USER" &>/dev/null; then
+		ocf_log err "validate: $REDIS_USER is not a valid user"
+		return $OCF_ERR_CONFIGURED
+	fi
+}
+
+NODENAME=$(ocf_local_nodename)
+
+ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"
+
+case "${1:-$__OCF_ACTION}" in
+	status|monitor)
+		monitor
+		;;
+	start)
+		start
+		;;
+	stop)
+		stop
+		;;
+	restart)
+		stop && start
+		;;
+	promote)
+		promote
+		;;
+	demote)
+		demote
+		;;
+	notify)
+		notify
+		;;
+	meta-data)
+		meta_data
+		;;
+	validate-all)
+		validate
+		;;
+	*)
+		echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}"
+		exit $OCF_ERR_UNIMPLEMENTED
+		;;
+esac
+status=$?
+ocf_log debug "exit_status=$status"
+exit $status
-- 
1.8.4.2