Blob Blame History Raw
From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Sun, 13 Dec 2020 14:58:34 -0800
Subject: [PATCH] LVM-activate: Fix return codes

OCF_ERR_ARGS should be used when the configuration isn't valid for the
**local** node, and so the resource should not attempt to start again
locally until the issue is corrected.

OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
**any** node, and so the resource should not attempt to start again
anywhere until the issue is corrected.

One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
running (or improperly not running) be an OCF_ERR_GENERIC or
OCF_ERR_ARGS? The fact that it's a state issue rather than a config
issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
user intervention suggests OCF_ERR_ARGS. The approach here is to use
GENERIC for all of these. One can make the case that "improperly
running" should use ARGS, since a process must be manually stopped to
fix the issue, and that "improperly not running" should use GENERIC,
since there's a small chance the process died and will be recovered in
some way.

More info about return code meanings:
  - https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted

Resolves: RHBZ#1905820

Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
 heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c86606637..e951a08e9 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -333,8 +333,7 @@ config_verify()
 	real=$(lvmconfig "$name" | cut -d'=' -f2)
 	if [ "$real" != "$expect" ]; then
 		ocf_exit_reason "config item $name: expect=$expect but real=$real"
-		exit $OCF_ERR_CONFIGURED
-
+		exit $OCF_ERR_ARGS
 	fi
 
 	return $OCF_SUCCESS
@@ -366,12 +365,12 @@ lvmlockd_check()
 		fi
 
 		ocf_exit_reason "lvmlockd daemon is not running!"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	if pgrep clvmd >/dev/null 2>&1 ; then
 		ocf_exit_reason "clvmd daemon is running unexpectedly."
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	return $OCF_SUCCESS
@@ -402,17 +401,17 @@ clvmd_check()
 	# Good: clvmd is running, and lvmlockd is not running
 	if ! pgrep clvmd >/dev/null 2>&1 ; then
 		ocf_exit_reason "clvmd daemon is not running!"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	if pgrep lvmetad >/dev/null 2>&1 ; then
 		ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	if pgrep lvmlockd >/dev/null 2>&1 ; then
 		ocf_exit_reason "lvmlockd daemon is running unexpectedly."
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	return $OCF_SUCCESS
@@ -424,12 +423,12 @@ systemid_check()
 	source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
 	if [ "$source" = "" ] || [ "$source" = "none" ]; then
 		ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_ARGS
 	fi
 
 	if [ -z ${SYSTEM_ID} ]; then
 		ocf_exit_reason "local/system_id is not set!"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_ARGS
 	fi
 
 	return $OCF_SUCCESS
@@ -441,18 +440,18 @@ tagging_check()
 	# The volume_list must be initialized to something in order to
 	# guarantee our tag will be filtered on startup
 	if ! lvm dumpconfig activation/volume_list; then
-		ocf_log err  "LVM: Improper setup detected"
+		ocf_log err "LVM: Improper setup detected"
 		ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_ARGS
 	fi
 
 	# Our tag must _NOT_ be in the volume_list.  This agent
 	# overrides the volume_list during activation using the
 	# special tag reserved for cluster activation
 	if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\"";  then
-		ocf_log err "LVM:  Improper setup detected"
+		ocf_log err "LVM: Improper setup detected"
 		ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_ARGS
 	fi
 
 	return $OCF_SUCCESS
@@ -463,13 +462,13 @@ read_parameters()
 	if [ -z "$VG" ]
 	then
 		ocf_exit_reason "You must identify the volume group name!"
-		exit $OCF_ERR_ARGS
+		exit $OCF_ERR_CONFIGURED
 	fi
 
 	if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
 	then
 		ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
-		exit $OCF_ERR_ARGS
+		exit $OCF_ERR_CONFIGURED
 	fi
 
 	# Convert VG_access_mode from string to index
@@ -519,8 +518,10 @@ lvm_validate() {
 			exit $OCF_NOT_RUNNING
 		fi
 
+		# Could be a transient error (e.g., iSCSI connection
+		# issue) so use OCF_ERR_GENERIC
 		ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
-		exit $OCF_ERR_CONFIGURED
+		exit $OCF_ERR_GENERIC
 	fi
 
 	# Inconsistency might be due to missing physical volumes, which doesn't
@@ -549,7 +550,7 @@ lvm_validate() {
 	mode=$?
 	if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
 		ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
-		exit $OCF_ERR_ARGS
+		exit $OCF_ERR_CONFIGURED
 	fi
 
 	# Nothing to do if the VG has no logical volume
@@ -561,11 +562,11 @@ lvm_validate() {
 
 	# Check if the given $LV is in the $VG
 	if [ -n "$LV" ]; then
-		OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
+		output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
 		if [ $? -ne 0 ]; then
-			ocf_log err "lvs: ${OUT}"
+			ocf_log err "lvs: ${output}"
 			ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
-			exit $OCF_ERR_ARGS
+			exit $OCF_ERR_CONFIGURED
 		fi
 	fi
 
@@ -580,7 +581,6 @@ lvm_validate() {
 	3)
 		systemid_check
 		;;
-
 	4)
 		tagging_check
 		;;
@@ -808,10 +808,9 @@ lvm_status() {
 			dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
 				2>&1
 			if [ $? -ne 0 ]; then
-				return $OCF_NOT_RUNNING
-			else
-				return $OCF_SUCCESS
+				return $OCF_ERR_GENERIC
 			fi
+			return $OCF_SUCCESS
 			;;
 		*)
 			ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"