Blame SOURCES/bz1064519-pcmk-controld-handles-dlm-startup-fencing.patch

7100e8
commit 1b0fa29aa0e52fa5cb28ef80139e657119b15fca
7100e8
Author: David Vossel <dvossel@redhat.com>
7100e8
Date:   Mon Feb 17 15:19:27 2014 -0600
7100e8
7100e8
    High: controld: handling startup fencing within the controld agent, not the dlm
7100e8
    
7100e8
    Resolves: rhbz#1064519
7100e8
    (cherry picked from commit 11160a94b76ada9844c277128054719445acf22f)
7100e8
7100e8
diff --git a/extra/resources/controld b/extra/resources/controld
7100e8
index bdd31bb..bf5466d 100644
7100e8
--- a/extra/resources/controld
7100e8
+++ b/extra/resources/controld
7100e8
@@ -34,6 +34,12 @@
7100e8
 
7100e8
 #######################################################################
7100e8
 
7100e8
+if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then
7100e8
+    ocf_log info "Using heartbeat controld agent"
7100e8
+    $OCF_ROOT/resource.d/heartbeat/controld $1
7100e8
+    exit $?
7100e8
+fi
7100e8
+
7100e8
 meta_data() {
7100e8
 	cat <
7100e8
 
7100e8
@@ -89,6 +95,8 @@ END
7100e8
 
7100e8
 #######################################################################
7100e8
 
7100e8
+DLM_SYSFS_DIR="/sys/kernel/dlm"
7100e8
+
7100e8
 controld_usage() {
7100e8
 	cat <
7100e8
 usage: $0 {start|stop|monitor|validate-all|meta-data}
7100e8
@@ -97,6 +105,21 @@ Expects to have a fully populated OCF RA-compliant environment set.
7100e8
 END
7100e8
 }
7100e8
 
7100e8
+check_uncontrolled_locks()
7100e8
+{
7100e8
+    local tmp
7100e8
+    tmp=$(ls $DLM_SYSFS_DIR 2>&1)
7100e8
+    if [ $? -eq 0 ]; then
7100e8
+        if [ -n "$tmp" ]; then
7100e8
+
7100e8
+            ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing"
7100e8
+            stonith_admin --reboot=$(crm_node -n) --tag controld
7100e8
+
7100e8
+            exit $OCF_ERR_GENERIC
7100e8
+        fi
7100e8
+    fi
7100e8
+}
7100e8
+
7100e8
 controld_start() {
7100e8
     controld_monitor; rc=$?
7100e8
 
7100e8
@@ -180,13 +203,22 @@ controld_stop() {
7100e8
 }
7100e8
 
7100e8
 controld_monitor() {
7100e8
+    local rc
7100e8
     killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; rc=$?
7100e8
 
7100e8
     case $rc in
7100e8
-      0) return $OCF_SUCCESS;;
7100e8
-      1) return $OCF_NOT_RUNNING;;
7100e8
-      *) return $OCF_ERR_GENERIC;;
7100e8
+      0) rc=$OCF_SUCCESS;;
7100e8
+      1) rc=$OCF_NOT_RUNNING;;
7100e8
+      *) rc=$OCF_ERR_GENERIC;;
7100e8
     esac
7100e8
+
7100e8
+    # if the dlm is not successfully running, but
7100e8
+    # dlm lockspace bits are left over, we self must fence.
7100e8
+    if [ $rc -ne $OCF_SUCCESS ]; then
7100e8
+        check_uncontrolled_locks
7100e8
+    fi
7100e8
+
7100e8
+    return $rc
7100e8
 }
7100e8
 
7100e8
 controld_validate() {
7100e8
@@ -219,11 +251,11 @@ case "$OCF_RESOURCE_INSTANCE" in
7100e8
 	: ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}}
7100e8
 	;;
7100e8
     *[dD][lL][mM]*)
7100e8
-	: ${OCF_RESKEY_args=-q 0}
7100e8
+	: ${OCF_RESKEY_args=-q 0 -s 0}
7100e8
 	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
7100e8
 	;;
7100e8
     *)
7100e8
-	: ${OCF_RESKEY_args=-q 0}
7100e8
+	: ${OCF_RESKEY_args=-q 0 -s 0}
7100e8
 	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
7100e8
 esac
7100e8