Blob Blame History Raw
commit 1b0fa29aa0e52fa5cb28ef80139e657119b15fca
Author: David Vossel <dvossel@redhat.com>
Date:   Mon Feb 17 15:19:27 2014 -0600

    High: controld: handling startup fencing within the controld agent, not the dlm
    
    Resolves: rhbz#1064519
    (cherry picked from commit 11160a94b76ada9844c277128054719445acf22f)

diff --git a/extra/resources/controld b/extra/resources/controld
index bdd31bb..bf5466d 100644
--- a/extra/resources/controld
+++ b/extra/resources/controld
@@ -34,6 +34,12 @@
 
 #######################################################################
 
+if [ -e "$OCF_ROOT/resource.d/heartbeat/controld" ]; then
+    ocf_log info "Using heartbeat controld agent"
+    $OCF_ROOT/resource.d/heartbeat/controld $1
+    exit $?
+fi
+
 meta_data() {
 	cat <<END
 <?xml version="1.0"?>
@@ -89,6 +95,8 @@ END
 
 #######################################################################
 
+DLM_SYSFS_DIR="/sys/kernel/dlm"
+
 controld_usage() {
 	cat <<END
 usage: $0 {start|stop|monitor|validate-all|meta-data}
@@ -97,6 +105,21 @@ Expects to have a fully populated OCF RA-compliant environment set.
 END
 }
 
+check_uncontrolled_locks()
+{
+    local tmp
+    tmp=$(ls $DLM_SYSFS_DIR 2>&1)
+    if [ $? -eq 0 ]; then
+        if [ -n "$tmp" ]; then
+
+            ocf_log err "Uncontrolled lockspace exists, system must reboot. Executing suicide fencing"
+            stonith_admin --reboot=$(crm_node -n) --tag controld
+
+            exit $OCF_ERR_GENERIC
+        fi
+    fi
+}
+
 controld_start() {
     controld_monitor; rc=$?
 
@@ -180,13 +203,22 @@ controld_stop() {
 }
 
 controld_monitor() {
+    local rc
     killall -0 ${OCF_RESKEY_daemon} >/dev/null 2>&1 ; rc=$?
 
     case $rc in
-      0) return $OCF_SUCCESS;;
-      1) return $OCF_NOT_RUNNING;;
-      *) return $OCF_ERR_GENERIC;;
+      0) rc=$OCF_SUCCESS;;
+      1) rc=$OCF_NOT_RUNNING;;
+      *) rc=$OCF_ERR_GENERIC;;
     esac
+
+    # if the dlm is not successfully running, but
+    # dlm lockspace bits are left over, we self must fence.
+    if [ $rc -ne $OCF_SUCCESS ]; then
+        check_uncontrolled_locks
+    fi
+
+    return $rc
 }
 
 controld_validate() {
@@ -219,11 +251,11 @@ case "$OCF_RESOURCE_INSTANCE" in
 	: ${OCF_RESKEY_daemon=gfs_controld${daemon_ext}}
 	;;
     *[dD][lL][mM]*)
-	: ${OCF_RESKEY_args=-q 0}
+	: ${OCF_RESKEY_args=-q 0 -s 0}
 	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
 	;;
     *)
-	: ${OCF_RESKEY_args=-q 0}
+	: ${OCF_RESKEY_args=-q 0 -s 0}
 	: ${OCF_RESKEY_daemon=dlm_controld${daemon_ext}}
 esac