Blame SOURCES/bz1170376-galera-no-readonly.patch

8d4b3d
diff --git a/heartbeat/galera b/heartbeat/galera
8d4b3d
index 994aad0..d74a70d 100755
8d4b3d
--- a/heartbeat/galera
8d4b3d
+++ b/heartbeat/galera
8d4b3d
@@ -342,6 +342,14 @@ is_readonly()
8d4b3d
 
8d4b3d
 master_exists()
8d4b3d
 {
8d4b3d
+    if [ "$__OCF_ACTION" = "demote" ]; then
8d4b3d
+        # We don't want to detect master instances during demote.
8d4b3d
+        # 1. we could be detecting ourselves as being master, which is no longer the case.
8d4b3d
+        # 2. we could be detecting other master instances that are in the process of shutting down.
8d4b3d
+        # by not detecting other master instances in "demote" we are deferring this check
8d4b3d
+        # to the next recurring monitor operation which will be much more accurate
8d4b3d
+        return 1
8d4b3d
+    fi
8d4b3d
     # determine if a master instance is already up and is healthy
8d4b3d
     crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
8d4b3d
     return $?
8d4b3d
@@ -441,20 +449,24 @@ galera_promote()
8d4b3d
             extra_opts="--wsrep-cluster-address=gcomm://"
8d4b3d
         else
8d4b3d
             ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
8d4b3d
+            clear_last_commit
8d4b3d
             return $OCF_ERR_GENERIC
8d4b3d
         fi
8d4b3d
-
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    # make sure the read only instance is stopped
8d4b3d
-    mysql_common_stop
8d4b3d
-    rc=$?
8d4b3d
-    if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
8d4b3d
-        ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master"
8d4b3d
-        return $rc
8d4b3d
+    galera_monitor
8d4b3d
+    if [ $? -eq $OCF_RUNNING_MASTER ]; then
8d4b3d
+        if ocf_is_true $bootstrap; then
8d4b3d
+            promote_everyone
8d4b3d
+            clear_bootstrap_node
8d4b3d
+            ocf_log info "boostrap node already up, promoting the rest of the galera instances."
8d4b3d
+        fi
8d4b3d
+        clear_last_commit
8d4b3d
+        return $OCF_SUCCESS
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    sleep 4
8d4b3d
+    # last commit is no longer relevant once promoted
8d4b3d
+    clear_last_commit
8d4b3d
 
8d4b3d
     mysql_common_prepare_dirs
8d4b3d
     mysql_common_start "$extra_opts"
8d4b3d
@@ -492,9 +504,6 @@ galera_promote()
8d4b3d
         wait_for_sync
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    # last commit is no longer relevant once promoted
8d4b3d
-    clear_last_commit
8d4b3d
-
8d4b3d
     ocf_log info "Galera started"
8d4b3d
     return $OCF_SUCCESS
8d4b3d
 }
8d4b3d
@@ -510,14 +519,14 @@ galera_demote()
8d4b3d
 
8d4b3d
     # if this node was previously a bootstrap node, that is no longer the case.
8d4b3d
     clear_bootstrap_node
8d4b3d
+    clear_last_commit
8d4b3d
 
8d4b3d
-    # start again in slave mode so the new last commit is recorded
8d4b3d
+    # record last commit by "starting" galera. start is just detection of the last sequence number
8d4b3d
     galera_start
8d4b3d
 }
8d4b3d
 
8d4b3d
 galera_start()
8d4b3d
 {
8d4b3d
-    local extra_opts='--read-only=true'
8d4b3d
     local last_commit
8d4b3d
 
8d4b3d
     echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
8d4b3d
@@ -526,22 +535,39 @@ galera_start()
8d4b3d
         return $OCF_ERR_CONFIGURED
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    mysql_common_prepare_dirs
8d4b3d
-    mysql_common_start "$extra_opts"
8d4b3d
-
8d4b3d
-    is_readonly
8d4b3d
-    if [ $? -ne 0 ]; then
8d4b3d
-        ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
8d4b3d
+    galera_monitor
8d4b3d
+    if [ $? -eq $OCF_RUNNING_MASTER ]; then
8d4b3d
+        ocf_exit_reason "master galera instance started outside of the cluster's control"
8d4b3d
         return $OCF_ERR_GENERIC
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    ocf_log info "attempting to detect last commit version"
8d4b3d
-    while [ -z "$last_commit" ]; do
8d4b3d
-        last_commit=$(get_status_variable "wsrep_last_committed")
8d4b3d
-        if [ -z "$last_commit" ]; then
8d4b3d
-            sleep 1
8d4b3d
+    mysql_common_prepare_dirs
8d4b3d
+
8d4b3d
+    ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
8d4b3d
+    last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
8d4b3d
+    if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
8d4b3d
+        ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
8d4b3d
+        local tmp=$(mktemp)
8d4b3d
+        ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
8d4b3d
+            --pid-file=$OCF_RESKEY_pid \
8d4b3d
+            --socket=$OCF_RESKEY_socket \
8d4b3d
+            --datadir=$OCF_RESKEY_datadir \
8d4b3d
+            --user=$OCF_RESKEY_user \
8d4b3d
+            --wsrep-recover > $tmp 2>&1
8d4b3d
+
8d4b3d
+        last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
8d4b3d
+        rm -f $tmp
8d4b3d
+
8d4b3d
+        if [ "$last_commit" = "-1" ]; then
8d4b3d
+            last_commit="0"
8d4b3d
         fi
8d4b3d
-    done
8d4b3d
+    fi
8d4b3d
+
8d4b3d
+    if [ -z "$last_commit" ]; then
8d4b3d
+        ocf_exit_reason "Unable to detect last known write sequence number"
8d4b3d
+        clear_last_commit
8d4b3d
+        return $OCF_ERR_GENERIC
8d4b3d
+    fi
8d4b3d
     ocf_log info "Last commit version found:  $last_commit"
8d4b3d
 
8d4b3d
     set_last_commit $last_commit
8d4b3d
@@ -567,28 +593,40 @@ galera_monitor()
8d4b3d
     if ocf_is_probe; then
8d4b3d
         status_loglevel="info"
8d4b3d
     fi
8d4b3d
- 
8d4b3d
+
8d4b3d
     mysql_common_status $status_loglevel
8d4b3d
     rc=$?
8d4b3d
 
8d4b3d
-    # If status returned an error, return that immediately
8d4b3d
-    if [ $rc -ne $OCF_SUCCESS ]; then
8d4b3d
+    if [ $rc -eq $OCF_NOT_RUNNING ]; then
8d4b3d
+        last_commit=$(get_last_commit $node)
8d4b3d
+        if [ -n "$last_commit" ]; then
8d4b3d
+            # if last commit is set, this instance is considered started in slave mode
8d4b3d
+            rc=$OCF_SUCCESS
8d4b3d
+            master_exists
8d4b3d
+            if [ $? -ne 0 ]; then
8d4b3d
+                detect_first_master
8d4b3d
+            else
8d4b3d
+                # a master instance exists and is healthy, promote this
8d4b3d
+                # local read only instance
8d4b3d
+                # so it can join the master galera cluster.
8d4b3d
+                set_master_score
8d4b3d
+            fi
8d4b3d
+        fi
8d4b3d
+        return $rc
8d4b3d
+    elif [ $rc -ne $OCF_SUCCESS ]; then
8d4b3d
         return $rc
8d4b3d
     fi
8d4b3d
 
8d4b3d
+    # if we make it here, mysql is running. Check cluster status now.
8d4b3d
+
8d4b3d
     echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
8d4b3d
     if [ $? -ne 0 ]; then
8d4b3d
         ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
8d4b3d
         return $OCF_ERR_GENERIC
8d4b3d
     fi
8d4b3d
 
8d4b3d
-    is_readonly
8d4b3d
-    if [ $? -ne 0 ]; then
8d4b3d
-        is_primary
8d4b3d
-        if [ $? -ne 0 ]; then
8d4b3d
-            ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
8d4b3d
-            return $OCF_ERR_GENERIC
8d4b3d
-        fi
8d4b3d
+    is_primary
8d4b3d
+    if [ $? -eq 0 ]; then
8d4b3d
 
8d4b3d
         if ocf_is_probe; then
8d4b3d
             # restore master score during probe
8d4b3d
@@ -596,18 +634,10 @@ galera_monitor()
8d4b3d
             set_master_score
8d4b3d
         fi
8d4b3d
         rc=$OCF_RUNNING_MASTER
8d4b3d
-    else 
8d4b3d
-        master_exists
8d4b3d
-        if [ $? -ne 0 ]; then
8d4b3d
-            detect_first_master
8d4b3d
-        else
8d4b3d
-            # a master instance exists and is healthy, promote this
8d4b3d
-            # local read only instance
8d4b3d
-            # so it can join the master galera cluster.
8d4b3d
-            set_master_score
8d4b3d
-        fi
8d4b3d
+    else
8d4b3d
+        ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
8d4b3d
+        rc=$OCF_ERR_GENERIC
8d4b3d
     fi
8d4b3d
-    # TODO look at what is done in the wait script
8d4b3d
 
8d4b3d
     return $rc
8d4b3d
 }