|
|
8d4b3d |
diff --git a/heartbeat/galera b/heartbeat/galera
|
|
|
8d4b3d |
index 994aad0..d74a70d 100755
|
|
|
8d4b3d |
--- a/heartbeat/galera
|
|
|
8d4b3d |
+++ b/heartbeat/galera
|
|
|
8d4b3d |
@@ -342,6 +342,14 @@ is_readonly()
|
|
|
8d4b3d |
|
|
|
8d4b3d |
master_exists()
|
|
|
8d4b3d |
{
|
|
|
8d4b3d |
+ if [ "$__OCF_ACTION" = "demote" ]; then
|
|
|
8d4b3d |
+ # We don't want to detect master instances during demote.
|
|
|
8d4b3d |
+ # 1. we could be detecting ourselves as being master, which is no longer the case.
|
|
|
8d4b3d |
+ # 2. we could be detecting other master instances that are in the process of shutting down.
|
|
|
8d4b3d |
+ # by not detecting other master instances in "demote" we are deferring this check
|
|
|
8d4b3d |
+ # to the next recurring monitor operation which will be much more accurate
|
|
|
8d4b3d |
+ return 1
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
# determine if a master instance is already up and is healthy
|
|
|
8d4b3d |
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
|
|
|
8d4b3d |
return $?
|
|
|
8d4b3d |
@@ -441,20 +449,24 @@ galera_promote()
|
|
|
8d4b3d |
extra_opts="--wsrep-cluster-address=gcomm://"
|
|
|
8d4b3d |
else
|
|
|
8d4b3d |
ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
|
|
|
8d4b3d |
+ clear_last_commit
|
|
|
8d4b3d |
return $OCF_ERR_GENERIC
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
-
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- # make sure the read only instance is stopped
|
|
|
8d4b3d |
- mysql_common_stop
|
|
|
8d4b3d |
- rc=$?
|
|
|
8d4b3d |
- if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
|
|
|
8d4b3d |
- ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master"
|
|
|
8d4b3d |
- return $rc
|
|
|
8d4b3d |
+ galera_monitor
|
|
|
8d4b3d |
+ if [ $? -eq $OCF_RUNNING_MASTER ]; then
|
|
|
8d4b3d |
+ if ocf_is_true $bootstrap; then
|
|
|
8d4b3d |
+ promote_everyone
|
|
|
8d4b3d |
+ clear_bootstrap_node
|
|
|
8d4b3d |
+ ocf_log info "boostrap node already up, promoting the rest of the galera instances."
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
+ clear_last_commit
|
|
|
8d4b3d |
+ return $OCF_SUCCESS
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- sleep 4
|
|
|
8d4b3d |
+ # last commit is no longer relevant once promoted
|
|
|
8d4b3d |
+ clear_last_commit
|
|
|
8d4b3d |
|
|
|
8d4b3d |
mysql_common_prepare_dirs
|
|
|
8d4b3d |
mysql_common_start "$extra_opts"
|
|
|
8d4b3d |
@@ -492,9 +504,6 @@ galera_promote()
|
|
|
8d4b3d |
wait_for_sync
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- # last commit is no longer relevant once promoted
|
|
|
8d4b3d |
- clear_last_commit
|
|
|
8d4b3d |
-
|
|
|
8d4b3d |
ocf_log info "Galera started"
|
|
|
8d4b3d |
return $OCF_SUCCESS
|
|
|
8d4b3d |
}
|
|
|
8d4b3d |
@@ -510,14 +519,14 @@ galera_demote()
|
|
|
8d4b3d |
|
|
|
8d4b3d |
# if this node was previously a bootstrap node, that is no longer the case.
|
|
|
8d4b3d |
clear_bootstrap_node
|
|
|
8d4b3d |
+ clear_last_commit
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- # start again in slave mode so the new last commit is recorded
|
|
|
8d4b3d |
+ # record last commit by "starting" galera. start is just detection of the last sequence number
|
|
|
8d4b3d |
galera_start
|
|
|
8d4b3d |
}
|
|
|
8d4b3d |
|
|
|
8d4b3d |
galera_start()
|
|
|
8d4b3d |
{
|
|
|
8d4b3d |
- local extra_opts='--read-only=true'
|
|
|
8d4b3d |
local last_commit
|
|
|
8d4b3d |
|
|
|
8d4b3d |
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
|
|
|
8d4b3d |
@@ -526,22 +535,39 @@ galera_start()
|
|
|
8d4b3d |
return $OCF_ERR_CONFIGURED
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- mysql_common_prepare_dirs
|
|
|
8d4b3d |
- mysql_common_start "$extra_opts"
|
|
|
8d4b3d |
-
|
|
|
8d4b3d |
- is_readonly
|
|
|
8d4b3d |
- if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
- ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
|
|
|
8d4b3d |
+ galera_monitor
|
|
|
8d4b3d |
+ if [ $? -eq $OCF_RUNNING_MASTER ]; then
|
|
|
8d4b3d |
+ ocf_exit_reason "master galera instance started outside of the cluster's control"
|
|
|
8d4b3d |
return $OCF_ERR_GENERIC
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- ocf_log info "attempting to detect last commit version"
|
|
|
8d4b3d |
- while [ -z "$last_commit" ]; do
|
|
|
8d4b3d |
- last_commit=$(get_status_variable "wsrep_last_committed")
|
|
|
8d4b3d |
- if [ -z "$last_commit" ]; then
|
|
|
8d4b3d |
- sleep 1
|
|
|
8d4b3d |
+ mysql_common_prepare_dirs
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
|
|
|
8d4b3d |
+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
|
|
|
8d4b3d |
+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
|
|
|
8d4b3d |
+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
|
|
|
8d4b3d |
+ local tmp=$(mktemp)
|
|
|
8d4b3d |
+ ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
|
|
|
8d4b3d |
+ --pid-file=$OCF_RESKEY_pid \
|
|
|
8d4b3d |
+ --socket=$OCF_RESKEY_socket \
|
|
|
8d4b3d |
+ --datadir=$OCF_RESKEY_datadir \
|
|
|
8d4b3d |
+ --user=$OCF_RESKEY_user \
|
|
|
8d4b3d |
+ --wsrep-recover > $tmp 2>&1
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
+ last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
|
|
|
8d4b3d |
+ rm -f $tmp
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
+ if [ "$last_commit" = "-1" ]; then
|
|
|
8d4b3d |
+ last_commit="0"
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
- done
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
+ if [ -z "$last_commit" ]; then
|
|
|
8d4b3d |
+ ocf_exit_reason "Unable to detect last known write sequence number"
|
|
|
8d4b3d |
+ clear_last_commit
|
|
|
8d4b3d |
+ return $OCF_ERR_GENERIC
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
ocf_log info "Last commit version found: $last_commit"
|
|
|
8d4b3d |
|
|
|
8d4b3d |
set_last_commit $last_commit
|
|
|
8d4b3d |
@@ -567,28 +593,40 @@ galera_monitor()
|
|
|
8d4b3d |
if ocf_is_probe; then
|
|
|
8d4b3d |
status_loglevel="info"
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
-
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
mysql_common_status $status_loglevel
|
|
|
8d4b3d |
rc=$?
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- # If status returned an error, return that immediately
|
|
|
8d4b3d |
- if [ $rc -ne $OCF_SUCCESS ]; then
|
|
|
8d4b3d |
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
|
|
8d4b3d |
+ last_commit=$(get_last_commit $node)
|
|
|
8d4b3d |
+ if [ -n "$last_commit" ]; then
|
|
|
8d4b3d |
+ # if last commit is set, this instance is considered started in slave mode
|
|
|
8d4b3d |
+ rc=$OCF_SUCCESS
|
|
|
8d4b3d |
+ master_exists
|
|
|
8d4b3d |
+ if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
+ detect_first_master
|
|
|
8d4b3d |
+ else
|
|
|
8d4b3d |
+ # a master instance exists and is healthy, promote this
|
|
|
8d4b3d |
+ # local read only instance
|
|
|
8d4b3d |
+ # so it can join the master galera cluster.
|
|
|
8d4b3d |
+ set_master_score
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
+ fi
|
|
|
8d4b3d |
+ return $rc
|
|
|
8d4b3d |
+ elif [ $rc -ne $OCF_SUCCESS ]; then
|
|
|
8d4b3d |
return $rc
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
+ # if we make it here, mysql is running. Check cluster status now.
|
|
|
8d4b3d |
+
|
|
|
8d4b3d |
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
|
|
|
8d4b3d |
if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
|
|
|
8d4b3d |
return $OCF_ERR_GENERIC
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
|
|
|
8d4b3d |
- is_readonly
|
|
|
8d4b3d |
- if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
- is_primary
|
|
|
8d4b3d |
- if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
- ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
|
|
|
8d4b3d |
- return $OCF_ERR_GENERIC
|
|
|
8d4b3d |
- fi
|
|
|
8d4b3d |
+ is_primary
|
|
|
8d4b3d |
+ if [ $? -eq 0 ]; then
|
|
|
8d4b3d |
|
|
|
8d4b3d |
if ocf_is_probe; then
|
|
|
8d4b3d |
# restore master score during probe
|
|
|
8d4b3d |
@@ -596,18 +634,10 @@ galera_monitor()
|
|
|
8d4b3d |
set_master_score
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
rc=$OCF_RUNNING_MASTER
|
|
|
8d4b3d |
- else
|
|
|
8d4b3d |
- master_exists
|
|
|
8d4b3d |
- if [ $? -ne 0 ]; then
|
|
|
8d4b3d |
- detect_first_master
|
|
|
8d4b3d |
- else
|
|
|
8d4b3d |
- # a master instance exists and is healthy, promote this
|
|
|
8d4b3d |
- # local read only instance
|
|
|
8d4b3d |
- # so it can join the master galera cluster.
|
|
|
8d4b3d |
- set_master_score
|
|
|
8d4b3d |
- fi
|
|
|
8d4b3d |
+ else
|
|
|
8d4b3d |
+ ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
|
|
|
8d4b3d |
+ rc=$OCF_ERR_GENERIC
|
|
|
8d4b3d |
fi
|
|
|
8d4b3d |
- # TODO look at what is done in the wait script
|
|
|
8d4b3d |
|
|
|
8d4b3d |
return $rc
|
|
|
8d4b3d |
}
|