|
|
7a920e |
From d9833b68498e306d181be11adf9eee14b646a899 Mon Sep 17 00:00:00 2001
|
|
|
7a920e |
From: Damien Ciabrini <dciabrin@redhat.com>
|
|
|
7a920e |
Date: Tue, 2 Feb 2016 14:34:36 +0100
|
|
|
7a920e |
Subject: [PATCH] galera: force crash recovery if needed during last commit
|
|
|
7a920e |
detection
|
|
|
7a920e |
|
|
|
7a920e |
---
|
|
|
7a920e |
heartbeat/galera | 90 +++++++++++++++++++++++++++++++++++++-------------------
|
|
|
7a920e |
1 file changed, 60 insertions(+), 30 deletions(-)
|
|
|
7a920e |
|
|
|
7a920e |
diff --git a/heartbeat/galera b/heartbeat/galera
|
|
|
7a920e |
index 7be2b00..ca94c21 100755
|
|
|
7a920e |
--- a/heartbeat/galera
|
|
|
7a920e |
+++ b/heartbeat/galera
|
|
|
7a920e |
@@ -525,6 +525,58 @@ detect_first_master()
|
|
|
7a920e |
set_bootstrap_node $best_node
|
|
|
7a920e |
}
|
|
|
7a920e |
|
|
|
7a920e |
+detect_last_commit()
|
|
|
7a920e |
+{
|
|
|
7a920e |
+ local last_commit
|
|
|
7a920e |
+ local recover_args="--defaults-file=$OCF_RESKEY_config \
|
|
|
7a920e |
+ --pid-file=$OCF_RESKEY_pid \
|
|
|
7a920e |
+ --socket=$OCF_RESKEY_socket \
|
|
|
7a920e |
+ --datadir=$OCF_RESKEY_datadir \
|
|
|
7a920e |
+ --user=$OCF_RESKEY_user"
|
|
|
7a920e |
+ local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
|
|
|
7a920e |
+
|
|
|
7a920e |
+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
|
|
|
7a920e |
+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
|
|
|
7a920e |
+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
|
|
|
7a920e |
+ local tmp=$(mktemp)
|
|
|
7a920e |
+ local tmperr=$(mktemp)
|
|
|
7a920e |
+
|
|
|
7a920e |
+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
|
|
|
7a920e |
+
|
|
|
7a920e |
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr
|
|
|
7a920e |
+
|
|
|
7a920e |
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
|
|
|
7a920e |
+ if [ -z "$last_commit" ]; then
|
|
|
7a920e |
+ # Galera uses InnoDB's 2pc transactions internally. If
|
|
|
7a920e |
+ # server was stopped in the middle of a replication, the
|
|
|
7a920e |
+ # recovery may find a "prepared" XA transaction in the
|
|
|
7a920e |
+ # redo log, and mysql won't recover automatically
|
|
|
7a920e |
+
|
|
|
7a920e |
+ cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
|
|
|
7a920e |
+ if [ $? -eq 0 ]; then
|
|
|
7a920e |
+ # we can only rollback the transaction, but that's OK
|
|
|
7a920e |
+ # since the DB will get resynchronized anyway
|
|
|
7a920e |
+ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
|
|
|
7a920e |
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
|
|
|
7a920e |
+ --tc-heuristic-recover=rollback > $tmp 2>/dev/null
|
|
|
7a920e |
+
|
|
|
7a920e |
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
|
|
|
7a920e |
+ fi
|
|
|
7a920e |
+ fi
|
|
|
7a920e |
+ rm -f $tmp $tmperr
|
|
|
7a920e |
+ fi
|
|
|
7a920e |
+
|
|
|
7a920e |
+ if [ ! -z "$last_commit" ]; then
|
|
|
7a920e |
+ ocf_log info "Last commit version found: $last_commit"
|
|
|
7a920e |
+ set_last_commit $last_commit
|
|
|
7a920e |
+ return $OCF_SUCCESS
|
|
|
7a920e |
+ else
|
|
|
7a920e |
+ ocf_exit_reason "Unable to detect last known write sequence number"
|
|
|
7a920e |
+ clear_last_commit
|
|
|
7a920e |
+ return $OCF_ERR_GENERIC
|
|
|
7a920e |
+ fi
|
|
|
7a920e |
+}
|
|
|
7a920e |
+
|
|
|
7a920e |
# For galera, promote is really start
|
|
|
7a920e |
galera_promote()
|
|
|
7a920e |
{
|
|
|
7a920e |
@@ -569,13 +620,15 @@ galera_demote()
|
|
|
7a920e |
clear_bootstrap_node
|
|
|
7a920e |
clear_last_commit
|
|
|
7a920e |
|
|
|
7a920e |
- # record last commit by "starting" galera. start is just detection of the last sequence number
|
|
|
7a920e |
- galera_start
|
|
|
7a920e |
+ # record last commit for next promotion
|
|
|
7a920e |
+ detect_last_commit
|
|
|
7a920e |
+ rc=$?
|
|
|
7a920e |
+ return $rc
|
|
|
7a920e |
}
|
|
|
7a920e |
|
|
|
7a920e |
galera_start()
|
|
|
7a920e |
{
|
|
|
7a920e |
- local last_commit
|
|
|
7a920e |
+ local rc
|
|
|
7a920e |
|
|
|
7a920e |
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
|
|
|
7a920e |
if [ $? -ne 0 ]; then
|
|
|
7a920e |
@@ -591,34 +644,11 @@ galera_start()
|
|
|
7a920e |
|
|
|
7a920e |
mysql_common_prepare_dirs
|
|
|
7a920e |
|
|
|
7a920e |
- ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
|
|
|
7a920e |
- last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
|
|
|
7a920e |
- if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
|
|
|
7a920e |
- ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
|
|
|
7a920e |
- local tmp=$(mktemp)
|
|
|
7a920e |
- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
|
|
|
7a920e |
- --pid-file=$OCF_RESKEY_pid \
|
|
|
7a920e |
- --socket=$OCF_RESKEY_socket \
|
|
|
7a920e |
- --datadir=$OCF_RESKEY_datadir \
|
|
|
7a920e |
- --user=$OCF_RESKEY_user \
|
|
|
7a920e |
- --wsrep-recover > $tmp 2>&1
|
|
|
7a920e |
-
|
|
|
7a920e |
- last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
|
|
|
7a920e |
- rm -f $tmp
|
|
|
7a920e |
-
|
|
|
7a920e |
- if [ "$last_commit" = "-1" ]; then
|
|
|
7a920e |
- last_commit="0"
|
|
|
7a920e |
- fi
|
|
|
7a920e |
- fi
|
|
|
7a920e |
-
|
|
|
7a920e |
- if [ -z "$last_commit" ]; then
|
|
|
7a920e |
- ocf_exit_reason "Unable to detect last known write sequence number"
|
|
|
7a920e |
- clear_last_commit
|
|
|
7a920e |
- return $OCF_ERR_GENERIC
|
|
|
7a920e |
+ detect_last_commit
|
|
|
7a920e |
+ rc=$?
|
|
|
7a920e |
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
|
7a920e |
+ return $rc
|
|
|
7a920e |
fi
|
|
|
7a920e |
- ocf_log info "Last commit version found: $last_commit"
|
|
|
7a920e |
-
|
|
|
7a920e |
- set_last_commit $last_commit
|
|
|
7a920e |
|
|
|
7a920e |
master_exists
|
|
|
7a920e |
if [ $? -eq 0 ]; then
|