From a05eb8673bd1d5d3d41f2ed39df2650b19681d08 Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Fri, 3 Mar 2017 15:31:30 +0100 Subject: [PATCH] galera: fix the first bootstrap when cluster has no data The resource agent selects the first node to go into Master state based on the biggest commit version found on each node. If case no data were written yet into the galera cluster, the current node is selected as a "fallback" node to bootstrap the cluster. The way the "fallback" node is selected is wrong because every node takes a different decision, and this ultimately yields to 3 single-node galera clusters being started. To fix that, let the "fallback" node be the last one in the wsrep_cluster_address, so that the selection algorithm yields coherent results across nodes. --- heartbeat/galera | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/heartbeat/galera b/heartbeat/galera index decbaa2..475a8ba 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -451,14 +451,24 @@ pcmk_to_galera_name() detect_first_master() { local best_commit=0 - local best_node="$NODENAME" local last_commit=0 local missing_nodes=0 local nodes="" local nodes_recovered="" + local all_nodes + local best_node_gcomm + local best_node + + all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') + best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') + best_node=$(galera_to_pcmk_name $best_node_gcomm) + if [ -z "$best_node" ]; then + ocf_log error "Could not determine initial best node from galera name <${best_node_gcomm}>." + return + fi # avoid selecting a recovered node as bootstrap if possible - for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do + for node in $all_nodes; do local pcmk_node=$(galera_to_pcmk_name $node) if [ -z "$pcmk_node" ]; then ocf_log error "Could not determine pacemaker node from galera name <${node}>."