diff --git a/SOURCES/bz1170376-galera-no-readonly.patch b/SOURCES/bz1170376-galera-no-readonly.patch new file mode 100644 index 0000000..e96196c --- /dev/null +++ b/SOURCES/bz1170376-galera-no-readonly.patch @@ -0,0 +1,204 @@ +diff --git a/heartbeat/galera b/heartbeat/galera +index 994aad0..d74a70d 100755 +--- a/heartbeat/galera ++++ b/heartbeat/galera +@@ -342,6 +342,14 @@ is_readonly() + + master_exists() + { ++ if [ "$__OCF_ACTION" = "demote" ]; then ++ # We don't want to detect master instances during demote. ++ # 1. we could be detecting ourselves as being master, which is no longer the case. ++ # 2. we could be detecting other master instances that are in the process of shutting down. ++ # by not detecting other master instances in "demote" we are deferring this check ++ # to the next recurring monitor operation which will be much more accurate ++ return 1 ++ fi + # determine if a master instance is already up and is healthy + crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + return $? +@@ -441,20 +449,24 @@ galera_promote() + extra_opts="--wsrep-cluster-address=gcomm://" + else + ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected." ++ clear_last_commit + return $OCF_ERR_GENERIC + fi +- + fi + +- # make sure the read only instance is stopped +- mysql_common_stop +- rc=$? +- if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then +- ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master" +- return $rc ++ galera_monitor ++ if [ $? -eq $OCF_RUNNING_MASTER ]; then ++ if ocf_is_true $bootstrap; then ++ promote_everyone ++ clear_bootstrap_node ++ ocf_log info "boostrap node already up, promoting the rest of the galera instances." ++ fi ++ clear_last_commit ++ return $OCF_SUCCESS + fi + +- sleep 4 ++ # last commit is no longer relevant once promoted ++ clear_last_commit + + mysql_common_prepare_dirs + mysql_common_start "$extra_opts" +@@ -492,9 +504,6 @@ galera_promote() + wait_for_sync + fi + +- # last commit is no longer relevant once promoted +- clear_last_commit +- + ocf_log info "Galera started" + return $OCF_SUCCESS + } +@@ -510,14 +519,14 @@ galera_demote() + + # if this node was previously a bootstrap node, that is no longer the case. + clear_bootstrap_node ++ clear_last_commit + +- # start again in slave mode so the new last commit is recorded ++ # record last commit by "starting" galera. start is just detection of the last sequence number + galera_start + } + + galera_start() + { +- local extra_opts='--read-only=true' + local last_commit + + echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME +@@ -526,22 +535,39 @@ galera_start() + return $OCF_ERR_CONFIGURED + fi + +- mysql_common_prepare_dirs +- mysql_common_start "$extra_opts" +- +- is_readonly +- if [ $? -ne 0 ]; then +- ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set." ++ galera_monitor ++ if [ $? -eq $OCF_RUNNING_MASTER ]; then ++ ocf_exit_reason "master galera instance started outside of the cluster's control" + return $OCF_ERR_GENERIC + fi + +- ocf_log info "attempting to detect last commit version" +- while [ -z "$last_commit" ]; do +- last_commit=$(get_status_variable "wsrep_last_committed") +- if [ -z "$last_commit" ]; then +- sleep 1 ++ mysql_common_prepare_dirs ++ ++ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" ++ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" ++ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then ++ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" ++ local tmp=$(mktemp) ++ ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ ++ --pid-file=$OCF_RESKEY_pid \ ++ --socket=$OCF_RESKEY_socket \ ++ --datadir=$OCF_RESKEY_datadir \ ++ --user=$OCF_RESKEY_user \ ++ --wsrep-recover > $tmp 2>&1 ++ ++ last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')" ++ rm -f $tmp ++ ++ if [ "$last_commit" = "-1" ]; then ++ last_commit="0" + fi +- done ++ fi ++ ++ if [ -z "$last_commit" ]; then ++ ocf_exit_reason "Unable to detect last known write sequence number" ++ clear_last_commit ++ return $OCF_ERR_GENERIC ++ fi + ocf_log info "Last commit version found: $last_commit" + + set_last_commit $last_commit +@@ -567,28 +593,40 @@ galera_monitor() + if ocf_is_probe; then + status_loglevel="info" + fi +- ++ + mysql_common_status $status_loglevel + rc=$? + +- # If status returned an error, return that immediately +- if [ $rc -ne $OCF_SUCCESS ]; then ++ if [ $rc -eq $OCF_NOT_RUNNING ]; then ++ last_commit=$(get_last_commit $node) ++ if [ -n "$last_commit" ]; then ++ # if last commit is set, this instance is considered started in slave mode ++ rc=$OCF_SUCCESS ++ master_exists ++ if [ $? -ne 0 ]; then ++ detect_first_master ++ else ++ # a master instance exists and is healthy, promote this ++ # local read only instance ++ # so it can join the master galera cluster. ++ set_master_score ++ fi ++ fi ++ return $rc ++ elif [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + ++ # if we make it here, mysql is running. Check cluster status now. ++ + echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME + if [ $? -ne 0 ]; then + ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>" + return $OCF_ERR_GENERIC + fi + +- is_readonly +- if [ $? -ne 0 ]; then +- is_primary +- if [ $? -ne 0 ]; then +- ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state." +- return $OCF_ERR_GENERIC +- fi ++ is_primary ++ if [ $? -eq 0 ]; then + + if ocf_is_probe; then + # restore master score during probe +@@ -596,18 +634,10 @@ galera_monitor() + set_master_score + fi + rc=$OCF_RUNNING_MASTER +- else +- master_exists +- if [ $? -ne 0 ]; then +- detect_first_master +- else +- # a master instance exists and is healthy, promote this +- # local read only instance +- # so it can join the master galera cluster. +- set_master_score +- fi ++ else ++ ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state." ++ rc=$OCF_ERR_GENERIC + fi +- # TODO look at what is done in the wait script + + return $rc + } diff --git a/SOURCES/bz1214360-NovaCompute-update1.patch b/SOURCES/bz1214360-NovaCompute-update1.patch new file mode 100644 index 0000000..2dabe0b --- /dev/null +++ b/SOURCES/bz1214360-NovaCompute-update1.patch @@ -0,0 +1,494 @@ +From 8c92227bce9cc4fe177eea5b2f7c9016e96434f9 Mon Sep 17 00:00:00 2001 +From: David Vossel +Date: Mon, 29 Jun 2015 13:03:17 -0500 +Subject: [PATCH 1/3] bz1214360-NovaCompute-update1.patch + +--- + doc/man/Makefile.am | 1 + + heartbeat/Makefile.am | 3 +- + heartbeat/NovaCompute | 73 ++++++------ + heartbeat/NovaEvacuate | 311 +++++++++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 352 insertions(+), 36 deletions(-) + create mode 100755 heartbeat/NovaEvacuate + +diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am +index 42a57fe..d32426b 100644 +--- a/doc/man/Makefile.am ++++ b/doc/man/Makefile.am +@@ -74,6 +74,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_ManageRAID.7 \ + ocf_heartbeat_ManageVE.7 \ + ocf_heartbeat_NovaCompute.7 \ ++ ocf_heartbeat_NovaEvacuate.7 \ + ocf_heartbeat_Pure-FTPd.7 \ + ocf_heartbeat_Raid1.7 \ + ocf_heartbeat_Route.7 \ +diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am +index 0bebf97..1034632 100644 +--- a/heartbeat/Makefile.am ++++ b/heartbeat/Makefile.am +@@ -52,7 +52,8 @@ send_ua_SOURCES = send_ua.c IPv6addr_utils.c + IPv6addr_LDADD = -lplumb $(LIBNETLIBS) + send_ua_LDADD = $(LIBNETLIBS) + +-osp_SCRIPTS = NovaCompute ++osp_SCRIPTS = NovaCompute \ ++ NovaEvacuate + + ocf_SCRIPTS = ClusterMon \ + CTDB \ +diff --git a/heartbeat/NovaCompute b/heartbeat/NovaCompute +index f71abeb..09eee38 100644 +--- a/heartbeat/NovaCompute ++++ b/heartbeat/NovaCompute +@@ -107,15 +107,26 @@ Disable shared storage recovery for instances. Use at your own risk! + + + ++ ++ ++How long to wait for nova to finish evacuating instances elsewhere ++before starting nova-compute. Only used when the agent detects ++evacuations might be in progress. ++ ++You may need to increase the start timeout when increasing this value. ++ ++Delay to allow evacuations time to complete ++ ++ ++ + + + +- ++ + + + + +- + + + END +@@ -132,7 +143,7 @@ sigterm_handler() { + + nova_usage() { + cat < ++ ++ ++1.0 ++ ++ ++Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged. ++ ++Evacuator for OpenStack Nova Compute Server ++ ++ ++ ++ ++ ++Authorization URL for connecting to keystone in admin context ++ ++Authorization URL ++ ++ ++ ++ ++ ++Username for connecting to keystone in admin context ++ ++Username ++ ++ ++ ++ ++Password for connecting to keystone in admin context ++ ++Password ++ ++ ++ ++ ++ ++Tenant name for connecting to keystone in admin context. ++Note that with Keystone V3 tenant names are only unique within a domain. ++ ++Tenant name ++ ++ ++ ++ ++ ++Nova API location (internal, public or admin URL) ++ ++Nova API location (internal, public or admin URL) ++ ++ ++ ++ ++ ++Disable shared storage recovery for instances. Use at your own risk! ++ ++Disable shared storage recovery for instances ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++END ++} ++ ++####################################################################### ++ ++# don't exit on TERM, to test that lrmd makes sure that we do exit ++trap sigterm_handler TERM ++sigterm_handler() { ++ ocf_log info "They use TERM to bring us down. No such luck." ++ return ++} ++ ++evacuate_usage() { ++ cat <= 11 || 0%{?centos_version} > 5 || 0%{?rhel} > 5 CFLAGS="$(echo '%{optflags}')" @@ -497,6 +502,17 @@ ccs_update_schema > /dev/null 2>&1 ||: %changelog +* Mon Jul 13 2015 David Vossel - 3.9.5-40.6 +- Improve galera resource-agent to not require use of read-only + mode to retrieve last known write sequence number. + + Resolves: rhbz#1242339 + +* Mon Jul 6 2015 David Vossel - 3.9.5-40.5 +- NovaCompute and NovaEvacuate updates + + Resolves: rhbz#1238716 + * Thu Jun 11 2015 David Vossel - 3.9.5-40.4 - Support for NovaCompute resource-agent