diff --git a/SOURCES/bz1170347-1-Set-RR-priority-by-default.patch b/SOURCES/bz1170347-1-Set-RR-priority-by-default.patch new file mode 100644 index 0000000..e8104aa --- /dev/null +++ b/SOURCES/bz1170347-1-Set-RR-priority-by-default.patch @@ -0,0 +1,68 @@ +From 177ef0e5240b4060ff5b14eab6f2eefee3aa777d Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Fri, 2 Jan 2015 12:39:09 +0100 +Subject: [PATCH] Set RR priority by default + +Experience with larger production clusters showed that setting RR +priority for corosync is viable for prevent random fencing, ... + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/main.c | 7 ++++--- + man/corosync.8 | 4 ++-- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/exec/main.c b/exec/main.c +index e423c97..cd972b5 100644 +--- a/exec/main.c ++++ b/exec/main.c +@@ -1200,7 +1200,7 @@ int main (int argc, char **argv, char **envp) + /* default configuration + */ + background = 1; +- setprio = 0; ++ setprio = 1; + testonly = 0; + + while ((ch = getopt (argc, argv, "fprtv")) != EOF) { +@@ -1210,6 +1210,7 @@ int main (int argc, char **argv, char **envp) + background = 0; + break; + case 'p': ++ setprio = 0; + break; + case 'r': + setprio = 1; +@@ -1228,9 +1229,9 @@ int main (int argc, char **argv, char **envp) + fprintf(stderr, \ + "usage:\n"\ + " -f : Start application in foreground.\n"\ +- " -p : Does nothing. \n"\ ++ " -p : Do not set process priority.\n"\ + " -t : Test configuration and exit.\n"\ +- " -r : Set round robin realtime scheduling \n"\ ++ " -r : Set round robin realtime scheduling (default).\n"\ + " -v : Display version and SVN revision of Corosync and exit.\n"); + logsys_system_fini(); + return EXIT_FAILURE; +diff --git a/man/corosync.8 b/man/corosync.8 +index 559b392..41f634f 100644 +--- a/man/corosync.8 ++++ b/man/corosync.8 +@@ -45,10 +45,10 @@ Corosync provides clustering infracture such as membership, messaging and quorum + Start application in foreground. + .TP + .B -p +-Does nothing (was: "Do not set process priority" - this is now the default). ++Do not set process priority. + .TP + .B -r +-Set round robin realtime scheduling. ++Set round robin realtime scheduling (default). + .TP + .B -t + Test configuration and then exit. +-- +1.7.1 + diff --git a/SOURCES/bz1197091-1-Votequorum-Fix-auto_tie_breaker-default.patch b/SOURCES/bz1197091-1-Votequorum-Fix-auto_tie_breaker-default.patch new file mode 100644 index 0000000..7745d0c --- /dev/null +++ b/SOURCES/bz1197091-1-Votequorum-Fix-auto_tie_breaker-default.patch @@ -0,0 +1,51 @@ +From 314a01c98e5f98ff686333966dbe675935b7b6a8 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Mon, 2 Mar 2015 15:48:01 +0000 +Subject: [PATCH] Votequorum: Fix auto_tie_breaker default + +The default for auto_tie_breaker should be 'lowest' - which is what it +was before the extended ATB functionality of auto_tie_breaker_node was +added, and what the documentation states. + +However this was broken so that if auto_tie_breaker_node was not +specified then auto_tie_breaker itself was ignored. This patch fixes +that. + +It also fixes a typo in a comment. + +Signed-Off-By: Christine Caulfield +Reviewed-by: Jan Friesse +--- + exec/votequorum.c | 9 +++++++-- + 1 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/exec/votequorum.c b/exec/votequorum.c +index 6caccaf..3757b53 100644 +--- a/exec/votequorum.c ++++ b/exec/votequorum.c +@@ -628,7 +628,7 @@ static int is_in_nodelist(int nodeid, unsigned int *members, int entries) + } + + /* +- * The algorithm for a list of time-breaker nodes is: ++ * The algorithm for a list of tie-breaker nodes is: + * travel the list of nodes in the auto_tie_breaker list, + * if the node IS in our current partition, check if the + * nodes earlier in the atb list are in the 'previous' partition; +@@ -1290,7 +1290,12 @@ static char *votequorum_readconfig(int runtime) + icmap_get_uint8("quorum.auto_tie_breaker", &atb); + icmap_get_string("quorum.auto_tie_breaker_node", &atb_string); + +- if (!atb) { ++ /* auto_tie_breaker defaults to LOWEST */ ++ if (atb) { ++ auto_tie_breaker = ATB_LOWEST; ++ icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); ++ } ++ else { + auto_tie_breaker = ATB_NONE; + if (atb_string) { + log_printf(LOGSYS_LEVEL_WARNING, +-- +1.7.1 + diff --git a/SOURCES/bz1197671-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch b/SOURCES/bz1197671-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch new file mode 100644 index 0000000..d380bd9 --- /dev/null +++ b/SOURCES/bz1197671-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch @@ -0,0 +1,40 @@ +From c832ade034fa737561ccabefbe417c9d7855d970 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Mon, 2 Mar 2015 15:50:21 +0000 +Subject: [PATCH] Don't allow both two_node and auto_tie_breaker in corosync.conf + +The two_node and auto_tie_breaker options are incompatible as they +specify conflicting methods of determining the quorate half of a cluster +partition. + +This patch detects this error in corosync.conf, issues a message and +disables two_node if auto_tie_breaker is present. + +Signed-Off-By: Christine Caulfield +Reviewed-by: Jan Friesse +--- + exec/votequorum.c | 8 ++++++++ + 1 files changed, 8 insertions(+), 0 deletions(-) + +diff --git a/exec/votequorum.c b/exec/votequorum.c +index 3757b53..2ff0b43 100644 +--- a/exec/votequorum.c ++++ b/exec/votequorum.c +@@ -1323,6 +1323,14 @@ static char *votequorum_readconfig(int runtime) + + } + ++ /* two_node and auto_tie_breaker are not compatible as two_node uses ++ * a fence race to decide quorum whereas ATB decides based on node id ++ */ ++ if (two_node && auto_tie_breaker != ATB_NONE) { ++ log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible."); ++ log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); ++ two_node = 0; ++ } + /* + * quorum device is not compatible with last_man_standing and auto_tie_breaker + * neither lms or atb can be set at runtime, so there is no need to check for +-- +1.7.1 + diff --git a/SOURCES/bz1205336-1-Votequorum-Fix-auto_tie_breaker-default.patch b/SOURCES/bz1205336-1-Votequorum-Fix-auto_tie_breaker-default.patch deleted file mode 100644 index 7745d0c..0000000 --- a/SOURCES/bz1205336-1-Votequorum-Fix-auto_tie_breaker-default.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 314a01c98e5f98ff686333966dbe675935b7b6a8 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Mon, 2 Mar 2015 15:48:01 +0000 -Subject: [PATCH] Votequorum: Fix auto_tie_breaker default - -The default for auto_tie_breaker should be 'lowest' - which is what it -was before the extended ATB functionality of auto_tie_breaker_node was -added, and what the documentation states. - -However this was broken so that if auto_tie_breaker_node was not -specified then auto_tie_breaker itself was ignored. This patch fixes -that. - -It also fixes a typo in a comment. - -Signed-Off-By: Christine Caulfield -Reviewed-by: Jan Friesse ---- - exec/votequorum.c | 9 +++++++-- - 1 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/exec/votequorum.c b/exec/votequorum.c -index 6caccaf..3757b53 100644 ---- a/exec/votequorum.c -+++ b/exec/votequorum.c -@@ -628,7 +628,7 @@ static int is_in_nodelist(int nodeid, unsigned int *members, int entries) - } - - /* -- * The algorithm for a list of time-breaker nodes is: -+ * The algorithm for a list of tie-breaker nodes is: - * travel the list of nodes in the auto_tie_breaker list, - * if the node IS in our current partition, check if the - * nodes earlier in the atb list are in the 'previous' partition; -@@ -1290,7 +1290,12 @@ static char *votequorum_readconfig(int runtime) - icmap_get_uint8("quorum.auto_tie_breaker", &atb); - icmap_get_string("quorum.auto_tie_breaker_node", &atb_string); - -- if (!atb) { -+ /* auto_tie_breaker defaults to LOWEST */ -+ if (atb) { -+ auto_tie_breaker = ATB_LOWEST; -+ icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); -+ } -+ else { - auto_tie_breaker = ATB_NONE; - if (atb_string) { - log_printf(LOGSYS_LEVEL_WARNING, --- -1.7.1 - diff --git a/SOURCES/bz1205338-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch b/SOURCES/bz1205338-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch deleted file mode 100644 index d380bd9..0000000 --- a/SOURCES/bz1205338-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch +++ /dev/null @@ -1,40 +0,0 @@ -From c832ade034fa737561ccabefbe417c9d7855d970 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Mon, 2 Mar 2015 15:50:21 +0000 -Subject: [PATCH] Don't allow both two_node and auto_tie_breaker in corosync.conf - -The two_node and auto_tie_breaker options are incompatible as they -specify conflicting methods of determining the quorate half of a cluster -partition. - -This patch detects this error in corosync.conf, issues a message and -disables two_node if auto_tie_breaker is present. - -Signed-Off-By: Christine Caulfield -Reviewed-by: Jan Friesse ---- - exec/votequorum.c | 8 ++++++++ - 1 files changed, 8 insertions(+), 0 deletions(-) - -diff --git a/exec/votequorum.c b/exec/votequorum.c -index 3757b53..2ff0b43 100644 ---- a/exec/votequorum.c -+++ b/exec/votequorum.c -@@ -1323,6 +1323,14 @@ static char *votequorum_readconfig(int runtime) - - } - -+ /* two_node and auto_tie_breaker are not compatible as two_node uses -+ * a fence race to decide quorum whereas ATB decides based on node id -+ */ -+ if (two_node && auto_tie_breaker != ATB_NONE) { -+ log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible."); -+ log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); -+ two_node = 0; -+ } - /* - * quorum device is not compatible with last_man_standing and auto_tie_breaker - * neither lms or atb can be set at runtime, so there is no need to check for --- -1.7.1 - diff --git a/SOURCES/bz1225441-1-Log-Add-logrotate-configuration-file.patch b/SOURCES/bz1225441-1-Log-Add-logrotate-configuration-file.patch new file mode 100644 index 0000000..ef2248e --- /dev/null +++ b/SOURCES/bz1225441-1-Log-Add-logrotate-configuration-file.patch @@ -0,0 +1,161 @@ +From aabbace625b3c68332b4356887378fca81f8f387 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Fri, 19 Jun 2015 17:42:09 +0200 +Subject: [PATCH] Log: Add logrotate configuration file + +In cman era corosync was depending on logrotate file distributed by +cman. It's good idea to logrotate also on systems without cman (new +clusters). + +Signed-off-by: Jan Friesse +Reviewed-by: Fabio M. Di Nitto +--- + conf/Makefile.am | 2 + + conf/logrotate/Makefile.am | 45 ++++++++++++++++++++++++++++++++++++++++++++ + conf/logrotate/corosync.in | 9 ++++++++ + configure.ac | 17 +++++++++++++++- + corosync.spec.in | 1 + + 5 files changed, 73 insertions(+), 1 deletions(-) + create mode 100644 conf/logrotate/Makefile.am + create mode 100644 conf/logrotate/corosync.in + +diff --git a/conf/Makefile.am b/conf/Makefile.am +index 69f7ff9..807b6cc 100644 +--- a/conf/Makefile.am ++++ b/conf/Makefile.am +@@ -69,3 +69,5 @@ if INSTALL_DBUSCONF + dbusdir = $(sysconfdir)/dbus-1/system.d + dbus_DATA = corosync-signals.conf + endif ++ ++SUBDIRS = logrotate +diff --git a/conf/logrotate/Makefile.am b/conf/logrotate/Makefile.am +new file mode 100644 +index 0000000..a22079b +--- /dev/null ++++ b/conf/logrotate/Makefile.am +@@ -0,0 +1,45 @@ ++# Copyright (c) 2009 Red Hat, Inc. ++# ++# Authors:Jan Friesse (jfriesse@redhat.com) ++# Andrew Beekhof ++# Steven Dake (sdake@redhat.com) ++# ++# This software licensed under BSD license, the text of which follows: ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions are met: ++# ++# - Redistributions of source code must retain the above copyright notice, ++# this list of conditions and the following disclaimer. ++# - Redistributions in binary form must reproduce the above copyright notice, ++# this list of conditions and the following disclaimer in the documentation ++# and/or other materials provided with the distribution. ++# - Neither the name of the MontaVista Software, Inc. nor the names of its ++# contributors may be used to endorse or promote products derived from this ++# software without specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ++# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ++# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ++# THE POSSIBILITY OF SUCH DAMAGE. ++ ++MAINTAINERCLEANFILES = Makefile.in ++ ++EXTRA_DIST = corosync.in ++ ++corosync: corosync.in ++ sed -e 's#@''LOGDIR@#${LOGDIR}#g' \ ++ $< > $@ ++ ++logrotatecorosyncdir = ${LOGROTATEDIR} ++logrotatecorosync_DATA = corosync ++ ++clean-local: ++ rm -f corosync +diff --git a/conf/logrotate/corosync.in b/conf/logrotate/corosync.in +new file mode 100644 +index 0000000..cba17b0 +--- /dev/null ++++ b/conf/logrotate/corosync.in +@@ -0,0 +1,9 @@ ++@LOGDIR@/corosync.log { ++ missingok ++ compress ++ copytruncate ++ daily ++ rotate 31 ++ minsize 2048 ++ notifempty ++} +diff --git a/configure.ac b/configure.ac +index b394329..4b640fc 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -199,7 +199,8 @@ AC_CONFIG_FILES([Makefile + tools/Makefile + conf/Makefile + qdevices/Makefile +- Doxyfile]) ++ Doxyfile ++ conf/logrotate/Makefile]) + + ### Local business + +@@ -369,6 +370,16 @@ AC_ARG_WITH([initwrappersdir], + [ INITWRAPPERSDIR="$withval" ], + [ INITWRAPPERSDIR="$datarootdir/corosync" ]) + ++AC_ARG_WITH([logdir], ++ [ --logdir=DIR : the base directory for corosync logging files. ], ++ [ LOGDIR="$withval" ], ++ [ LOGDIR="$localstatedir/log/cluster" ]) ++ ++AC_ARG_WITH([logrotatedir], ++ [ --logrotatedir=DIR : the base directory for logrorate.d files. ], ++ [ LOGROTATEDIR="$withval" ], ++ [ LOGROTATEDIR="$sysconfdir/logrotate.d" ]) ++ + AC_ARG_ENABLE([snmp], + [ --enable-snmp : SNMP protocol support ], + [ default="no" ]) +@@ -674,6 +685,8 @@ AC_SUBST([SYSTEMDDIR]) + AC_SUBST([UPSTARTDIR]) + INITWRAPPERSDIR=$(eval echo ${INITWRAPPERSDIR}) + AC_SUBST([INITWRAPPERSDIR]) ++AC_SUBST([LOGDIR]) ++AC_SUBST([LOGROTATEDIR]) + + AC_SUBST([SOMAJOR]) + AC_SUBST([SOMINOR]) +@@ -716,6 +729,8 @@ AC_MSG_RESULT([ System init.d directory = ${INITDDIR}]) + AC_MSG_RESULT([ System systemd directory = ${SYSTEMDDIR}]) + AC_MSG_RESULT([ System upstart directory = ${UPSTARTDIR}]) + AC_MSG_RESULT([ System init wraps dir = ${INITWRAPPERSDIR}]) ++AC_MSG_RESULT([ Log directory = ${LOGDIR}]) ++AC_MSG_RESULT([ Log rotate directory = ${LOGROTATEDIR}]) + AC_MSG_RESULT([ corosync config dir = ${COROSYSCONFDIR}]) + AC_MSG_RESULT([ Features =${PACKAGE_FEATURES}]) + AC_MSG_RESULT([]) +diff --git a/corosync.spec.in b/corosync.spec.in +index a2ba584..12979f6 100644 +--- a/corosync.spec.in ++++ b/corosync.spec.in +@@ -196,6 +196,7 @@ fi + %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example.udpu + %config(noreplace) %{_sysconfdir}/sysconfig/corosync-notifyd + %config(noreplace) %{_sysconfdir}/sysconfig/corosync ++%config(noreplace) %{_sysconfdir}/logrotate.d/corosync + %if %{with dbus} + %{_sysconfdir}/dbus-1/system.d/corosync-signals.conf + %endif +-- +1.7.1 + diff --git a/SOURCES/bz1226842-1-Add-note-about-rrp-active-beeing-unsupported.patch b/SOURCES/bz1226842-1-Add-note-about-rrp-active-beeing-unsupported.patch new file mode 100644 index 0000000..30aa3e7 --- /dev/null +++ b/SOURCES/bz1226842-1-Add-note-about-rrp-active-beeing-unsupported.patch @@ -0,0 +1,31 @@ +From 219965f4fe694eaaf2eb4ea05cdc7e35f5146114 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Fri, 19 Jun 2015 16:16:18 +0200 +Subject: [PATCH] Add note about rrp active beeing unsupported + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + man/corosync.conf.5 | 6 ++++-- + 1 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 +index 8e774c1..2283cd4 100644 +--- a/man/corosync.conf.5 ++++ b/man/corosync.conf.5 +@@ -199,8 +199,10 @@ crypto_cipher and crypto_hash. + .TP + rrp_mode + This specifies the mode of redundant ring, which may be none, active, or +-passive. Active replication offers slightly lower latency from transmit +-to delivery in faulty network environments but with less performance. ++passive. Currently only 'passive' is supported or tested ++(using 'active' is not recommended). Active replication offers ++slightly lower latency from transmit to delivery in faulty network ++environments but with less performance. + Passive replication may nearly double the speed of the totem protocol + if the protocol doesn't become cpu bound. The final option is none, in + which case only one network interface will be used to operate the totem +-- +1.7.1 + diff --git a/SOURCES/bz1229194-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch b/SOURCES/bz1229194-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch new file mode 100644 index 0000000..9186720 --- /dev/null +++ b/SOURCES/bz1229194-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch @@ -0,0 +1,114 @@ +From 82526d2fe9137e8b604f1bbae6d6e39ba41377f9 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Mon, 16 Mar 2015 11:37:52 +0000 +Subject: [PATCH] quorum: don't allow quorum_trackstart to be called twice + +If quorum_trackstart() or votequorum_trackstart() are called twice with +CS_TRACK_CHANGES then the client gets added twice to the notifications +list effectively corrupting it. Users have reported segfaults in +corosync when they did this (by mistake!). + +As there's already a tracking_enabled flag in the private-data, we check +that before adding to the list again and return an error if +the process is already registered. + +Signed-off-by: Christine Caulfield +Reviewed-by: Jan Friesse +--- + exec/votequorum.c | 12 ++++++++++-- + exec/vsf_quorum.c | 11 +++++++++-- + 2 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/exec/votequorum.c b/exec/votequorum.c +index 2ff0b43..f6faa25 100644 +--- a/exec/votequorum.c ++++ b/exec/votequorum.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2009-2014 Red Hat, Inc. ++ * Copyright (c) 2009-2015 Red Hat, Inc. + * + * All rights reserved. + * +@@ -2615,8 +2615,10 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, + const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message; + struct res_lib_votequorum_status res_lib_votequorum_status; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ++ cs_error_t error = CS_OK; + + ENTER(); ++ + /* + * If an immediate listing of the current cluster membership + * is requested, generate membership list +@@ -2627,6 +2629,11 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, + votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context); + } + ++ if (quorum_pd->tracking_enabled) { ++ error = CS_ERR_EXIST; ++ goto response_send; ++ } ++ + /* + * Record requests for tracking + */ +@@ -2640,9 +2647,10 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, + list_add (&quorum_pd->list, &trackers_list); + } + ++response_send: + res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); + res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; +- res_lib_votequorum_status.header.error = CS_OK; ++ res_lib_votequorum_status.header.error = error; + corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); + + LEAVE(); +diff --git a/exec/vsf_quorum.c b/exec/vsf_quorum.c +index 2a3a263..a6c739d 100644 +--- a/exec/vsf_quorum.c ++++ b/exec/vsf_quorum.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2008-2012 Red Hat, Inc. ++ * Copyright (c) 2008-2015 Red Hat, Inc. + * + * All rights reserved. + * +@@ -409,6 +409,7 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, + const struct req_lib_quorum_trackstart *req_lib_quorum_trackstart = msg; + struct qb_ipc_response_header res; + struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); ++ cs_error_t error = CS_OK; + + log_printf(LOGSYS_LEVEL_DEBUG, "got trackstart request on %p", conn); + +@@ -422,6 +423,11 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, + send_library_notification(conn); + } + ++ if (quorum_pd->tracking_enabled) { ++ error = CS_ERR_EXIST; ++ goto response_send; ++ } ++ + /* + * Record requests for tracking + */ +@@ -434,10 +440,11 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, + list_add (&quorum_pd->list, &lib_trackers_list); + } + ++response_send: + /* send status */ + res.size = sizeof(res); + res.id = MESSAGE_RES_QUORUM_TRACKSTART; +- res.error = CS_OK; ++ res.error = error; + corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); + } + +-- +1.7.1 + diff --git a/SOURCES/bz1229194-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch b/SOURCES/bz1229194-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch new file mode 100644 index 0000000..b4bd454 --- /dev/null +++ b/SOURCES/bz1229194-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch @@ -0,0 +1,77 @@ +From b9f5c290b7dedd0a677cdfc25db7dd111245a745 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Thu, 18 Jun 2015 09:57:59 +0100 +Subject: [PATCH] votequorum: Fix auto_tie_breaker behaviour in odd-sized clusters + +auto_tie_breaker can behave incorrectly in the case of a cluster +with an odd number of nodes. It's possible for a partition to +have quorum while the other side has the ATB node, and both will +continue working. (Of course in a properly configured cluster one side +will be fenced but that becomes an indeterminate race .. just what ATB +is supposed to avoid). + +This patch prevents ATB from running in a partition if the 'other' +partition might have quorum, and also mandates the use of wait_for_all +in clusters with an odd number of nodes so that a quorate partition +cannot start services or fence an existing partition with the tie +breaker node. + +Signed-Off-By: Christine Caulfield +Reviewed-by: Jan Friesse +--- + exec/votequorum.c | 31 +++++++++++++++++++++++++++++++ + 1 files changed, 31 insertions(+), 0 deletions(-) + +diff --git a/exec/votequorum.c b/exec/votequorum.c +index f6faa25..62c8cf3 100644 +--- a/exec/votequorum.c ++++ b/exec/votequorum.c +@@ -1011,7 +1011,10 @@ static void are_we_quorate(unsigned int total_votes) + } + + if ((auto_tie_breaker != ATB_NONE) && ++ /* Must be a half (or half-1) split */ + (total_votes == (us->expected_votes / 2)) && ++ /* If the 'other' partition in a split might have quorum then we can't run ATB */ ++ (previous_quorum_members_entries - quorum_members_entries < quorum) && + (check_auto_tie_breaker() == 1)) { + quorate = 1; + } +@@ -1331,6 +1334,34 @@ static char *votequorum_readconfig(int runtime) + log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); + two_node = 0; + } ++ ++ /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs ++ * to be set so that an isolated half+1 without the tie breaker node ++ * does not have quorum on reboot. ++ */ ++ if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) && ++ (!wait_for_all)) { ++ if (last_man_standing) { ++ /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what ++ * they might want so we'll just quit. ++ */ ++ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n"); ++ error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster"; ++ goto out; ++ } ++ else { ++ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n"); ++ log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n"); ++ auto_tie_breaker = ATB_NONE; ++ icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); ++ } ++ } ++ + /* + * quorum device is not compatible with last_man_standing and auto_tie_breaker + * neither lms or atb can be set at runtime, so there is no need to check for +-- +1.7.1 + diff --git a/SOURCES/bz1234261-1-Reset-timer_problem_decrementer-on-fault.patch b/SOURCES/bz1234261-1-Reset-timer_problem_decrementer-on-fault.patch new file mode 100644 index 0000000..75afeaa --- /dev/null +++ b/SOURCES/bz1234261-1-Reset-timer_problem_decrementer-on-fault.patch @@ -0,0 +1,48 @@ +From 8f284b26b3331e1ab252969ba65543e6d9217ab1 Mon Sep 17 00:00:00 2001 +From: Jason +Date: Mon, 8 Dec 2014 16:24:22 +0100 +Subject: [PATCH] Reset timer_problem_decrementer on fault + +After a heartbeat link's FAULTY and its auto re-enable, +active_instance->timer_problem_decrementer did not reset to zero. So in +the next timer_function_active_token_expired() round, +active_timer_problem_decrementer_start() will not be called. This will +result in that the active_instance->counter_problems of this link can +not be decreased any more. Cause rrp lose the ability to tolerate +network fluctuation. + +This problem can be reproduced by the following sequence: +1) Set RRP in active mode, configure at least 2 heartbeat links. +2) Unplug one link till corosync-cfgtool -s shows it is FAULTY. +3) Re-plug this link then corosync-cfgtool -s shows it is active with +no faults. +4) Unplug this link again but quicky re-plug it before it becomes +FAULTY. +5) Finally, you can see corosync-cfgtool -s shows it is in +"Incrementing problem counter" state despite it currently is physically +healthy. + +It can be solved by not forget to reset timer_problem_decrementer to +zero in active_timer_problem_decrementer_cancel(). + +Signed-off-by: Jason +Reviewed-by: Jan Friesse +--- + exec/totemrrp.c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + +diff --git a/exec/totemrrp.c b/exec/totemrrp.c +index 95a789e..eee9d26 100644 +--- a/exec/totemrrp.c ++++ b/exec/totemrrp.c +@@ -1542,6 +1542,7 @@ static void active_timer_problem_decrementer_cancel ( + qb_loop_timer_del ( + active_instance->rrp_instance->poll_handle, + active_instance->timer_problem_decrementer); ++ active_instance->timer_problem_decrementer = 0; + } + + +-- +1.7.1 + diff --git a/SOURCES/bz1234261-2-totem-Ignore-duplicated-commit-tokens-in-recovery.patch b/SOURCES/bz1234261-2-totem-Ignore-duplicated-commit-tokens-in-recovery.patch new file mode 100644 index 0000000..c929676 --- /dev/null +++ b/SOURCES/bz1234261-2-totem-Ignore-duplicated-commit-tokens-in-recovery.patch @@ -0,0 +1,104 @@ +From 4ee84c51fa73c4ec7cbee922111a140a3aaf75df Mon Sep 17 00:00:00 2001 +From: Jason +Date: Sat, 10 Jan 2015 17:35:47 +0800 +Subject: [PATCH] totem: Ignore duplicated commit tokens in recovery + +In active rrp mode, commit tokens are treated as mcast data messages, +thus, rrp directly delivers them to srp layer by active_mcast_recv(). +This will result in duplicated commit tokens being received by srp from +different heartbeat links. If node is in recovery state and has already +sent out the initial orf token, those duplicated commit tokens will +cause message_handler_memb_commit_token() to send initial orf token +again! This is wrong because it resets the orf token content in +instance->orf_token_retransmit, which breaks the token retransmission +state. + +Furthermore, by sending those initial orf tokens again and again, +it may lead active_token_recv() to drop some subsequent orf tokens. +It is OK for rrp because srp will do token retransmission, +but as said above, srp retransmission state has already been broken, +so finally we meet a "token lost in recovery state" condition caused +by software. If token timeout value is large, then it will takes long +time to create a new ring. + +This can be reproduced by having two noded set to active rrp mode, with +two heartbeat links. Then with one node always on, let the other one do +stop/start again and again. It has a low probability to reproduce. +In theory, I think, the more heartbeat links used, the more easily it +can be reproduced. + +This problem can be resolved by letting +message_handler_memb_commit_token() to ignore duplicated commit tokens +in recovery state if node (the ring representation) has already sent +out the initial orf token. + +Different from prev take, this version do not depends on stored token +data but uses originated_orf_token in totemsrp_instance to remember +if initial orf token has been already originated for current membership. + +Signed-off-by: Jason +Reviewed-by: Steven Dake +Reviewed-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemsrp.c | 16 ++++++++++++++++ + 1 files changed, 16 insertions(+), 0 deletions(-) + +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index 95736b1..b05773a 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -508,6 +508,8 @@ struct totemsrp_instance { + + uint32_t orf_token_discard; + ++ uint32_t originated_orf_token; ++ + uint32_t threaded_mode_enabled; + + uint32_t waiting_trans_ack; +@@ -731,6 +733,8 @@ static void totemsrp_instance_initialize (struct totemsrp_instance *instance) + + instance->orf_token_discard = 0; + ++ instance->originated_orf_token = 0; ++ + instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage; + + instance->my_id.no_addrs = INTERFACE_MAX; +@@ -1834,6 +1838,8 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + char left_node_msg[1024]; + char joined_node_msg[1024]; + ++ instance->originated_orf_token = 0; ++ + memb_consensus_reset (instance); + + old_ring_state_reset (instance); +@@ -2045,6 +2051,8 @@ static void memb_state_gather_enter ( + { + instance->orf_token_discard = 1; + ++ instance->originated_orf_token = 0; ++ + memb_set_merge ( + &instance->my_id, 1, + instance->my_proc_list, &instance->my_proc_list_entries); +@@ -4510,6 +4518,14 @@ static int message_handler_memb_commit_token ( + + case MEMB_STATE_RECOVERY: + if (totemip_equal (&instance->my_id.addr[0], &instance->my_ring_id.rep)) { ++ ++ /* Filter out duplicated tokens */ ++ if (instance->originated_orf_token) { ++ break; ++ } ++ ++ instance->originated_orf_token = 1; ++ + log_printf (instance->totemsrp_log_level_debug, + "Sending initial ORF token"); + +-- +1.7.1 + diff --git a/SOURCES/bz1234266-1-corosync_ring_id_store-Use-safer-permissions.patch b/SOURCES/bz1234266-1-corosync_ring_id_store-Use-safer-permissions.patch new file mode 100644 index 0000000..3428d3c --- /dev/null +++ b/SOURCES/bz1234266-1-corosync_ring_id_store-Use-safer-permissions.patch @@ -0,0 +1,36 @@ +From 252b38ab8a62ff083e83b1d6f514109f7b7cbb42 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Tue, 20 Jan 2015 10:24:34 +0100 +Subject: [PATCH] corosync_ring_id_store: Use safer permissions + +corosync_ring_id_store should use same (safer) permissions as +corosync_ring_id_create_or_load for (eventually) newly created ringid +file. + +Credit to Sjerek for finding this problem. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/main.c | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/exec/main.c b/exec/main.c +index cd972b5..85c74ee 100644 +--- a/exec/main.c ++++ b/exec/main.c +@@ -782,9 +782,9 @@ static void corosync_ring_id_store ( + snprintf (filename, sizeof(filename), "%s/ringid_%s", + get_run_dir(), totemip_print (addr)); + +- fd = open (filename, O_WRONLY, 0777); ++ fd = open (filename, O_WRONLY, 0700); + if (fd == -1) { +- fd = open (filename, O_CREAT|O_RDWR, 0777); ++ fd = open (filename, O_CREAT|O_RDWR, 0700); + } + if (fd == -1) { + LOGSYS_PERROR(errno, LOGSYS_LEVEL_ERROR, +-- +1.7.1 + diff --git a/SOURCES/bz1260002-1-Set-RR-priority-by-default.patch b/SOURCES/bz1260002-1-Set-RR-priority-by-default.patch deleted file mode 100644 index e8104aa..0000000 --- a/SOURCES/bz1260002-1-Set-RR-priority-by-default.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 177ef0e5240b4060ff5b14eab6f2eefee3aa777d Mon Sep 17 00:00:00 2001 -From: Jan Friesse -Date: Fri, 2 Jan 2015 12:39:09 +0100 -Subject: [PATCH] Set RR priority by default - -Experience with larger production clusters showed that setting RR -priority for corosync is viable for prevent random fencing, ... - -Signed-off-by: Jan Friesse -Reviewed-by: Christine Caulfield ---- - exec/main.c | 7 ++++--- - man/corosync.8 | 4 ++-- - 2 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/exec/main.c b/exec/main.c -index e423c97..cd972b5 100644 ---- a/exec/main.c -+++ b/exec/main.c -@@ -1200,7 +1200,7 @@ int main (int argc, char **argv, char **envp) - /* default configuration - */ - background = 1; -- setprio = 0; -+ setprio = 1; - testonly = 0; - - while ((ch = getopt (argc, argv, "fprtv")) != EOF) { -@@ -1210,6 +1210,7 @@ int main (int argc, char **argv, char **envp) - background = 0; - break; - case 'p': -+ setprio = 0; - break; - case 'r': - setprio = 1; -@@ -1228,9 +1229,9 @@ int main (int argc, char **argv, char **envp) - fprintf(stderr, \ - "usage:\n"\ - " -f : Start application in foreground.\n"\ -- " -p : Does nothing. \n"\ -+ " -p : Do not set process priority.\n"\ - " -t : Test configuration and exit.\n"\ -- " -r : Set round robin realtime scheduling \n"\ -+ " -r : Set round robin realtime scheduling (default).\n"\ - " -v : Display version and SVN revision of Corosync and exit.\n"); - logsys_system_fini(); - return EXIT_FAILURE; -diff --git a/man/corosync.8 b/man/corosync.8 -index 559b392..41f634f 100644 ---- a/man/corosync.8 -+++ b/man/corosync.8 -@@ -45,10 +45,10 @@ Corosync provides clustering infracture such as membership, messaging and quorum - Start application in foreground. - .TP - .B -p --Does nothing (was: "Do not set process priority" - this is now the default). -+Do not set process priority. - .TP - .B -r --Set round robin realtime scheduling. -+Set round robin realtime scheduling (default). - .TP - .B -t - Test configuration and then exit. --- -1.7.1 - diff --git a/SOURCES/bz1260719-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch b/SOURCES/bz1260719-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch deleted file mode 100644 index 9186720..0000000 --- a/SOURCES/bz1260719-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 82526d2fe9137e8b604f1bbae6d6e39ba41377f9 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Mon, 16 Mar 2015 11:37:52 +0000 -Subject: [PATCH] quorum: don't allow quorum_trackstart to be called twice - -If quorum_trackstart() or votequorum_trackstart() are called twice with -CS_TRACK_CHANGES then the client gets added twice to the notifications -list effectively corrupting it. Users have reported segfaults in -corosync when they did this (by mistake!). - -As there's already a tracking_enabled flag in the private-data, we check -that before adding to the list again and return an error if -the process is already registered. - -Signed-off-by: Christine Caulfield -Reviewed-by: Jan Friesse ---- - exec/votequorum.c | 12 ++++++++++-- - exec/vsf_quorum.c | 11 +++++++++-- - 2 files changed, 19 insertions(+), 4 deletions(-) - -diff --git a/exec/votequorum.c b/exec/votequorum.c -index 2ff0b43..f6faa25 100644 ---- a/exec/votequorum.c -+++ b/exec/votequorum.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2009-2014 Red Hat, Inc. -+ * Copyright (c) 2009-2015 Red Hat, Inc. - * - * All rights reserved. - * -@@ -2615,8 +2615,10 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, - const struct req_lib_votequorum_trackstart *req_lib_votequorum_trackstart = message; - struct res_lib_votequorum_status res_lib_votequorum_status; - struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); -+ cs_error_t error = CS_OK; - - ENTER(); -+ - /* - * If an immediate listing of the current cluster membership - * is requested, generate membership list -@@ -2627,6 +2629,11 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, - votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context); - } - -+ if (quorum_pd->tracking_enabled) { -+ error = CS_ERR_EXIST; -+ goto response_send; -+ } -+ - /* - * Record requests for tracking - */ -@@ -2640,9 +2647,10 @@ static void message_handler_req_lib_votequorum_trackstart (void *conn, - list_add (&quorum_pd->list, &trackers_list); - } - -+response_send: - res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status); - res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS; -- res_lib_votequorum_status.header.error = CS_OK; -+ res_lib_votequorum_status.header.error = error; - corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status)); - - LEAVE(); -diff --git a/exec/vsf_quorum.c b/exec/vsf_quorum.c -index 2a3a263..a6c739d 100644 ---- a/exec/vsf_quorum.c -+++ b/exec/vsf_quorum.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2008-2012 Red Hat, Inc. -+ * Copyright (c) 2008-2015 Red Hat, Inc. - * - * All rights reserved. - * -@@ -409,6 +409,7 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, - const struct req_lib_quorum_trackstart *req_lib_quorum_trackstart = msg; - struct qb_ipc_response_header res; - struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn); -+ cs_error_t error = CS_OK; - - log_printf(LOGSYS_LEVEL_DEBUG, "got trackstart request on %p", conn); - -@@ -422,6 +423,11 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, - send_library_notification(conn); - } - -+ if (quorum_pd->tracking_enabled) { -+ error = CS_ERR_EXIST; -+ goto response_send; -+ } -+ - /* - * Record requests for tracking - */ -@@ -434,10 +440,11 @@ static void message_handler_req_lib_quorum_trackstart (void *conn, - list_add (&quorum_pd->list, &lib_trackers_list); - } - -+response_send: - /* send status */ - res.size = sizeof(res); - res.id = MESSAGE_RES_QUORUM_TRACKSTART; -- res.error = CS_OK; -+ res.error = error; - corosync_api->ipc_response_send(conn, &res, sizeof(struct qb_ipc_response_header)); - } - --- -1.7.1 - diff --git a/SOURCES/bz1260719-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch b/SOURCES/bz1260719-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch deleted file mode 100644 index b4bd454..0000000 --- a/SOURCES/bz1260719-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch +++ /dev/null @@ -1,77 +0,0 @@ -From b9f5c290b7dedd0a677cdfc25db7dd111245a745 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Thu, 18 Jun 2015 09:57:59 +0100 -Subject: [PATCH] votequorum: Fix auto_tie_breaker behaviour in odd-sized clusters - -auto_tie_breaker can behave incorrectly in the case of a cluster -with an odd number of nodes. It's possible for a partition to -have quorum while the other side has the ATB node, and both will -continue working. (Of course in a properly configured cluster one side -will be fenced but that becomes an indeterminate race .. just what ATB -is supposed to avoid). - -This patch prevents ATB from running in a partition if the 'other' -partition might have quorum, and also mandates the use of wait_for_all -in clusters with an odd number of nodes so that a quorate partition -cannot start services or fence an existing partition with the tie -breaker node. - -Signed-Off-By: Christine Caulfield -Reviewed-by: Jan Friesse ---- - exec/votequorum.c | 31 +++++++++++++++++++++++++++++++ - 1 files changed, 31 insertions(+), 0 deletions(-) - -diff --git a/exec/votequorum.c b/exec/votequorum.c -index f6faa25..62c8cf3 100644 ---- a/exec/votequorum.c -+++ b/exec/votequorum.c -@@ -1011,7 +1011,10 @@ static void are_we_quorate(unsigned int total_votes) - } - - if ((auto_tie_breaker != ATB_NONE) && -+ /* Must be a half (or half-1) split */ - (total_votes == (us->expected_votes / 2)) && -+ /* If the 'other' partition in a split might have quorum then we can't run ATB */ -+ (previous_quorum_members_entries - quorum_members_entries < quorum) && - (check_auto_tie_breaker() == 1)) { - quorate = 1; - } -@@ -1331,6 +1334,34 @@ static char *votequorum_readconfig(int runtime) - log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf"); - two_node = 0; - } -+ -+ /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs -+ * to be set so that an isolated half+1 without the tie breaker node -+ * does not have quorum on reboot. -+ */ -+ if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) && -+ (!wait_for_all)) { -+ if (last_man_standing) { -+ /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what -+ * they might want so we'll just quit. -+ */ -+ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n"); -+ error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster"; -+ goto out; -+ } -+ else { -+ log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n"); -+ log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n"); -+ auto_tie_breaker = ATB_NONE; -+ icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker); -+ } -+ } -+ - /* - * quorum device is not compatible with last_man_standing and auto_tie_breaker - * neither lms or atb can be set at runtime, so there is no need to check for --- -1.7.1 - diff --git a/SOURCES/bz682771-1-cpg-Add-support-for-messages-larger-than-1Mb.patch b/SOURCES/bz682771-1-cpg-Add-support-for-messages-larger-than-1Mb.patch new file mode 100644 index 0000000..2c99a81 --- /dev/null +++ b/SOURCES/bz682771-1-cpg-Add-support-for-messages-larger-than-1Mb.patch @@ -0,0 +1,717 @@ +From 8cc8e513633a1a8b12c416e32fb5362fcf4d65dd Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Thu, 5 Mar 2015 16:45:15 +0000 +Subject: [PATCH] cpg: Add support for messages larger than 1Mb + +If a cpg client sends a message larger than 1Mb (actually slightly +less to allow for internal buffers) cpg will now fragment that into +several corosync messages before sending it around the ring. + +cpg_mcast_joined() can now return CS_ERR_INTERRUPT which means that the +cpg membership was disrupted during the send operation and the message +needs to be resent. + +The new API call cpg_max_atomic_msgsize_get() returns the maximum size +of a message that will not be fragmented internally. + +New test program cpghum was written to stress test this functionality, +it checks message integrity and order of receipt. + +Signed-off-by: Christine Caulfield +Reviewed-by: Jan Friesse +--- + configure.ac | 1 + + corosync.spec.in | 1 + + exec/cpg.c | 182 +++++++++++++++++++++++++++++++++++++++++++- + include/corosync/cpg.h | 7 ++ + include/corosync/ipc_cpg.h | 35 ++++++++- + lib/cpg.c | 171 ++++++++++++++++++++++++++++++++++++++++- + test/Makefile.am | 3 +- + 7 files changed, 393 insertions(+), 7 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 0c371aa..b394329 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -163,6 +163,7 @@ AC_CHECK_LIB([pthread], [pthread_create]) + AC_CHECK_LIB([socket], [socket]) + AC_CHECK_LIB([nsl], [t_open]) + AC_CHECK_LIB([rt], [sched_getscheduler]) ++AC_CHECK_LIB([z], [crc32]) + + # Checks for library functions. + AC_FUNC_ALLOCA +diff --git a/corosync.spec.in b/corosync.spec.in +index 3ca75b7..a2ba584 100644 +--- a/corosync.spec.in ++++ b/corosync.spec.in +@@ -40,6 +40,7 @@ Conflicts: openais <= 0.89, openais-devel <= 0.89 + BuildRequires: groff + BuildRequires: libqb-devel + BuildRequires: nss-devel ++BuildRequires: zlib-devel + %if %{with runautogen} + BuildRequires: autoconf automake libtool + %endif +diff --git a/exec/cpg.c b/exec/cpg.c +index 1c6fbb9..a18b850 100644 +--- a/exec/cpg.c ++++ b/exec/cpg.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2006-2012 Red Hat, Inc. ++ * Copyright (c) 2006-2015 Red Hat, Inc. + * + * All rights reserved. + * +@@ -83,7 +83,8 @@ enum cpg_message_req_types { + MESSAGE_REQ_EXEC_CPG_JOINLIST = 2, + MESSAGE_REQ_EXEC_CPG_MCAST = 3, + MESSAGE_REQ_EXEC_CPG_DOWNLIST_OLD = 4, +- MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5 ++ MESSAGE_REQ_EXEC_CPG_DOWNLIST = 5, ++ MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST = 6, + }; + + struct zcb_mapped { +@@ -156,6 +157,8 @@ struct cpg_pd { + enum cpd_state cpd_state; + unsigned int flags; + int initial_totem_conf_sent; ++ uint64_t transition_counter; /* These two are used when sending fragmented messages */ ++ uint64_t initial_transition_counter; + struct list_head list; + struct list_head iteration_instance_list_head; + struct list_head zcb_mapped_list_head; +@@ -224,6 +227,10 @@ static void message_handler_req_exec_cpg_mcast ( + const void *message, + unsigned int nodeid); + ++static void message_handler_req_exec_cpg_partial_mcast ( ++ const void *message, ++ unsigned int nodeid); ++ + static void message_handler_req_exec_cpg_downlist_old ( + const void *message, + unsigned int nodeid); +@@ -238,6 +245,8 @@ static void exec_cpg_joinlist_endian_convert (void *msg); + + static void exec_cpg_mcast_endian_convert (void *msg); + ++static void exec_cpg_partial_mcast_endian_convert (void *msg); ++ + static void exec_cpg_downlist_endian_convert_old (void *msg); + + static void exec_cpg_downlist_endian_convert (void *msg); +@@ -250,6 +259,8 @@ static void message_handler_req_lib_cpg_finalize (void *conn, const void *messag + + static void message_handler_req_lib_cpg_mcast (void *conn, const void *message); + ++static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message); ++ + static void message_handler_req_lib_cpg_membership (void *conn, + const void *message); + +@@ -383,7 +394,10 @@ static struct corosync_lib_handler cpg_lib_engine[] = + .lib_handler_fn = message_handler_req_lib_cpg_zc_execute, + .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED + }, +- ++ { /* 12 */ ++ .lib_handler_fn = message_handler_req_lib_cpg_partial_mcast, ++ .flow_control = CS_LIB_FLOW_CONTROL_REQUIRED ++ }, + + }; + +@@ -413,6 +427,10 @@ static struct corosync_exec_handler cpg_exec_engine[] = + .exec_handler_fn = message_handler_req_exec_cpg_downlist, + .exec_endian_convert_fn = exec_cpg_downlist_endian_convert + }, ++ { /* 6 - MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST */ ++ .exec_handler_fn = message_handler_req_exec_cpg_partial_mcast, ++ .exec_endian_convert_fn = exec_cpg_partial_mcast_endian_convert ++ }, + }; + + struct corosync_service_engine cpg_service_engine = { +@@ -457,6 +475,17 @@ struct req_exec_cpg_mcast { + mar_uint8_t message[] __attribute__((aligned(8))); + }; + ++struct req_exec_cpg_partial_mcast { ++ struct qb_ipc_request_header header __attribute__((aligned(8))); ++ mar_cpg_name_t group_name __attribute__((aligned(8))); ++ mar_uint32_t msglen __attribute__((aligned(8))); ++ mar_uint32_t fraglen __attribute__((aligned(8))); ++ mar_uint32_t pid __attribute__((aligned(8))); ++ mar_uint32_t type __attribute__((aligned(8))); ++ mar_message_source_t source __attribute__((aligned(8))); ++ mar_uint8_t message[] __attribute__((aligned(8))); ++}; ++ + struct req_exec_cpg_downlist_old { + struct qb_ipc_request_header header __attribute__((aligned(8))); + mar_uint32_t left_nodes __attribute__((aligned(8))); +@@ -740,6 +769,7 @@ static int notify_lib_joinlist( + cpd->cpd_state == CPD_STATE_LEAVE_STARTED) { + + api->ipc_dispatch_send (cpd->conn, buf, size); ++ cpd->transition_counter++; + } + if (left_list_entries) { + if (left_list[0].pid == cpd->pid && +@@ -1186,6 +1216,19 @@ static void exec_cpg_mcast_endian_convert (void *msg) + swab_mar_message_source_t (&req_exec_cpg_mcast->source); + } + ++static void exec_cpg_partial_mcast_endian_convert (void *msg) ++{ ++ struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = msg; ++ ++ swab_coroipc_request_header_t (&req_exec_cpg_mcast->header); ++ swab_mar_cpg_name_t (&req_exec_cpg_mcast->group_name); ++ req_exec_cpg_mcast->pid = swab32(req_exec_cpg_mcast->pid); ++ req_exec_cpg_mcast->msglen = swab32(req_exec_cpg_mcast->msglen); ++ req_exec_cpg_mcast->fraglen = swab32(req_exec_cpg_mcast->fraglen); ++ req_exec_cpg_mcast->type = swab32(req_exec_cpg_mcast->type); ++ swab_mar_message_source_t (&req_exec_cpg_mcast->source); ++} ++ + static struct process_info *process_info_find(const mar_cpg_name_t *group_name, uint32_t pid, unsigned int nodeid) { + struct list_head *iter; + +@@ -1453,6 +1496,68 @@ static void message_handler_req_exec_cpg_mcast ( + } + } + ++static void message_handler_req_exec_cpg_partial_mcast ( ++ const void *message, ++ unsigned int nodeid) ++{ ++ const struct req_exec_cpg_partial_mcast *req_exec_cpg_mcast = message; ++ struct res_lib_cpg_partial_deliver_callback res_lib_cpg_mcast; ++ int msglen = req_exec_cpg_mcast->fraglen; ++ struct list_head *iter, *pi_iter; ++ struct cpg_pd *cpd; ++ struct iovec iovec[2]; ++ int known_node = 0; ++ ++ log_printf(LOGSYS_LEVEL_DEBUG, "Got fragmented message from node %d, size = %d bytes\n", nodeid, msglen); ++ ++ res_lib_cpg_mcast.header.id = MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK; ++ res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast) + msglen; ++ res_lib_cpg_mcast.fraglen = msglen; ++ res_lib_cpg_mcast.msglen = req_exec_cpg_mcast->msglen; ++ res_lib_cpg_mcast.pid = req_exec_cpg_mcast->pid; ++ res_lib_cpg_mcast.type = req_exec_cpg_mcast->type; ++ res_lib_cpg_mcast.nodeid = nodeid; ++ ++ memcpy(&res_lib_cpg_mcast.group_name, &req_exec_cpg_mcast->group_name, ++ sizeof(mar_cpg_name_t)); ++ iovec[0].iov_base = (void *)&res_lib_cpg_mcast; ++ iovec[0].iov_len = sizeof (res_lib_cpg_mcast); ++ ++ iovec[1].iov_base = (char*)message+sizeof(*req_exec_cpg_mcast); ++ iovec[1].iov_len = msglen; ++ ++ for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; ) { ++ cpd = list_entry(iter, struct cpg_pd, list); ++ iter = iter->next; ++ ++ if ((cpd->cpd_state == CPD_STATE_LEAVE_STARTED || cpd->cpd_state == CPD_STATE_JOIN_COMPLETED) ++ && (mar_name_compare (&cpd->group_name, &req_exec_cpg_mcast->group_name) == 0)) { ++ ++ if (!known_node) { ++ /* Try to find, if we know the node */ ++ for (pi_iter = process_info_list_head.next; ++ pi_iter != &process_info_list_head; pi_iter = pi_iter->next) { ++ ++ struct process_info *pi = list_entry (pi_iter, struct process_info, list); ++ ++ if (pi->nodeid == nodeid && ++ mar_name_compare (&pi->group, &req_exec_cpg_mcast->group_name) == 0) { ++ known_node = 1; ++ break; ++ } ++ } ++ } ++ ++ if (!known_node) { ++ log_printf(LOGSYS_LEVEL_WARNING, "Unknown node -> we will not deliver message"); ++ return ; ++ } ++ ++ api->ipc_dispatch_iov_send (cpd->conn, iovec, 2); ++ } ++ } ++} ++ + + static int cpg_exec_send_downlist(void) + { +@@ -1864,6 +1969,77 @@ static void message_handler_req_lib_cpg_zc_free ( + res_header.size); + } + ++/* Fragmented mcast message from the library */ ++static void message_handler_req_lib_cpg_partial_mcast (void *conn, const void *message) ++{ ++ const struct req_lib_cpg_partial_mcast *req_lib_cpg_mcast = message; ++ struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn); ++ mar_cpg_name_t group_name = cpd->group_name; ++ ++ struct iovec req_exec_cpg_iovec[2]; ++ struct req_exec_cpg_partial_mcast req_exec_cpg_mcast; ++ struct res_lib_cpg_partial_send res_lib_cpg_partial_send; ++ int msglen = req_lib_cpg_mcast->fraglen; ++ int result; ++ cs_error_t error = CS_ERR_NOT_EXIST; ++ ++ log_printf(LOGSYS_LEVEL_TRACE, "got fragmented mcast request on %p", conn); ++ log_printf(LOGSYS_LEVEL_DEBUG, "Sending fragmented message size = %d bytes\n", msglen); ++ ++ switch (cpd->cpd_state) { ++ case CPD_STATE_UNJOINED: ++ error = CS_ERR_NOT_EXIST; ++ break; ++ case CPD_STATE_LEAVE_STARTED: ++ error = CS_ERR_NOT_EXIST; ++ break; ++ case CPD_STATE_JOIN_STARTED: ++ error = CS_OK; ++ break; ++ case CPD_STATE_JOIN_COMPLETED: ++ error = CS_OK; ++ break; ++ } ++ ++ res_lib_cpg_partial_send.header.size = sizeof(res_lib_cpg_partial_send); ++ res_lib_cpg_partial_send.header.id = MESSAGE_RES_CPG_PARTIAL_SEND; ++ ++ if (req_lib_cpg_mcast->type == LIBCPG_PARTIAL_FIRST) { ++ cpd->initial_transition_counter = cpd->transition_counter; ++ } ++ if (cpd->transition_counter != cpd->initial_transition_counter) { ++ error = CS_ERR_INTERRUPT; ++ } ++ ++ if (error == CS_OK) { ++ req_exec_cpg_mcast.header.size = sizeof(req_exec_cpg_mcast) + msglen; ++ req_exec_cpg_mcast.header.id = SERVICE_ID_MAKE(CPG_SERVICE, ++ MESSAGE_REQ_EXEC_CPG_PARTIAL_MCAST); ++ req_exec_cpg_mcast.pid = cpd->pid; ++ req_exec_cpg_mcast.msglen = req_lib_cpg_mcast->msglen; ++ req_exec_cpg_mcast.type = req_lib_cpg_mcast->type; ++ req_exec_cpg_mcast.fraglen = req_lib_cpg_mcast->fraglen; ++ api->ipc_source_set (&req_exec_cpg_mcast.source, conn); ++ memcpy(&req_exec_cpg_mcast.group_name, &group_name, ++ sizeof(mar_cpg_name_t)); ++ ++ req_exec_cpg_iovec[0].iov_base = (char *)&req_exec_cpg_mcast; ++ req_exec_cpg_iovec[0].iov_len = sizeof(req_exec_cpg_mcast); ++ req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message; ++ req_exec_cpg_iovec[1].iov_len = msglen; ++ ++ result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED); ++ assert(result == 0); ++ } else { ++ log_printf(LOGSYS_LEVEL_ERROR, "*** %p can't mcast to group %s state:%d, error:%d", ++ conn, group_name.value, cpd->cpd_state, error); ++ } ++ ++ res_lib_cpg_partial_send.header.error = error; ++ api->ipc_response_send (conn, &res_lib_cpg_partial_send, ++ sizeof (res_lib_cpg_partial_send)); ++} ++ + /* Mcast message from the library */ + static void message_handler_req_lib_cpg_mcast (void *conn, const void *message) + { +diff --git a/include/corosync/cpg.h b/include/corosync/cpg.h +index 55fc4b8..f66fb14 100644 +--- a/include/corosync/cpg.h ++++ b/include/corosync/cpg.h +@@ -186,6 +186,13 @@ cs_error_t cpg_fd_get ( + int *fd); + + /** ++ * Get maximum size of a message that will not be fragmented ++ */ ++cs_error_t cpg_max_atomic_msgsize_get ( ++ cpg_handle_t handle, ++ uint32_t *size); ++ ++/** + * Get contexts for a CPG handle + */ + cs_error_t cpg_context_get ( +diff --git a/include/corosync/ipc_cpg.h b/include/corosync/ipc_cpg.h +index a95335a..5008acf 100644 +--- a/include/corosync/ipc_cpg.h ++++ b/include/corosync/ipc_cpg.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2006-2011 Red Hat, Inc. ++ * Copyright (c) 2006-2015 Red Hat, Inc. + * + * All rights reserved. + * +@@ -55,6 +55,7 @@ enum req_cpg_types { + MESSAGE_REQ_CPG_ZC_ALLOC = 9, + MESSAGE_REQ_CPG_ZC_FREE = 10, + MESSAGE_REQ_CPG_ZC_EXECUTE = 11, ++ MESSAGE_REQ_CPG_PARTIAL_MCAST = 12, + }; + + enum res_cpg_types { +@@ -75,6 +76,8 @@ enum res_cpg_types { + MESSAGE_RES_CPG_ZC_ALLOC = 14, + MESSAGE_RES_CPG_ZC_FREE = 15, + MESSAGE_RES_CPG_ZC_EXECUTE = 16, ++ MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK = 17, ++ MESSAGE_RES_CPG_PARTIAL_SEND = 18, + }; + + enum lib_cpg_confchg_reason { +@@ -85,6 +88,12 @@ enum lib_cpg_confchg_reason { + CONFCHG_CPG_REASON_PROCDOWN = 5 + }; + ++enum lib_cpg_partial_types { ++ LIBCPG_PARTIAL_FIRST = 1, ++ LIBCPG_PARTIAL_CONTINUED = 2, ++ LIBCPG_PARTIAL_LAST = 3, ++}; ++ + typedef struct { + uint32_t length __attribute__((aligned(8))); + char value[CPG_MAX_NAME_LENGTH] __attribute__((aligned(8))); +@@ -200,6 +209,10 @@ struct res_lib_cpg_local_get { + mar_uint32_t local_nodeid __attribute__((aligned(8))); + }; + ++struct res_lib_cpg_partial_send { ++ struct qb_ipc_response_header header __attribute__((aligned(8))); ++}; ++ + struct req_lib_cpg_mcast { + struct qb_ipc_response_header header __attribute__((aligned(8))); + mar_uint32_t guarantee __attribute__((aligned(8))); +@@ -207,6 +220,15 @@ struct req_lib_cpg_mcast { + mar_uint8_t message[] __attribute__((aligned(8))); + }; + ++struct req_lib_cpg_partial_mcast { ++ struct qb_ipc_response_header header __attribute__((aligned(8))); ++ mar_uint32_t guarantee __attribute__((aligned(8))); ++ mar_uint32_t msglen __attribute__((aligned(8))); ++ mar_uint32_t fraglen __attribute__((aligned(8))); ++ mar_uint32_t type __attribute__((aligned(8))); ++ mar_uint8_t message[] __attribute__((aligned(8))); ++}; ++ + struct res_lib_cpg_mcast { + struct qb_ipc_response_header header __attribute__((aligned(8))); + }; +@@ -223,6 +245,17 @@ struct res_lib_cpg_deliver_callback { + mar_uint8_t message[] __attribute__((aligned(8))); + }; + ++struct res_lib_cpg_partial_deliver_callback { ++ struct qb_ipc_response_header header __attribute__((aligned(8))); ++ mar_cpg_name_t group_name __attribute__((aligned(8))); ++ mar_uint32_t msglen __attribute__((aligned(8))); ++ mar_uint32_t fraglen __attribute__((aligned(8))); ++ mar_uint32_t nodeid __attribute__((aligned(8))); ++ mar_uint32_t pid __attribute__((aligned(8))); ++ mar_uint32_t type __attribute__((aligned(8))); ++ mar_uint8_t message[] __attribute__((aligned(8))); ++}; ++ + struct res_lib_cpg_flowcontrol_callback { + struct qb_ipc_response_header header __attribute__((aligned(8))); + mar_uint32_t flow_control_state __attribute__((aligned(8))); +diff --git a/lib/cpg.c b/lib/cpg.c +index 4b92f44..037e8a9 100644 +--- a/lib/cpg.c ++++ b/lib/cpg.c +@@ -1,7 +1,7 @@ + /* + * vi: set autoindent tabstop=4 shiftwidth=4 : + * +- * Copyright (c) 2006-2012 Red Hat, Inc. ++ * Copyright (c) 2006-2015 Red Hat, Inc. + * + * All rights reserved. + * +@@ -70,6 +70,12 @@ + #endif + + /* ++ * Maximum number of times to retry a send when transmitting ++ * a large message fragment ++ */ ++#define MAX_RETRIES 100 ++ ++/* + * ZCB files have following umask (umask is same as used in libqb) + */ + #define CPG_MEMORY_MAP_UMASK 077 +@@ -83,6 +89,14 @@ struct cpg_inst { + cpg_model_v1_data_t model_v1_data; + }; + struct list_head iteration_list_head; ++ uint32_t max_msg_size; ++ char *assembly_buf; ++ uint32_t assembly_buf_ptr; ++ int assembling; /* Flag that says we have started assembling a message. ++ * It's here to catch the situation where a node joins ++ * the cluster/group in the middle of a CPG message send ++ * so we don't pass on a partial message to the client. ++ */ + }; + static void cpg_inst_free (void *inst); + +@@ -210,6 +224,8 @@ cs_error_t cpg_model_initialize ( + } + } + ++ /* Allow space for corosync internal headers */ ++ cpg_inst->max_msg_size = IPC_REQUEST_SIZE - 1024; + cpg_inst->model_data.model = model; + cpg_inst->context = context; + +@@ -291,6 +307,25 @@ cs_error_t cpg_fd_get ( + return (error); + } + ++cs_error_t cpg_max_atomic_msgsize_get ( ++ cpg_handle_t handle, ++ uint32_t *size) ++{ ++ cs_error_t error; ++ struct cpg_inst *cpg_inst; ++ ++ error = hdb_error_to_cs (hdb_handle_get (&cpg_handle_t_db, handle, (void *)&cpg_inst)); ++ if (error != CS_OK) { ++ return (error); ++ } ++ ++ *size = cpg_inst->max_msg_size; ++ ++ hdb_handle_put (&cpg_handle_t_db, handle); ++ ++ return (error); ++} ++ + cs_error_t cpg_context_get ( + cpg_handle_t handle, + void **context) +@@ -339,6 +374,7 @@ cs_error_t cpg_dispatch ( + struct cpg_inst *cpg_inst; + struct res_lib_cpg_confchg_callback *res_cpg_confchg_callback; + struct res_lib_cpg_deliver_callback *res_cpg_deliver_callback; ++ struct res_lib_cpg_partial_deliver_callback *res_cpg_partial_deliver_callback; + struct res_lib_cpg_totem_confchg_callback *res_cpg_totem_confchg_callback; + struct cpg_inst cpg_inst_copy; + struct qb_ipc_response_header *dispatch_data; +@@ -361,7 +397,7 @@ cs_error_t cpg_dispatch ( + + /* + * Timeout instantly for CS_DISPATCH_ONE_NONBLOCKING or CS_DISPATCH_ALL and +- * wait indefinately for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING ++ * wait indefinitely for CS_DISPATCH_ONE or CS_DISPATCH_BLOCKING + */ + if (dispatch_types == CS_DISPATCH_ALL || dispatch_types == CS_DISPATCH_ONE_NONBLOCKING) { + timeout = 0; +@@ -428,6 +464,43 @@ cs_error_t cpg_dispatch ( + res_cpg_deliver_callback->msglen); + break; + ++ case MESSAGE_RES_CPG_PARTIAL_DELIVER_CALLBACK: ++ res_cpg_partial_deliver_callback = (struct res_lib_cpg_partial_deliver_callback *)dispatch_data; ++ ++ marshall_from_mar_cpg_name_t ( ++ &group_name, ++ &res_cpg_partial_deliver_callback->group_name); ++ ++ if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_FIRST) { ++ /* ++ * Allocate a buffer to contain a full message. ++ */ ++ cpg_inst->assembly_buf = malloc(res_cpg_partial_deliver_callback->msglen); ++ if (!cpg_inst->assembly_buf) { ++ error = CS_ERR_NO_MEMORY; ++ goto error_put; ++ } ++ cpg_inst->assembling = 1; ++ cpg_inst->assembly_buf_ptr = 0; ++ } ++ if (cpg_inst->assembling) { ++ memcpy(cpg_inst->assembly_buf + cpg_inst->assembly_buf_ptr, ++ res_cpg_partial_deliver_callback->message, res_cpg_partial_deliver_callback->fraglen); ++ cpg_inst->assembly_buf_ptr += res_cpg_partial_deliver_callback->fraglen; ++ ++ if (res_cpg_partial_deliver_callback->type == LIBCPG_PARTIAL_LAST) { ++ cpg_inst_copy.model_v1_data.cpg_deliver_fn (handle, ++ &group_name, ++ res_cpg_partial_deliver_callback->nodeid, ++ res_cpg_partial_deliver_callback->pid, ++ cpg_inst->assembly_buf, ++ res_cpg_partial_deliver_callback->msglen); ++ free(cpg_inst->assembly_buf); ++ cpg_inst->assembling = 0; ++ } ++ } ++ break; ++ + case MESSAGE_RES_CPG_CONFCHG_CALLBACK: + if (cpg_inst_copy.model_v1_data.cpg_confchg_fn == NULL) { + break; +@@ -921,6 +994,12 @@ cs_error_t cpg_zcb_mcast_joined ( + if (error != CS_OK) { + return (error); + } ++ ++ if (msg_len > IPC_REQUEST_SIZE) { ++ error = CS_ERR_TOO_BIG; ++ goto error_exit; ++ } ++ + req_lib_cpg_mcast = (struct req_lib_cpg_mcast *)(((char *)msg) - sizeof (struct req_lib_cpg_mcast)); + req_lib_cpg_mcast->header.size = sizeof (struct req_lib_cpg_mcast) + + msg_len; +@@ -957,6 +1036,88 @@ error_exit: + return (error); + } + ++static cs_error_t send_fragments ( ++ struct cpg_inst *cpg_inst, ++ cpg_guarantee_t guarantee, ++ size_t msg_len, ++ const struct iovec *iovec, ++ unsigned int iov_len) ++{ ++ int i; ++ cs_error_t error = CS_OK; ++ struct iovec iov[2]; ++ struct req_lib_cpg_partial_mcast req_lib_cpg_mcast; ++ struct res_lib_cpg_partial_send res_lib_cpg_partial_send; ++ size_t sent = 0; ++ size_t iov_sent = 0; ++ int retry_count; ++ ++ req_lib_cpg_mcast.header.id = MESSAGE_REQ_CPG_PARTIAL_MCAST; ++ req_lib_cpg_mcast.guarantee = guarantee; ++ req_lib_cpg_mcast.msglen = msg_len; ++ ++ iov[0].iov_base = (void *)&req_lib_cpg_mcast; ++ iov[0].iov_len = sizeof (struct req_lib_cpg_partial_mcast); ++ ++ i=0; ++ iov_sent = 0 ; ++ qb_ipcc_fc_enable_max_set(cpg_inst->c, 2); ++ ++ while (error == CS_OK && sent < msg_len) { ++ ++ retry_count = 0; ++ if ( (iovec[i].iov_len - iov_sent) > cpg_inst->max_msg_size) { ++ iov[1].iov_len = cpg_inst->max_msg_size; ++ } ++ else { ++ iov[1].iov_len = iovec[i].iov_len - iov_sent; ++ } ++ ++ if (sent == 0) { ++ req_lib_cpg_mcast.type = LIBCPG_PARTIAL_FIRST; ++ } ++ else if ((sent + iov[1].iov_len) == msg_len) { ++ req_lib_cpg_mcast.type = LIBCPG_PARTIAL_LAST; ++ } ++ else { ++ req_lib_cpg_mcast.type = LIBCPG_PARTIAL_CONTINUED; ++ } ++ ++ req_lib_cpg_mcast.fraglen = iov[1].iov_len; ++ req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_partial_mcast) + iov[1].iov_len; ++ iov[1].iov_base = (char *)iovec[i].iov_base + iov_sent; ++ ++ resend: ++ error = coroipcc_msg_send_reply_receive (cpg_inst->c, iov, 2, ++ &res_lib_cpg_partial_send, ++ sizeof (res_lib_cpg_partial_send)); ++ ++ if (error == CS_ERR_TRY_AGAIN) { ++ fprintf(stderr, "sleep. counter=%d\n", retry_count); ++ if (++retry_count > MAX_RETRIES) { ++ goto error_exit; ++ } ++ usleep(10000); ++ goto resend; ++ } ++ ++ iov_sent += iov[1].iov_len; ++ sent += iov[1].iov_len; ++ ++ /* Next iovec */ ++ if (iov_sent >= iovec[i].iov_len) { ++ i++; ++ iov_sent = 0; ++ } ++ error = res_lib_cpg_partial_send.header.error; ++ } ++error_exit: ++ qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); ++ ++ return error; ++} ++ ++ + cs_error_t cpg_mcast_joined ( + cpg_handle_t handle, + cpg_guarantee_t guarantee, +@@ -979,6 +1140,11 @@ cs_error_t cpg_mcast_joined ( + msg_len += iovec[i].iov_len; + } + ++ if (msg_len > cpg_inst->max_msg_size) { ++ error = send_fragments(cpg_inst, guarantee, msg_len, iovec, iov_len); ++ goto error_exit; ++ } ++ + req_lib_cpg_mcast.header.size = sizeof (struct req_lib_cpg_mcast) + + msg_len; + +@@ -994,6 +1160,7 @@ cs_error_t cpg_mcast_joined ( + error = qb_to_cs_error(qb_ipcc_sendv(cpg_inst->c, iov, iov_len + 1)); + qb_ipcc_fc_enable_max_set(cpg_inst->c, 1); + ++error_exit: + hdb_handle_put (&cpg_handle_t_db, handle); + + return (error); +diff --git a/test/Makefile.am b/test/Makefile.am +index c19e506..bb11518 100644 +--- a/test/Makefile.am ++++ b/test/Makefile.am +@@ -34,7 +34,7 @@ MAINTAINERCLEANFILES = Makefile.in + + EXTRA_DIST = ploadstart.sh + +-noinst_PROGRAMS = cpgverify testcpg testcpg2 cpgbench \ ++noinst_PROGRAMS = cpgverify testcpg testcpg2 cpgbench cpghum \ + testquorum testvotequorum1 testvotequorum2 \ + stress_cpgfdget stress_cpgcontext cpgbound testsam \ + testcpgzc cpgbenchzc testzcgc stress_cpgzc +@@ -48,6 +48,7 @@ testzcgc_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgzc_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgfdget_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgcontext_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la ++cpghum_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la -lz + testquorum_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libquorum.la + testvotequorum1_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libvotequorum.la + testvotequorum2_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libvotequorum.la +-- +1.7.1 + diff --git a/SOURCES/bz682771-2-Really-add-cpghum.patch b/SOURCES/bz682771-2-Really-add-cpghum.patch new file mode 100644 index 0000000..8bf2df1 --- /dev/null +++ b/SOURCES/bz682771-2-Really-add-cpghum.patch @@ -0,0 +1,452 @@ +From 3842ba6080e00fd9484a2a875d982e149f67bc44 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Tue, 10 Mar 2015 13:20:37 +0100 +Subject: [PATCH] Really add cpghum + +Signed-off-by: Jan Friesse +--- + test/cpghum.c | 432 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 432 insertions(+), 0 deletions(-) + create mode 100644 test/cpghum.c + +diff --git a/test/cpghum.c b/test/cpghum.c +new file mode 100644 +index 0000000..79184e8 +--- /dev/null ++++ b/test/cpghum.c +@@ -0,0 +1,432 @@ ++/* ++ * Copyright (c) 2015 Red Hat, Inc. ++ * ++ * All rights reserved. ++ * ++ * Author: Christine Caulfield ++ * ++ * This software licensed under BSD license, the text of which follows: ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions are met: ++ * ++ * - Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * - Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * - Neither the name of the MontaVista Software, Inc. nor the names of its ++ * contributors may be used to endorse or promote products derived from this ++ * software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF ++ * THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++ ++static cpg_handle_t handle; ++ ++static pthread_t thread; ++ ++#ifndef timersub ++#define timersub(a, b, result) \ ++ do { \ ++ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ ++ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ ++ if ((result)->tv_usec < 0) { \ ++ --(result)->tv_sec; \ ++ (result)->tv_usec += 1000000; \ ++ } \ ++ } while (0) ++#endif /* timersub */ ++ ++static int alarm_notice; ++#define ONE_MEG 1048576 ++#define DATASIZE (ONE_MEG*20) ++static char data[DATASIZE]; ++static int send_counter = 0; ++static int do_syslog = 0; ++static int quiet = 0; ++static volatile int stopped; ++ ++// stats ++static unsigned int length_errors=0; ++static unsigned int crc_errors=0; ++static unsigned int sequence_errors=0; ++static unsigned int packets_sent=0; ++static unsigned int packets_recvd=0; ++static unsigned int send_retries=0; ++static unsigned int send_fails=0; ++ ++static void cpg_bm_confchg_fn ( ++ cpg_handle_t handle_in, ++ const struct cpg_name *group_name, ++ const struct cpg_address *member_list, size_t member_list_entries, ++ const struct cpg_address *left_list, size_t left_list_entries, ++ const struct cpg_address *joined_list, size_t joined_list_entries) ++{ ++} ++ ++static unsigned int g_recv_count; ++static unsigned int g_recv_length; ++static unsigned int g_write_size; ++static int g_recv_counter = 0; ++ ++static void cpg_bm_deliver_fn ( ++ cpg_handle_t handle_in, ++ const struct cpg_name *group_name, ++ uint32_t nodeid, ++ uint32_t pid, ++ void *msg, ++ size_t msg_len) ++{ ++ int *value = msg; ++ uLong crc=0; ++ ulong recv_crc = value[1] & 0xFFFFFFFF; ++ ++ packets_recvd++; ++ g_recv_length = msg_len; ++ ++ // Basic check, packets should all be the right size ++ if (g_write_size && (msg_len != g_write_size)) { ++ length_errors++; ++ fprintf(stderr, "%s: message sizes don't match. got %lu, expected %u\n", group_name->value, msg_len, g_write_size); ++ if (do_syslog) { ++ syslog(LOG_ERR, "%s: message sizes don't match. got %lu, expected %u\n", group_name->value, msg_len, g_write_size); ++ } ++ } ++ ++ // Sequence counters are incrementing in step? ++ if (*value != g_recv_counter) { ++ sequence_errors++; ++ fprintf(stderr, "%s: counters don't match. got %d, expected %d\n", group_name->value, *value, g_recv_counter); ++ if (do_syslog) { ++ syslog(LOG_ERR, "%s: counters don't match. got %d, expected %d\n", group_name->value, *value, g_recv_counter); ++ } ++ // Catch up or we'll be printing errors for ever ++ g_recv_counter = *value +1; ++ } else { ++ g_recv_counter++; ++ } ++ ++ // Check crc ++ crc = crc32(0, NULL, 0); ++ crc = crc32(crc, (Bytef *)&value[2], msg_len-sizeof(int)*2) & 0xFFFFFFFF; ++ if (crc != recv_crc) { ++ crc_errors++; ++ fprintf(stderr, "%s: CRCs don't match. got %lx, expected %lx\n", group_name->value, recv_crc, crc); ++ if (do_syslog) { ++ syslog(LOG_ERR, "%s: CRCs don't match. got %lx, expected %lx\n", group_name->value, recv_crc, crc); ++ } ++ } ++ ++ g_recv_count++; ++ ++} ++ ++static cpg_model_v1_data_t model1_data = { ++ .cpg_deliver_fn = cpg_bm_deliver_fn, ++ .cpg_confchg_fn = cpg_bm_confchg_fn, ++}; ++ ++static cpg_callbacks_t callbacks = { ++ .cpg_deliver_fn = cpg_bm_deliver_fn, ++ .cpg_confchg_fn = cpg_bm_confchg_fn ++}; ++ ++static struct cpg_name group_name = { ++ .value = "cpghum", ++ .length = 7 ++}; ++ ++static void cpg_test ( ++ cpg_handle_t handle_in, ++ int write_size, ++ int delay_time, ++ int print_time) ++{ ++ struct timeval tv1, tv2, tv_elapsed; ++ struct iovec iov; ++ unsigned int res; ++ int i; ++ unsigned int *dataint = (unsigned int *)data; ++ uLong crc; ++ ++ alarm_notice = 0; ++ iov.iov_base = data; ++ iov.iov_len = write_size; ++ ++ g_recv_count = 0; ++ alarm (print_time); ++ ++ gettimeofday (&tv1, NULL); ++ do { ++ dataint[0] = send_counter++; ++ for (i=2; i<(DATASIZE-sizeof(int)*2)/4; i++) { ++ dataint[i] = rand(); ++ } ++ crc = crc32(0, NULL, 0); ++ dataint[1] = crc32(crc, (Bytef*)&dataint[2], write_size-sizeof(int)*2); ++ resend: ++ res = cpg_mcast_joined (handle_in, CPG_TYPE_AGREED, &iov, 1); ++ if (res == CS_ERR_TRY_AGAIN) { ++ usleep(10000); ++ send_retries++; ++ goto resend; ++ } ++ if (res != CS_OK) { ++ fprintf(stderr, "send failed: %d\n", res); ++ send_fails++; ++ } ++ else { ++ packets_sent++; ++ } ++ usleep(delay_time*1000); ++ } while (alarm_notice == 0 && (res == CS_OK || res == CS_ERR_TRY_AGAIN) && stopped == 0); ++ gettimeofday (&tv2, NULL); ++ timersub (&tv2, &tv1, &tv_elapsed); ++ ++ if (!quiet) { ++ printf ("%s: %5d message%s received, ", group_name.value, g_recv_count, g_recv_count==1?"":"s"); ++ printf ("%5d bytes per write\n", write_size); ++ } ++ ++} ++ ++static void sigalrm_handler (int num) ++{ ++ alarm_notice = 1; ++} ++ ++static void sigint_handler (int num) ++{ ++ stopped = 1; ++} ++ ++static void* dispatch_thread (void *arg) ++{ ++ cpg_dispatch (handle, CS_DISPATCH_BLOCKING); ++ return NULL; ++} ++ ++static void usage(char *cmd) ++{ ++ fprintf(stderr, "%s [OPTIONS]\n", cmd); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, "%s sends CPG messages to all registered users of the CPG.\n", cmd); ++ fprintf(stderr, "The messages have a sequence number and a CRC so that missing or\n"); ++ fprintf(stderr, "corrupted messages will be detected and reported.\n"); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, "%s can also be asked to simply listen for (and check) packets\n", cmd); ++ fprintf(stderr, "so that there is another node in the cluster connected to the CPG.\n"); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, "When -l is present, packet size is only checked if specified by -w or -W\n"); ++ fprintf(stderr, "and it, obviously, must match that of the sender.\n"); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, "Multiple copies, in different CPGs, can also be run on the same or\n"); ++ fprintf(stderr, "different nodes by using the -n option.\n"); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, "%s can't handle more than 1 sender in the same CPG as it messes with the\n", cmd); ++ fprintf(stderr, "sequence numbers.\n"); ++ fprintf(stderr, "\n"); ++ fprintf(stderr, " -w Write size in Kbytes, default 4\n"); ++ fprintf(stderr, " -W Write size in bytes, default 4096\n"); ++ fprintf(stderr, " -n CPG name to use, default 'cpghum'\n"); ++ fprintf(stderr, " -d Delay between sending packets (mS), default 1000\n"); ++ fprintf(stderr, " -r Number of repetitions, default 100\n"); ++ fprintf(stderr, " -p Delay between printing output(S), default 10s\n"); ++ fprintf(stderr, " -l Listen and check CRCs only, don't send (^C to quit)\n"); ++ fprintf(stderr, " -m cpg_initialise() model. Default 1.\n"); ++ fprintf(stderr, " -s Also send errors to syslog (for daemon log correlation).\n"); ++ fprintf(stderr, " -q Quiet. Don't print messages every 10 seconds (see also -p)\n"); ++ fprintf(stderr, "\n"); ++} ++ ++int main (int argc, char *argv[]) { ++ int i; ++ unsigned int res; ++ uint32_t maxsize; ++ int opt; ++ int bs; ++ int write_size = 4096; ++ int delay_time = 1000; ++ int repetitions = 100; ++ int print_time = 10; ++ int have_size = 0; ++ int listen_only = 0; ++ int model = 1; ++ ++ while ( (opt = getopt(argc, argv, "qlsn:d:r:p:m:w:W:")) != -1 ) { ++ switch (opt) { ++ case 'w': // Write size in K ++ bs = atoi(optarg); ++ if (bs > 0) { ++ write_size = bs*1024; ++ have_size = 1; ++ } ++ break; ++ case 'W': // Write size in bytes ++ bs = atoi(optarg); ++ if (bs > 0) { ++ write_size = bs; ++ have_size = 1; ++ } ++ break; ++ case 'n': ++ strcpy(group_name.value, optarg); ++ group_name.length = strlen(group_name.value); ++ break; ++ case 'd': ++ delay_time = atoi(optarg); ++ break; ++ case 'r': ++ repetitions = atoi(optarg); ++ break; ++ case 'p': ++ print_time = atoi(optarg); ++ break; ++ case 'l': ++ listen_only = 1; ++ break; ++ case 's': ++ do_syslog = 1; ++ break; ++ case 'q': ++ quiet = 1; ++ break; ++ case 'm': ++ model = atoi(optarg); ++ if (model < 0 || model > 1) { ++ fprintf(stderr, "%s: Model must be 0-1\n", argv[0]); ++ exit(1); ++ } ++ break; ++ case '?': ++ usage(basename(argv[0])); ++ exit(0); ++ } ++ } ++ ++ qb_log_init("cpghum", LOG_USER, LOG_EMERG); ++ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); ++ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, ++ QB_LOG_FILTER_FILE, "*", LOG_DEBUG); ++ qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); ++ ++ g_write_size = write_size; ++ ++ signal (SIGALRM, sigalrm_handler); ++ signal (SIGINT, sigint_handler); ++ switch (model) { ++ case 0: ++ res = cpg_initialize (&handle, &callbacks); ++ break; ++ case 1: ++ res = cpg_model_initialize (&handle, CPG_MODEL_V1, (cpg_model_data_t *)&model1_data, NULL); ++ break; ++ default: ++ res=999; // can't get here but it keeps the compiler happy ++ break; ++ } ++ ++ if (res != CS_OK) { ++ printf ("cpg_initialize failed with result %d\n", res); ++ exit (1); ++ } ++ pthread_create (&thread, NULL, dispatch_thread, NULL); ++ ++ res = cpg_join (handle, &group_name); ++ if (res != CS_OK) { ++ printf ("cpg_join failed with result %d\n", res); ++ exit (1); ++ } ++ ++ if (listen_only) { ++ int secs; ++ if (!quiet) { ++ printf("-- Listening on CPG %s\n", group_name.value); ++ printf("-- Ignore any starting \"counters don't match\" error while we catch up\n"); ++ } ++ ++ /* Only check packet size if specified on the command-line */ ++ if (!have_size) { ++ g_write_size = 0; ++ } ++ ++ while (!stopped) { ++ sleep(1); ++ if (++secs > print_time && !quiet) { ++ printf ("%s: %5d message%s received. %d bytes\n", group_name.value, g_recv_count, g_recv_count==1?"":"s", g_recv_length); ++ secs = 0; ++ g_recv_count = 0; ++ } ++ } ++ } ++ else { ++ cpg_max_atomic_msgsize_get (handle, &maxsize); ++ if ( write_size > maxsize) { ++ fprintf(stderr, "INFO: packet size (%d) is larger than the maximum atomic size (%d), libcpg will fragment\n", ++ write_size, maxsize); ++ } ++ for (i = 0; i < repetitions && !stopped; i++) { ++ cpg_test (handle, write_size, delay_time, print_time); ++ signal (SIGALRM, sigalrm_handler); ++ } ++ } ++ ++ res = cpg_finalize (handle); ++ if (res != CS_OK) { ++ printf ("cpg_finalize failed with result %d\n", res); ++ exit (1); ++ } ++ ++ printf("\n"); ++ printf("Stats:\n"); ++ if (!listen_only) { ++ printf(" packets sent: %d\n", packets_sent); ++ printf(" send failures: %d\n", send_fails); ++ printf(" send retries: %d\n", send_retries); ++ } ++ if (have_size) { ++ printf(" length errors: %d\n", length_errors); ++ } ++ printf(" packets recvd: %d\n", packets_recvd); ++ printf(" sequence errors: %d\n", sequence_errors); ++ printf(" crc errors: %d\n", crc_errors); ++ printf("\n"); ++ return (0); ++} +-- +1.7.1 + diff --git a/SOURCES/bz682771-3-Don-t-link-with-libz-when-not-needed.patch b/SOURCES/bz682771-3-Don-t-link-with-libz-when-not-needed.patch new file mode 100644 index 0000000..6db67de --- /dev/null +++ b/SOURCES/bz682771-3-Don-t-link-with-libz-when-not-needed.patch @@ -0,0 +1,68 @@ +From 4d228554d977327fe089b8ec3cb2ced649091560 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 22 Jun 2015 16:00:07 +0200 +Subject: [PATCH] Don't link with libz when not needed + +Commit 8cc8e513633a1a8b12c416e32fb5362fcf4d65dd added check for libz +resulting in linking with lib z for all libraries. This is not expected +behavior. Patch solves it by making defining automake conditional so +cpghum is linked only if libz is available and LIBS variable is not +modified at all. + +Signed-off-by: Jan Friesse +--- + configure.ac | 4 +++- + test/Makefile.am | 8 ++++++-- + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 4b640fc..922b439 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -163,7 +163,9 @@ AC_CHECK_LIB([pthread], [pthread_create]) + AC_CHECK_LIB([socket], [socket]) + AC_CHECK_LIB([nsl], [t_open]) + AC_CHECK_LIB([rt], [sched_getscheduler]) +-AC_CHECK_LIB([z], [crc32]) ++AC_CHECK_LIB([z], [crc32], ++ AM_CONDITIONAL([BUILD_CPGHUM], true), ++ AM_CONDITIONAL([BUILD_CPGHUM], false)) + + # Checks for library functions. + AC_FUNC_ALLOCA +diff --git a/test/Makefile.am b/test/Makefile.am +index bb11518..ad1489d 100644 +--- a/test/Makefile.am ++++ b/test/Makefile.am +@@ -34,7 +34,7 @@ MAINTAINERCLEANFILES = Makefile.in + + EXTRA_DIST = ploadstart.sh + +-noinst_PROGRAMS = cpgverify testcpg testcpg2 cpgbench cpghum \ ++noinst_PROGRAMS = cpgverify testcpg testcpg2 cpgbench \ + testquorum testvotequorum1 testvotequorum2 \ + stress_cpgfdget stress_cpgcontext cpgbound testsam \ + testcpgzc cpgbenchzc testzcgc stress_cpgzc +@@ -48,7 +48,6 @@ testzcgc_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgzc_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgfdget_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + stress_cpgcontext_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la +-cpghum_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la -lz + testquorum_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libquorum.la + testvotequorum1_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libvotequorum.la + testvotequorum2_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libvotequorum.la +@@ -59,6 +58,11 @@ cpgbench_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + cpgbenchzc_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la + testsam_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libsam.la + ++if BUILD_CPGHUM ++noinst_PROGRAMS += cpghum ++cpghum_LDADD = $(LIBQB_LIBS) $(top_builddir)/lib/libcpg.la -lz ++endif ++ + ploadstart: ploadstart.sh + sed -e 's#@''BASHPATH@#${BASHPATH}#g' $< > $@ + chmod 755 $@ +-- +1.7.1 + diff --git a/SOURCES/bz773464-1-config-Make-sure-user-doesn-t-mix-IPv6-and-IPv4.patch b/SOURCES/bz773464-1-config-Make-sure-user-doesn-t-mix-IPv6-and-IPv4.patch new file mode 100644 index 0000000..535d35e --- /dev/null +++ b/SOURCES/bz773464-1-config-Make-sure-user-doesn-t-mix-IPv6-and-IPv4.patch @@ -0,0 +1,49 @@ +From 6c028d4d9c53decaa9469c792ac68fd2a886e7d9 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 24 Nov 2014 10:25:05 +0100 +Subject: [PATCH] config: Make sure user doesn't mix IPv6 and IPv4 + +Checking code was there, sadly not correct, so it was possible to enter +one bindnet addr as IPv4 and second as IPv6. Fix is trivial. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemconfig.c | 11 ++++++----- + 1 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index daf0719..3bcf49d 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -1055,21 +1055,22 @@ int totem_config_validate ( + goto parse_error; + } + +- if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { ++ if (totem_config->broadcast_use == 0 && totem_config->transport_number == TOTEM_TRANSPORT_UDP) { + if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { + error_reason = "Multicast address family does not match bind address family"; + goto parse_error; + } + +- if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { +- error_reason = "Not all bind address belong to the same IP family"; +- goto parse_error; +- } + if (totemip_is_mcast (&totem_config->interfaces[i].mcast_addr) != 0) { + error_reason = "mcastaddr is not a correct multicast address."; + goto parse_error; + } + } ++ ++ if (totem_config->interfaces[0].bindnet.family != totem_config->interfaces[i].bindnet.family) { ++ error_reason = "Not all bind address belong to the same IP family"; ++ goto parse_error; ++ } + } + + if (totem_config->version != 2) { +-- +1.7.1 + diff --git a/SOURCES/bz773464-2-config-Process-broadcast-option-consistently.patch b/SOURCES/bz773464-2-config-Process-broadcast-option-consistently.patch new file mode 100644 index 0000000..c3f42a2 --- /dev/null +++ b/SOURCES/bz773464-2-config-Process-broadcast-option-consistently.patch @@ -0,0 +1,69 @@ +From 70bd35fc06e68a010d780dfa39bd68d4bd2f7da7 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 24 Nov 2014 10:32:03 +0100 +Subject: [PATCH] config: Process broadcast option consistently + +Broadcast option is global but in config set in interface section. When +more interfaces are defined, only broadcast from last section was used. + +Solution is to use broadcast whenever at least one interface use +broadcast. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemconfig.c | 20 ++++++++++++++++---- + 1 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index 3bcf49d..9cb9b07 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -812,6 +812,12 @@ extern int totem_config_read ( + free(str); + } + ++ /* ++ * Broadcast option is global but set in interface section, ++ * so reset before processing interfaces. ++ */ ++ totem_config->broadcast_use = 0; ++ + iter = icmap_iter_init("totem.interface."); + while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { + res = sscanf(iter_key, "totem.interface.%[^.].%s", ringnumber_key, tmp_key); +@@ -866,14 +872,10 @@ extern int totem_config_read ( + &totem_config->interfaces[ringnumber].mcast_addr); + } + +- totem_config->broadcast_use = 0; + snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "totem.interface.%u.broadcast", ringnumber); + if (icmap_get_string(tmp_key, &str) == CS_OK) { + if (strcmp (str, "yes") == 0) { + totem_config->broadcast_use = 1; +- totemip_parse ( +- &totem_config->interfaces[ringnumber].mcast_addr, +- "255.255.255.255", totem_config->ip_version); + } + free(str); + } +@@ -927,6 +929,16 @@ extern int totem_config_read ( + icmap_iter_finalize(iter); + + /* ++ * Use broadcast is global, so if set, make sure to fill mcast addr correctly ++ */ ++ if (totem_config->broadcast_use) { ++ for (ringnumber = 0; ringnumber < totem_config->interface_count; ringnumber++) { ++ totemip_parse (&totem_config->interfaces[ringnumber].mcast_addr, ++ "255.255.255.255", 0); ++ } ++ } ++ ++ /* + * Store automatically generated items back to icmap + */ + for (i = 0; i < totem_config->interface_count; i++) { +-- +1.7.1 + diff --git a/SOURCES/bz773464-3-config-Ensure-mcast-address-port-differs-for-rrp.patch b/SOURCES/bz773464-3-config-Ensure-mcast-address-port-differs-for-rrp.patch new file mode 100644 index 0000000..2bbe2b9 --- /dev/null +++ b/SOURCES/bz773464-3-config-Ensure-mcast-address-port-differs-for-rrp.patch @@ -0,0 +1,72 @@ +From 6449bea835c90045baa23e3e041fed1df2abf070 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 24 Nov 2014 11:54:20 +0100 +Subject: [PATCH] config: Ensure mcast address/port differs for rrp + +When using multiple interfaces, it's necessary to use different +multicast address/port pair for each interface to make +rrp work correctly. This is now checked in parser. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemconfig.c | 19 ++++++++++++++++++- + man/corosync.conf.5 | 4 ++++ + 2 files changed, 22 insertions(+), 1 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index 9cb9b07..2acee2a 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -1022,8 +1022,9 @@ int totem_config_validate ( + static char local_error_reason[512]; + char parse_error[512]; + const char *error_reason = local_error_reason; +- int i; ++ int i, j; + unsigned int interface_max = INTERFACE_MAX; ++ unsigned int port1, port2; + + if (totem_config->interface_count == 0) { + error_reason = "No interfaces defined"; +@@ -1083,6 +1084,22 @@ int totem_config_validate ( + error_reason = "Not all bind address belong to the same IP family"; + goto parse_error; + } ++ ++ /* ++ * Ensure mcast address/port differs ++ */ ++ if (totem_config->transport_number == TOTEM_TRANSPORT_UDP) { ++ for (j = i + 1; j < totem_config->interface_count; j++) { ++ port1 = totem_config->interfaces[i].ip_port; ++ port2 = totem_config->interfaces[j].ip_port; ++ if (totemip_equal(&totem_config->interfaces[i].mcast_addr, ++ &totem_config->interfaces[j].mcast_addr) && ++ (((port1 > port2 ? port1 : port2) - (port1 < port2 ? port1 : port2)) <= 1)) { ++ error_reason = "Interfaces multicast address/port pair must differ"; ++ goto parse_error; ++ } ++ } ++ } + } + + if (totem_config->version != 2) { +diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 +index 95eca2d..8e774c1 100644 +--- a/man/corosync.conf.5 ++++ b/man/corosync.conf.5 +@@ -213,6 +213,10 @@ be chosen. + The maximum number of interface directives that is allowed for either + modes (active or passive) is 2. + ++When using multiple interfaces, make sure to use different multicast ++address/port (port for same address must differ by at least two) pair ++for each interface (this is checked by parser) to make rrp works. ++ + .TP + netmtu + This specifies the network maximum transmit unit. To set this value beyond +-- +1.7.1 + diff --git a/SOURCES/bz773464-4-automake-Check-minimum-automake-version.patch b/SOURCES/bz773464-4-automake-Check-minimum-automake-version.patch new file mode 100644 index 0000000..8db1be5 --- /dev/null +++ b/SOURCES/bz773464-4-automake-Check-minimum-automake-version.patch @@ -0,0 +1,30 @@ +From 114b826c67126fe1f690ad976b5217a8487994a4 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Fri, 2 Jan 2015 12:27:48 +0100 +Subject: [PATCH] automake: Check minimum automake version + +Corosync needs automake version at least 1.11. Patch adds minimum +version check. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + configure.ac | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 4f374fc..0c371aa 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -10,7 +10,7 @@ AC_INIT([corosync], + + AC_USE_SYSTEM_EXTENSIONS + +-AM_INIT_AUTOMAKE([-Wno-portability]) ++AM_INIT_AUTOMAKE([-Wno-portability 1.11]) + + LT_PREREQ([2.2.6]) + LT_INIT +-- +1.7.1 + diff --git a/SOURCES/bz773464-5-Log-auto-recovery-of-ring-only-once.patch b/SOURCES/bz773464-5-Log-auto-recovery-of-ring-only-once.patch new file mode 100644 index 0000000..705008d --- /dev/null +++ b/SOURCES/bz773464-5-Log-auto-recovery-of-ring-only-once.patch @@ -0,0 +1,49 @@ +From e0ac861efdc32831366a2b5f5cc1d61e2ffa5504 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Thu, 2 Oct 2014 12:09:42 +0000 +Subject: [PATCH] Log auto-recovery of ring only once + +Make sure to log auto-recovery of ring only once. Every +MESSAGE_TYPE_RING_TEST_ACTIVATE receive is logged, but with lower +priority and more detailed information. + +Signed-off-by: Jan Friesse +Reviewed-by: Christine Caulfield +--- + exec/totemrrp.c | 14 +++++++++++--- + 1 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/exec/totemrrp.c b/exec/totemrrp.c +index eee9d26..0d9a58d 100644 +--- a/exec/totemrrp.c ++++ b/exec/totemrrp.c +@@ -1910,15 +1910,23 @@ void rrp_deliver_fn ( + } + } else + if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) { +- log_printf ( +- rrp_instance->totemrrp_log_level_notice, +- "Automatically recovered ring %d", hdr->ring_number); + + if (hdr->endian_detector != ENDIAN_LOCAL) { + test_active_msg_endian_convert(hdr, &tmp_msg); + hdr = &tmp_msg; + } + ++ log_printf ( ++ rrp_instance->totemrrp_log_level_debug, ++ "Received ring test activate message for ring %d sent by node %u", ++ hdr->ring_number, ++ hdr->nodeid_activator); ++ ++ if (rrp_instance->stats.faulty[deliver_fn_context->iface_no]) { ++ log_printf (rrp_instance->totemrrp_log_level_notice, ++ "Automatically recovered ring %d", hdr->ring_number); ++ } ++ + totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no); + if (hdr->nodeid_activator != rrp_instance->my_nodeid) { + totemnet_token_send ( +-- +1.7.1 + diff --git a/SOURCES/bz773464-6-totemsrp-Format-member-list-log-as-unsigned-int.patch b/SOURCES/bz773464-6-totemsrp-Format-member-list-log-as-unsigned-int.patch new file mode 100644 index 0000000..f34c2ed --- /dev/null +++ b/SOURCES/bz773464-6-totemsrp-Format-member-list-log-as-unsigned-int.patch @@ -0,0 +1,36 @@ +From 5d9acc5604eb4e8a739cb37a4ad46bcc5ad8deb6 Mon Sep 17 00:00:00 2001 +From: Andrey N. Groshev +Date: Tue, 3 Mar 2015 05:56:12 +0300 +Subject: [PATCH] totemsrp: Format member list log as unsigned int + +Signed-off-by: Andrey N. Groshev +Reviewed-by: Jan Friesse +--- + exec/totemsrp.c | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index b05773a..6357f5a 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -1999,7 +1999,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + int sptr = 0; + sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:"); + for (i=0; i< joined_list_entries; i++) { +- sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " %d", joined_list_totemip[i]); ++ sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " %u", joined_list_totemip[i]); + } + } + else { +@@ -2010,7 +2010,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + int sptr = 0; + sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:"); + for (i=0; i< instance->my_left_memb_entries; i++) { +- sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %d", left_list[i]); ++ sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %u", left_list[i]); + } + } + else { +-- +1.7.1 + diff --git a/SOURCES/bz773464-7-totemconfig-Check-for-duplicate-nodeids.patch b/SOURCES/bz773464-7-totemconfig-Check-for-duplicate-nodeids.patch new file mode 100644 index 0000000..cd3d4b5 --- /dev/null +++ b/SOURCES/bz773464-7-totemconfig-Check-for-duplicate-nodeids.patch @@ -0,0 +1,159 @@ +From 997074cc3e1ea425ca63e453b7e2181741bdcef0 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Fri, 10 Apr 2015 14:22:07 +0100 +Subject: [PATCH] totemconfig: Check for duplicate nodeids + +Having duplicate nodeids in corosync.conf can play havoc with a cluster, +so (as suggested by someone on this list) here is some code to check +that all nodeids are unique. Even if a nodeid is not specified it will +check to be sure that the ID generated from the IP address (ipv4 only) +does not clash with one that is provided. + +It logs all non-unique nodeids to syslog, but only the last is reported +on the command-line to the user which should be enough to get them to +check further. At startup this will cause corosync to fail to start. + +Signed-off-by: Christine Caulfield +--- + exec/totemconfig.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 118 insertions(+), 0 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index b678752..f232ea8 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -481,6 +481,120 @@ static int get_cluster_mcast_addr ( + return (err); + } + ++static unsigned int generate_nodeid_for_duplicate_test( ++ struct totem_config *totem_config, ++ char *addr) ++{ ++ unsigned int nodeid; ++ struct totem_ip_address totemip; ++ ++ /* AF_INET hard-coded here because auto-generated nodeids ++ are only for IPv4 */ ++ if (totemip_parse(&totemip, addr, AF_INET) != 0) ++ return -1; ++ ++ memcpy (&nodeid, &totemip.addr, sizeof (unsigned int)); ++ ++#if __BYTE_ORDER == __LITTLE_ENDIAN ++ nodeid = swab32 (nodeid); ++#endif ++ ++ if (totem_config->clear_node_high_bit) { ++ nodeid &= 0x7FFFFFFF; ++ } ++ return nodeid; ++} ++ ++static int check_for_duplicate_nodeids( ++ struct totem_config *totem_config, ++ const char **error_string) ++{ ++ icmap_iter_t iter; ++ icmap_iter_t subiter; ++ const char *iter_key; ++ int res = 0; ++ int retval = 0; ++ char tmp_key[ICMAP_KEYNAME_MAXLEN]; ++ char *ring0_addr=NULL; ++ char *ring0_addr1=NULL; ++ unsigned int node_pos; ++ unsigned int node_pos1; ++ unsigned int nodeid; ++ unsigned int nodeid1; ++ int autogenerated; ++ ++ iter = icmap_iter_init("nodelist.node."); ++ while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) { ++ res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key); ++ if (res != 2) { ++ continue; ++ } ++ ++ if (strcmp(tmp_key, "ring0_addr") != 0) { ++ continue; ++ } ++ ++ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos); ++ autogenerated = 0; ++ if (icmap_get_uint32(tmp_key, &nodeid) != CS_OK) { ++ ++ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos); ++ if (icmap_get_string(tmp_key, &ring0_addr) != CS_OK) { ++ continue; ++ } ++ ++ /* Generate nodeid so we can check that auto-generated nodeids don't clash either */ ++ nodeid = generate_nodeid_for_duplicate_test(totem_config, ring0_addr); ++ if (nodeid == -1) { ++ continue; ++ } ++ autogenerated = 1; ++ } ++ ++ node_pos1 = 0; ++ subiter = icmap_iter_init("nodelist.node."); ++ while (((iter_key = icmap_iter_next(subiter, NULL, NULL)) != NULL) && (node_pos1 < node_pos)) { ++ res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos1, tmp_key); ++ if ((res != 2) || (node_pos1 >= node_pos)) { ++ continue; ++ } ++ ++ if (strcmp(tmp_key, "ring0_addr") != 0) { ++ continue; ++ } ++ ++ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.nodeid", node_pos1); ++ if (icmap_get_uint32(tmp_key, &nodeid1) != CS_OK) { ++ ++ snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.ring0_addr", node_pos1); ++ if (icmap_get_string(tmp_key, &ring0_addr1) != CS_OK) { ++ continue; ++ } ++ nodeid1 = generate_nodeid_for_duplicate_test(totem_config, ring0_addr1); ++ if (nodeid1 == -1) { ++ continue; ++ } ++ } ++ ++ if (nodeid == nodeid1) { ++ retval = -1; ++ snprintf (error_string_response, sizeof(error_string_response), ++ "Nodeid %u%s%s%s appears twice in corosync.conf", nodeid, ++ autogenerated?"(autogenerated from ":"", ++ autogenerated?ring0_addr:"", ++ autogenerated?")":""); ++ log_printf (LOGSYS_LEVEL_ERROR, error_string_response); ++ *error_string = error_string_response; ++ break; ++ } ++ } ++ icmap_iter_finalize(subiter); ++ } ++ icmap_iter_finalize(iter); ++ return retval; ++} ++ ++ + static int find_local_node_in_nodelist(struct totem_config *totem_config) + { + icmap_iter_t iter; +@@ -1236,6 +1350,10 @@ int totem_config_validate ( + return (-1); + } + ++ if (check_for_duplicate_nodeids(totem_config, error_string) == -1) { ++ return (-1); ++ } ++ + /* + * RRP values validation + */ +-- +1.7.1 + diff --git a/SOURCES/bz773464-8-totem-Log-a-message-if-JOIN-or-LEAVE-message-is-igno.patch b/SOURCES/bz773464-8-totem-Log-a-message-if-JOIN-or-LEAVE-message-is-igno.patch new file mode 100644 index 0000000..c8e836d --- /dev/null +++ b/SOURCES/bz773464-8-totem-Log-a-message-if-JOIN-or-LEAVE-message-is-igno.patch @@ -0,0 +1,37 @@ +From 53f67a2a7914228f1a406aad61ea6768525e11b0 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Fri, 17 Apr 2015 15:49:53 +0100 +Subject: [PATCH] totem: Log a message if JOIN or LEAVE message is ignored + +As per recent email thread, this patch adds a log message if a JOIN or +LEAVE message is discarded while corosync is flushing the receive queue. + +While ignoring a JOIN message is harmless (it will be resent), ignoring +a LEAVE message can cause a longer state transition as it is treated as +a node crashing rather than leaving gracefully, so the system admin +might be confused as to the cause. + +Unfortunately, we can't (at the totemudp level) distinguish between JOIN +or LEAVE messages without a lot more protocol-specific code creeping in +the lower layer so the message is left ambiguous. + +Signed-off-by: Christine Caulfield +--- + exec/totemudp.c | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) + +diff --git a/exec/totemudp.c b/exec/totemudp.c +index 86059af..8ed87ce 100644 +--- a/exec/totemudp.c ++++ b/exec/totemudp.c +@@ -511,6 +511,7 @@ static int net_deliver_fn ( + */ + message_type = (char *)iovec->iov_base; + if (instance->flushing == 1 && *message_type == MESSAGE_TYPE_MEMB_JOIN) { ++ log_printf(instance->totemudp_log_level_warning, "JOIN or LEAVE message was thrown away during flush operation."); + iovec->iov_len = FRAME_SIZE_MAX; + return (0); + } +-- +1.7.1 + diff --git a/SOURCES/bz773464-9-totemsrp-Improve-logging-of-left-down-nodes.patch b/SOURCES/bz773464-9-totemsrp-Improve-logging-of-left-down-nodes.patch new file mode 100644 index 0000000..8d530ed --- /dev/null +++ b/SOURCES/bz773464-9-totemsrp-Improve-logging-of-left-down-nodes.patch @@ -0,0 +1,219 @@ +From ab8942f6260fde93824ed2a18e09e572b59ceb25 Mon Sep 17 00:00:00 2001 +From: Christine Caulfield +Date: Fri, 12 Jun 2015 16:16:45 +0100 +Subject: [PATCH] totemsrp: Improve logging of left/down nodes + +This patch from Hideo Yamauchi improves the logging of +whether nodes leave the cluster cleanly or uncleanly, +making it easier to determine if a node ws shut down +by the operator. There is also the possibility that a +LEAVE message could get missed (due to the node being +in flush state) so this can also make that clearer. + +The modifications are as follows. + +Change 1) I added the list which maintained LEAVE node to totemsrp. +Change 2) I added registration, a search, the handling of to clear LEAVE +node. +Change 3) I added the output to log. +Change 4) I changed an output level of the log. + +Signed-off-by: Hideo Yamauchi +Reviewed-by: Christine Caulfield +Reviewed-by: Jan Friesse +--- + exec/totemsrp.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 files changed, 104 insertions(+), 1 deletions(-) + +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index 6357f5a..3aa61cc 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -316,6 +316,8 @@ struct totemsrp_instance { + + struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]; + ++ unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX]; ++ + int my_proc_list_entries; + + int my_failed_list_entries; +@@ -329,6 +331,8 @@ struct totemsrp_instance { + int my_deliver_memb_entries; + + int my_left_memb_entries; ++ ++ int my_leave_memb_entries; + + struct memb_ring_id my_ring_id; + +@@ -513,6 +517,8 @@ struct totemsrp_instance { + uint32_t threaded_mode_enabled; + + uint32_t waiting_trans_ack; ++ ++ int flushing; + + void * token_recv_event_handle; + void * token_sent_event_handle; +@@ -1476,6 +1482,52 @@ static void memb_set_print ( + } + } + #endif ++static void my_leave_memb_clear( ++ struct totemsrp_instance *instance) ++{ ++ memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list)); ++ instance->my_leave_memb_entries = 0; ++} ++ ++static unsigned int my_leave_memb_match( ++ struct totemsrp_instance *instance, ++ unsigned int nodeid) ++{ ++ int i; ++ unsigned int ret = 0; ++ ++ for (i = 0; i < instance->my_leave_memb_entries; i++){ ++ if (instance->my_leave_memb_list[i] == nodeid){ ++ ret = nodeid; ++ break; ++ } ++ } ++ return ret; ++} ++ ++static void my_leave_memb_set( ++ struct totemsrp_instance *instance, ++ unsigned int nodeid) ++{ ++ int i, found = 0; ++ for (i = 0; i < instance->my_leave_memb_entries; i++){ ++ if (instance->my_leave_memb_list[i] == nodeid){ ++ found = 1; ++ break; ++ } ++ } ++ if (found == 1) { ++ return; ++ } ++ if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) { ++ instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid; ++ instance->my_leave_memb_entries++; ++ } else { ++ log_printf (instance->totemsrp_log_level_warning, ++ "Cannot set LEAVE nodeid=%d", nodeid); ++ } ++} ++ + + static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance) + { +@@ -1837,6 +1889,7 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + unsigned int res; + char left_node_msg[1024]; + char joined_node_msg[1024]; ++ char failed_node_msg[1024]; + + instance->originated_orf_token = 0; + +@@ -2008,15 +2061,30 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + + if (instance->my_left_memb_entries) { + int sptr = 0; ++ int sptr2 = 0; + sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:"); + for (i=0; i< instance->my_left_memb_entries; i++) { + sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " %u", left_list[i]); + } ++ for (i=0; i< instance->my_left_memb_entries; i++) { ++ if (my_leave_memb_match(instance, left_list[i]) == 0) { ++ if (sptr2 == 0) { ++ sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:"); ++ } ++ sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " %u", left_list[i]); ++ } ++ } ++ if (sptr2 == 0) { ++ failed_node_msg[0] = '\0'; ++ } + } + else { + left_node_msg[0] = '\0'; ++ failed_node_msg[0] = '\0'; + } + ++ my_leave_memb_clear(instance); ++ + log_printf (instance->totemsrp_log_level_debug, + "entering OPERATIONAL state."); + log_printf (instance->totemsrp_log_level_notice, +@@ -2025,6 +2093,13 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) + instance->my_ring_id.seq, + joined_node_msg, + left_node_msg); ++ ++ if (strlen(failed_node_msg)) { ++ log_printf (instance->totemsrp_log_level_notice, ++ "Failed to receive the leave message.%s", ++ failed_node_msg); ++ } ++ + instance->memb_state = MEMB_STATE_OPERATIONAL; + + instance->stats.operational_entered++; +@@ -3597,8 +3672,9 @@ static int message_handler_orf_token ( + return (0); + } + #endif +- ++ instance->flushing = 1; + totemrrp_recv_flush (instance->totemrrp_context); ++ instance->flushing = 0; + + /* + * Determine if we should hold (in reality drop) the token +@@ -4130,6 +4206,32 @@ static void memb_join_process ( + memb_set_print ("my_faillist", instance->my_failed_list, instance->my_failed_list_entries); + -*/ + ++ if (memb_join->header.type == MESSAGE_TYPE_MEMB_JOIN) { ++ if (instance->flushing) { ++ if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { ++ log_printf (instance->totemsrp_log_level_warning, ++ "Discarding LEAVE message during flush, nodeid=%u", ++ memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID); ++ if (memb_join->failed_list_entries > 0) { ++ my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid); ++ } ++ } else { ++ log_printf (instance->totemsrp_log_level_warning, ++ "Discarding JOIN message during flush, nodeid=%d", memb_join->header.nodeid); ++ } ++ return; ++ } else { ++ if (memb_join->header.nodeid == LEAVE_DUMMY_NODEID) { ++ log_printf (instance->totemsrp_log_level_debug, ++ "Recieve LEAVE message from %u", memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid : LEAVE_DUMMY_NODEID); ++ if (memb_join->failed_list_entries > 0) { ++ my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].addr[0].nodeid); ++ } ++ } ++ } ++ ++ } ++ + if (memb_set_equal (proc_list, + memb_join->proc_list_entries, + instance->my_proc_list, +@@ -4573,6 +4675,7 @@ void main_deliver_fn ( + return; + } + ++ + switch (message_header->type) { + case MESSAGE_TYPE_ORF_TOKEN: + instance->stats.orf_token_rx++; +-- +1.7.1 + diff --git a/SPECS/corosync.spec b/SPECS/corosync.spec index be64b43..40d1334 100644 --- a/SPECS/corosync.spec +++ b/SPECS/corosync.spec @@ -21,7 +21,7 @@ Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces Version: 2.3.4 -Release: 4%{?gitver}%{?dist}.3 +Release: 7%{?gitver}%{?dist} License: BSD Group: System Environment/Base URL: http://www.corosync.org/ @@ -34,11 +34,28 @@ Patch3: bz1078361-1-manpage-Fix-English.patch Patch4: bz1078361-2-Store-configuration-values-used-by-totem-to-cmap.patch Patch5: bz1078361-3-man-page-Improve-description-of-token-timeout.patch Patch6: bz1184154-1-Handle-adding-and-removing-UDPU-members-atomically.patch -Patch7: bz1205336-1-Votequorum-Fix-auto_tie_breaker-default.patch -Patch8: bz1205338-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch -Patch9: bz1260002-1-Set-RR-priority-by-default.patch -Patch10: bz1260719-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch -Patch11: bz1260719-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch +Patch7: bz1197091-1-Votequorum-Fix-auto_tie_breaker-default.patch +Patch8: bz1197671-1-Don-t-allow-both-two_node-and-auto_tie_breaker-in-co.patch +Patch9: bz773464-1-config-Make-sure-user-doesn-t-mix-IPv6-and-IPv4.patch +Patch10: bz773464-2-config-Process-broadcast-option-consistently.patch +Patch11: bz773464-3-config-Ensure-mcast-address-port-differs-for-rrp.patch +Patch12: bz1234261-1-Reset-timer_problem_decrementer-on-fault.patch +Patch13: bz773464-4-automake-Check-minimum-automake-version.patch +Patch14: bz1170347-1-Set-RR-priority-by-default.patch +Patch15: bz773464-5-Log-auto-recovery-of-ring-only-once.patch +Patch16: bz1234261-2-totem-Ignore-duplicated-commit-tokens-in-recovery.patch +Patch17: bz1234266-1-corosync_ring_id_store-Use-safer-permissions.patch +Patch18: bz773464-6-totemsrp-Format-member-list-log-as-unsigned-int.patch +Patch19: bz682771-1-cpg-Add-support-for-messages-larger-than-1Mb.patch +Patch20: bz682771-2-Really-add-cpghum.patch +Patch21: bz1229194-1-quorum-don-t-allow-quorum_trackstart-to-be-called-tw.patch +Patch22: bz773464-7-totemconfig-Check-for-duplicate-nodeids.patch +Patch23: bz773464-8-totem-Log-a-message-if-JOIN-or-LEAVE-message-is-igno.patch +Patch24: bz773464-9-totemsrp-Improve-logging-of-left-down-nodes.patch +Patch25: bz1229194-2-votequorum-Fix-auto_tie_breaker-behaviour-in-odd-siz.patch +Patch26: bz1226842-1-Add-note-about-rrp-active-beeing-unsupported.patch +Patch27: bz1225441-1-Log-Add-logrotate-configuration-file.patch +Patch28: bz682771-3-Don-t-link-with-libz-when-not-needed.patch %if 0%{?rhel} ExclusiveArch: i686 x86_64 s390x @@ -57,6 +74,7 @@ Obsoletes: cman, clusterlib, clusterlib-devel BuildRequires: groff BuildRequires: libqb-devel >= 0.14.2 BuildRequires: nss-devel +BuildRequires: zlib-devel %if %{with runautogen} BuildRequires: autoconf automake libtool %endif @@ -93,11 +111,28 @@ BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) %patch4 -p1 -b .bz1078361-2 %patch5 -p1 -b .bz1078361-3 %patch6 -p1 -b .bz1184154-1 -%patch7 -p1 -b .bz1205336-1 -%patch8 -p1 -b .bz1205338-1 -%patch9 -p1 -b .bz1260002-1 -%patch10 -p1 -b .bz1260719-1 -%patch11 -p1 -b .bz1260719-2 +%patch7 -p1 -b .bz1197091-1 +%patch8 -p1 -b .bz1197671-1 +%patch9 -p1 -b .bz773464-1 +%patch10 -p1 -b .bz773464-2 +%patch11 -p1 -b .bz773464-3 +%patch12 -p1 -b .bz1234261-1 +%patch13 -p1 -b .bz773464-4 +%patch14 -p1 -b .bz1170347-1 +%patch15 -p1 -b .bz773464-5 +%patch16 -p1 -b .bz1234261-2 +%patch17 -p1 -b .bz1234266-1 +%patch18 -p1 -b .bz773464-6 +%patch19 -p1 -b .bz682771-1 +%patch20 -p1 -b .bz682771-2 +%patch21 -p1 -b .bz1229194-1 +%patch22 -p1 -b .bz773464-7 +%patch23 -p1 -b .bz773464-8 +%patch24 -p1 -b .bz773464-9 +%patch25 -p1 -b .bz1229194-2 +%patch26 -p1 -b .bz1226842-1 +%patch27 -p1 -b .bz1225441-1 +%patch28 -p1 -b .bz682771-3 %build %if %{with runautogen} @@ -224,6 +259,7 @@ fi %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example.udpu %config(noreplace) %{_sysconfdir}/sysconfig/corosync-notifyd %config(noreplace) %{_sysconfdir}/sysconfig/corosync +%config(noreplace) %{_sysconfdir}/logrotate.d/corosync %if %{with dbus} %{_sysconfdir}/dbus-1/system.d/corosync-signals.conf %endif @@ -360,28 +396,69 @@ The Corosync Cluster Engine APIs. %{_mandir}/man8/quorum_overview.8* %changelog -* Mon Sep 07 2015 Jan Friesse 2.3.4-4.3 -- Resolves: rhbz#1260719 - -- quorum: don't allow quorum_trackstart to be called twice (rhbz#1260719) -- merge upstream commit 82526d2fe9137e8b604f1bbae6d6e39ba41377f9 (rhbz#1260719) -- votequorum: Fix auto_tie_breaker behaviour in odd-sized clusters (rhbz#1260719) -- merge upstream commit b9f5c290b7dedd0a677cdfc25db7dd111245a745 (rhbz#1260719) - -* Fri Sep 04 2015 Jan Friesse 2.3.4-4.2 -- Resolves: rhbz#1260002 - -- Set RR priority by default (rhbz#1260002) -- merge upstream commit 177ef0e5240b4060ff5b14eab6f2eefee3aa777d (rhbz#1260002) - -* Wed Mar 25 2015 Jan Friesse 2.3.4-4.1 -- Resolves: rhbz#1205336 -- Resolves: rhbz#1205338 - -- Votequorum: Fix auto_tie_breaker default (rhbz#1205336) -- merge upstream commit 314a01c98e5f98ff686333966dbe675935b7b6a8 (rhbz#1205336) -- Don't allow both two_node and auto_tie_breaker in corosync.conf (rhbz#1205338) -- merge upstream commit c832ade034fa737561ccabefbe417c9d7855d970 (rhbz#1205338) +* Mon Jun 22 2015 Jan Friesse 2.3.4-7 +- Related: rhbz#682771 + +- Don't link with libz when not needed (rhbz#682771) +- merge upstream commit 145f9279d12cf0b981494bbd4dabbc9c3641378e (rhbz#682771) + +* Mon Jun 22 2015 Jan Friesse 2.3.4-6 +- Resolves: rhbz#1170347 +- Resolves: rhbz#1225441 +- Resolves: rhbz#1226842 +- Resolves: rhbz#1229194 +- Resolves: rhbz#1234261 +- Resolves: rhbz#1234266 +- Resolves: rhbz#682771 +- Resolves: rhbz#773464 + +- config: Make sure user doesn't mix IPv6 and IPv4 (rhbz#773464) +- merge upstream commit 6c028d4d9c53decaa9469c792ac68fd2a886e7d9 (rhbz#773464) +- config: Process broadcast option consistently (rhbz#773464) +- merge upstream commit 70bd35fc06e68a010d780dfa39bd68d4bd2f7da7 (rhbz#773464) +- config: Ensure mcast address/port differs for rrp (rhbz#773464) +- merge upstream commit 6449bea835c90045baa23e3e041fed1df2abf070 (rhbz#773464) +- Reset timer_problem_decrementer on fault (rhbz#1234261) +- merge upstream commit 8f284b26b3331e1ab252969ba65543e6d9217ab1 (rhbz#1234261) +- automake: Check minimum automake version (rhbz#773464) +- merge upstream commit 114b826c67126fe1f690ad976b5217a8487994a4 (rhbz#773464) +- Set RR priority by default (rhbz#1170347) +- merge upstream commit 177ef0e5240b4060ff5b14eab6f2eefee3aa777d (rhbz#1170347) +- Log auto-recovery of ring only once (rhbz#773464) +- merge upstream commit e0ac861efdc32831366a2b5f5cc1d61e2ffa5504 (rhbz#773464) +- totem: Ignore duplicated commit tokens in recovery (rhbz#1234261) +- merge upstream commit 4ee84c51fa73c4ec7cbee922111a140a3aaf75df (rhbz#1234261) +- corosync_ring_id_store: Use safer permissions (rhbz#1234266) +- merge upstream commit 252b38ab8a62ff083e83b1d6f514109f7b7cbb42 (rhbz#1234266) +- totemsrp: Format member list log as unsigned int (rhbz#773464) +- merge upstream commit 5d9acc5604eb4e8a739cb37a4ad46bcc5ad8deb6 (rhbz#773464) +- cpg: Add support for messages larger than 1Mb (rhbz#682771) +- merge upstream commit 8cc8e513633a1a8b12c416e32fb5362fcf4d65dd (rhbz#682771) +- Really add cpghum (rhbz#682771) +- merge upstream commit 3842ba6080e00fd9484a2a875d982e149f67bc44 (rhbz#682771) +- quorum: don't allow quorum_trackstart to be called twice (rhbz#1229194) +- merge upstream commit 82526d2fe9137e8b604f1bbae6d6e39ba41377f9 (rhbz#1229194) +- totemconfig: Check for duplicate nodeids (rhbz#773464) +- merge upstream commit 997074cc3e1ea425ca63e453b7e2181741bdcef0 (rhbz#773464) +- totem: Log a message if JOIN or LEAVE message is ignored (rhbz#773464) +- merge upstream commit 53f67a2a7914228f1a406aad61ea6768525e11b0 (rhbz#773464) +- totemsrp: Improve logging of left/down nodes (rhbz#773464) +- merge upstream commit ab8942f6260fde93824ed2a18e09e572b59ceb25 (rhbz#773464) +- votequorum: Fix auto_tie_breaker behaviour in odd-sized clusters (rhbz#1229194) +- merge upstream commit b9f5c290b7dedd0a677cdfc25db7dd111245a745 (rhbz#1229194) +- Add note about rrp active beeing unsupported (rhbz#1226842) +- merge upstream commit 219965f4fe694eaaf2eb4ea05cdc7e35f5146114 (rhbz#1226842) +- Log: Add logrotate configuration file (rhbz#1225441) +- merge upstream commit aabbace625b3c68332b4356887378fca81f8f387 (rhbz#1225441) + +* Wed Mar 25 2015 Jan Friesse 2.3.4-5 +- Resolves: rhbz#1197091 +- Resolves: rhbz#1197671 + +- Votequorum: Fix auto_tie_breaker default (rhbz#1197091) +- merge upstream commit 314a01c98e5f98ff686333966dbe675935b7b6a8 (rhbz#1197091) +- Don't allow both two_node and auto_tie_breaker in corosync.conf (rhbz#1197671) +- merge upstream commit c832ade034fa737561ccabefbe417c9d7855d970 (rhbz#1197671) * Wed Jan 21 2015 Jan Friesse 2.3.4-4 - Resolves: rhbz#1184154