|
|
1ef286 |
From 1061804d09565363aba73e369faf310a7d2c4d86 Mon Sep 17 00:00:00 2001
|
|
|
1ef286 |
From: Jan Friesse <jfriesse@redhat.com>
|
|
|
1ef286 |
Date: Mon, 15 Jul 2019 14:08:39 +0200
|
|
|
1ef286 |
Subject: [PATCH] totem: Increase ring_id seq after load
|
|
|
1ef286 |
|
|
|
1ef286 |
This patch handles the situation where the leader
|
|
|
1ef286 |
node (the node with lowest node_id) crashes and is started again
|
|
|
1ef286 |
before token timeout of the rest of the cluster.
|
|
|
1ef286 |
The newly restarted node restores the ringid of the old ring from
|
|
|
1ef286 |
stable storage, so it has the same ringid as rest of the nodes,
|
|
|
1ef286 |
but ARU is zero. If the node is able to create a singleton membership
|
|
|
1ef286 |
before receiving the joinlist from rest of the cluster,
|
|
|
1ef286 |
everything works as expected, because the ring id gets increased
|
|
|
1ef286 |
correctly.
|
|
|
1ef286 |
|
|
|
1ef286 |
But if the node receives a joinlist from another cluster node before
|
|
|
1ef286 |
its own joinlist, then it continues as it would had it never left
|
|
|
1ef286 |
the cluster. This is not correct, because the new node should always
|
|
|
1ef286 |
create a singleton configuration first.
|
|
|
1ef286 |
|
|
|
1ef286 |
During the recovery phase, ARUs are compared and because they differ
|
|
|
1ef286 |
(the ARU of the old leader node is 0), the other nodes
|
|
|
1ef286 |
try to sent all of their previous messages. This is impossible
|
|
|
1ef286 |
(even if it was correct), because other nodes have already freed most
|
|
|
1ef286 |
of those messages. The implementation uses an assert to limit maximum
|
|
|
1ef286 |
number of messages sent during recovery (we could fix this,
|
|
|
1ef286 |
but it's not really the point).
|
|
|
1ef286 |
|
|
|
1ef286 |
The solution here is to increase the ring_id sequence number by 1 after
|
|
|
1ef286 |
loading it from storage. During creation of the commit token it is
|
|
|
1ef286 |
always increased by 4, so it will not collide with an existing
|
|
|
1ef286 |
sequence.
|
|
|
1ef286 |
|
|
|
1ef286 |
Thanks Christine Caulfield <ccaulfie@redhat.com> for clarify commit
|
|
|
1ef286 |
message.
|
|
|
1ef286 |
|
|
|
1ef286 |
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
|
|
1ef286 |
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
|
|
1ef286 |
---
|
|
|
1ef286 |
exec/totemsrp.c | 8 ++++++++
|
|
|
1ef286 |
1 file changed, 8 insertions(+)
|
|
|
1ef286 |
|
|
|
1ef286 |
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
|
|
|
1ef286 |
index 0410ed9..c07bd43 100644
|
|
|
1ef286 |
--- a/exec/totemsrp.c
|
|
|
1ef286 |
+++ b/exec/totemsrp.c
|
|
|
1ef286 |
@@ -5094,6 +5094,14 @@ void main_iface_change_fn (
|
|
|
1ef286 |
if (instance->iface_changes++ == 0) {
|
|
|
1ef286 |
instance->memb_ring_id_create_or_load (&instance->my_ring_id,
|
|
|
1ef286 |
&instance->my_id.addr[0]);
|
|
|
1ef286 |
+ /*
|
|
|
1ef286 |
+ * Increase the ring_id sequence number. This doesn't follow specification.
|
|
|
1ef286 |
+ * Solves problem with restarted leader node (node with lowest nodeid) before
|
|
|
1ef286 |
+ * rest of the cluster forms new membership and guarantees unique ring_id for
|
|
|
1ef286 |
+ * new singleton configuration.
|
|
|
1ef286 |
+ */
|
|
|
1ef286 |
+ instance->my_ring_id.seq++;
|
|
|
1ef286 |
+
|
|
|
1ef286 |
instance->token_ring_id_seq = instance->my_ring_id.seq;
|
|
|
1ef286 |
log_printf (
|
|
|
1ef286 |
instance->totemsrp_log_level_debug,
|
|
|
1ef286 |
--
|
|
|
1ef286 |
1.8.3.1
|
|
|
1ef286 |
|