Blob Blame History Raw
From fbe8768f1bbab6d546023d70e7f7b91a9dc213b0 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 17 Feb 2014 15:36:19 +0100
Subject: [PATCH] cpg: Make sure left nodes are really removed

When node is paused and other nodes has in meantime exited cpg process,
paused node after resume doesn't update it's membership correctly so on
previously paused node exited cpg process is still visible.

Solution is to compare join list with cpd and remove all pids which are
not included in join list.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
 exec/cpg.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/exec/cpg.c b/exec/cpg.c
index db8d987..1c6fbb9 100644
--- a/exec/cpg.c
+++ b/exec/cpg.c
@@ -931,6 +931,56 @@ static void downlist_master_choose_and_send (void)
 	qb_map_destroy(group_map);
 }
 
+/*
+ * Remove processes that might have left the group while we were suspended.
+ */
+static void joinlist_remove_zombie_pi_entries (void)
+{
+	struct list_head *pi_iter;
+	struct list_head *jl_iter;
+	struct process_info *pi;
+	struct joinlist_msg *stored_msg;
+	int found;
+
+	for (pi_iter = process_info_list_head.next; pi_iter != &process_info_list_head; ) {
+		pi = list_entry (pi_iter, struct process_info, list);
+		pi_iter = pi_iter->next;
+
+		/*
+		 * Ignore local node
+		 */
+		if (pi->nodeid == api->totem_nodeid_get()) {
+			continue ;
+		}
+
+		/*
+		 * Try to find message in joinlist messages
+		 */
+		found = 0;
+		for (jl_iter = joinlist_messages_head.next;
+			jl_iter != &joinlist_messages_head;
+			jl_iter = jl_iter->next) {
+
+			stored_msg = list_entry(jl_iter, struct joinlist_msg, list);
+
+			if (stored_msg->sender_nodeid == api->totem_nodeid_get()) {
+				continue ;
+			}
+
+			if (pi->nodeid == stored_msg->sender_nodeid &&
+			    pi->pid == stored_msg->pid &&
+			    mar_name_compare (&pi->group, &stored_msg->group_name) == 0) {
+				found = 1;
+				break ;
+			}
+		}
+
+		if (!found) {
+			do_proc_leave(&pi->group, pi->pid, pi->nodeid, CONFCHG_CPG_REASON_PROCDOWN);
+		}
+	}
+}
+
 static void joinlist_inform_clients (void)
 {
 	struct joinlist_msg *stored_msg;
@@ -957,6 +1007,8 @@ static void joinlist_inform_clients (void)
 		do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid,
 			CONFCHG_CPG_REASON_NODEUP);
 	}
+
+	joinlist_remove_zombie_pi_entries ();
 }
 
 static void downlist_messages_delete (void)
-- 
1.7.1