Blame SOURCES/053-ipc-eviction.patch

60de42
From 33b3b7e7004e3ec5501c32a2217ec32775a5a487 Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Mon, 24 Apr 2017 10:38:50 -0500
60de42
Subject: [PATCH 1/6] Refactor: libcib: functionize destroying op callback
60de42
 table
60de42
60de42
reduces code duplication
60de42
---
60de42
 lib/cib/cib_client.c | 29 ++++++++++++-----------------
60de42
 1 file changed, 12 insertions(+), 17 deletions(-)
60de42
60de42
diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c
60de42
index 5ded943..0f53330 100644
60de42
--- a/lib/cib/cib_client.c
60de42
+++ b/lib/cib/cib_client.c
60de42
@@ -206,6 +206,15 @@ cib_destroy_op_callback(gpointer data)
60de42
     free(blob);
60de42
 }
60de42
 
60de42
+static void
60de42
+destroy_op_callback_table()
60de42
+{
60de42
+    if (cib_op_callback_table != NULL) {
60de42
+        g_hash_table_destroy(cib_op_callback_table);
60de42
+        cib_op_callback_table = NULL;
60de42
+    }
60de42
+}
60de42
+
60de42
 char *
60de42
 get_shadow_file(const char *suffix)
60de42
 {
60de42
@@ -348,14 +357,7 @@ cib_new_variant(void)
60de42
 
60de42
     new_cib = calloc(1, sizeof(cib_t));
60de42
 
60de42
-    if (cib_op_callback_table != NULL) {
60de42
-        g_hash_table_destroy(cib_op_callback_table);
60de42
-        cib_op_callback_table = NULL;
60de42
-    }
60de42
-    if (cib_op_callback_table == NULL) {
60de42
-        cib_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
60de42
-                                                      NULL, cib_destroy_op_callback);
60de42
-    }
60de42
+    remove_cib_op_callback(0, TRUE); /* remove all */
60de42
 
60de42
     new_cib->call_id = 1;
60de42
     new_cib->variant = cib_undefined;
60de42
@@ -419,10 +421,7 @@ cib_delete(cib_t * cib)
60de42
         free(client);
60de42
     }
60de42
 
60de42
-    if(cib_op_callback_table) {
60de42
-        g_hash_table_destroy(cib_op_callback_table);
60de42
-        cib_op_callback_table = NULL;
60de42
-    }
60de42
+    destroy_op_callback_table();
60de42
 
60de42
     if(cib) {
60de42
         cib->cmds->free(cib);
60de42
@@ -639,13 +638,9 @@ void
60de42
 remove_cib_op_callback(int call_id, gboolean all_callbacks)
60de42
 {
60de42
     if (all_callbacks) {
60de42
-        if (cib_op_callback_table != NULL) {
60de42
-            g_hash_table_destroy(cib_op_callback_table);
60de42
-        }
60de42
-
60de42
+        destroy_op_callback_table();
60de42
         cib_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal,
60de42
                                                       NULL, cib_destroy_op_callback);
60de42
-
60de42
     } else {
60de42
         g_hash_table_remove(cib_op_callback_table, GINT_TO_POINTER(call_id));
60de42
     }
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From 360cf350722f9646581e47b4d73d971aa81e6cec Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Fri, 21 Apr 2017 11:31:01 -0500
60de42
Subject: [PATCH 2/6] Low: crmd: avoid use-after-free when disconnecting from
60de42
 CIB
60de42
60de42
crmd_exit() freed the CIB connection, then drained the mainloop.
60de42
However, the mainloop could call functions (such as do_cib_control() or
60de42
cib_native_destroy()) that would use the connection object.
60de42
60de42
This would lead to a segfault, though the harm was minimal, since the crmd was
60de42
already exiting at this point.
60de42
60de42
Also log a notice comparable to what's done for the crmd's other disconnects.
60de42
---
60de42
 crmd/cib.c           | 17 ++++++-----------
60de42
 crmd/control.c       | 10 ++++++++--
60de42
 include/crm/cib.h    |  1 +
60de42
 lib/cib/cib_client.c | 35 ++++++++++++++++++++++++-----------
60de42
 4 files changed, 39 insertions(+), 24 deletions(-)
60de42
60de42
diff --git a/crmd/cib.c b/crmd/cib.c
60de42
index 40fed4a..41f4f3d 100644
60de42
--- a/crmd/cib.c
60de42
+++ b/crmd/cib.c
60de42
@@ -159,19 +159,16 @@ do_cib_replaced(const char *event, xmlNode * msg)
60de42
     register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
60de42
 }
60de42
 
60de42
-/*	 A_CIB_STOP, A_CIB_START, A_CIB_RESTART,	*/
60de42
+/* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */
60de42
 void
60de42
 do_cib_control(long long action,
60de42
                enum crmd_fsa_cause cause,
60de42
                enum crmd_fsa_state cur_state,
60de42
                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
60de42
 {
60de42
-    struct crm_subsystem_s *this_subsys = cib_subsystem;
60de42
+    CRM_ASSERT(fsa_cib_conn != NULL);
60de42
 
60de42
-    long long stop_actions = A_CIB_STOP;
60de42
-    long long start_actions = A_CIB_START;
60de42
-
60de42
-    if (action & stop_actions) {
60de42
+    if (action & A_CIB_STOP) {
60de42
 
60de42
         if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) {
60de42
             crm_info("Waiting for resource update %d to complete", last_resource_update);
60de42
@@ -181,7 +178,6 @@ do_cib_control(long long action,
60de42
 
60de42
         crm_info("Disconnecting CIB");
60de42
         clear_bit(fsa_input_register, R_CIB_CONNECTED);
60de42
-        CRM_ASSERT(fsa_cib_conn != NULL);
60de42
 
60de42
         fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated);
60de42
 
60de42
@@ -189,15 +185,14 @@ do_cib_control(long long action,
60de42
             fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local);
60de42
             fsa_cib_conn->cmds->signoff(fsa_cib_conn);
60de42
         }
60de42
+        crm_notice("Disconnected from the CIB");
60de42
     }
60de42
 
60de42
-    if (action & start_actions) {
60de42
+    if (action & A_CIB_START) {
60de42
         int rc = pcmk_ok;
60de42
 
60de42
-        CRM_ASSERT(fsa_cib_conn != NULL);
60de42
-
60de42
         if (cur_state == S_STOPPING) {
60de42
-            crm_err("Ignoring request to start %s after shutdown", this_subsys->name);
60de42
+            crm_err("Ignoring request to start the CIB after shutdown");
60de42
             return;
60de42
         }
60de42
 
60de42
diff --git a/crmd/control.c b/crmd/control.c
60de42
index af9c2c2..a9c0b73 100644
60de42
--- a/crmd/control.c
60de42
+++ b/crmd/control.c
60de42
@@ -355,8 +355,11 @@ crmd_exit(int rc)
60de42
     election_fini(fsa_election);
60de42
     fsa_election = NULL;
60de42
 
60de42
-    cib_delete(fsa_cib_conn);
60de42
-    fsa_cib_conn = NULL;
60de42
+    /* Tear down the CIB connection, but don't free it yet -- it could be used
60de42
+     * when we drain the mainloop later.
60de42
+     */
60de42
+    cib_free_callbacks(fsa_cib_conn);
60de42
+    fsa_cib_conn->cmds->signoff(fsa_cib_conn);
60de42
 
60de42
     verify_stopped(fsa_state, LOG_WARNING);
60de42
     clear_bit(fsa_input_register, R_LRM_CONNECTED);
60de42
@@ -485,6 +488,9 @@ crmd_exit(int rc)
60de42
         mainloop_destroy_signal(SIGCHLD);
60de42
     }
60de42
 
60de42
+    cib_delete(fsa_cib_conn);
60de42
+    fsa_cib_conn = NULL;
60de42
+
60de42
     /* Graceful */
60de42
     return rc;
60de42
 }
60de42
diff --git a/include/crm/cib.h b/include/crm/cib.h
60de42
index a1246d1..ec52602 100644
60de42
--- a/include/crm/cib.h
60de42
+++ b/include/crm/cib.h
60de42
@@ -172,6 +172,7 @@ cib_t *cib_new_no_shadow(void);
60de42
 char *get_shadow_file(const char *name);
60de42
 cib_t *cib_shadow_new(const char *name);
60de42
 
60de42
+void cib_free_callbacks(cib_t *cib);
60de42
 void cib_delete(cib_t * cib);
60de42
 
60de42
 void cib_dump_pending_callbacks(void);
60de42
diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c
60de42
index 0f53330..907bb5a 100644
60de42
--- a/lib/cib/cib_client.c
60de42
+++ b/lib/cib/cib_client.c
60de42
@@ -406,24 +406,37 @@ cib_new_variant(void)
60de42
     return new_cib;
60de42
 }
60de42
 
60de42
+/*!
60de42
+ * \brief Free all callbacks for a CIB connection
60de42
+ *
60de42
+ * \param[in] cib  CIB connection to clean up
60de42
+ */
60de42
 void
60de42
-cib_delete(cib_t * cib)
60de42
+cib_free_callbacks(cib_t *cib)
60de42
 {
60de42
-    GList *list = NULL;
60de42
-    if(cib) {
60de42
-        list = cib->notify_list;
60de42
-    }
60de42
+    if (cib) {
60de42
+        GList *list = cib->notify_list;
60de42
 
60de42
-    while (list != NULL) {
60de42
-        cib_notify_client_t *client = g_list_nth_data(list, 0);
60de42
+        while (list != NULL) {
60de42
+            cib_notify_client_t *client = g_list_nth_data(list, 0);
60de42
 
60de42
-        list = g_list_remove(list, client);
60de42
-        free(client);
60de42
+            list = g_list_remove(list, client);
60de42
+            free(client);
60de42
+        }
60de42
     }
60de42
-
60de42
     destroy_op_callback_table();
60de42
+}
60de42
 
60de42
-    if(cib) {
60de42
+/*!
60de42
+ * \brief Free all memory used by CIB connection
60de42
+ *
60de42
+ * \param[in] cib  CIB connection to delete
60de42
+ */
60de42
+void
60de42
+cib_delete(cib_t *cib)
60de42
+{
60de42
+    cib_free_callbacks(cib);
60de42
+    if (cib) {
60de42
         cib->cmds->free(cib);
60de42
     }
60de42
 }
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From a6383a30d3258856efef1c067790faa78d1a5787 Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Fri, 21 Apr 2017 11:33:53 -0500
60de42
Subject: [PATCH 3/6] Low: crmd: don't destroy election structure twice
60de42
60de42
didn't cause any harm, but unnecessary
60de42
---
60de42
 crmd/control.c | 1 -
60de42
 1 file changed, 1 deletion(-)
60de42
60de42
diff --git a/crmd/control.c b/crmd/control.c
60de42
index a9c0b73..5d91b1d 100644
60de42
--- a/crmd/control.c
60de42
+++ b/crmd/control.c
60de42
@@ -388,7 +388,6 @@ crmd_exit(int rc)
60de42
     free(integration_timer); integration_timer = NULL;
60de42
     free(finalization_timer); finalization_timer = NULL;
60de42
     free(election_trigger); election_trigger = NULL;
60de42
-    election_fini(fsa_election);
60de42
     free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
60de42
     free(wait_timer); wait_timer = NULL;
60de42
     free(recheck_timer); recheck_timer = NULL;
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From 8183dbf57bc92a79b54c7e56942e4e3bda36654f Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Mon, 1 May 2017 14:58:58 -0500
60de42
Subject: [PATCH 4/6] Log: libcib: downgrade ACL status message to trace
60de42
60de42
gets logged 1-3 times for every CIB op
60de42
---
60de42
 lib/cib/cib_utils.c | 2 +-
60de42
 1 file changed, 1 insertion(+), 1 deletion(-)
60de42
60de42
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
60de42
index 1908401..ab48f16 100644
60de42
--- a/lib/cib/cib_utils.c
60de42
+++ b/lib/cib/cib_utils.c
60de42
@@ -282,7 +282,7 @@ cib_acl_enabled(xmlNode *xml, const char *user)
60de42
         g_hash_table_destroy(options);
60de42
     }
60de42
 
60de42
-    crm_debug("CIB ACL is %s", rc ? "enabled" : "disabled");
60de42
+    crm_trace("CIB ACL is %s", rc ? "enabled" : "disabled");
60de42
 #endif
60de42
     return rc;
60de42
 }
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From f1f9bf7ceb3737ed4d731669c2e5744a9e234a04 Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Mon, 1 May 2017 17:06:03 -0500
60de42
Subject: [PATCH 5/6] Low: libcrmcommon: don't delay next flush by more than 5
60de42
 seconds
60de42
60de42
If an IPC client falls behind on processing incoming events, we set a
60de42
progressive delay between queue flushes. However, at the maximum backlog
60de42
of 500 events, this would be 51 seconds. Cap this delay at 5 seconds.
60de42
---
60de42
 lib/common/ipc.c | 22 +++++++++++++++++++---
60de42
 1 file changed, 19 insertions(+), 3 deletions(-)
60de42
60de42
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
60de42
index 2949837..c9e3da2 100644
60de42
--- a/lib/common/ipc.c
60de42
+++ b/lib/common/ipc.c
60de42
@@ -463,12 +463,28 @@ crm_ipcs_flush_events_cb(gpointer data)
60de42
     return FALSE;
60de42
 }
60de42
 
60de42
+/*!
60de42
+ * \internal
60de42
+ * \brief Add progressive delay before next event queue flush
60de42
+ *
60de42
+ * \param[in,out] c          Client connection to add delay to
60de42
+ * \param[in]     queue_len  Current event queue length
60de42
+ */
60de42
+static inline void
60de42
+delay_next_flush(crm_client_t *c, unsigned int queue_len)
60de42
+{
60de42
+    /* Delay a maximum of 5 seconds */
60de42
+    guint delay = (queue_len < 40)? (1000 + 100 * queue_len) : 5000;
60de42
+
60de42
+    c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c);
60de42
+}
60de42
+
60de42
 ssize_t
60de42
 crm_ipcs_flush_events(crm_client_t * c)
60de42
 {
60de42
-    int sent = 0;
60de42
     ssize_t rc = 0;
60de42
-    int queue_len = 0;
60de42
+    unsigned int sent = 0;
60de42
+    unsigned int queue_len = 0;
60de42
 
60de42
     if (c == NULL) {
60de42
         return pcmk_ok;
60de42
@@ -523,8 +539,8 @@ crm_ipcs_flush_events(crm_client_t * c)
60de42
             qb_ipcs_disconnect(c->ipcs);
60de42
             return rc;
60de42
         }
60de42
+        delay_next_flush(c, queue_len);
60de42
 
60de42
-        c->event_timer = g_timeout_add(1000 + 100 * queue_len, crm_ipcs_flush_events_cb, c);
60de42
     }
60de42
 
60de42
     return rc;
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From 5d94da7af947810ca4b12dc1d9dce18a7d638f73 Mon Sep 17 00:00:00 2001
60de42
From: Ken Gaillot <kgaillot@redhat.com>
60de42
Date: Mon, 1 May 2017 17:39:02 -0500
60de42
Subject: [PATCH 6/6] Fix: libcrmcommon: avoid evicting IPC client if messages
60de42
 spike briefly
60de42
60de42
Before, an IP server would evict a client if its event queue grew to 500
60de42
messages. Now, we avoid evicting if the backlog is new (a quick spike of many
60de42
messages that immediately crosses the threshold).
60de42
---
60de42
 include/crm/common/ipcs.h |  5 ++++-
60de42
 lib/common/ipc.c          | 41 ++++++++++++++++++++++++++++++++---------
60de42
 2 files changed, 36 insertions(+), 10 deletions(-)
60de42
60de42
diff --git a/include/crm/common/ipcs.h b/include/crm/common/ipcs.h
60de42
index 2fec931..ba1ccef 100644
60de42
--- a/include/crm/common/ipcs.h
60de42
+++ b/include/crm/common/ipcs.h
60de42
@@ -73,6 +73,8 @@ struct crm_client_s {
60de42
     char *name;
60de42
     char *user;
60de42
 
60de42
+    /* Provided for server use (not used by library) */
60de42
+    /* @TODO merge options, flags, and kind (reserving lower bits for server) */
60de42
     long long options;
60de42
 
60de42
     int request_id;
60de42
@@ -80,7 +82,7 @@ struct crm_client_s {
60de42
     void *userdata;
60de42
 
60de42
     int event_timer;
60de42
-    GList *event_queue;
60de42
+    GList *event_queue; /* @TODO use GQueue instead */
60de42
 
60de42
     /* Depending on the value of kind, only some of the following
60de42
      * will be populated/valid
60de42
@@ -91,6 +93,7 @@ struct crm_client_s {
60de42
 
60de42
     struct crm_remote_s *remote;        /* TCP/TLS */
60de42
 
60de42
+    unsigned int backlog_len; /* IPC queue length after last flush */
60de42
 };
60de42
 
60de42
 extern GHashTable *client_connections;
60de42
diff --git a/lib/common/ipc.c b/lib/common/ipc.c
60de42
index c9e3da2..d32e373 100644
60de42
--- a/lib/common/ipc.c
60de42
+++ b/lib/common/ipc.c
60de42
@@ -37,6 +37,9 @@
60de42
 
60de42
 #define PCMK_IPC_VERSION 1
60de42
 
60de42
+/* Evict clients whose event queue grows this large */
60de42
+#define PCMK_IPC_MAX_QUEUE 500
60de42
+
60de42
 struct crm_ipc_response_header {
60de42
     struct qb_ipc_response_header qb;
60de42
     uint32_t size_uncompressed;
60de42
@@ -523,24 +526,44 @@ crm_ipcs_flush_events(crm_client_t * c)
60de42
     }
60de42
 
60de42
     queue_len -= sent;
60de42
-    if (sent > 0 || c->event_queue) {
60de42
+    if (sent > 0 || queue_len) {
60de42
         crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)",
60de42
                   sent, queue_len, c->ipcs, c->pid,
60de42
                   pcmk_strerror(rc < 0 ? rc : 0), (long long) rc);
60de42
     }
60de42
 
60de42
-    if (c->event_queue) {
60de42
-        if (queue_len % 100 == 0 && queue_len > 99) {
60de42
-            crm_warn("Event queue for %p[%d] has grown to %d", c->ipcs, c->pid, queue_len);
60de42
+    if (queue_len) {
60de42
+        /* We want to allow clients to briefly fall behind on processing
60de42
+         * incoming messages, but drop completely unresponsive clients so the
60de42
+         * connection doesn't consume resources indefinitely.
60de42
+         *
60de42
+         * @TODO It is possible that the queue could reasonably grow large in a
60de42
+         * short time. An example is a reprobe of hundreds of resources on many
60de42
+         * nodes resulting in a surge of CIB replies to the crmd. We could
60de42
+         * possibly give cluster daemons a higher threshold here, and/or prevent
60de42
+         * such a surge by throttling LRM history writes in the crmd.
60de42
+         */
60de42
 
60de42
-        } else if (queue_len > 500) {
60de42
-            crm_err("Evicting slow client %p[%d]: event queue reached %d entries",
60de42
-                    c->ipcs, c->pid, queue_len);
60de42
-            qb_ipcs_disconnect(c->ipcs);
60de42
-            return rc;
60de42
+        if (queue_len > PCMK_IPC_MAX_QUEUE) {
60de42
+            if ((c->backlog_len <= 1) || (queue_len < c->backlog_len)) {
60de42
+                /* Don't evict for a new or shrinking backlog */
60de42
+                crm_warn("Client with process ID %u has a backlog of %u messages "
60de42
+                         CRM_XS " %p", c->pid, queue_len, c->ipcs);
60de42
+            } else {
60de42
+                crm_err("Evicting client with process ID %u due to backlog of %u messages "
60de42
+                         CRM_XS " %p", c->pid, queue_len, c->ipcs);
60de42
+                c->backlog_len = 0;
60de42
+                qb_ipcs_disconnect(c->ipcs);
60de42
+                return rc;
60de42
+            }
60de42
         }
60de42
+
60de42
+        c->backlog_len = queue_len;
60de42
         delay_next_flush(c, queue_len);
60de42
 
60de42
+    } else {
60de42
+        /* Event queue is empty, there is no backlog */
60de42
+        c->backlog_len = 0;
60de42
     }
60de42
 
60de42
     return rc;
60de42
-- 
60de42
1.8.3.1
60de42