Blame SOURCES/054-bundles-on-remotes.patch

60de42
From 26c59fb7d128f83d0b4d35ae9b9d088359103d31 Mon Sep 17 00:00:00 2001
60de42
From: Andrew Beekhof <andrew@beekhof.net>
60de42
Date: Wed, 12 Apr 2017 20:07:56 +1000
60de42
Subject: [PATCH 1/2] PE: Remote: Allow remote nodes that start containers with
60de42
 pacemaker remote inside
60de42
60de42
---
60de42
 include/crm/pengine/internal.h     |   2 +-
60de42
 include/crm/pengine/status.h       |   1 +
60de42
 lib/pengine/container.c            |   3 +-
60de42
 lib/pengine/unpack.c               | 275 ++++++++++++++++++-------------------
60de42
 lib/pengine/utils.c                |  39 ++++++
60de42
 pengine/container.c                |   8 ++
60de42
 pengine/test10/bug-cl-5247.summary |   4 +-
60de42
 7 files changed, 190 insertions(+), 142 deletions(-)
60de42
60de42
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
60de42
index 0da02cc..1b6afd1 100644
60de42
--- a/include/crm/pengine/internal.h
60de42
+++ b/include/crm/pengine/internal.h
60de42
@@ -291,6 +291,6 @@ node_t *pe_create_node(const char *id, const char *uname, const char *type,
60de42
 bool remote_id_conflict(const char *remote_name, pe_working_set_t *data);
60de42
 void common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data);
60de42
 resource_t *find_container_child(const char *stem, resource_t * rsc, node_t *node);
60de42
-
60de42
+bool fix_remote_addr(resource_t * rsc);
60de42
 
60de42
 #endif
60de42
diff --git a/include/crm/pengine/status.h b/include/crm/pengine/status.h
60de42
index 00b20ab..4cc3919 100644
60de42
--- a/include/crm/pengine/status.h
60de42
+++ b/include/crm/pengine/status.h
60de42
@@ -142,6 +142,7 @@ struct node_shared_s {
60de42
     gboolean shutdown;
60de42
     gboolean expected_up;
60de42
     gboolean is_dc;
60de42
+    gboolean unpacked;
60de42
 
60de42
     int num_resources;
60de42
     GListPtr running_rsc;       /* resource_t* */
60de42
diff --git a/lib/pengine/container.c b/lib/pengine/container.c
60de42
index 127b144..d06997a 100644
60de42
--- a/lib/pengine/container.c
60de42
+++ b/lib/pengine/container.c
60de42
@@ -368,7 +368,8 @@ create_remote_resource(
60de42
         if(tuple->ipaddr) {
60de42
             create_nvp(xml_obj, "addr", tuple->ipaddr);
60de42
         } else {
60de42
-            create_nvp(xml_obj, "addr", "localhost");
60de42
+            // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
60de42
+            create_nvp(xml_obj, "addr", "#uname");
60de42
         }
60de42
 
60de42
         if(data->control_port) {
60de42
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
60de42
index 6aa51dd..29a1013 100644
60de42
--- a/lib/pengine/unpack.c
60de42
+++ b/lib/pengine/unpack.c
60de42
@@ -1017,6 +1017,133 @@ get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
60de42
     }
60de42
 }
60de42
 
60de42
+static void
60de42
+unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set) 
60de42
+{
60de42
+    const char *resource_discovery_enabled = NULL;
60de42
+    xmlNode *attrs = NULL;
60de42
+    resource_t *rsc = NULL;
60de42
+    const char *shutdown = NULL;
60de42
+
60de42
+    if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
60de42
+        return;
60de42
+    }
60de42
+
60de42
+    if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
60de42
+        return;
60de42
+    }
60de42
+    crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
60de42
+
60de42
+    this_node->details->remote_maintenance =
60de42
+        crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
60de42
+
60de42
+    rsc = this_node->details->remote_rsc;
60de42
+    if (this_node->details->remote_requires_reset == FALSE) {
60de42
+        this_node->details->unclean = FALSE;
60de42
+        this_node->details->unseen = FALSE;
60de42
+    }
60de42
+    attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
60de42
+    add_node_attrs(attrs, this_node, TRUE, data_set);
60de42
+
60de42
+    shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
60de42
+    if (shutdown != NULL && safe_str_neq("0", shutdown)) {
60de42
+        crm_info("Node %s is shutting down", this_node->details->uname);
60de42
+        this_node->details->shutdown = TRUE;
60de42
+        if (rsc) {
60de42
+            rsc->next_role = RSC_ROLE_STOPPED;
60de42
+        }
60de42
+    }
60de42
+ 
60de42
+    if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
60de42
+        crm_info("Node %s is in standby-mode", this_node->details->uname);
60de42
+        this_node->details->standby = TRUE;
60de42
+    }
60de42
+
60de42
+    if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance")) ||
60de42
+        (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
60de42
+        crm_info("Node %s is in maintenance-mode", this_node->details->uname);
60de42
+        this_node->details->maintenance = TRUE;
60de42
+    }
60de42
+
60de42
+    resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
60de42
+    if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
60de42
+        if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
+            crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
60de42
+                     XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
60de42
+        } else {
60de42
+            /* if we're here, this is either a baremetal node and fencing is enabled,
60de42
+             * or this is a container node which we don't care if fencing is enabled 
60de42
+             * or not on. container nodes are 'fenced' by recovering the container resource
60de42
+             * regardless of whether fencing is enabled. */
60de42
+            crm_info("Node %s has resource discovery disabled", this_node->details->uname);
60de42
+            this_node->details->rsc_discovery_enabled = FALSE;
60de42
+        }
60de42
+    }
60de42
+}
60de42
+
60de42
+static bool
60de42
+unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set) 
60de42
+{
60de42
+    bool changed = false;
60de42
+    xmlNode *lrm_rsc = NULL;
60de42
+
60de42
+    for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
60de42
+        const char *id = NULL;
60de42
+        const char *uname = NULL;
60de42
+        node_t *this_node = NULL;
60de42
+        bool process = FALSE;
60de42
+
60de42
+        if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
60de42
+            continue;
60de42
+        }
60de42
+
60de42
+        id = crm_element_value(state, XML_ATTR_ID);
60de42
+        uname = crm_element_value(state, XML_ATTR_UNAME);
60de42
+        this_node = pe_find_node_any(data_set->nodes, id, uname);
60de42
+
60de42
+        if (this_node == NULL) {
60de42
+            crm_info("Node %s is unknown", id);
60de42
+            continue;
60de42
+
60de42
+        } else if (this_node->details->unpacked) {
60de42
+            crm_info("Node %s is already processed", id);
60de42
+            continue;
60de42
+
60de42
+        } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
+            // A redundant test, but preserves the order for regression tests
60de42
+            process = TRUE;
60de42
+
60de42
+        } else if (is_remote_node(this_node)) {
60de42
+            resource_t *rsc = this_node->details->remote_rsc;
60de42
+
60de42
+            if (fence || (rsc && rsc->role == RSC_ROLE_STARTED)) {
60de42
+                determine_remote_online_status(data_set, this_node);
60de42
+                unpack_handle_remote_attrs(this_node, state, data_set);
60de42
+                process = TRUE;
60de42
+            }
60de42
+
60de42
+        } else if (this_node->details->online) {
60de42
+            process = TRUE;
60de42
+
60de42
+        } else if (fence) {
60de42
+            process = TRUE;
60de42
+        }
60de42
+
60de42
+        if(process) {
60de42
+            crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
60de42
+                      fence?"un":"", is_remote_node(this_node)?" remote":"",
60de42
+                      this_node->details->uname);
60de42
+            changed = TRUE;
60de42
+            this_node->details->unpacked = TRUE;
60de42
+
60de42
+            lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
60de42
+            lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
60de42
+            unpack_lrm_resources(this_node, lrm_rsc, data_set);
60de42
+        }
60de42
+    }
60de42
+    return changed;
60de42
+}
60de42
+
60de42
 /* remove nodes that are down, stopping */
60de42
 /* create +ve rsc_to_node constraints between resources and the nodes they are running on */
60de42
 /* anything else? */
60de42
@@ -1027,7 +1154,6 @@ unpack_status(xmlNode * status, pe_working_set_t * data_set)
60de42
     const char *uname = NULL;
60de42
 
60de42
     xmlNode *state = NULL;
60de42
-    xmlNode *lrm_rsc = NULL;
60de42
     node_t *this_node = NULL;
60de42
 
60de42
     crm_trace("Beginning unpack");
60de42
@@ -1125,152 +1251,25 @@ unpack_status(xmlNode * status, pe_working_set_t * data_set)
60de42
         }
60de42
     }
60de42
 
60de42
-    /* Now that we know all node states, we can safely handle migration ops */
60de42
-    for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
60de42
-        if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
60de42
-            continue;
60de42
-        }
60de42
-
60de42
-        id = crm_element_value(state, XML_ATTR_ID);
60de42
-        uname = crm_element_value(state, XML_ATTR_UNAME);
60de42
-        this_node = pe_find_node_any(data_set->nodes, id, uname);
60de42
-
60de42
-        if (this_node == NULL) {
60de42
-            crm_info("Node %s is unknown", id);
60de42
-            continue;
60de42
-
60de42
-        } else if (is_remote_node(this_node)) {
60de42
-
60de42
-            /* online status of remote node can not be determined until all other
60de42
-             * resource status is unpacked. */
60de42
-            continue;
60de42
-        } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
-            crm_trace("Processing lrm resource entries on healthy node: %s",
60de42
-                      this_node->details->uname);
60de42
-            lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
60de42
-            lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
60de42
-            unpack_lrm_resources(this_node, lrm_rsc, data_set);
60de42
-        }
60de42
-    }
60de42
-
60de42
-    /* now that the rest of the cluster's status is determined
60de42
-     * calculate remote-nodes */
60de42
-    unpack_remote_status(status, data_set);
60de42
 
60de42
-    return TRUE;
60de42
-}
60de42
-
60de42
-gboolean
60de42
-unpack_remote_status(xmlNode * status, pe_working_set_t * data_set)
60de42
-{
60de42
-    const char *id = NULL;
60de42
-    const char *uname = NULL;
60de42
-    const char *shutdown = NULL;
60de42
-    resource_t *rsc = NULL;
60de42
-
60de42
-    GListPtr gIter = NULL;
60de42
-
60de42
-    xmlNode *state = NULL;
60de42
-    xmlNode *lrm_rsc = NULL;
60de42
-    node_t *this_node = NULL;
60de42
-
60de42
-    if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) {
60de42
-        crm_trace("no remote nodes to unpack");
60de42
-        return TRUE;
60de42
+    while(unpack_node_loop(status, FALSE, data_set)) {
60de42
+        crm_trace("Start another loop");
60de42
     }
60de42
 
60de42
-    /* get online status */
60de42
-    for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
60de42
-        this_node = gIter->data;
60de42
+    // Now catch any nodes we didnt see
60de42
+    unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
60de42
 
60de42
-        if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
60de42
-            continue;
60de42
-        }
60de42
-        determine_remote_online_status(data_set, this_node);
60de42
-    }
60de42
+    for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
60de42
+        node_t *this_node = gIter->data;
60de42
 
60de42
-    /* process attributes */
60de42
-    for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
60de42
-        const char *resource_discovery_enabled = NULL;
60de42
-        xmlNode *attrs = NULL;
60de42
-        if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
60de42
-            continue;
60de42
-        }
60de42
-
60de42
-        id = crm_element_value(state, XML_ATTR_ID);
60de42
-        uname = crm_element_value(state, XML_ATTR_UNAME);
60de42
-        this_node = pe_find_node_any(data_set->nodes, id, uname);
60de42
-
60de42
-        if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
60de42
+        if (this_node == NULL) {
60de42
             continue;
60de42
-        }
60de42
-        crm_trace("Processing remote node id=%s, uname=%s", id, uname);
60de42
-
60de42
-        this_node->details->remote_maintenance =
60de42
-            crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
60de42
-
60de42
-        rsc = this_node->details->remote_rsc;
60de42
-        if (this_node->details->remote_requires_reset == FALSE) {
60de42
-            this_node->details->unclean = FALSE;
60de42
-            this_node->details->unseen = FALSE;
60de42
-        }
60de42
-        attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
60de42
-        add_node_attrs(attrs, this_node, TRUE, data_set);
60de42
-
60de42
-        shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN);
60de42
-        if (shutdown != NULL && safe_str_neq("0", shutdown)) {
60de42
-            crm_info("Node %s is shutting down", this_node->details->uname);
60de42
-            this_node->details->shutdown = TRUE;
60de42
-            if (rsc) {
60de42
-                rsc->next_role = RSC_ROLE_STOPPED;
60de42
-            }
60de42
-        }
60de42
- 
60de42
-        if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) {
60de42
-            crm_info("Node %s is in standby-mode", this_node->details->uname);
60de42
-            this_node->details->standby = TRUE;
60de42
-        }
60de42
-
60de42
-        if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance")) ||
60de42
-            (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
60de42
-            crm_info("Node %s is in maintenance-mode", this_node->details->uname);
60de42
-            this_node->details->maintenance = TRUE;
60de42
-        }
60de42
-
60de42
-        resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY);
60de42
-        if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
60de42
-            if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
60de42
-                crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
60de42
-                    XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
60de42
-            } else {
60de42
-                /* if we're here, this is either a baremetal node and fencing is enabled,
60de42
-                 * or this is a container node which we don't care if fencing is enabled 
60de42
-                 * or not on. container nodes are 'fenced' by recovering the container resource
60de42
-                 * regardless of whether fencing is enabled. */
60de42
-                crm_info("Node %s has resource discovery disabled", this_node->details->uname);
60de42
-                this_node->details->rsc_discovery_enabled = FALSE;
60de42
-            }
60de42
-        }
60de42
-    }
60de42
-
60de42
-    /* process node rsc status */
60de42
-    for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
60de42
-        if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
60de42
+        } else if(is_remote_node(this_node) == FALSE) {
60de42
             continue;
60de42
-        }
60de42
-
60de42
-        id = crm_element_value(state, XML_ATTR_ID);
60de42
-        uname = crm_element_value(state, XML_ATTR_UNAME);
60de42
-        this_node = pe_find_node_any(data_set->nodes, id, uname);
60de42
-
60de42
-        if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
60de42
+        } else if(this_node->details->unpacked) {
60de42
             continue;
60de42
         }
60de42
-        crm_trace("Processing lrm resource entries on healthy remote node: %s",
60de42
-                  this_node->details->uname);
60de42
-        lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
60de42
-        lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
60de42
-        unpack_lrm_resources(this_node, lrm_rsc, data_set);
60de42
+        determine_remote_online_status(data_set, this_node);
60de42
     }
60de42
 
60de42
     return TRUE;
60de42
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
60de42
index bff5a4c..177fb84 100644
60de42
--- a/lib/pengine/utils.c
60de42
+++ b/lib/pengine/utils.c
60de42
@@ -1675,6 +1675,38 @@ filter_parameters(xmlNode * param_set, const char *param_string, bool need_prese
60de42
     }
60de42
 }
60de42
 
60de42
+bool fix_remote_addr(resource_t * rsc)
60de42
+{
60de42
+    const char *name;
60de42
+    const char *value;
60de42
+    const char *attr_list[] = {
60de42
+        XML_ATTR_TYPE,
60de42
+        XML_AGENT_ATTR_CLASS,
60de42
+        XML_AGENT_ATTR_PROVIDER
60de42
+    };
60de42
+    const char *value_list[] = {
60de42
+        "remote",
60de42
+        "ocf",
60de42
+        "pacemaker"
60de42
+    };
60de42
+
60de42
+    name = "addr";
60de42
+    value = g_hash_table_lookup(rsc->parameters, name);
60de42
+    if (safe_str_eq(value, "#uname") == FALSE) {
60de42
+        return FALSE;
60de42
+    }
60de42
+
60de42
+    for (int lpc = 0; rsc && lpc < DIMOF(attr_list); lpc++) {
60de42
+        name = attr_list[lpc];
60de42
+        value = crm_element_value(rsc->xml, attr_list[lpc]);
60de42
+        if (safe_str_eq(value, value_list[lpc]) == FALSE) {
60de42
+            return FALSE;
60de42
+        }
60de42
+    }
60de42
+
60de42
+    return TRUE;
60de42
+}
60de42
+
60de42
 op_digest_cache_t *
60de42
 rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
60de42
                       pe_working_set_t * data_set)
60de42
@@ -1724,6 +1756,13 @@ rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
60de42
                                              g_hash_destroy_str, g_hash_destroy_str);
60de42
     get_rsc_attributes(local_rsc_params, rsc, node, data_set);
60de42
     data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
60de42
+
60de42
+    if(fix_remote_addr(rsc) && node) {
60de42
+        // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
60de42
+        crm_xml_add(data->params_all, "addr", node->details->uname);
60de42
+        crm_trace("Fixing addr for %s on %s", rsc->id, node->details->uname);
60de42
+    }
60de42
+
60de42
     g_hash_table_foreach(local_rsc_params, hash2field, data->params_all);
60de42
     g_hash_table_foreach(action->extra, hash2field, data->params_all);
60de42
     g_hash_table_foreach(rsc->parameters, hash2field, data->params_all);
60de42
diff --git a/pengine/container.c b/pengine/container.c
60de42
index 3da19aa..8c70f54 100644
60de42
--- a/pengine/container.c
60de42
+++ b/pengine/container.c
60de42
@@ -263,7 +263,15 @@ container_expand(resource_t * rsc, pe_working_set_t * data_set)
60de42
     for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) {
60de42
         container_grouping_t *tuple = (container_grouping_t *)gIter->data;
60de42
 
60de42
+
60de42
         CRM_ASSERT(tuple);
60de42
+        if(fix_remote_addr(tuple->remote) && tuple->docker->allocated_to) {
60de42
+            // REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
60de42
+            xmlNode *nvpair = get_xpath_object("//nvpair[@name='addr']", tuple->remote->xml, LOG_ERR);
60de42
+
60de42
+            g_hash_table_replace(tuple->remote->parameters, strdup("addr"), strdup(tuple->docker->allocated_to->details->uname));
60de42
+            crm_xml_add(nvpair, "value", tuple->docker->allocated_to->details->uname);
60de42
+        }
60de42
         if(tuple->ip) {
60de42
             tuple->ip->cmds->expand(tuple->ip, data_set);
60de42
         }
60de42
diff --git a/pengine/test10/bug-cl-5247.summary b/pengine/test10/bug-cl-5247.summary
60de42
index 09dc301..91ed8db 100644
60de42
--- a/pengine/test10/bug-cl-5247.summary
60de42
+++ b/pengine/test10/bug-cl-5247.summary
60de42
@@ -90,8 +90,8 @@ Containers: [ pgsr01:prmDB1 ]
60de42
  Resource Group: grpStonith2
60de42
      prmStonith2-2	(stonith:external/ipmi):	Started bl460g8n3
60de42
  Resource Group: master-group
60de42
-     vip-master	(ocf::heartbeat:Dummy):	FAILED[ pgsr02 pgsr01 ]
60de42
-     vip-rep	(ocf::heartbeat:Dummy):	FAILED[ pgsr02 pgsr01 ]
60de42
+     vip-master	(ocf::heartbeat:Dummy):	FAILED[ pgsr01 pgsr02 ]
60de42
+     vip-rep	(ocf::heartbeat:Dummy):	FAILED[ pgsr01 pgsr02 ]
60de42
  Master/Slave Set: msPostgresql [pgsql]
60de42
      Masters: [ pgsr01 ]
60de42
      Stopped: [ bl460g8n3 bl460g8n4 ]
60de42
-- 
60de42
1.8.3.1
60de42
60de42
60de42
From 8abdd82ba85ee384ab78ce1db617f51b692e9df6 Mon Sep 17 00:00:00 2001
60de42
From: Andrew Beekhof <andrew@beekhof.net>
60de42
Date: Wed, 19 Apr 2017 12:55:08 +1000
60de42
Subject: [PATCH 2/2] lrmd: Have pacemaker-remote reap zombies if it is running
60de42
 as pid 1
60de42
60de42
---
60de42
 configure.ac |  10 +++++
60de42
 lrmd/main.c  | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
60de42
 2 files changed, 131 insertions(+), 1 deletion(-)
60de42
60de42
diff --git a/configure.ac b/configure.ac
60de42
index ee98f9b..5e08e7b 100644
60de42
--- a/configure.ac
60de42
+++ b/configure.ac
60de42
@@ -823,6 +823,16 @@ if test "$ac_cv_header_libxslt_xslt_h" != "yes"; then
60de42
    AC_MSG_ERROR(The libxslt developement headers were not found)
60de42
 fi
60de42
 
60de42
+AC_CACHE_CHECK(whether __progname and __progname_full are available,
60de42
+                pf_cv_var_progname,
60de42
+                AC_TRY_LINK([extern char *__progname, *__progname_full;],
60de42
+                    [__progname = "foo"; __progname_full = "foo bar";],
60de42
+                    pf_cv_var_progname="yes", pf_cv_var_progname="no"))
60de42
+
60de42
+if test "$pf_cv_var_progname" = "yes"; then
60de42
+    AC_DEFINE(HAVE___PROGNAME,1,[ ])
60de42
+fi
60de42
+
60de42
 dnl ========================================================================
60de42
 dnl Structures
60de42
 dnl ========================================================================
60de42
diff --git a/lrmd/main.c b/lrmd/main.c
60de42
index ca8cdf2..412ce24 100644
60de42
--- a/lrmd/main.c
60de42
+++ b/lrmd/main.c
60de42
@@ -21,6 +21,11 @@
60de42
 
60de42
 #include <glib.h>
60de42
 #include <unistd.h>
60de42
+#include <signal.h>
60de42
+
60de42
+#include <sys/types.h>
60de42
+#include <sys/wait.h>
60de42
+#include <sys/prctl.h>
60de42
 
60de42
 #include <crm/crm.h>
60de42
 #include <crm/msg_xml.h>
60de42
@@ -391,6 +396,119 @@ void handle_shutdown_nack()
60de42
     crm_debug("Ignoring unexpected shutdown nack");
60de42
 }
60de42
 
60de42
+
60de42
+static pid_t main_pid = 0;
60de42
+static void
60de42
+sigdone(void)
60de42
+{
60de42
+    exit(0);
60de42
+}
60de42
+
60de42
+static void
60de42
+sigreap(void)
60de42
+{
60de42
+    pid_t pid = 0;
60de42
+    int status;
60de42
+    do {
60de42
+        /*
60de42
+         * Opinions seem to differ as to what to put here:
60de42
+         *  -1, any child process
60de42
+         *  0,  any child process whose process group ID is equal to that of the calling process
60de42
+         */
60de42
+        pid = waitpid(-1, &status, WNOHANG);
60de42
+        if(pid == main_pid) {
60de42
+            /* Exit when pacemaker-remote exits and use the same return code */
60de42
+            if (WIFEXITED(status)) {
60de42
+                exit(WEXITSTATUS(status));
60de42
+            }
60de42
+            exit(1);
60de42
+        }
60de42
+
60de42
+    } while (pid > 0);
60de42
+}
60de42
+
60de42
+static struct {
60de42
+	int sig;
60de42
+	void (*handler)(void);
60de42
+} sigmap[] = {
60de42
+	{ SIGCHLD, sigreap },
60de42
+	{ SIGINT,  sigdone },
60de42
+};
60de42
+
60de42
+static void spawn_pidone(int argc, char **argv, char **envp)
60de42
+{
60de42
+    sigset_t set;
60de42
+
60de42
+    if (getpid() != 1) {
60de42
+        return;
60de42
+    }
60de42
+
60de42
+    sigfillset(&set);
60de42
+    sigprocmask(SIG_BLOCK, &set, 0);
60de42
+
60de42
+    main_pid = fork();
60de42
+    switch (main_pid) {
60de42
+	case 0:
60de42
+            sigprocmask(SIG_UNBLOCK, &set, NULL);
60de42
+            setsid();
60de42
+            setpgid(0, 0);
60de42
+
60de42
+            /* Child remains as pacemaker_remoted */
60de42
+            return;
60de42
+	case -1:
60de42
+            perror("fork");
60de42
+    }
60de42
+
60de42
+    /* Parent becomes the reaper of zombie processes */
60de42
+    /* Safe to initialize logging now if needed */
60de42
+
60de42
+#ifdef HAVE___PROGNAME
60de42
+    /* Differentiate ourselves in the 'ps' output */
60de42
+    {
60de42
+        char *p;
60de42
+        int i, maxlen;
60de42
+        char *LastArgv = NULL;
60de42
+        const char *name = "pcmk-init";
60de42
+
60de42
+	for(i = 0; i < argc; i++) {
60de42
+		if(!i || (LastArgv + 1 == argv[i]))
60de42
+			LastArgv = argv[i] + strlen(argv[i]);
60de42
+	}
60de42
+
60de42
+	for(i = 0; envp[i] != NULL; i++) {
60de42
+		if((LastArgv + 1) == envp[i]) {
60de42
+			LastArgv = envp[i] + strlen(envp[i]);
60de42
+		}
60de42
+	}
60de42
+
60de42
+        maxlen = (LastArgv - argv[0]) - 2;
60de42
+
60de42
+        i = strlen(name);
60de42
+        /* We can overwrite individual argv[] arguments */
60de42
+        snprintf(argv[0], maxlen, "%s", name);
60de42
+
60de42
+        /* Now zero out everything else */
60de42
+        p = &argv[0][i];
60de42
+        while(p < LastArgv)
60de42
+            *p++ = '\0';
60de42
+        argv[1] = NULL;
60de42
+    }
60de42
+#endif /* HAVE___PROGNAME */
60de42
+
60de42
+    while (1) {
60de42
+	int sig;
60de42
+	size_t i;
60de42
+
60de42
+        sigwait(&set, &sig);
60de42
+        for (i = 0; i < DIMOF(sigmap); i++) {
60de42
+            if (sigmap[i].sig == sig) {
60de42
+                sigmap[i].handler();
60de42
+                break;
60de42
+            }
60de42
+        }
60de42
+    }
60de42
+}
60de42
+
60de42
 /* *INDENT-OFF* */
60de42
 static struct crm_option long_options[] = {
60de42
     /* Top-level Options */
60de42
@@ -410,12 +528,14 @@ static struct crm_option long_options[] = {
60de42
 /* *INDENT-ON* */
60de42
 
60de42
 int
60de42
-main(int argc, char **argv)
60de42
+main(int argc, char **argv, char **envp)
60de42
 {
60de42
     int flag = 0;
60de42
     int index = 0;
60de42
     const char *option = NULL;
60de42
 
60de42
+    /* If necessary, create PID1 now before any FDs are opened */
60de42
+    spawn_pidone(argc, argv, envp);
60de42
 
60de42
 #ifndef ENABLE_PCMK_REMOTE
60de42
     crm_log_preinit("lrmd", argc, argv);
60de42
-- 
60de42
1.8.3.1
60de42