From 749f6b256cb2864ce3e862442adc6d219eefeca3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 6 Dec 2019 12:17:03 -0600
Subject: [PATCH 03/10] Low: scheduler: respect shutdown locks when placing
inactive resources
When shutdown-lock is enabled, and we're either scheduling a resource stop
on a node that's cleanly shutting down or scheduling any action for a
previously locked resource, add "shutdown-lock=<shutdown-timestamp>" to the
graph action. The controller will be able to use this to know when to preserve
the lock (by adding the lock time to the resource state entry).
When the scheduler unpacks a resource state entry with a lock, it will remember
the lock node and lock time, which will trigger existing code for applying
shutdown locks.
---
lib/pengine/unpack.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
pengine/allocate.c | 17 ++++++++++++++++-
pengine/graph.c | 32 +++++++++++++++++++++++++++++++-
3 files changed, 91 insertions(+), 7 deletions(-)
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 7b0d837..bb5efa4 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -18,6 +18,7 @@
#include <crm_internal.h>
#include <glib.h>
+#include <time.h>
#include <crm/crm.h>
#include <crm/services.h>
@@ -1151,7 +1152,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
} else if (is_container_remote_node(this_node) == FALSE
- && rsc->role == RSC_ROLE_STARTED) {
+ && ((rsc->role == RSC_ROLE_STARTED)
+ || is_set(data_set->flags, pe_flag_shutdown_lock))) {
check = TRUE;
crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
}
@@ -1167,6 +1169,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
} else if (fence) {
process = TRUE;
+
+ } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
+ process = TRUE;
}
if(process) {
@@ -2286,6 +2291,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
}
}
+// If resource history entry has shutdown lock, remember lock node and time
+static void
+unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
+ pe_working_set_t *data_set)
+{
+ time_t lock_time = 0; // When lock started (i.e. node shutdown time)
+
+ if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ &lock_time) == pcmk_ok) && (lock_time != 0)) {
+
+ if ((data_set->shutdown_lock > 0)
+ && (get_effective_time(data_set)
+ > (lock_time + data_set->shutdown_lock))) {
+ pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
+ rsc->id, node->details->uname);
+ } else {
+ rsc->lock_node = node;
+ rsc->lock_time = lock_time;
+ }
+ }
+}
+
static resource_t *
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
@@ -2322,18 +2349,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data
}
}
- if (op_list == NULL) {
- /* if there are no operations, there is nothing to do */
- return NULL;
+ if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) {
+ if (op_list == NULL) {
+ // If there are no operations, there is nothing to do
+ return NULL;
+ }
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if (rsc == NULL) {
- rsc = process_orphan_resource(rsc_entry, node, data_set);
+ if (op_list == NULL) {
+ // If there are no operations, there is nothing to do
+ return NULL;
+ } else {
+ rsc = process_orphan_resource(rsc_entry, node, data_set);
+ }
}
CRM_ASSERT(rsc != NULL);
+ // Check whether the resource is "shutdown-locked" to this node
+ if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
+ unpack_shutdown_lock(rsc_entry, rsc, node, data_set);
+ }
+
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
diff --git a/pengine/allocate.c b/pengine/allocate.c
index 09f9e51..7366716 100644
--- a/pengine/allocate.c
+++ b/pengine/allocate.c
@@ -1047,8 +1047,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
return;
}
+ if (rsc->lock_node != NULL) {
+ // The lock was obtained from resource history
+
+ if (rsc->running_on != NULL) {
+ /* The resource was started elsewhere even though it is now
+ * considered locked. This shouldn't be possible, but as a
+ * failsafe, we don't want to disturb the resource now.
+ */
+ pe_rsc_info(rsc,
+ "Cancelling shutdown lock because %s is already active",
+ rsc->id);
+ rsc->lock_node = NULL;
+ rsc->lock_time = 0;
+ }
+
// Only a resource active on exactly one node can be locked
- if (pcmk__list_of_1(rsc->running_on)) {
+ } else if (pcmk__list_of_1(rsc->running_on)) {
pe_node_t *node = rsc->running_on->data;
if (node->details->shutdown) {
diff --git a/pengine/graph.c b/pengine/graph.c
index cba30d0..33168ca 100644
--- a/pengine/graph.c
+++ b/pengine/graph.c
@@ -1,5 +1,7 @@
/*
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -998,6 +1000,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action,
}
}
+static bool
+should_lock_action(pe_action_t *action)
+{
+ // Only actions taking place on resource's lock node are locked
+ if ((action->rsc->lock_node == NULL) || (action->node == NULL)
+ || (action->node->details != action->rsc->lock_node->details)) {
+ return false;
+ }
+
+ /* During shutdown, only stops are locked (otherwise, another action such as
+ * a demote would cause the controller to clear the lock)
+ */
+ if (action->node->details->shutdown && action->task
+ && strcmp(action->task, RSC_STOP)) {
+ return false;
+ }
+
+ return true;
+}
+
static xmlNode *
action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
{
@@ -1104,6 +1126,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
XML_ATTR_TYPE
};
+ /* If a resource is locked to a node via shutdown-lock, mark its actions
+ * so the controller can preserve the lock when the action completes.
+ */
+ if (should_lock_action(action)) {
+ crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
+ (long long) action->rsc->lock_time);
+ }
+
// List affected resource
rsc_xml = create_xml_node(action_xml,
--
1.8.3.1