|
|
af918f |
From 749f6b256cb2864ce3e862442adc6d219eefeca3 Mon Sep 17 00:00:00 2001
|
|
|
af918f |
From: Ken Gaillot <kgaillot@redhat.com>
|
|
|
af918f |
Date: Fri, 6 Dec 2019 12:17:03 -0600
|
|
|
af918f |
Subject: [PATCH 03/10] Low: scheduler: respect shutdown locks when placing
|
|
|
af918f |
inactive resources
|
|
|
af918f |
|
|
|
af918f |
When shutdown-lock is enabled, and we're either scheduling a resource stop
|
|
|
af918f |
on a node that's cleanly shutting down or scheduling any action for a
|
|
|
af918f |
previously locked resource, add "shutdown-lock=<shutdown-timestamp>" to the
|
|
|
af918f |
graph action. The controller will be able to use this to know when to preserve
|
|
|
af918f |
the lock (by adding the lock time to the resource state entry).
|
|
|
af918f |
|
|
|
af918f |
When the scheduler unpacks a resource state entry with a lock, it will remember
|
|
|
af918f |
the lock node and lock time, which will trigger existing code for applying
|
|
|
af918f |
shutdown locks.
|
|
|
af918f |
---
|
|
|
af918f |
lib/pengine/unpack.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
|
|
|
af918f |
pengine/allocate.c | 17 ++++++++++++++++-
|
|
|
af918f |
pengine/graph.c | 32 +++++++++++++++++++++++++++++++-
|
|
|
af918f |
3 files changed, 91 insertions(+), 7 deletions(-)
|
|
|
af918f |
|
|
|
af918f |
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
|
|
|
af918f |
index 7b0d837..bb5efa4 100644
|
|
|
af918f |
--- a/lib/pengine/unpack.c
|
|
|
af918f |
+++ b/lib/pengine/unpack.c
|
|
|
af918f |
@@ -18,6 +18,7 @@
|
|
|
af918f |
#include <crm_internal.h>
|
|
|
af918f |
|
|
|
af918f |
#include <glib.h>
|
|
|
af918f |
+#include <time.h>
|
|
|
af918f |
|
|
|
af918f |
#include <crm/crm.h>
|
|
|
af918f |
#include <crm/services.h>
|
|
|
af918f |
@@ -1151,7 +1152,8 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
|
|
|
af918f |
crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
|
|
|
af918f |
|
|
|
af918f |
} else if (is_container_remote_node(this_node) == FALSE
|
|
|
af918f |
- && rsc->role == RSC_ROLE_STARTED) {
|
|
|
af918f |
+ && ((rsc->role == RSC_ROLE_STARTED)
|
|
|
af918f |
+ || is_set(data_set->flags, pe_flag_shutdown_lock))) {
|
|
|
af918f |
check = TRUE;
|
|
|
af918f |
crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
|
|
|
af918f |
}
|
|
|
af918f |
@@ -1167,6 +1169,9 @@ unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
|
|
|
af918f |
|
|
|
af918f |
} else if (fence) {
|
|
|
af918f |
process = TRUE;
|
|
|
af918f |
+
|
|
|
af918f |
+ } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
|
|
|
af918f |
+ process = TRUE;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
if(process) {
|
|
|
af918f |
@@ -2286,6 +2291,28 @@ calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+// If resource history entry has shutdown lock, remember lock node and time
|
|
|
af918f |
+static void
|
|
|
af918f |
+unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
|
|
|
af918f |
+ pe_working_set_t *data_set)
|
|
|
af918f |
+{
|
|
|
af918f |
+ time_t lock_time = 0; // When lock started (i.e. node shutdown time)
|
|
|
af918f |
+
|
|
|
af918f |
+ if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
|
|
|
af918f |
+ &lock_time) == pcmk_ok) && (lock_time != 0)) {
|
|
|
af918f |
+
|
|
|
af918f |
+ if ((data_set->shutdown_lock > 0)
|
|
|
af918f |
+ && (get_effective_time(data_set)
|
|
|
af918f |
+ > (lock_time + data_set->shutdown_lock))) {
|
|
|
af918f |
+ pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
|
|
|
af918f |
+ rsc->id, node->details->uname);
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ rsc->lock_node = node;
|
|
|
af918f |
+ rsc->lock_time = lock_time;
|
|
|
af918f |
+ }
|
|
|
af918f |
+ }
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
static resource_t *
|
|
|
af918f |
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
|
|
|
af918f |
{
|
|
|
af918f |
@@ -2322,18 +2349,30 @@ unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
- if (op_list == NULL) {
|
|
|
af918f |
- /* if there are no operations, there is nothing to do */
|
|
|
af918f |
- return NULL;
|
|
|
af918f |
+ if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) {
|
|
|
af918f |
+ if (op_list == NULL) {
|
|
|
af918f |
+ // If there are no operations, there is nothing to do
|
|
|
af918f |
+ return NULL;
|
|
|
af918f |
+ }
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
/* find the resource */
|
|
|
af918f |
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
|
|
|
af918f |
if (rsc == NULL) {
|
|
|
af918f |
- rsc = process_orphan_resource(rsc_entry, node, data_set);
|
|
|
af918f |
+ if (op_list == NULL) {
|
|
|
af918f |
+ // If there are no operations, there is nothing to do
|
|
|
af918f |
+ return NULL;
|
|
|
af918f |
+ } else {
|
|
|
af918f |
+ rsc = process_orphan_resource(rsc_entry, node, data_set);
|
|
|
af918f |
+ }
|
|
|
af918f |
}
|
|
|
af918f |
CRM_ASSERT(rsc != NULL);
|
|
|
af918f |
|
|
|
af918f |
+ // Check whether the resource is "shutdown-locked" to this node
|
|
|
af918f |
+ if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
|
|
|
af918f |
+ unpack_shutdown_lock(rsc_entry, rsc, node, data_set);
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
/* process operations */
|
|
|
af918f |
saved_role = rsc->role;
|
|
|
af918f |
on_fail = action_fail_ignore;
|
|
|
af918f |
diff --git a/pengine/allocate.c b/pengine/allocate.c
|
|
|
af918f |
index 09f9e51..7366716 100644
|
|
|
af918f |
--- a/pengine/allocate.c
|
|
|
af918f |
+++ b/pengine/allocate.c
|
|
|
af918f |
@@ -1047,8 +1047,23 @@ apply_shutdown_lock(pe_resource_t *rsc, pe_working_set_t *data_set)
|
|
|
af918f |
return;
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+ if (rsc->lock_node != NULL) {
|
|
|
af918f |
+ // The lock was obtained from resource history
|
|
|
af918f |
+
|
|
|
af918f |
+ if (rsc->running_on != NULL) {
|
|
|
af918f |
+ /* The resource was started elsewhere even though it is now
|
|
|
af918f |
+ * considered locked. This shouldn't be possible, but as a
|
|
|
af918f |
+ * failsafe, we don't want to disturb the resource now.
|
|
|
af918f |
+ */
|
|
|
af918f |
+ pe_rsc_info(rsc,
|
|
|
af918f |
+ "Cancelling shutdown lock because %s is already active",
|
|
|
af918f |
+ rsc->id);
|
|
|
af918f |
+ rsc->lock_node = NULL;
|
|
|
af918f |
+ rsc->lock_time = 0;
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
// Only a resource active on exactly one node can be locked
|
|
|
af918f |
- if (pcmk__list_of_1(rsc->running_on)) {
|
|
|
af918f |
+ } else if (pcmk__list_of_1(rsc->running_on)) {
|
|
|
af918f |
pe_node_t *node = rsc->running_on->data;
|
|
|
af918f |
|
|
|
af918f |
if (node->details->shutdown) {
|
|
|
af918f |
diff --git a/pengine/graph.c b/pengine/graph.c
|
|
|
af918f |
index cba30d0..33168ca 100644
|
|
|
af918f |
--- a/pengine/graph.c
|
|
|
af918f |
+++ b/pengine/graph.c
|
|
|
af918f |
@@ -1,5 +1,7 @@
|
|
|
af918f |
/*
|
|
|
af918f |
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
|
|
|
af918f |
+ * Copyright 2004-2020 the Pacemaker project contributors
|
|
|
af918f |
+ *
|
|
|
af918f |
+ * The version control history for this file may have further details.
|
|
|
af918f |
*
|
|
|
af918f |
* This program is free software; you can redistribute it and/or
|
|
|
af918f |
* modify it under the terms of the GNU General Public
|
|
|
af918f |
@@ -998,6 +1000,26 @@ add_downed_nodes(xmlNode *xml, const action_t *action,
|
|
|
af918f |
}
|
|
|
af918f |
}
|
|
|
af918f |
|
|
|
af918f |
+static bool
|
|
|
af918f |
+should_lock_action(pe_action_t *action)
|
|
|
af918f |
+{
|
|
|
af918f |
+ // Only actions taking place on resource's lock node are locked
|
|
|
af918f |
+ if ((action->rsc->lock_node == NULL) || (action->node == NULL)
|
|
|
af918f |
+ || (action->node->details != action->rsc->lock_node->details)) {
|
|
|
af918f |
+ return false;
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ /* During shutdown, only stops are locked (otherwise, another action such as
|
|
|
af918f |
+ * a demote would cause the controller to clear the lock)
|
|
|
af918f |
+ */
|
|
|
af918f |
+ if (action->node->details->shutdown && action->task
|
|
|
af918f |
+ && strcmp(action->task, RSC_STOP)) {
|
|
|
af918f |
+ return false;
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
+ return true;
|
|
|
af918f |
+}
|
|
|
af918f |
+
|
|
|
af918f |
static xmlNode *
|
|
|
af918f |
action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
|
|
|
af918f |
{
|
|
|
af918f |
@@ -1104,6 +1126,14 @@ action2xml(action_t * action, gboolean as_input, pe_working_set_t *data_set)
|
|
|
af918f |
XML_ATTR_TYPE
|
|
|
af918f |
};
|
|
|
af918f |
|
|
|
af918f |
+ /* If a resource is locked to a node via shutdown-lock, mark its actions
|
|
|
af918f |
+ * so the controller can preserve the lock when the action completes.
|
|
|
af918f |
+ */
|
|
|
af918f |
+ if (should_lock_action(action)) {
|
|
|
af918f |
+ crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
|
|
|
af918f |
+ (long long) action->rsc->lock_time);
|
|
|
af918f |
+ }
|
|
|
af918f |
+
|
|
|
af918f |
// List affected resource
|
|
|
af918f |
|
|
|
af918f |
rsc_xml = create_xml_node(action_xml,
|
|
|
af918f |
--
|
|
|
af918f |
1.8.3.1
|
|
|
af918f |
|