6db787
From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001
6db787
From: Klaus Wenninger <klaus.wenninger@aon.at>
6db787
Date: Fri, 30 Jul 2021 18:07:25 +0200
6db787
Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to
6db787
 get_agent_metadata
6db787
6db787
Used to distinguish between empty metadata per design,
6db787
case of failed getting metadata that might succeed on a
6db787
retry and fatal failure.
6db787
Fixes as well regression that leads to endless retries getting
6db787
metadata for #watchdog - not superserious as it happens with
6db787
delays in between but still undesirable.
6db787
---
6db787
 daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++-------------
6db787
 1 file changed, 55 insertions(+), 37 deletions(-)
6db787
6db787
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
6db787
index a778801b1..cd9968f1a 100644
6db787
--- a/daemons/fenced/fenced_commands.c
6db787
+++ b/daemons/fenced/fenced_commands.c
6db787
@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re
6db787
 static void search_devices_record_result(struct device_search_s *search, const char *device,
6db787
                                          gboolean can_fence);
6db787
 
6db787
-static xmlNode * get_agent_metadata(const char *agent);
6db787
+static int get_agent_metadata(const char *agent, xmlNode **metadata);
6db787
 static void read_action_metadata(stonith_device_t *device);
6db787
 
6db787
 typedef struct async_command_s {
6db787
@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data)
6db787
 static int
6db787
 get_agent_metadata_cb(gpointer data) {
6db787
     stonith_device_t *device = data;
6db787
+    guint period_ms;
6db787
 
6db787
-    device->agent_metadata = get_agent_metadata(device->agent);
6db787
-    if (device->agent_metadata) {
6db787
-        read_action_metadata(device);
6db787
-        stonith__device_parameter_flags(&(device->flags), device->id,
6db787
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
6db787
+        case pcmk_rc_ok:
6db787
+            if (device->agent_metadata) {
6db787
+                read_action_metadata(device);
6db787
+                stonith__device_parameter_flags(&(device->flags), device->id,
6db787
                                         device->agent_metadata);
6db787
-        return G_SOURCE_REMOVE;
6db787
-    } else {
6db787
-        guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
6db787
-        if (period_ms < 160 * 1000) {
6db787
-            mainloop_timer_set_period(device->timer, 2 * period_ms);
6db787
-        }
6db787
-        return G_SOURCE_CONTINUE;
6db787
+            }
6db787
+            return G_SOURCE_REMOVE;
6db787
+
6db787
+        case EAGAIN:
6db787
+            period_ms = pcmk__mainloop_timer_get_period(device->timer);
6db787
+            if (period_ms < 160 * 1000) {
6db787
+                mainloop_timer_set_period(device->timer, 2 * period_ms);
6db787
+            }
6db787
+            return G_SOURCE_CONTINUE;
6db787
+
6db787
+        default:
6db787
+            return G_SOURCE_REMOVE;
6db787
     }
6db787
 }
6db787
 
6db787
@@ -700,38 +707,41 @@ init_metadata_cache(void) {
6db787
     }
6db787
 }
6db787
 
6db787
-static xmlNode *
6db787
-get_agent_metadata(const char *agent)
6db787
+int
6db787
+get_agent_metadata(const char *agent, xmlNode ** metadata)
6db787
 {
6db787
-    xmlNode *xml = NULL;
6db787
     char *buffer = NULL;
6db787
 
6db787
+    if (metadata == NULL) {
6db787
+        return EINVAL;
6db787
+    }
6db787
+    *metadata = NULL;
6db787
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
6db787
+        return pcmk_rc_ok;
6db787
+    }
6db787
     init_metadata_cache();
6db787
     buffer = g_hash_table_lookup(metadata_cache, agent);
6db787
-    if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
6db787
-        return NULL;
6db787
-
6db787
-    } else if(buffer == NULL) {
6db787
+    if (buffer == NULL) {
6db787
         stonith_t *st = stonith_api_new();
6db787
         int rc;
6db787
 
6db787
         if (st == NULL) {
6db787
             crm_warn("Could not get agent meta-data: "
6db787
                      "API memory allocation failed");
6db787
-            return NULL;
6db787
+            return EAGAIN;
6db787
         }
6db787
-        rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
6db787
+        rc = st->cmds->metadata(st, st_opt_sync_call, agent,
6db787
+                                NULL, &buffer, 10);
6db787
         stonith_api_delete(st);
6db787
         if (rc || !buffer) {
6db787
             crm_err("Could not retrieve metadata for fencing agent %s", agent);
6db787
-            return NULL;
6db787
+            return EAGAIN;
6db787
         }
6db787
         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
6db787
     }
6db787
 
6db787
-    xml = string2xml(buffer);
6db787
-
6db787
-    return xml;
6db787
+    *metadata = string2xml(buffer);
6db787
+    return pcmk_rc_ok;
6db787
 }
6db787
 
6db787
 static gboolean
6db787
@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg)
6db787
         g_list_free_full(device->targets, free);
6db787
         device->targets = NULL;
6db787
     }
6db787
-    device->agent_metadata = get_agent_metadata(device->agent);
6db787
-    if (device->agent_metadata) {
6db787
-        read_action_metadata(device);
6db787
-        stonith__device_parameter_flags(&(device->flags), device->id,
6db787
-                                        device->agent_metadata);
6db787
-    } else {
6db787
-        if (device->timer == NULL) {
6db787
-            device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
6db787
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
6db787
+        case pcmk_rc_ok:
6db787
+            if (device->agent_metadata) {
6db787
+                read_action_metadata(device);
6db787
+                stonith__device_parameter_flags(&(device->flags), device->id,
6db787
+                                                device->agent_metadata);
6db787
+            }
6db787
+            break;
6db787
+
6db787
+        case EAGAIN:
6db787
+            if (device->timer == NULL) {
6db787
+                device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
6db787
                                            TRUE, get_agent_metadata_cb, device);
6db787
-        }
6db787
-        if (!mainloop_timer_running(device->timer)) {
6db787
-            mainloop_timer_start(device->timer);
6db787
-        }
6db787
+            }
6db787
+            if (!mainloop_timer_running(device->timer)) {
6db787
+                mainloop_timer_start(device->timer);
6db787
+            }
6db787
+            break;
6db787
+
6db787
+        default:
6db787
+            break;
6db787
     }
6db787
 
6db787
     value = g_hash_table_lookup(device->params, "nodeid");
6db787
-- 
6db787
2.27.0
6db787
6db787
6db787
From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001
6db787
From: Klaus Wenninger <klaus.wenninger@aon.at>
6db787
Date: Fri, 30 Jul 2021 18:15:10 +0200
6db787
Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain
6db787
 nodes
6db787
6db787
Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being
6db787
fully upgraded to a version that supports the feature
6db787
before explicitly adding a watchdog-fence-device.
6db787
---
6db787
 configure.ac                        |   1 +
6db787
 daemons/controld/controld_control.c |   2 +-
6db787
 daemons/controld/controld_fencing.c |  14 ++
6db787
 daemons/controld/controld_fencing.h |   1 +
6db787
 daemons/fenced/Makefile.am          |   2 +-
6db787
 daemons/fenced/fence_watchdog.in    | 283 ++++++++++++++++++++++++++++
6db787
 daemons/fenced/fenced_commands.c    | 141 +++++++++++---
6db787
 daemons/fenced/fenced_remote.c      |  71 ++++---
6db787
 daemons/fenced/pacemaker-fenced.c   | 131 +++++++++----
6db787
 daemons/fenced/pacemaker-fenced.h   |   5 +-
6db787
 include/crm/crm.h                   |   2 +-
6db787
 include/crm/fencing/internal.h      |   8 +-
6db787
 lib/fencing/st_client.c             |  61 ++++++
6db787
 lib/lrmd/lrmd_client.c              |   6 +-
6db787
 rpm/pacemaker.spec.in               |   3 +
6db787
 16 files changed, 635 insertions(+), 97 deletions(-)
6db787
 create mode 100755 daemons/fenced/fence_watchdog.in
6db787
6db787
diff --git a/configure.ac b/configure.ac
6db787
index 436100c81..013562e46 100644
6db787
--- a/configure.ac
6db787
+++ b/configure.ac
6db787
@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli],
6db787
                   [cts/support/fence_dummy],
6db787
                   [cts/support/pacemaker-cts-dummyd],
6db787
                   [daemons/fenced/fence_legacy],
6db787
+                  [daemons/fenced/fence_watchdog],
6db787
                   [doc/abi-check],
6db787
                   [extra/resources/ClusterMon],
6db787
                   [extra/resources/HealthSMART],
6db787
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
6db787
index 45a70bb92..b5da6a46c 100644
6db787
--- a/daemons/controld/controld_control.c
6db787
+++ b/daemons/controld/controld_control.c
6db787
@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = {
6db787
     },
6db787
     {
6db787
         "stonith-watchdog-timeout", NULL, "time", NULL,
6db787
-        "0", pcmk__valid_sbd_timeout,
6db787
+        "0", controld_verify_stonith_watchdog_timeout,
6db787
         "How long to wait before we can assume nodes are safely down "
6db787
             "when watchdog-based self-fencing via SBD is in use",
6db787
         "If nonzero, along with `have-watchdog=true` automatically set by the "
6db787
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
6db787
index 0fba6613b..6c2a6c550 100644
6db787
--- a/daemons/controld/controld_fencing.c
6db787
+++ b/daemons/controld/controld_fencing.c
6db787
@@ -11,6 +11,7 @@
6db787
 #include <crm/crm.h>
6db787
 #include <crm/msg_xml.h>
6db787
 #include <crm/common/xml.h>
6db787
+#include <crm/stonith-ng.h>
6db787
 #include <crm/fencing/internal.h>
6db787
 
6db787
 #include <pacemaker-controld.h>
6db787
@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
6db787
     return TRUE;
6db787
 }
6db787
 
6db787
+bool
6db787
+controld_verify_stonith_watchdog_timeout(const char *value)
6db787
+{
6db787
+    gboolean rv = TRUE;
6db787
+
6db787
+    if (stonith_api && (stonith_api->state != stonith_disconnected) &&
6db787
+        stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
6db787
+                                                       fsa_our_uname)) {
6db787
+        rv = pcmk__valid_sbd_timeout(value);
6db787
+    }
6db787
+    return rv;
6db787
+}
6db787
+
6db787
 /* end stonith API client functions */
6db787
 
6db787
 
6db787
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
6db787
index d0ecc8234..ef68a0c83 100644
6db787
--- a/daemons/controld/controld_fencing.h
6db787
+++ b/daemons/controld/controld_fencing.h
6db787
@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value);
6db787
 void controld_trigger_fencer_connect(void);
6db787
 void controld_disconnect_fencer(bool destroy);
6db787
 gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
6db787
+bool controld_verify_stonith_watchdog_timeout(const char *value);
6db787
 
6db787
 // stonith cleanup list
6db787
 void add_stonith_cleanup(const char *target);
6db787
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
6db787
index 43413e11d..2923d7c9b 100644
6db787
--- a/daemons/fenced/Makefile.am
6db787
+++ b/daemons/fenced/Makefile.am
6db787
@@ -15,7 +15,7 @@ halibdir	= $(CRM_DAEMON_DIR)
6db787
 
6db787
 halib_PROGRAMS	= pacemaker-fenced cts-fence-helper
6db787
 
6db787
-sbin_SCRIPTS	= fence_legacy
6db787
+sbin_SCRIPTS	= fence_legacy fence_watchdog
6db787
 
6db787
 noinst_HEADERS	= pacemaker-fenced.h
6db787
 
6db787
diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in
6db787
new file mode 100755
6db787
index 000000000..c83304f1d
6db787
--- /dev/null
6db787
+++ b/daemons/fenced/fence_watchdog.in
6db787
@@ -0,0 +1,283 @@
6db787
+#!@PYTHON@
6db787
+"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent
6db787
+"""
6db787
+
6db787
+__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors"
6db787
+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
6db787
+
6db787
+import io
6db787
+import os
6db787
+import re
6db787
+import sys
6db787
+import atexit
6db787
+import getopt
6db787
+
6db787
+SHORT_DESC = "Dummy watchdog fence agent"
6db787
+LONG_DESC = """fence_watchdog just provides
6db787
+meta-data - actual fencing is done by the pacemaker internal watchdog agent."""
6db787
+
6db787
+ALL_OPT = {
6db787
+    "version" : {
6db787
+        "getopt" : "V",
6db787
+        "longopt" : "version",
6db787
+        "help" : "-V, --version                  Display version information and exit",
6db787
+        "required" : "0",
6db787
+        "shortdesc" : "Display version information and exit",
6db787
+        "order" : 53
6db787
+        },
6db787
+    "help"    : {
6db787
+        "getopt" : "h",
6db787
+        "longopt" : "help",
6db787
+        "help" : "-h, --help                     Display this help and exit",
6db787
+        "required" : "0",
6db787
+        "shortdesc" : "Display help and exit",
6db787
+        "order" : 54
6db787
+        },
6db787
+    "action" : {
6db787
+        "getopt" : "o:",
6db787
+        "longopt" : "action",
6db787
+        "help" : "-o, --action=[action]          Action: metadata",
6db787
+        "required" : "1",
6db787
+        "shortdesc" : "Fencing Action",
6db787
+        "default" : "metadata",
6db787
+        "order" : 1
6db787
+        },
6db787
+    "nodename" : {
6db787
+        "getopt" : "N:",
6db787
+        "longopt" : "nodename",
6db787
+        "help" : "-N, --nodename                 Node name of fence victim (ignored)",
6db787
+        "required" : "0",
6db787
+        "shortdesc" : "Ignored",
6db787
+        "order" : 2
6db787
+        },
6db787
+    "plug" : {
6db787
+        "getopt" : "n:",
6db787
+        "longopt" : "plug",
6db787
+        "help" : "-n, --plug=[id]                Physical plug number on device (ignored)",
6db787
+        "required" : "1",
6db787
+        "shortdesc" : "Ignored",
6db787
+        "order" : 4
6db787
+        }
6db787
+}
6db787
+
6db787
+
6db787
+def agent():
6db787
+    """ Return name this file was run as. """
6db787
+
6db787
+    return os.path.basename(sys.argv[0])
6db787
+
6db787
+
6db787
+def fail_usage(message):
6db787
+    """ Print a usage message and exit. """
6db787
+
6db787
+    sys.exit("%s\nPlease use '-h' for usage" % message)
6db787
+
6db787
+
6db787
+def show_docs(options):
6db787
+    """ Handle informational options (display info and exit). """
6db787
+
6db787
+    device_opt = options["device_opt"]
6db787
+
6db787
+    if "-h" in options:
6db787
+        usage(device_opt)
6db787
+        sys.exit(0)
6db787
+
6db787
+    if "-o" in options and options["-o"].lower() == "metadata":
6db787
+        metadata(device_opt, options)
6db787
+        sys.exit(0)
6db787
+
6db787
+    if "-V" in options:
6db787
+        print(AGENT_VERSION)
6db787
+        sys.exit(0)
6db787
+
6db787
+
6db787
+def sorted_options(avail_opt):
6db787
+    """ Return a list of all options, in their internally specified order. """
6db787
+
6db787
+    sorted_list = [(key, ALL_OPT[key]) for key in avail_opt]
6db787
+    sorted_list.sort(key=lambda x: x[1]["order"])
6db787
+    return sorted_list
6db787
+
6db787
+
6db787
+def usage(avail_opt):
6db787
+    """ Print a usage message. """
6db787
+    print(LONG_DESC)
6db787
+    print()
6db787
+    print("Usage:")
6db787
+    print("\t" + agent() + " [options]")
6db787
+    print("Options:")
6db787
+
6db787
+    for dummy, value in sorted_options(avail_opt):
6db787
+        if len(value["help"]) != 0:
6db787
+            print("   " + value["help"])
6db787
+
6db787
+
6db787
+def metadata(avail_opt, options):
6db787
+    """ Print agent metadata. """
6db787
+
6db787
+    print("""
6db787
+<resource-agent name="%s" shortdesc="%s">
6db787
+<longdesc>%s</longdesc>
6db787
+<parameters>""" % (agent(), SHORT_DESC, LONG_DESC))
6db787
+
6db787
+    for option, dummy in sorted_options(avail_opt):
6db787
+        if "shortdesc" in ALL_OPT[option]:
6db787
+            print('    
6db787
+                  'required="' + ALL_OPT[option]["required"] + '">')
6db787
+
6db787
+            default = ""
6db787
+            default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1]
6db787
+            default_name_no_arg = "-" + ALL_OPT[option]["getopt"]
6db787
+
6db787
+            if "default" in ALL_OPT[option]:
6db787
+                default = 'default="%s"' % str(ALL_OPT[option]["default"])
6db787
+            elif default_name_arg in options:
6db787
+                if options[default_name_arg]:
6db787
+                    try:
6db787
+                        default = 'default="%s"' % options[default_name_arg]
6db787
+                    except TypeError:
6db787
+                        ## @todo/@note: Currently there is no clean way how to handle lists
6db787
+                        ## we can create a string from it but we can't set it on command line
6db787
+                        default = 'default="%s"' % str(options[default_name_arg])
6db787
+            elif default_name_no_arg in options:
6db787
+                default = 'default="true"'
6db787
+
6db787
+            mixed = ALL_OPT[option]["help"]
6db787
+            ## split it between option and help text
6db787
+            res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed)
6db787
+            if None != res:
6db787
+                mixed = res.group(1)
6db787
+            mixed = mixed.replace("<", "<").replace(">", ">")
6db787
+            print('      <getopt mixed="' + mixed + '" />')
6db787
+
6db787
+            if ALL_OPT[option]["getopt"].count(":") > 0:
6db787
+                print('      <content type="string" ' + default + ' />')
6db787
+            else:
6db787
+                print('      <content type="boolean" ' + default + ' />')
6db787
+
6db787
+            print('      <shortdesc lang="en">' + ALL_OPT[option]["shortdesc"] + '</shortdesc>')
6db787
+            print('    </parameter>')
6db787
+
6db787
+    print('  </parameters>\n <actions>')
6db787
+    print('    <action name="on" />')
6db787
+    print('    <action name="off" />')
6db787
+    print('    <action name="reboot" />')
6db787
+    print('    <action name="monitor" />')
6db787
+    print('    <action name="list" />')
6db787
+    print('    <action name="metadata" />')
6db787
+    print('  </actions>')
6db787
+    print('</resource-agent>')
6db787
+
6db787
+
6db787
+def option_longopt(option):
6db787
+    """ Return the getopt-compatible long-option name of the given option. """
6db787
+
6db787
+    if ALL_OPT[option]["getopt"].endswith(":"):
6db787
+        return ALL_OPT[option]["longopt"] + "="
6db787
+    else:
6db787
+        return ALL_OPT[option]["longopt"]
6db787
+
6db787
+
6db787
+def opts_from_command_line(argv, avail_opt):
6db787
+    """ Read options from command-line arguments. """
6db787
+
6db787
+    # Prepare list of options for getopt
6db787
+    getopt_string = ""
6db787
+    longopt_list = []
6db787
+    for k in avail_opt:
6db787
+        if k in ALL_OPT:
6db787
+            getopt_string += ALL_OPT[k]["getopt"]
6db787
+        else:
6db787
+            fail_usage("Parse error: unknown option '" + k + "'")
6db787
+
6db787
+        if k in ALL_OPT and "longopt" in ALL_OPT[k]:
6db787
+            longopt_list.append(option_longopt(k))
6db787
+
6db787
+    try:
6db787
+        opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list)
6db787
+    except getopt.GetoptError as error:
6db787
+        fail_usage("Parse error: " + error.msg)
6db787
+
6db787
+    # Transform longopt to short one which are used in fencing agents
6db787
+    old_opt = opt
6db787
+    opt = {}
6db787
+    for old_option in dict(old_opt).keys():
6db787
+        if old_option.startswith("--"):
6db787
+            for option in ALL_OPT.keys():
6db787
+                if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option:
6db787
+                    opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option]
6db787
+        else:
6db787
+            opt[old_option] = dict(old_opt)[old_option]
6db787
+
6db787
+    return opt
6db787
+
6db787
+
6db787
+def opts_from_stdin(avail_opt):
6db787
+    """ Read options from standard input. """
6db787
+
6db787
+    opt = {}
6db787
+    name = ""
6db787
+    for line in sys.stdin.readlines():
6db787
+        line = line.strip()
6db787
+        if line.startswith("#") or (len(line) == 0):
6db787
+            continue
6db787
+
6db787
+        (name, value) = (line + "=").split("=", 1)
6db787
+        value = value[:-1]
6db787
+
6db787
+        if name not in avail_opt:
6db787
+            print("Parse error: Ignoring unknown option '%s'" % line,
6db787
+                  file=sys.stderr)
6db787
+            continue
6db787
+
6db787
+        if ALL_OPT[name]["getopt"].endswith(":"):
6db787
+            opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value
6db787
+        elif value.lower() in ["1", "yes", "on", "true"]:
6db787
+            opt["-"+ALL_OPT[name]["getopt"]] = "1"
6db787
+
6db787
+    return opt
6db787
+
6db787
+
6db787
+def process_input(avail_opt):
6db787
+    """ Set standard environment variables, and parse all options. """
6db787
+
6db787
+    # Set standard environment
6db787
+    os.putenv("LANG", "C")
6db787
+    os.putenv("LC_ALL", "C")
6db787
+
6db787
+    # Read options from command line or standard input
6db787
+    if len(sys.argv) > 1:
6db787
+        return opts_from_command_line(sys.argv[1:], avail_opt)
6db787
+    else:
6db787
+        return opts_from_stdin(avail_opt)
6db787
+
6db787
+
6db787
+def atexit_handler():
6db787
+    """ Close stdout on exit. """
6db787
+
6db787
+    try:
6db787
+        sys.stdout.close()
6db787
+        os.close(1)
6db787
+    except IOError:
6db787
+        sys.exit("%s failed to close standard output" % agent())
6db787
+
6db787
+
6db787
+def main():
6db787
+    """ Make it so! """
6db787
+
6db787
+    device_opt = ALL_OPT.keys()
6db787
+
6db787
+    ## Defaults for fence agent
6db787
+    atexit.register(atexit_handler)
6db787
+    options = process_input(device_opt)
6db787
+    options["device_opt"] = device_opt
6db787
+    show_docs(options)
6db787
+
6db787
+    print("Watchdog fencing may be initiated only by the cluster, not this agent.",
6db787
+          file=sys.stderr)
6db787
+
6db787
+    sys.exit(1)
6db787
+
6db787
+
6db787
+if __name__ == "__main__":
6db787
+    main()
6db787
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
6db787
index cd9968f1a..9470ea2c1 100644
6db787
--- a/daemons/fenced/fenced_commands.c
6db787
+++ b/daemons/fenced/fenced_commands.c
6db787
@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device)
6db787
         return TRUE;
6db787
     }
6db787
 
6db787
-    if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
6db787
-        if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
6db787
-            pcmk__panic(__func__);
6db787
-            goto done;
6db787
-
6db787
-        } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
6db787
-            pcmk__panic(__func__);
6db787
-            goto done;
6db787
-
6db787
+    if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
6db787
+                         STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
6db787
+        if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
6db787
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
6db787
+                pcmk__panic(__func__);
6db787
+                goto done;
6db787
+            }
6db787
         } else {
6db787
             crm_info("Faking success for %s watchdog operation", cmd->action);
6db787
             cmd->done_cb(0, 0, NULL, cmd);
6db787
@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata)
6db787
         return EINVAL;
6db787
     }
6db787
     *metadata = NULL;
6db787
-    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
6db787
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
6db787
         return pcmk_rc_ok;
6db787
     }
6db787
     init_metadata_cache();
6db787
@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin,
6db787
     schedule_stonith_command(cmd, device);
6db787
 }
6db787
 
6db787
-gboolean
6db787
-string_in_list(GList *list, const char *item)
6db787
-{
6db787
-    int lpc = 0;
6db787
-    int max = g_list_length(list);
6db787
-
6db787
-    for (lpc = 0; lpc < max; lpc++) {
6db787
-        const char *value = g_list_nth_data(list, lpc);
6db787
-
6db787
-        if (pcmk__str_eq(item, value, pcmk__str_casei)) {
6db787
-            return TRUE;
6db787
-        } else {
6db787
-            crm_trace("%d: '%s' != '%s'", lpc, item, value);
6db787
-        }
6db787
-    }
6db787
-    return FALSE;
6db787
-}
6db787
-
6db787
 static void
6db787
 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
6db787
 {
6db787
@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
6db787
         if (!alias) {
6db787
             alias = search->host;
6db787
         }
6db787
-        if (string_in_list(dev->targets, alias)) {
6db787
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
6db787
             can_fence = TRUE;
6db787
         }
6db787
     }
6db787
@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
6db787
     stonith_device_t *dup = NULL;
6db787
     stonith_device_t *device = build_device_from_xml(msg);
6db787
     guint ndevices = 0;
6db787
+    int rv = pcmk_ok;
6db787
 
6db787
     CRM_CHECK(device != NULL, return -ENOMEM);
6db787
 
6db787
+    /* do we have a watchdog-device? */
6db787
+    if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
6db787
+        pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
6db787
+                     STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
6db787
+        if (stonith_watchdog_timeout_ms <= 0) {
6db787
+            crm_err("Ignoring watchdog fence device without "
6db787
+                    "stonith-watchdog-timeout set.");
6db787
+            rv = -ENODEV;
6db787
+            /* fall through to cleanup & return */
6db787
+        } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
6db787
+                                 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
6db787
+            crm_err("Ignoring watchdog fence device with unknown "
6db787
+                    "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
6db787
+                    device->agent?device->agent:"");
6db787
+            rv = -ENODEV;
6db787
+            /* fall through to cleanup & return */
6db787
+        } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
6db787
+                                 pcmk__str_none)) {
6db787
+            crm_err("Ignoring watchdog fence device "
6db787
+                    "named %s !='"STONITH_WATCHDOG_ID"'.",
6db787
+                    device->id?device->id:"");
6db787
+            rv = -ENODEV;
6db787
+            /* fall through to cleanup & return */
6db787
+        } else {
6db787
+            if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
6db787
+                             pcmk__str_none)) {
6db787
+                /* this either has an empty list or the targets
6db787
+                   configured for watchdog-fencing
6db787
+                 */
6db787
+                g_list_free_full(stonith_watchdog_targets, free);
6db787
+                stonith_watchdog_targets = device->targets;
6db787
+                device->targets = NULL;
6db787
+            }
6db787
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
6db787
+                g_list_free_full(device->targets, free);
6db787
+                device->targets = stonith__parse_targets(stonith_our_uname);
6db787
+                g_hash_table_replace(device->params,
6db787
+                                     strdup(PCMK_STONITH_HOST_LIST),
6db787
+                                     strdup(stonith_our_uname));
6db787
+                /* proceed as with any other stonith-device */
6db787
+                break;
6db787
+            }
6db787
+
6db787
+            crm_debug("Skip registration of watchdog fence device on node not in host-list.");
6db787
+            /* cleanup and fall through to more cleanup and return */
6db787
+            device->targets = NULL;
6db787
+            stonith_device_remove(device->id, from_cib);
6db787
+        }
6db787
+        free_device(device);
6db787
+        return rv;
6db787
+    } while (0);
6db787
+
6db787
     dup = device_has_duplicate(device);
6db787
     if (dup) {
6db787
         ndevices = g_hash_table_size(device_list);
6db787
@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc)
6db787
  *       (CIB registration is not sufficient), because monitor should not be
6db787
  *       possible unless the device is "started" (API registered).
6db787
  */
6db787
+
6db787
+static char *
6db787
+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
6db787
+{
6db787
+    int max = g_list_length(list);
6db787
+    size_t delim_len = delim?strlen(delim):0;
6db787
+    size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
6db787
+    char *rv;
6db787
+    GList *gIter;
6db787
+
6db787
+    for (gIter = list; gIter != NULL; gIter = gIter->next) {
6db787
+        const char *value = (const char *) gIter->data;
6db787
+
6db787
+        alloc_size += strlen(value);
6db787
+    }
6db787
+    rv = calloc(alloc_size, sizeof(char));
6db787
+    if (rv) {
6db787
+        char *pos = rv;
6db787
+        const char *lead_delim = "";
6db787
+
6db787
+        for (gIter = list; gIter != NULL; gIter = gIter->next) {
6db787
+            const char *value = (const char *) gIter->data;
6db787
+
6db787
+            pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
6db787
+            lead_delim = delim;
6db787
+        }
6db787
+        if (max && terminate_with_delim) {
6db787
+            sprintf(pos, "%s", delim);
6db787
+        }
6db787
+    }
6db787
+    return rv;
6db787
+}
6db787
+
6db787
 static int
6db787
 stonith_device_action(xmlNode * msg, char **output)
6db787
 {
6db787
@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output)
6db787
         return -EPROTO;
6db787
     }
6db787
 
6db787
+    if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
6db787
+        if (stonith_watchdog_timeout_ms <= 0) {
6db787
+            return -ENODEV;
6db787
+        } else {
6db787
+            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
6db787
+                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
6db787
+                return pcmk_ok;
6db787
+            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
6db787
+                return pcmk_ok;
6db787
+            }
6db787
+        }
6db787
+    }
6db787
+
6db787
     device = g_hash_table_lookup(device_list, id);
6db787
     if ((device == NULL)
6db787
         || (!device->api_registered && !strcmp(action, "monitor"))) {
6db787
@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
6db787
          * Only use if all hosts on which the device can be active can always fence all listed hosts
6db787
          */
6db787
 
6db787
-        if (string_in_list(dev->targets, host)) {
6db787
+        if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) {
6db787
             can = TRUE;
6db787
         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
6db787
                    && g_hash_table_lookup(dev->aliases, host)) {
6db787
@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
6db787
             return;
6db787
         }
6db787
 
6db787
-        if (string_in_list(dev->targets, alias)) {
6db787
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
6db787
             can = TRUE;
6db787
         }
6db787
 
6db787
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
6db787
index cf91acaed..224f2baba 100644
6db787
--- a/daemons/fenced/fenced_remote.c
6db787
+++ b/daemons/fenced/fenced_remote.c
6db787
@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
6db787
     }
6db787
 }
6db787
 
6db787
+static gboolean
6db787
+check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
6db787
+{
6db787
+    if (node_does_watchdog_fencing(op->target)) {
6db787
+
6db787
+        crm_notice("Waiting %lds for %s to self-fence (%s) for "
6db787
+                   "client %s " CRM_XS " id=%.8s",
6db787
+                   (stonith_watchdog_timeout_ms / 1000),
6db787
+                   op->target, op->action, op->client_name, op->id);
6db787
+        op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
6db787
+                                         remote_op_watchdog_done, op);
6db787
+        return TRUE;
6db787
+    } else {
6db787
+        crm_debug("Skipping fallback to watchdog-fencing as %s is "
6db787
+                 "not in host-list", op->target);
6db787
+    }
6db787
+    return FALSE;
6db787
+}
6db787
+
6db787
 void
6db787
 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
6db787
 {
6db787
@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
6db787
             g_source_remove(op->op_timer_one);
6db787
         }
6db787
 
6db787
-        if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) {
6db787
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
6db787
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
6db787
-                       op->target, op->action, op->client_name, op->id);
6db787
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
6db787
-
6db787
-            /* TODO check devices to verify watchdog will be in use */
6db787
-        } else if(stonith_watchdog_timeout_ms > 0
6db787
-                  && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
6db787
-                  && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) {
6db787
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
6db787
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
6db787
-                       op->target, op->action, op->client_name, op->id);
6db787
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
6db787
-
6db787
-        } else {
6db787
+        if (!(stonith_watchdog_timeout_ms > 0 && (
6db787
+                (pcmk__str_eq(device, STONITH_WATCHDOG_ID,
6db787
+                                        pcmk__str_none)) ||
6db787
+                (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
6db787
+                    && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) &&
6db787
+             check_watchdog_fencing_and_wait(op))) {
6db787
+
6db787
+            /* Some thoughts about self-fencing cases reaching this point:
6db787
+               - Actually check in check_watchdog_fencing_and_wait
6db787
+                 shouldn't fail if STONITH_WATCHDOG_ID is
6db787
+                 chosen as fencing-device and it being present implies
6db787
+                 watchdog-fencing is enabled anyway
6db787
+               - If watchdog-fencing is disabled either in general or for
6db787
+                 a specific target - detected in check_watchdog_fencing_and_wait -
6db787
+                 for some other kind of self-fencing we can't expect
6db787
+                 a success answer but timeout is fine if the node doesn't
6db787
+                 come back in between
6db787
+               - Delicate might be the case where we have watchdog-fencing
6db787
+                 enabled for a node but the watchdog-fencing-device isn't
6db787
+                 explicitly chosen for suicide. Local pe-execution in sbd
6db787
+                 may detect the node as unclean and lead to timely suicide.
6db787
+                 Otherwise the selection of stonith-watchdog-timeout at
6db787
+                 least is questionable.
6db787
+             */
6db787
             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
6db787
         }
6db787
 
6db787
-
6db787
         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
6db787
         peer->tried = TRUE;
6db787
         free_xml(remote_op);
6db787
@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
6db787
          * but we have all the expected replies, then no devices
6db787
          * are available to execute the fencing operation. */
6db787
 
6db787
-        if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) {
6db787
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
6db787
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
6db787
-                       op->target, op->action, op->client_name, op->id);
6db787
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
6db787
-            return;
6db787
+        if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
6db787
+           STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
6db787
+            if (check_watchdog_fencing_and_wait(op)) {
6db787
+                return;
6db787
+            }
6db787
         }
6db787
 
6db787
         if (op->state == st_query) {
6db787
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
6db787
index 39738d8be..7f8b427d9 100644
6db787
--- a/daemons/fenced/pacemaker-fenced.c
6db787
+++ b/daemons/fenced/pacemaker-fenced.c
6db787
@@ -42,6 +42,7 @@
6db787
 
6db787
 char *stonith_our_uname = NULL;
6db787
 long stonith_watchdog_timeout_ms = 0;
6db787
+GList *stonith_watchdog_targets = NULL;
6db787
 
6db787
 static GMainLoop *mainloop = NULL;
6db787
 
6db787
@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc)
6db787
 }
6db787
 
6db787
 static void
6db787
-watchdog_device_update(xmlNode *cib)
6db787
+watchdog_device_update(void)
6db787
+{
6db787
+    if (stonith_watchdog_timeout_ms > 0) {
6db787
+        if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
6db787
+            !stonith_watchdog_targets) {
6db787
+            /* getting here watchdog-fencing enabled, no device there yet
6db787
+               and reason isn't stonith_watchdog_targets preventing that
6db787
+             */
6db787
+            int rc;
6db787
+            xmlNode *xml;
6db787
+
6db787
+            xml = create_device_registration_xml(
6db787
+                    STONITH_WATCHDOG_ID,
6db787
+                    st_namespace_internal,
6db787
+                    STONITH_WATCHDOG_AGENT,
6db787
+                    NULL, /* stonith_device_register will add our
6db787
+                             own name as PCMK_STONITH_HOST_LIST param
6db787
+                             so we can skip that here
6db787
+                           */
6db787
+                    NULL);
6db787
+            rc = stonith_device_register(xml, NULL, TRUE);
6db787
+            free_xml(xml);
6db787
+            if (rc != pcmk_ok) {
6db787
+                crm_crit("Cannot register watchdog pseudo fence agent");
6db787
+                crm_exit(CRM_EX_FATAL);
6db787
+            }
6db787
+        }
6db787
+
6db787
+    } else {
6db787
+        /* be silent if no device - todo parameter to stonith_device_remove */
6db787
+        if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
6db787
+            stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
6db787
+        }
6db787
+    }
6db787
+}
6db787
+
6db787
+static void
6db787
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
6db787
 {
6db787
     xmlNode *stonith_enabled_xml = NULL;
6db787
     const char *stonith_enabled_s = NULL;
6db787
@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib)
6db787
         }
6db787
     }
6db787
 
6db787
-    if (timeout_ms != stonith_watchdog_timeout_ms) {
6db787
-        crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
6db787
-        stonith_watchdog_timeout_ms = timeout_ms;
6db787
-
6db787
-        if (stonith_watchdog_timeout_ms > 0) {
6db787
-            int rc;
6db787
-            xmlNode *xml;
6db787
-            stonith_key_value_t *params = NULL;
6db787
-
6db787
-            params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST,
6db787
-                                           stonith_our_uname);
6db787
-
6db787
-            xml = create_device_registration_xml("watchdog", st_namespace_internal,
6db787
-                                                 STONITH_WATCHDOG_AGENT, params,
6db787
-                                                 NULL);
6db787
-            stonith_key_value_freeall(params, 1, 1);
6db787
-            rc = stonith_device_register(xml, NULL, FALSE);
6db787
-            free_xml(xml);
6db787
-            if (rc != pcmk_ok) {
6db787
-                crm_crit("Cannot register watchdog pseudo fence agent");
6db787
-                crm_exit(CRM_EX_FATAL);
6db787
-            }
6db787
-
6db787
-        } else {
6db787
-            stonith_device_remove("watchdog", FALSE);
6db787
-        }
6db787
-    }
6db787
+    stonith_watchdog_timeout_ms = timeout_ms;
6db787
 }
6db787
 
6db787
 /*!
6db787
@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
6db787
         return;
6db787
     }
6db787
 
6db787
+    /* if watchdog-fencing is disabled handle any watchdog-fence
6db787
+       resource as if it was disabled
6db787
+     */
6db787
+    if ((stonith_watchdog_timeout_ms <= 0) &&
6db787
+        pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
6db787
+        crm_info("Watchdog-fencing disabled thus handling "
6db787
+                 "device %s as disabled", rsc->id);
6db787
+        return;
6db787
+    }
6db787
+
6db787
     /* Check whether our node is allowed for this resource (and its parent if in a group) */
6db787
     node = our_node_allowed_for(rsc);
6db787
     if (rsc->parent && (rsc->parent->variant == pe_group)) {
6db787
@@ -772,6 +794,12 @@ cib_devices_update(void)
6db787
         }
6db787
     }
6db787
 
6db787
+    /* have list repopulated if cib has a watchdog-fencing-resource
6db787
+       TODO: keep a cached list for queries happening while we are refreshing
6db787
+     */
6db787
+    g_list_free_full(stonith_watchdog_targets, free);
6db787
+    stonith_watchdog_targets = NULL;
6db787
+
6db787
     for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
6db787
         cib_device_update(gIter->data, fenced_data_set);
6db787
     }
6db787
@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
6db787
             if (search != NULL) {
6db787
                 *search = 0;
6db787
                 stonith_device_remove(rsc_id, TRUE);
6db787
+                /* watchdog_device_update called afterwards
6db787
+                   to fall back to implicit definition if needed */
6db787
             } else {
6db787
                 crm_warn("Ignoring malformed CIB update (resource deletion)");
6db787
             }
6db787
@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value)
6db787
     return (match != NULL);
6db787
 }
6db787
 
6db787
+/*!
6db787
+ * \internal
6db787
+ * \brief Check whether a node does watchdog-fencing
6db787
+ *
6db787
+ * \param[in] node    Name of node to check
6db787
+ *
6db787
+ * \return TRUE if node found in stonith_watchdog_targets
6db787
+ *         or stonith_watchdog_targets is empty indicating
6db787
+ *         all nodes are doing watchdog-fencing
6db787
+ */
6db787
+gboolean
6db787
+node_does_watchdog_fencing(const char *node)
6db787
+{
6db787
+    return ((stonith_watchdog_targets == NULL) ||
6db787
+            pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei));
6db787
+}
6db787
+
6db787
+
6db787
 static void
6db787
 update_fencing_topology(const char *event, xmlNode * msg)
6db787
 {
6db787
@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
6db787
     xmlNode *stonith_enabled_xml = NULL;
6db787
     const char *stonith_enabled_s = NULL;
6db787
     static gboolean stonith_enabled_saved = TRUE;
6db787
+    long timeout_ms_saved = stonith_watchdog_timeout_ms;
6db787
+    gboolean need_full_refresh = FALSE;
6db787
 
6db787
     if(!have_cib_devices) {
6db787
         crm_trace("Skipping updates until we get a full dump");
6db787
@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
6db787
     }
6db787
 
6db787
     pcmk__refresh_node_caches_from_cib(local_cib);
6db787
+    update_stonith_watchdog_timeout_ms(local_cib);
6db787
 
6db787
     stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
6db787
                                            local_cib, LOG_NEVER);
6db787
@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
6db787
         stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
6db787
     }
6db787
 
6db787
-    watchdog_device_update(local_cib);
6db787
-
6db787
     if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
6db787
         crm_trace("Ignoring CIB updates while fencing is disabled");
6db787
         stonith_enabled_saved = FALSE;
6db787
-        return;
6db787
 
6db787
     } else if (stonith_enabled_saved == FALSE) {
6db787
         crm_info("Updating fencing device and topology lists "
6db787
                  "now that fencing is enabled");
6db787
         stonith_enabled_saved = TRUE;
6db787
-        fencing_topology_init();
6db787
-        cib_devices_update();
6db787
+        need_full_refresh = TRUE;
6db787
 
6db787
     } else {
6db787
-        update_fencing_topology(event, msg);
6db787
-        update_cib_stonith_devices(event, msg);
6db787
+        if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
6db787
+            need_full_refresh = TRUE;
6db787
+        } else {
6db787
+            update_fencing_topology(event, msg);
6db787
+            update_cib_stonith_devices(event, msg);
6db787
+            watchdog_device_update();
6db787
+        }
6db787
+    }
6db787
+
6db787
+    if (need_full_refresh) {
6db787
+        fencing_topology_init();
6db787
+        cib_devices_update();
6db787
+        watchdog_device_update();
6db787
     }
6db787
 }
6db787
 
6db787
@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us
6db787
     local_cib = copy_xml(output);
6db787
 
6db787
     pcmk__refresh_node_caches_from_cib(local_cib);
6db787
+    update_stonith_watchdog_timeout_ms(local_cib);
6db787
 
6db787
     fencing_topology_init();
6db787
-    watchdog_device_update(local_cib);
6db787
     cib_devices_update();
6db787
+    watchdog_device_update();
6db787
 }
6db787
 
6db787
 static void
6db787
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
6db787
index d330fda4d..14e085e98 100644
6db787
--- a/daemons/fenced/pacemaker-fenced.h
6db787
+++ b/daemons/fenced/pacemaker-fenced.h
6db787
@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer);
6db787
 
6db787
 int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
6db787
 
6db787
-gboolean string_in_list(GList *list, const char *item);
6db787
-
6db787
 gboolean node_has_attr(const char *node, const char *name, const char *value);
6db787
 
6db787
+gboolean node_does_watchdog_fencing(const char *node);
6db787
+
6db787
 extern char *stonith_our_uname;
6db787
 extern gboolean stand_alone;
6db787
 extern GHashTable *device_list;
6db787
 extern GHashTable *topology;
6db787
 extern long stonith_watchdog_timeout_ms;
6db787
+extern GList *stonith_watchdog_targets;
6db787
 
6db787
 extern GHashTable *stonith_remote_op_list;
6db787
diff --git a/include/crm/crm.h b/include/crm/crm.h
6db787
index ee52c3630..7861c160e 100644
6db787
--- a/include/crm/crm.h
6db787
+++ b/include/crm/crm.h
6db787
@@ -66,7 +66,7 @@ extern "C" {
6db787
  * >=3.0.13: Fail counts include operation name and interval
6db787
  * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
6db787
  */
6db787
-#  define CRM_FEATURE_SET		"3.10.2"
6db787
+#  define CRM_FEATURE_SET		"3.11.0"
6db787
 
6db787
 /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
6db787
  * recipient of a CPG message. This imposes an arbitrary limit on cluster node
6db787
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
6db787
index 8bcb544d8..f222edba3 100644
6db787
--- a/include/crm/fencing/internal.h
6db787
+++ b/include/crm/fencing/internal.h
6db787
@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
6db787
 #  define STONITH_OP_LEVEL_ADD       "st_level_add"
6db787
 #  define STONITH_OP_LEVEL_DEL       "st_level_remove"
6db787
 
6db787
-#  define STONITH_WATCHDOG_AGENT  "#watchdog"
6db787
+#  define STONITH_WATCHDOG_AGENT          "fence_watchdog"
6db787
+/* Don't change 2 below as it would break rolling upgrade */
6db787
+#  define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog"
6db787
+#  define STONITH_WATCHDOG_ID             "watchdog"
6db787
 
6db787
 #  ifdef HAVE_STONITH_STONITH_H
6db787
 // utilities from st_lha.c
6db787
@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state)
6db787
     return state != st_failed && state != st_done;
6db787
 }
6db787
 
6db787
+gboolean stonith__watchdog_fencing_enabled_for_node(const char *node);
6db787
+gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node);
6db787
+
6db787
 #endif
6db787
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
6db787
index e285f51e2..0ff98157b 100644
6db787
--- a/lib/fencing/st_client.c
6db787
+++ b/lib/fencing/st_client.c
6db787
@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
6db787
     return st_namespace_invalid;
6db787
 }
6db787
 
6db787
+gboolean
6db787
+stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
6db787
+{
6db787
+    gboolean rv = FALSE;
6db787
+    stonith_t *stonith_api = st?st:stonith_api_new();
6db787
+    char *list = NULL;
6db787
+
6db787
+    if(stonith_api) {
6db787
+        if (stonith_api->state == stonith_disconnected) {
6db787
+            int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
6db787
+
6db787
+            if (rc != pcmk_ok) {
6db787
+                crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
6db787
+            }
6db787
+        }
6db787
+
6db787
+        if (stonith_api->state != stonith_disconnected) {
6db787
+            /* caveat!!!
6db787
+             * this might fail when when stonithd is just updating the device-list
6db787
+             * probably something we should fix as well for other api-calls */
6db787
+            int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
6db787
+            if ((rc != pcmk_ok) || (list == NULL)) {
6db787
+                /* due to the race described above it can happen that
6db787
+                 * we drop in here - so as not to make remote nodes
6db787
+                 * panic on that answer
6db787
+                 */
6db787
+                crm_warn("watchdog-fencing-query failed");
6db787
+            } else if (list[0] == '\0') {
6db787
+                crm_warn("watchdog-fencing-query returned an empty list - any node");
6db787
+                rv = TRUE;
6db787
+            } else {
6db787
+                GList *targets = stonith__parse_targets(list);
6db787
+                rv = pcmk__str_in_list(targets, node, pcmk__str_casei);
6db787
+                g_list_free_full(targets, free);
6db787
+            }
6db787
+            free(list);
6db787
+            if (!st) {
6db787
+                /* if we're provided the api we still might have done the
6db787
+                 * connection - but let's assume the caller won't bother
6db787
+                 */
6db787
+                stonith_api->cmds->disconnect(stonith_api);
6db787
+            }
6db787
+        }
6db787
+
6db787
+        if (!st) {
6db787
+            stonith_api_delete(stonith_api);
6db787
+        }
6db787
+    } else {
6db787
+        crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
6db787
+    }
6db787
+    crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
6db787
+              node, rv?"":"not ");
6db787
+    return rv;
6db787
+}
6db787
+
6db787
+gboolean
6db787
+stonith__watchdog_fencing_enabled_for_node(const char *node)
6db787
+{
6db787
+    return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
6db787
+}
6db787
+
6db787
 static void
6db787
 log_action(stonith_action_t *action, pid_t pid)
6db787
 {
6db787
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
6db787
index 87d050ed1..bf4bceb42 100644
6db787
--- a/lib/lrmd/lrmd_client.c
6db787
+++ b/lib/lrmd/lrmd_client.c
6db787
@@ -34,6 +34,7 @@
6db787
 #include <crm/msg_xml.h>
6db787
 
6db787
 #include <crm/stonith-ng.h>
6db787
+#include <crm/fencing/internal.h>
6db787
 
6db787
 #ifdef HAVE_GNUTLS_GNUTLS_H
6db787
 #  undef KEYFILE
6db787
@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash)
6db787
     crm_xml_add(data, F_LRMD_ORIGIN, __func__);
6db787
 
6db787
     value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
6db787
-    crm_xml_add(data, F_LRMD_WATCHDOG, value);
6db787
+    if ((value) &&
6db787
+        (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) {
6db787
+       crm_xml_add(data, F_LRMD_WATCHDOG, value);
6db787
+    }
6db787
 
6db787
     rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0,
6db787
                            (native->type == pcmk__client_ipc));
6db787
diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in
6db787
index 79e78ede9..f58357a77 100644
6db787
--- a/rpm/pacemaker.spec.in
6db787
+++ b/rpm/pacemaker.spec.in
6db787
@@ -744,6 +744,7 @@ exit 0
6db787
 %doc %{_mandir}/man8/crm_attribute.*
6db787
 %doc %{_mandir}/man8/crm_master.*
6db787
 %doc %{_mandir}/man8/fence_legacy.*
6db787
+%doc %{_mandir}/man8/fence_watchdog.*
6db787
 %doc %{_mandir}/man8/pacemakerd.*
6db787
 
6db787
 %doc %{_datadir}/pacemaker/alerts
6db787
@@ -796,6 +797,7 @@ exit 0
6db787
 %{_sbindir}/crm_simulate
6db787
 %{_sbindir}/crm_report
6db787
 %{_sbindir}/crm_ticket
6db787
+%{_sbindir}/fence_watchdog
6db787
 %{_sbindir}/stonith_admin
6db787
 # "dirname" is owned by -schemas, which is a prerequisite
6db787
 %{_datadir}/pacemaker/report.collector
6db787
@@ -822,6 +824,7 @@ exit 0
6db787
 %exclude %{_mandir}/man8/crm_attribute.*
6db787
 %exclude %{_mandir}/man8/crm_master.*
6db787
 %exclude %{_mandir}/man8/fence_legacy.*
6db787
+%exclude %{_mandir}/man8/fence_watchdog.*
6db787
 %exclude %{_mandir}/man8/pacemakerd.*
6db787
 %exclude %{_mandir}/man8/pacemaker-remoted.*
6db787
 
6db787
-- 
6db787
2.27.0
6db787
6db787
6db787
From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001
6db787
From: Klaus Wenninger <klaus.wenninger@aon.at>
6db787
Date: Wed, 4 Aug 2021 15:57:23 +0200
6db787
Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node
6db787
 restriction
6db787
6db787
---
6db787
 lib/fencing/st_client.c | 1 -
6db787
 1 file changed, 1 deletion(-)
6db787
6db787
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
6db787
index 0ff98157b..14fa7b2a6 100644
6db787
--- a/lib/fencing/st_client.c
6db787
+++ b/lib/fencing/st_client.c
6db787
@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
6db787
                  */
6db787
                 crm_warn("watchdog-fencing-query failed");
6db787
             } else if (list[0] == '\0') {
6db787
-                crm_warn("watchdog-fencing-query returned an empty list - any node");
6db787
                 rv = TRUE;
6db787
             } else {
6db787
                 GList *targets = stonith__parse_targets(list);
6db787
-- 
6db787
2.27.0
6db787