599c7d
From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001
599c7d
From: Klaus Wenninger <klaus.wenninger@aon.at>
599c7d
Date: Fri, 30 Jul 2021 18:07:25 +0200
599c7d
Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to
599c7d
 get_agent_metadata
599c7d
599c7d
Used to distinguish between empty metadata per design,
599c7d
case of failed getting metadata that might succeed on a
599c7d
retry and fatal failure.
599c7d
Fixes as well regression that leads to endless retries getting
599c7d
metadata for #watchdog - not superserious as it happens with
599c7d
delays in between but still undesirable.
599c7d
---
599c7d
 daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++-------------
599c7d
 1 file changed, 55 insertions(+), 37 deletions(-)
599c7d
599c7d
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
599c7d
index a778801b1..cd9968f1a 100644
599c7d
--- a/daemons/fenced/fenced_commands.c
599c7d
+++ b/daemons/fenced/fenced_commands.c
599c7d
@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re
599c7d
 static void search_devices_record_result(struct device_search_s *search, const char *device,
599c7d
                                          gboolean can_fence);
599c7d
 
599c7d
-static xmlNode * get_agent_metadata(const char *agent);
599c7d
+static int get_agent_metadata(const char *agent, xmlNode **metadata);
599c7d
 static void read_action_metadata(stonith_device_t *device);
599c7d
 
599c7d
 typedef struct async_command_s {
599c7d
@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data)
599c7d
 static int
599c7d
 get_agent_metadata_cb(gpointer data) {
599c7d
     stonith_device_t *device = data;
599c7d
+    guint period_ms;
599c7d
 
599c7d
-    device->agent_metadata = get_agent_metadata(device->agent);
599c7d
-    if (device->agent_metadata) {
599c7d
-        read_action_metadata(device);
599c7d
-        stonith__device_parameter_flags(&(device->flags), device->id,
599c7d
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
599c7d
+        case pcmk_rc_ok:
599c7d
+            if (device->agent_metadata) {
599c7d
+                read_action_metadata(device);
599c7d
+                stonith__device_parameter_flags(&(device->flags), device->id,
599c7d
                                         device->agent_metadata);
599c7d
-        return G_SOURCE_REMOVE;
599c7d
-    } else {
599c7d
-        guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
599c7d
-        if (period_ms < 160 * 1000) {
599c7d
-            mainloop_timer_set_period(device->timer, 2 * period_ms);
599c7d
-        }
599c7d
-        return G_SOURCE_CONTINUE;
599c7d
+            }
599c7d
+            return G_SOURCE_REMOVE;
599c7d
+
599c7d
+        case EAGAIN:
599c7d
+            period_ms = pcmk__mainloop_timer_get_period(device->timer);
599c7d
+            if (period_ms < 160 * 1000) {
599c7d
+                mainloop_timer_set_period(device->timer, 2 * period_ms);
599c7d
+            }
599c7d
+            return G_SOURCE_CONTINUE;
599c7d
+
599c7d
+        default:
599c7d
+            return G_SOURCE_REMOVE;
599c7d
     }
599c7d
 }
599c7d
 
599c7d
@@ -700,38 +707,41 @@ init_metadata_cache(void) {
599c7d
     }
599c7d
 }
599c7d
 
599c7d
-static xmlNode *
599c7d
-get_agent_metadata(const char *agent)
599c7d
+int
599c7d
+get_agent_metadata(const char *agent, xmlNode ** metadata)
599c7d
 {
599c7d
-    xmlNode *xml = NULL;
599c7d
     char *buffer = NULL;
599c7d
 
599c7d
+    if (metadata == NULL) {
599c7d
+        return EINVAL;
599c7d
+    }
599c7d
+    *metadata = NULL;
599c7d
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
599c7d
+        return pcmk_rc_ok;
599c7d
+    }
599c7d
     init_metadata_cache();
599c7d
     buffer = g_hash_table_lookup(metadata_cache, agent);
599c7d
-    if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
599c7d
-        return NULL;
599c7d
-
599c7d
-    } else if(buffer == NULL) {
599c7d
+    if (buffer == NULL) {
599c7d
         stonith_t *st = stonith_api_new();
599c7d
         int rc;
599c7d
 
599c7d
         if (st == NULL) {
599c7d
             crm_warn("Could not get agent meta-data: "
599c7d
                      "API memory allocation failed");
599c7d
-            return NULL;
599c7d
+            return EAGAIN;
599c7d
         }
599c7d
-        rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
599c7d
+        rc = st->cmds->metadata(st, st_opt_sync_call, agent,
599c7d
+                                NULL, &buffer, 10);
599c7d
         stonith_api_delete(st);
599c7d
         if (rc || !buffer) {
599c7d
             crm_err("Could not retrieve metadata for fencing agent %s", agent);
599c7d
-            return NULL;
599c7d
+            return EAGAIN;
599c7d
         }
599c7d
         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
599c7d
     }
599c7d
 
599c7d
-    xml = string2xml(buffer);
599c7d
-
599c7d
-    return xml;
599c7d
+    *metadata = string2xml(buffer);
599c7d
+    return pcmk_rc_ok;
599c7d
 }
599c7d
 
599c7d
 static gboolean
599c7d
@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg)
599c7d
         g_list_free_full(device->targets, free);
599c7d
         device->targets = NULL;
599c7d
     }
599c7d
-    device->agent_metadata = get_agent_metadata(device->agent);
599c7d
-    if (device->agent_metadata) {
599c7d
-        read_action_metadata(device);
599c7d
-        stonith__device_parameter_flags(&(device->flags), device->id,
599c7d
-                                        device->agent_metadata);
599c7d
-    } else {
599c7d
-        if (device->timer == NULL) {
599c7d
-            device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
599c7d
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
599c7d
+        case pcmk_rc_ok:
599c7d
+            if (device->agent_metadata) {
599c7d
+                read_action_metadata(device);
599c7d
+                stonith__device_parameter_flags(&(device->flags), device->id,
599c7d
+                                                device->agent_metadata);
599c7d
+            }
599c7d
+            break;
599c7d
+
599c7d
+        case EAGAIN:
599c7d
+            if (device->timer == NULL) {
599c7d
+                device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
599c7d
                                            TRUE, get_agent_metadata_cb, device);
599c7d
-        }
599c7d
-        if (!mainloop_timer_running(device->timer)) {
599c7d
-            mainloop_timer_start(device->timer);
599c7d
-        }
599c7d
+            }
599c7d
+            if (!mainloop_timer_running(device->timer)) {
599c7d
+                mainloop_timer_start(device->timer);
599c7d
+            }
599c7d
+            break;
599c7d
+
599c7d
+        default:
599c7d
+            break;
599c7d
     }
599c7d
 
599c7d
     value = g_hash_table_lookup(device->params, "nodeid");
599c7d
-- 
599c7d
2.27.0
599c7d
599c7d
599c7d
From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001
599c7d
From: Klaus Wenninger <klaus.wenninger@aon.at>
599c7d
Date: Fri, 30 Jul 2021 18:15:10 +0200
599c7d
Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain
599c7d
 nodes
599c7d
599c7d
Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being
599c7d
fully upgraded to a version that supports the feature
599c7d
before explicitly adding a watchdog-fence-device.
599c7d
---
599c7d
 configure.ac                        |   1 +
599c7d
 daemons/controld/controld_control.c |   2 +-
599c7d
 daemons/controld/controld_fencing.c |  14 ++
599c7d
 daemons/controld/controld_fencing.h |   1 +
599c7d
 daemons/fenced/Makefile.am          |   2 +-
599c7d
 daemons/fenced/fence_watchdog.in    | 283 ++++++++++++++++++++++++++++
599c7d
 daemons/fenced/fenced_commands.c    | 141 +++++++++++---
599c7d
 daemons/fenced/fenced_remote.c      |  71 ++++---
599c7d
 daemons/fenced/pacemaker-fenced.c   | 131 +++++++++----
599c7d
 daemons/fenced/pacemaker-fenced.h   |   5 +-
599c7d
 include/crm/crm.h                   |   2 +-
599c7d
 include/crm/fencing/internal.h      |   8 +-
599c7d
 lib/fencing/st_client.c             |  61 ++++++
599c7d
 lib/lrmd/lrmd_client.c              |   6 +-
599c7d
 rpm/pacemaker.spec.in               |   3 +
599c7d
 16 files changed, 635 insertions(+), 97 deletions(-)
599c7d
 create mode 100755 daemons/fenced/fence_watchdog.in
599c7d
599c7d
diff --git a/configure.ac b/configure.ac
599c7d
index 436100c81..013562e46 100644
599c7d
--- a/configure.ac
599c7d
+++ b/configure.ac
599c7d
@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli],
599c7d
                   [cts/support/fence_dummy],
599c7d
                   [cts/support/pacemaker-cts-dummyd],
599c7d
                   [daemons/fenced/fence_legacy],
599c7d
+                  [daemons/fenced/fence_watchdog],
599c7d
                   [doc/abi-check],
599c7d
                   [extra/resources/ClusterMon],
599c7d
                   [extra/resources/HealthSMART],
599c7d
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
599c7d
index 45a70bb92..b5da6a46c 100644
599c7d
--- a/daemons/controld/controld_control.c
599c7d
+++ b/daemons/controld/controld_control.c
599c7d
@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = {
599c7d
     },
599c7d
     {
599c7d
         "stonith-watchdog-timeout", NULL, "time", NULL,
599c7d
-        "0", pcmk__valid_sbd_timeout,
599c7d
+        "0", controld_verify_stonith_watchdog_timeout,
599c7d
         "How long to wait before we can assume nodes are safely down "
599c7d
             "when watchdog-based self-fencing via SBD is in use",
599c7d
         "If nonzero, along with `have-watchdog=true` automatically set by the "
599c7d
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
599c7d
index 0fba6613b..6c2a6c550 100644
599c7d
--- a/daemons/controld/controld_fencing.c
599c7d
+++ b/daemons/controld/controld_fencing.c
599c7d
@@ -11,6 +11,7 @@
599c7d
 #include <crm/crm.h>
599c7d
 #include <crm/msg_xml.h>
599c7d
 #include <crm/common/xml.h>
599c7d
+#include <crm/stonith-ng.h>
599c7d
 #include <crm/fencing/internal.h>
599c7d
 
599c7d
 #include <pacemaker-controld.h>
599c7d
@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
599c7d
     return TRUE;
599c7d
 }
599c7d
 
599c7d
+bool
599c7d
+controld_verify_stonith_watchdog_timeout(const char *value)
599c7d
+{
599c7d
+    gboolean rv = TRUE;
599c7d
+
599c7d
+    if (stonith_api && (stonith_api->state != stonith_disconnected) &&
599c7d
+        stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
599c7d
+                                                       fsa_our_uname)) {
599c7d
+        rv = pcmk__valid_sbd_timeout(value);
599c7d
+    }
599c7d
+    return rv;
599c7d
+}
599c7d
+
599c7d
 /* end stonith API client functions */
599c7d
 
599c7d
 
599c7d
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
599c7d
index d0ecc8234..ef68a0c83 100644
599c7d
--- a/daemons/controld/controld_fencing.h
599c7d
+++ b/daemons/controld/controld_fencing.h
599c7d
@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value);
599c7d
 void controld_trigger_fencer_connect(void);
599c7d
 void controld_disconnect_fencer(bool destroy);
599c7d
 gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
599c7d
+bool controld_verify_stonith_watchdog_timeout(const char *value);
599c7d
 
599c7d
 // stonith cleanup list
599c7d
 void add_stonith_cleanup(const char *target);
599c7d
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
599c7d
index 43413e11d..2923d7c9b 100644
599c7d
--- a/daemons/fenced/Makefile.am
599c7d
+++ b/daemons/fenced/Makefile.am
599c7d
@@ -15,7 +15,7 @@ halibdir	= $(CRM_DAEMON_DIR)
599c7d
 
599c7d
 halib_PROGRAMS	= pacemaker-fenced cts-fence-helper
599c7d
 
599c7d
-sbin_SCRIPTS	= fence_legacy
599c7d
+sbin_SCRIPTS	= fence_legacy fence_watchdog
599c7d
 
599c7d
 noinst_HEADERS	= pacemaker-fenced.h
599c7d
 
599c7d
diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in
599c7d
new file mode 100755
599c7d
index 000000000..c83304f1d
599c7d
--- /dev/null
599c7d
+++ b/daemons/fenced/fence_watchdog.in
599c7d
@@ -0,0 +1,283 @@
599c7d
+#!@PYTHON@
599c7d
+"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent
599c7d
+"""
599c7d
+
599c7d
+__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors"
599c7d
+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
599c7d
+
599c7d
+import io
599c7d
+import os
599c7d
+import re
599c7d
+import sys
599c7d
+import atexit
599c7d
+import getopt
599c7d
+
599c7d
+SHORT_DESC = "Dummy watchdog fence agent"
599c7d
+LONG_DESC = """fence_watchdog just provides
599c7d
+meta-data - actual fencing is done by the pacemaker internal watchdog agent."""
599c7d
+
599c7d
+ALL_OPT = {
599c7d
+    "version" : {
599c7d
+        "getopt" : "V",
599c7d
+        "longopt" : "version",
599c7d
+        "help" : "-V, --version                  Display version information and exit",
599c7d
+        "required" : "0",
599c7d
+        "shortdesc" : "Display version information and exit",
599c7d
+        "order" : 53
599c7d
+        },
599c7d
+    "help"    : {
599c7d
+        "getopt" : "h",
599c7d
+        "longopt" : "help",
599c7d
+        "help" : "-h, --help                     Display this help and exit",
599c7d
+        "required" : "0",
599c7d
+        "shortdesc" : "Display help and exit",
599c7d
+        "order" : 54
599c7d
+        },
599c7d
+    "action" : {
599c7d
+        "getopt" : "o:",
599c7d
+        "longopt" : "action",
599c7d
+        "help" : "-o, --action=[action]          Action: metadata",
599c7d
+        "required" : "1",
599c7d
+        "shortdesc" : "Fencing Action",
599c7d
+        "default" : "metadata",
599c7d
+        "order" : 1
599c7d
+        },
599c7d
+    "nodename" : {
599c7d
+        "getopt" : "N:",
599c7d
+        "longopt" : "nodename",
599c7d
+        "help" : "-N, --nodename                 Node name of fence victim (ignored)",
599c7d
+        "required" : "0",
599c7d
+        "shortdesc" : "Ignored",
599c7d
+        "order" : 2
599c7d
+        },
599c7d
+    "plug" : {
599c7d
+        "getopt" : "n:",
599c7d
+        "longopt" : "plug",
599c7d
+        "help" : "-n, --plug=[id]                Physical plug number on device (ignored)",
599c7d
+        "required" : "1",
599c7d
+        "shortdesc" : "Ignored",
599c7d
+        "order" : 4
599c7d
+        }
599c7d
+}
599c7d
+
599c7d
+
599c7d
+def agent():
599c7d
+    """ Return name this file was run as. """
599c7d
+
599c7d
+    return os.path.basename(sys.argv[0])
599c7d
+
599c7d
+
599c7d
+def fail_usage(message):
599c7d
+    """ Print a usage message and exit. """
599c7d
+
599c7d
+    sys.exit("%s\nPlease use '-h' for usage" % message)
599c7d
+
599c7d
+
599c7d
+def show_docs(options):
599c7d
+    """ Handle informational options (display info and exit). """
599c7d
+
599c7d
+    device_opt = options["device_opt"]
599c7d
+
599c7d
+    if "-h" in options:
599c7d
+        usage(device_opt)
599c7d
+        sys.exit(0)
599c7d
+
599c7d
+    if "-o" in options and options["-o"].lower() == "metadata":
599c7d
+        metadata(device_opt, options)
599c7d
+        sys.exit(0)
599c7d
+
599c7d
+    if "-V" in options:
599c7d
+        print(AGENT_VERSION)
599c7d
+        sys.exit(0)
599c7d
+
599c7d
+
599c7d
+def sorted_options(avail_opt):
599c7d
+    """ Return a list of all options, in their internally specified order. """
599c7d
+
599c7d
+    sorted_list = [(key, ALL_OPT[key]) for key in avail_opt]
599c7d
+    sorted_list.sort(key=lambda x: x[1]["order"])
599c7d
+    return sorted_list
599c7d
+
599c7d
+
599c7d
+def usage(avail_opt):
599c7d
+    """ Print a usage message. """
599c7d
+    print(LONG_DESC)
599c7d
+    print()
599c7d
+    print("Usage:")
599c7d
+    print("\t" + agent() + " [options]")
599c7d
+    print("Options:")
599c7d
+
599c7d
+    for dummy, value in sorted_options(avail_opt):
599c7d
+        if len(value["help"]) != 0:
599c7d
+            print("   " + value["help"])
599c7d
+
599c7d
+
599c7d
+def metadata(avail_opt, options):
599c7d
+    """ Print agent metadata. """
599c7d
+
599c7d
+    print("""
599c7d
+<resource-agent name="%s" shortdesc="%s">
599c7d
+<longdesc>%s</longdesc>
599c7d
+<parameters>""" % (agent(), SHORT_DESC, LONG_DESC))
599c7d
+
599c7d
+    for option, dummy in sorted_options(avail_opt):
599c7d
+        if "shortdesc" in ALL_OPT[option]:
599c7d
+            print('    
599c7d
+                  'required="' + ALL_OPT[option]["required"] + '">')
599c7d
+
599c7d
+            default = ""
599c7d
+            default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1]
599c7d
+            default_name_no_arg = "-" + ALL_OPT[option]["getopt"]
599c7d
+
599c7d
+            if "default" in ALL_OPT[option]:
599c7d
+                default = 'default="%s"' % str(ALL_OPT[option]["default"])
599c7d
+            elif default_name_arg in options:
599c7d
+                if options[default_name_arg]:
599c7d
+                    try:
599c7d
+                        default = 'default="%s"' % options[default_name_arg]
599c7d
+                    except TypeError:
599c7d
+                        ## @todo/@note: Currently there is no clean way how to handle lists
599c7d
+                        ## we can create a string from it but we can't set it on command line
599c7d
+                        default = 'default="%s"' % str(options[default_name_arg])
599c7d
+            elif default_name_no_arg in options:
599c7d
+                default = 'default="true"'
599c7d
+
599c7d
+            mixed = ALL_OPT[option]["help"]
599c7d
+            ## split it between option and help text
599c7d
+            res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed)
599c7d
+            if None != res:
599c7d
+                mixed = res.group(1)
599c7d
+            mixed = mixed.replace("<", "<").replace(">", ">")
599c7d
+            print('      <getopt mixed="' + mixed + '" />')
599c7d
+
599c7d
+            if ALL_OPT[option]["getopt"].count(":") > 0:
599c7d
+                print('      <content type="string" ' + default + ' />')
599c7d
+            else:
599c7d
+                print('      <content type="boolean" ' + default + ' />')
599c7d
+
599c7d
+            print('      <shortdesc lang="en">' + ALL_OPT[option]["shortdesc"] + '</shortdesc>')
599c7d
+            print('    </parameter>')
599c7d
+
599c7d
+    print('  </parameters>\n <actions>')
599c7d
+    print('    <action name="on" />')
599c7d
+    print('    <action name="off" />')
599c7d
+    print('    <action name="reboot" />')
599c7d
+    print('    <action name="monitor" />')
599c7d
+    print('    <action name="list" />')
599c7d
+    print('    <action name="metadata" />')
599c7d
+    print('  </actions>')
599c7d
+    print('</resource-agent>')
599c7d
+
599c7d
+
599c7d
+def option_longopt(option):
599c7d
+    """ Return the getopt-compatible long-option name of the given option. """
599c7d
+
599c7d
+    if ALL_OPT[option]["getopt"].endswith(":"):
599c7d
+        return ALL_OPT[option]["longopt"] + "="
599c7d
+    else:
599c7d
+        return ALL_OPT[option]["longopt"]
599c7d
+
599c7d
+
599c7d
+def opts_from_command_line(argv, avail_opt):
599c7d
+    """ Read options from command-line arguments. """
599c7d
+
599c7d
+    # Prepare list of options for getopt
599c7d
+    getopt_string = ""
599c7d
+    longopt_list = []
599c7d
+    for k in avail_opt:
599c7d
+        if k in ALL_OPT:
599c7d
+            getopt_string += ALL_OPT[k]["getopt"]
599c7d
+        else:
599c7d
+            fail_usage("Parse error: unknown option '" + k + "'")
599c7d
+
599c7d
+        if k in ALL_OPT and "longopt" in ALL_OPT[k]:
599c7d
+            longopt_list.append(option_longopt(k))
599c7d
+
599c7d
+    try:
599c7d
+        opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list)
599c7d
+    except getopt.GetoptError as error:
599c7d
+        fail_usage("Parse error: " + error.msg)
599c7d
+
599c7d
+    # Transform longopt to short one which are used in fencing agents
599c7d
+    old_opt = opt
599c7d
+    opt = {}
599c7d
+    for old_option in dict(old_opt).keys():
599c7d
+        if old_option.startswith("--"):
599c7d
+            for option in ALL_OPT.keys():
599c7d
+                if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option:
599c7d
+                    opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option]
599c7d
+        else:
599c7d
+            opt[old_option] = dict(old_opt)[old_option]
599c7d
+
599c7d
+    return opt
599c7d
+
599c7d
+
599c7d
+def opts_from_stdin(avail_opt):
599c7d
+    """ Read options from standard input. """
599c7d
+
599c7d
+    opt = {}
599c7d
+    name = ""
599c7d
+    for line in sys.stdin.readlines():
599c7d
+        line = line.strip()
599c7d
+        if line.startswith("#") or (len(line) == 0):
599c7d
+            continue
599c7d
+
599c7d
+        (name, value) = (line + "=").split("=", 1)
599c7d
+        value = value[:-1]
599c7d
+
599c7d
+        if name not in avail_opt:
599c7d
+            print("Parse error: Ignoring unknown option '%s'" % line,
599c7d
+                  file=sys.stderr)
599c7d
+            continue
599c7d
+
599c7d
+        if ALL_OPT[name]["getopt"].endswith(":"):
599c7d
+            opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value
599c7d
+        elif value.lower() in ["1", "yes", "on", "true"]:
599c7d
+            opt["-"+ALL_OPT[name]["getopt"]] = "1"
599c7d
+
599c7d
+    return opt
599c7d
+
599c7d
+
599c7d
+def process_input(avail_opt):
599c7d
+    """ Set standard environment variables, and parse all options. """
599c7d
+
599c7d
+    # Set standard environment
599c7d
+    os.putenv("LANG", "C")
599c7d
+    os.putenv("LC_ALL", "C")
599c7d
+
599c7d
+    # Read options from command line or standard input
599c7d
+    if len(sys.argv) > 1:
599c7d
+        return opts_from_command_line(sys.argv[1:], avail_opt)
599c7d
+    else:
599c7d
+        return opts_from_stdin(avail_opt)
599c7d
+
599c7d
+
599c7d
+def atexit_handler():
599c7d
+    """ Close stdout on exit. """
599c7d
+
599c7d
+    try:
599c7d
+        sys.stdout.close()
599c7d
+        os.close(1)
599c7d
+    except IOError:
599c7d
+        sys.exit("%s failed to close standard output" % agent())
599c7d
+
599c7d
+
599c7d
+def main():
599c7d
+    """ Make it so! """
599c7d
+
599c7d
+    device_opt = ALL_OPT.keys()
599c7d
+
599c7d
+    ## Defaults for fence agent
599c7d
+    atexit.register(atexit_handler)
599c7d
+    options = process_input(device_opt)
599c7d
+    options["device_opt"] = device_opt
599c7d
+    show_docs(options)
599c7d
+
599c7d
+    print("Watchdog fencing may be initiated only by the cluster, not this agent.",
599c7d
+          file=sys.stderr)
599c7d
+
599c7d
+    sys.exit(1)
599c7d
+
599c7d
+
599c7d
+if __name__ == "__main__":
599c7d
+    main()
599c7d
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
599c7d
index cd9968f1a..9470ea2c1 100644
599c7d
--- a/daemons/fenced/fenced_commands.c
599c7d
+++ b/daemons/fenced/fenced_commands.c
599c7d
@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device)
599c7d
         return TRUE;
599c7d
     }
599c7d
 
599c7d
-    if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
599c7d
-        if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
599c7d
-            pcmk__panic(__func__);
599c7d
-            goto done;
599c7d
-
599c7d
-        } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
599c7d
-            pcmk__panic(__func__);
599c7d
-            goto done;
599c7d
-
599c7d
+    if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
599c7d
+                         STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
599c7d
+        if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
599c7d
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
599c7d
+                pcmk__panic(__func__);
599c7d
+                goto done;
599c7d
+            }
599c7d
         } else {
599c7d
             crm_info("Faking success for %s watchdog operation", cmd->action);
599c7d
             cmd->done_cb(0, 0, NULL, cmd);
599c7d
@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata)
599c7d
         return EINVAL;
599c7d
     }
599c7d
     *metadata = NULL;
599c7d
-    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
599c7d
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
599c7d
         return pcmk_rc_ok;
599c7d
     }
599c7d
     init_metadata_cache();
599c7d
@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin,
599c7d
     schedule_stonith_command(cmd, device);
599c7d
 }
599c7d
 
599c7d
-gboolean
599c7d
-string_in_list(GList *list, const char *item)
599c7d
-{
599c7d
-    int lpc = 0;
599c7d
-    int max = g_list_length(list);
599c7d
-
599c7d
-    for (lpc = 0; lpc < max; lpc++) {
599c7d
-        const char *value = g_list_nth_data(list, lpc);
599c7d
-
599c7d
-        if (pcmk__str_eq(item, value, pcmk__str_casei)) {
599c7d
-            return TRUE;
599c7d
-        } else {
599c7d
-            crm_trace("%d: '%s' != '%s'", lpc, item, value);
599c7d
-        }
599c7d
-    }
599c7d
-    return FALSE;
599c7d
-}
599c7d
-
599c7d
 static void
599c7d
 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
599c7d
 {
599c7d
@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
599c7d
         if (!alias) {
599c7d
             alias = search->host;
599c7d
         }
599c7d
-        if (string_in_list(dev->targets, alias)) {
599c7d
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
599c7d
             can_fence = TRUE;
599c7d
         }
599c7d
     }
599c7d
@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
599c7d
     stonith_device_t *dup = NULL;
599c7d
     stonith_device_t *device = build_device_from_xml(msg);
599c7d
     guint ndevices = 0;
599c7d
+    int rv = pcmk_ok;
599c7d
 
599c7d
     CRM_CHECK(device != NULL, return -ENOMEM);
599c7d
 
599c7d
+    /* do we have a watchdog-device? */
599c7d
+    if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
599c7d
+        pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
599c7d
+                     STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
599c7d
+        if (stonith_watchdog_timeout_ms <= 0) {
599c7d
+            crm_err("Ignoring watchdog fence device without "
599c7d
+                    "stonith-watchdog-timeout set.");
599c7d
+            rv = -ENODEV;
599c7d
+            /* fall through to cleanup & return */
599c7d
+        } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
599c7d
+                                 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
599c7d
+            crm_err("Ignoring watchdog fence device with unknown "
599c7d
+                    "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
599c7d
+                    device->agent?device->agent:"");
599c7d
+            rv = -ENODEV;
599c7d
+            /* fall through to cleanup & return */
599c7d
+        } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
599c7d
+                                 pcmk__str_none)) {
599c7d
+            crm_err("Ignoring watchdog fence device "
599c7d
+                    "named %s !='"STONITH_WATCHDOG_ID"'.",
599c7d
+                    device->id?device->id:"");
599c7d
+            rv = -ENODEV;
599c7d
+            /* fall through to cleanup & return */
599c7d
+        } else {
599c7d
+            if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
599c7d
+                             pcmk__str_none)) {
599c7d
+                /* this either has an empty list or the targets
599c7d
+                   configured for watchdog-fencing
599c7d
+                 */
599c7d
+                g_list_free_full(stonith_watchdog_targets, free);
599c7d
+                stonith_watchdog_targets = device->targets;
599c7d
+                device->targets = NULL;
599c7d
+            }
599c7d
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
599c7d
+                g_list_free_full(device->targets, free);
599c7d
+                device->targets = stonith__parse_targets(stonith_our_uname);
599c7d
+                g_hash_table_replace(device->params,
599c7d
+                                     strdup(PCMK_STONITH_HOST_LIST),
599c7d
+                                     strdup(stonith_our_uname));
599c7d
+                /* proceed as with any other stonith-device */
599c7d
+                break;
599c7d
+            }
599c7d
+
599c7d
+            crm_debug("Skip registration of watchdog fence device on node not in host-list.");
599c7d
+            /* cleanup and fall through to more cleanup and return */
599c7d
+            device->targets = NULL;
599c7d
+            stonith_device_remove(device->id, from_cib);
599c7d
+        }
599c7d
+        free_device(device);
599c7d
+        return rv;
599c7d
+    } while (0);
599c7d
+
599c7d
     dup = device_has_duplicate(device);
599c7d
     if (dup) {
599c7d
         ndevices = g_hash_table_size(device_list);
599c7d
@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc)
599c7d
  *       (CIB registration is not sufficient), because monitor should not be
599c7d
  *       possible unless the device is "started" (API registered).
599c7d
  */
599c7d
+
599c7d
+static char *
599c7d
+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
599c7d
+{
599c7d
+    int max = g_list_length(list);
599c7d
+    size_t delim_len = delim?strlen(delim):0;
599c7d
+    size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
599c7d
+    char *rv;
599c7d
+    GList *gIter;
599c7d
+
599c7d
+    for (gIter = list; gIter != NULL; gIter = gIter->next) {
599c7d
+        const char *value = (const char *) gIter->data;
599c7d
+
599c7d
+        alloc_size += strlen(value);
599c7d
+    }
599c7d
+    rv = calloc(alloc_size, sizeof(char));
599c7d
+    if (rv) {
599c7d
+        char *pos = rv;
599c7d
+        const char *lead_delim = "";
599c7d
+
599c7d
+        for (gIter = list; gIter != NULL; gIter = gIter->next) {
599c7d
+            const char *value = (const char *) gIter->data;
599c7d
+
599c7d
+            pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
599c7d
+            lead_delim = delim;
599c7d
+        }
599c7d
+        if (max && terminate_with_delim) {
599c7d
+            sprintf(pos, "%s", delim);
599c7d
+        }
599c7d
+    }
599c7d
+    return rv;
599c7d
+}
599c7d
+
599c7d
 static int
599c7d
 stonith_device_action(xmlNode * msg, char **output)
599c7d
 {
599c7d
@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output)
599c7d
         return -EPROTO;
599c7d
     }
599c7d
 
599c7d
+    if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
599c7d
+        if (stonith_watchdog_timeout_ms <= 0) {
599c7d
+            return -ENODEV;
599c7d
+        } else {
599c7d
+            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
599c7d
+                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
599c7d
+                return pcmk_ok;
599c7d
+            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
599c7d
+                return pcmk_ok;
599c7d
+            }
599c7d
+        }
599c7d
+    }
599c7d
+
599c7d
     device = g_hash_table_lookup(device_list, id);
599c7d
     if ((device == NULL)
599c7d
         || (!device->api_registered && !strcmp(action, "monitor"))) {
599c7d
@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
599c7d
          * Only use if all hosts on which the device can be active can always fence all listed hosts
599c7d
          */
599c7d
 
599c7d
-        if (string_in_list(dev->targets, host)) {
599c7d
+        if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) {
599c7d
             can = TRUE;
599c7d
         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
599c7d
                    && g_hash_table_lookup(dev->aliases, host)) {
599c7d
@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
599c7d
             return;
599c7d
         }
599c7d
 
599c7d
-        if (string_in_list(dev->targets, alias)) {
599c7d
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
599c7d
             can = TRUE;
599c7d
         }
599c7d
 
599c7d
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
599c7d
index cf91acaed..224f2baba 100644
599c7d
--- a/daemons/fenced/fenced_remote.c
599c7d
+++ b/daemons/fenced/fenced_remote.c
599c7d
@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
599c7d
     }
599c7d
 }
599c7d
 
599c7d
+static gboolean
599c7d
+check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
599c7d
+{
599c7d
+    if (node_does_watchdog_fencing(op->target)) {
599c7d
+
599c7d
+        crm_notice("Waiting %lds for %s to self-fence (%s) for "
599c7d
+                   "client %s " CRM_XS " id=%.8s",
599c7d
+                   (stonith_watchdog_timeout_ms / 1000),
599c7d
+                   op->target, op->action, op->client_name, op->id);
599c7d
+        op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
599c7d
+                                         remote_op_watchdog_done, op);
599c7d
+        return TRUE;
599c7d
+    } else {
599c7d
+        crm_debug("Skipping fallback to watchdog-fencing as %s is "
599c7d
+                 "not in host-list", op->target);
599c7d
+    }
599c7d
+    return FALSE;
599c7d
+}
599c7d
+
599c7d
 void
599c7d
 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
599c7d
 {
599c7d
@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
599c7d
             g_source_remove(op->op_timer_one);
599c7d
         }
599c7d
 
599c7d
-        if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) {
599c7d
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
599c7d
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
599c7d
-                       op->target, op->action, op->client_name, op->id);
599c7d
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
599c7d
-
599c7d
-            /* TODO check devices to verify watchdog will be in use */
599c7d
-        } else if(stonith_watchdog_timeout_ms > 0
599c7d
-                  && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
599c7d
-                  && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) {
599c7d
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
599c7d
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
599c7d
-                       op->target, op->action, op->client_name, op->id);
599c7d
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
599c7d
-
599c7d
-        } else {
599c7d
+        if (!(stonith_watchdog_timeout_ms > 0 && (
599c7d
+                (pcmk__str_eq(device, STONITH_WATCHDOG_ID,
599c7d
+                                        pcmk__str_none)) ||
599c7d
+                (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
599c7d
+                    && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) &&
599c7d
+             check_watchdog_fencing_and_wait(op))) {
599c7d
+
599c7d
+            /* Some thoughts about self-fencing cases reaching this point:
599c7d
+               - Actually check in check_watchdog_fencing_and_wait
599c7d
+                 shouldn't fail if STONITH_WATCHDOG_ID is
599c7d
+                 chosen as fencing-device and it being present implies
599c7d
+                 watchdog-fencing is enabled anyway
599c7d
+               - If watchdog-fencing is disabled either in general or for
599c7d
+                 a specific target - detected in check_watchdog_fencing_and_wait -
599c7d
+                 for some other kind of self-fencing we can't expect
599c7d
+                 a success answer but timeout is fine if the node doesn't
599c7d
+                 come back in between
599c7d
+               - Delicate might be the case where we have watchdog-fencing
599c7d
+                 enabled for a node but the watchdog-fencing-device isn't
599c7d
+                 explicitly chosen for suicide. Local pe-execution in sbd
599c7d
+                 may detect the node as unclean and lead to timely suicide.
599c7d
+                 Otherwise the selection of stonith-watchdog-timeout at
599c7d
+                 least is questionable.
599c7d
+             */
599c7d
             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
599c7d
         }
599c7d
 
599c7d
-
599c7d
         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
599c7d
         peer->tried = TRUE;
599c7d
         free_xml(remote_op);
599c7d
@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
599c7d
          * but we have all the expected replies, then no devices
599c7d
          * are available to execute the fencing operation. */
599c7d
 
599c7d
-        if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) {
599c7d
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
599c7d
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
599c7d
-                       op->target, op->action, op->client_name, op->id);
599c7d
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
599c7d
-            return;
599c7d
+        if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
599c7d
+           STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
599c7d
+            if (check_watchdog_fencing_and_wait(op)) {
599c7d
+                return;
599c7d
+            }
599c7d
         }
599c7d
 
599c7d
         if (op->state == st_query) {
599c7d
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
599c7d
index 39738d8be..7f8b427d9 100644
599c7d
--- a/daemons/fenced/pacemaker-fenced.c
599c7d
+++ b/daemons/fenced/pacemaker-fenced.c
599c7d
@@ -42,6 +42,7 @@
599c7d
 
599c7d
 char *stonith_our_uname = NULL;
599c7d
 long stonith_watchdog_timeout_ms = 0;
599c7d
+GList *stonith_watchdog_targets = NULL;
599c7d
 
599c7d
 static GMainLoop *mainloop = NULL;
599c7d
 
599c7d
@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc)
599c7d
 }
599c7d
 
599c7d
 static void
599c7d
-watchdog_device_update(xmlNode *cib)
599c7d
+watchdog_device_update(void)
599c7d
+{
599c7d
+    if (stonith_watchdog_timeout_ms > 0) {
599c7d
+        if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
599c7d
+            !stonith_watchdog_targets) {
599c7d
+            /* getting here watchdog-fencing enabled, no device there yet
599c7d
+               and reason isn't stonith_watchdog_targets preventing that
599c7d
+             */
599c7d
+            int rc;
599c7d
+            xmlNode *xml;
599c7d
+
599c7d
+            xml = create_device_registration_xml(
599c7d
+                    STONITH_WATCHDOG_ID,
599c7d
+                    st_namespace_internal,
599c7d
+                    STONITH_WATCHDOG_AGENT,
599c7d
+                    NULL, /* stonith_device_register will add our
599c7d
+                             own name as PCMK_STONITH_HOST_LIST param
599c7d
+                             so we can skip that here
599c7d
+                           */
599c7d
+                    NULL);
599c7d
+            rc = stonith_device_register(xml, NULL, TRUE);
599c7d
+            free_xml(xml);
599c7d
+            if (rc != pcmk_ok) {
599c7d
+                crm_crit("Cannot register watchdog pseudo fence agent");
599c7d
+                crm_exit(CRM_EX_FATAL);
599c7d
+            }
599c7d
+        }
599c7d
+
599c7d
+    } else {
599c7d
+        /* be silent if no device - todo parameter to stonith_device_remove */
599c7d
+        if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
599c7d
+            stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
599c7d
+        }
599c7d
+    }
599c7d
+}
599c7d
+
599c7d
+static void
599c7d
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
599c7d
 {
599c7d
     xmlNode *stonith_enabled_xml = NULL;
599c7d
     const char *stonith_enabled_s = NULL;
599c7d
@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib)
599c7d
         }
599c7d
     }
599c7d
 
599c7d
-    if (timeout_ms != stonith_watchdog_timeout_ms) {
599c7d
-        crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
599c7d
-        stonith_watchdog_timeout_ms = timeout_ms;
599c7d
-
599c7d
-        if (stonith_watchdog_timeout_ms > 0) {
599c7d
-            int rc;
599c7d
-            xmlNode *xml;
599c7d
-            stonith_key_value_t *params = NULL;
599c7d
-
599c7d
-            params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST,
599c7d
-                                           stonith_our_uname);
599c7d
-
599c7d
-            xml = create_device_registration_xml("watchdog", st_namespace_internal,
599c7d
-                                                 STONITH_WATCHDOG_AGENT, params,
599c7d
-                                                 NULL);
599c7d
-            stonith_key_value_freeall(params, 1, 1);
599c7d
-            rc = stonith_device_register(xml, NULL, FALSE);
599c7d
-            free_xml(xml);
599c7d
-            if (rc != pcmk_ok) {
599c7d
-                crm_crit("Cannot register watchdog pseudo fence agent");
599c7d
-                crm_exit(CRM_EX_FATAL);
599c7d
-            }
599c7d
-
599c7d
-        } else {
599c7d
-            stonith_device_remove("watchdog", FALSE);
599c7d
-        }
599c7d
-    }
599c7d
+    stonith_watchdog_timeout_ms = timeout_ms;
599c7d
 }
599c7d
 
599c7d
 /*!
599c7d
@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
599c7d
         return;
599c7d
     }
599c7d
 
599c7d
+    /* if watchdog-fencing is disabled handle any watchdog-fence
599c7d
+       resource as if it was disabled
599c7d
+     */
599c7d
+    if ((stonith_watchdog_timeout_ms <= 0) &&
599c7d
+        pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
599c7d
+        crm_info("Watchdog-fencing disabled thus handling "
599c7d
+                 "device %s as disabled", rsc->id);
599c7d
+        return;
599c7d
+    }
599c7d
+
599c7d
     /* Check whether our node is allowed for this resource (and its parent if in a group) */
599c7d
     node = our_node_allowed_for(rsc);
599c7d
     if (rsc->parent && (rsc->parent->variant == pe_group)) {
599c7d
@@ -772,6 +794,12 @@ cib_devices_update(void)
599c7d
         }
599c7d
     }
599c7d
 
599c7d
+    /* have list repopulated if cib has a watchdog-fencing-resource
599c7d
+       TODO: keep a cached list for queries happening while we are refreshing
599c7d
+     */
599c7d
+    g_list_free_full(stonith_watchdog_targets, free);
599c7d
+    stonith_watchdog_targets = NULL;
599c7d
+
599c7d
     for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
599c7d
         cib_device_update(gIter->data, fenced_data_set);
599c7d
     }
599c7d
@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
599c7d
             if (search != NULL) {
599c7d
                 *search = 0;
599c7d
                 stonith_device_remove(rsc_id, TRUE);
599c7d
+                /* watchdog_device_update called afterwards
599c7d
+                   to fall back to implicit definition if needed */
599c7d
             } else {
599c7d
                 crm_warn("Ignoring malformed CIB update (resource deletion)");
599c7d
             }
599c7d
@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value)
599c7d
     return (match != NULL);
599c7d
 }
599c7d
 
599c7d
+/*!
599c7d
+ * \internal
599c7d
+ * \brief Check whether a node does watchdog-fencing
599c7d
+ *
599c7d
+ * \param[in] node    Name of node to check
599c7d
+ *
599c7d
+ * \return TRUE if node found in stonith_watchdog_targets
599c7d
+ *         or stonith_watchdog_targets is empty indicating
599c7d
+ *         all nodes are doing watchdog-fencing
599c7d
+ */
599c7d
+gboolean
599c7d
+node_does_watchdog_fencing(const char *node)
599c7d
+{
599c7d
+    return ((stonith_watchdog_targets == NULL) ||
599c7d
+            pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei));
599c7d
+}
599c7d
+
599c7d
+
599c7d
 static void
599c7d
 update_fencing_topology(const char *event, xmlNode * msg)
599c7d
 {
599c7d
@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
599c7d
     xmlNode *stonith_enabled_xml = NULL;
599c7d
     const char *stonith_enabled_s = NULL;
599c7d
     static gboolean stonith_enabled_saved = TRUE;
599c7d
+    long timeout_ms_saved = stonith_watchdog_timeout_ms;
599c7d
+    gboolean need_full_refresh = FALSE;
599c7d
 
599c7d
     if(!have_cib_devices) {
599c7d
         crm_trace("Skipping updates until we get a full dump");
599c7d
@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
599c7d
     }
599c7d
 
599c7d
     pcmk__refresh_node_caches_from_cib(local_cib);
599c7d
+    update_stonith_watchdog_timeout_ms(local_cib);
599c7d
 
599c7d
     stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
599c7d
                                            local_cib, LOG_NEVER);
599c7d
@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
599c7d
         stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
599c7d
     }
599c7d
 
599c7d
-    watchdog_device_update(local_cib);
599c7d
-
599c7d
     if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
599c7d
         crm_trace("Ignoring CIB updates while fencing is disabled");
599c7d
         stonith_enabled_saved = FALSE;
599c7d
-        return;
599c7d
 
599c7d
     } else if (stonith_enabled_saved == FALSE) {
599c7d
         crm_info("Updating fencing device and topology lists "
599c7d
                  "now that fencing is enabled");
599c7d
         stonith_enabled_saved = TRUE;
599c7d
-        fencing_topology_init();
599c7d
-        cib_devices_update();
599c7d
+        need_full_refresh = TRUE;
599c7d
 
599c7d
     } else {
599c7d
-        update_fencing_topology(event, msg);
599c7d
-        update_cib_stonith_devices(event, msg);
599c7d
+        if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
599c7d
+            need_full_refresh = TRUE;
599c7d
+        } else {
599c7d
+            update_fencing_topology(event, msg);
599c7d
+            update_cib_stonith_devices(event, msg);
599c7d
+            watchdog_device_update();
599c7d
+        }
599c7d
+    }
599c7d
+
599c7d
+    if (need_full_refresh) {
599c7d
+        fencing_topology_init();
599c7d
+        cib_devices_update();
599c7d
+        watchdog_device_update();
599c7d
     }
599c7d
 }
599c7d
 
599c7d
@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us
599c7d
     local_cib = copy_xml(output);
599c7d
 
599c7d
     pcmk__refresh_node_caches_from_cib(local_cib);
599c7d
+    update_stonith_watchdog_timeout_ms(local_cib);
599c7d
 
599c7d
     fencing_topology_init();
599c7d
-    watchdog_device_update(local_cib);
599c7d
     cib_devices_update();
599c7d
+    watchdog_device_update();
599c7d
 }
599c7d
 
599c7d
 static void
599c7d
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
599c7d
index d330fda4d..14e085e98 100644
599c7d
--- a/daemons/fenced/pacemaker-fenced.h
599c7d
+++ b/daemons/fenced/pacemaker-fenced.h
599c7d
@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer);
599c7d
 
599c7d
 int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
599c7d
 
599c7d
-gboolean string_in_list(GList *list, const char *item);
599c7d
-
599c7d
 gboolean node_has_attr(const char *node, const char *name, const char *value);
599c7d
 
599c7d
+gboolean node_does_watchdog_fencing(const char *node);
599c7d
+
599c7d
 extern char *stonith_our_uname;
599c7d
 extern gboolean stand_alone;
599c7d
 extern GHashTable *device_list;
599c7d
 extern GHashTable *topology;
599c7d
 extern long stonith_watchdog_timeout_ms;
599c7d
+extern GList *stonith_watchdog_targets;
599c7d
 
599c7d
 extern GHashTable *stonith_remote_op_list;
599c7d
diff --git a/include/crm/crm.h b/include/crm/crm.h
599c7d
index ee52c3630..7861c160e 100644
599c7d
--- a/include/crm/crm.h
599c7d
+++ b/include/crm/crm.h
599c7d
@@ -66,7 +66,7 @@ extern "C" {
599c7d
  * >=3.0.13: Fail counts include operation name and interval
599c7d
  * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
599c7d
  */
599c7d
-#  define CRM_FEATURE_SET		"3.10.2"
599c7d
+#  define CRM_FEATURE_SET		"3.11.0"
599c7d
 
599c7d
 /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
599c7d
  * recipient of a CPG message. This imposes an arbitrary limit on cluster node
599c7d
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
599c7d
index 8bcb544d8..f222edba3 100644
599c7d
--- a/include/crm/fencing/internal.h
599c7d
+++ b/include/crm/fencing/internal.h
599c7d
@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
599c7d
 #  define STONITH_OP_LEVEL_ADD       "st_level_add"
599c7d
 #  define STONITH_OP_LEVEL_DEL       "st_level_remove"
599c7d
 
599c7d
-#  define STONITH_WATCHDOG_AGENT  "#watchdog"
599c7d
+#  define STONITH_WATCHDOG_AGENT          "fence_watchdog"
599c7d
+/* Don't change 2 below as it would break rolling upgrade */
599c7d
+#  define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog"
599c7d
+#  define STONITH_WATCHDOG_ID             "watchdog"
599c7d
 
599c7d
 #  ifdef HAVE_STONITH_STONITH_H
599c7d
 // utilities from st_lha.c
599c7d
@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state)
599c7d
     return state != st_failed && state != st_done;
599c7d
 }
599c7d
 
599c7d
+gboolean stonith__watchdog_fencing_enabled_for_node(const char *node);
599c7d
+gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node);
599c7d
+
599c7d
 #endif
599c7d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
599c7d
index e285f51e2..0ff98157b 100644
599c7d
--- a/lib/fencing/st_client.c
599c7d
+++ b/lib/fencing/st_client.c
599c7d
@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
599c7d
     return st_namespace_invalid;
599c7d
 }
599c7d
 
599c7d
+gboolean
599c7d
+stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
599c7d
+{
599c7d
+    gboolean rv = FALSE;
599c7d
+    stonith_t *stonith_api = st?st:stonith_api_new();
599c7d
+    char *list = NULL;
599c7d
+
599c7d
+    if(stonith_api) {
599c7d
+        if (stonith_api->state == stonith_disconnected) {
599c7d
+            int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
599c7d
+
599c7d
+            if (rc != pcmk_ok) {
599c7d
+                crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
599c7d
+            }
599c7d
+        }
599c7d
+
599c7d
+        if (stonith_api->state != stonith_disconnected) {
599c7d
+            /* caveat!!!
599c7d
+             * this might fail when when stonithd is just updating the device-list
599c7d
+             * probably something we should fix as well for other api-calls */
599c7d
+            int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
599c7d
+            if ((rc != pcmk_ok) || (list == NULL)) {
599c7d
+                /* due to the race described above it can happen that
599c7d
+                 * we drop in here - so as not to make remote nodes
599c7d
+                 * panic on that answer
599c7d
+                 */
599c7d
+                crm_warn("watchdog-fencing-query failed");
599c7d
+            } else if (list[0] == '\0') {
599c7d
+                crm_warn("watchdog-fencing-query returned an empty list - any node");
599c7d
+                rv = TRUE;
599c7d
+            } else {
599c7d
+                GList *targets = stonith__parse_targets(list);
599c7d
+                rv = pcmk__str_in_list(targets, node, pcmk__str_casei);
599c7d
+                g_list_free_full(targets, free);
599c7d
+            }
599c7d
+            free(list);
599c7d
+            if (!st) {
599c7d
+                /* if we're provided the api we still might have done the
599c7d
+                 * connection - but let's assume the caller won't bother
599c7d
+                 */
599c7d
+                stonith_api->cmds->disconnect(stonith_api);
599c7d
+            }
599c7d
+        }
599c7d
+
599c7d
+        if (!st) {
599c7d
+            stonith_api_delete(stonith_api);
599c7d
+        }
599c7d
+    } else {
599c7d
+        crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
599c7d
+    }
599c7d
+    crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
599c7d
+              node, rv?"":"not ");
599c7d
+    return rv;
599c7d
+}
599c7d
+
599c7d
+gboolean
599c7d
+stonith__watchdog_fencing_enabled_for_node(const char *node)
599c7d
+{
599c7d
+    return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
599c7d
+}
599c7d
+
599c7d
 static void
599c7d
 log_action(stonith_action_t *action, pid_t pid)
599c7d
 {
599c7d
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
599c7d
index 87d050ed1..bf4bceb42 100644
599c7d
--- a/lib/lrmd/lrmd_client.c
599c7d
+++ b/lib/lrmd/lrmd_client.c
599c7d
@@ -34,6 +34,7 @@
599c7d
 #include <crm/msg_xml.h>
599c7d
 
599c7d
 #include <crm/stonith-ng.h>
599c7d
+#include <crm/fencing/internal.h>
599c7d
 
599c7d
 #ifdef HAVE_GNUTLS_GNUTLS_H
599c7d
 #  undef KEYFILE
599c7d
@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash)
599c7d
     crm_xml_add(data, F_LRMD_ORIGIN, __func__);
599c7d
 
599c7d
     value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
599c7d
-    crm_xml_add(data, F_LRMD_WATCHDOG, value);
599c7d
+    if ((value) &&
599c7d
+        (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) {
599c7d
+       crm_xml_add(data, F_LRMD_WATCHDOG, value);
599c7d
+    }
599c7d
 
599c7d
     rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0,
599c7d
                            (native->type == pcmk__client_ipc));
599c7d
diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in
599c7d
index 79e78ede9..f58357a77 100644
599c7d
--- a/rpm/pacemaker.spec.in
599c7d
+++ b/rpm/pacemaker.spec.in
599c7d
@@ -744,6 +744,7 @@ exit 0
599c7d
 %doc %{_mandir}/man8/crm_attribute.*
599c7d
 %doc %{_mandir}/man8/crm_master.*
599c7d
 %doc %{_mandir}/man8/fence_legacy.*
599c7d
+%doc %{_mandir}/man8/fence_watchdog.*
599c7d
 %doc %{_mandir}/man8/pacemakerd.*
599c7d
 
599c7d
 %doc %{_datadir}/pacemaker/alerts
599c7d
@@ -796,6 +797,7 @@ exit 0
599c7d
 %{_sbindir}/crm_simulate
599c7d
 %{_sbindir}/crm_report
599c7d
 %{_sbindir}/crm_ticket
599c7d
+%{_sbindir}/fence_watchdog
599c7d
 %{_sbindir}/stonith_admin
599c7d
 # "dirname" is owned by -schemas, which is a prerequisite
599c7d
 %{_datadir}/pacemaker/report.collector
599c7d
@@ -822,6 +824,7 @@ exit 0
599c7d
 %exclude %{_mandir}/man8/crm_attribute.*
599c7d
 %exclude %{_mandir}/man8/crm_master.*
599c7d
 %exclude %{_mandir}/man8/fence_legacy.*
599c7d
+%exclude %{_mandir}/man8/fence_watchdog.*
599c7d
 %exclude %{_mandir}/man8/pacemakerd.*
599c7d
 %exclude %{_mandir}/man8/pacemaker-remoted.*
599c7d
 
599c7d
-- 
599c7d
2.27.0
599c7d
599c7d
599c7d
From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001
599c7d
From: Klaus Wenninger <klaus.wenninger@aon.at>
599c7d
Date: Wed, 4 Aug 2021 15:57:23 +0200
599c7d
Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node
599c7d
 restriction
599c7d
599c7d
---
599c7d
 lib/fencing/st_client.c | 1 -
599c7d
 1 file changed, 1 deletion(-)
599c7d
599c7d
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
599c7d
index 0ff98157b..14fa7b2a6 100644
599c7d
--- a/lib/fencing/st_client.c
599c7d
+++ b/lib/fencing/st_client.c
599c7d
@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
599c7d
                  */
599c7d
                 crm_warn("watchdog-fencing-query failed");
599c7d
             } else if (list[0] == '\0') {
599c7d
-                crm_warn("watchdog-fencing-query returned an empty list - any node");
599c7d
                 rv = TRUE;
599c7d
             } else {
599c7d
                 GList *targets = stonith__parse_targets(list);
599c7d
-- 
599c7d
2.27.0
599c7d