7ac83c
From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001
7ac83c
From: Klaus Wenninger <klaus.wenninger@aon.at>
7ac83c
Date: Fri, 30 Jul 2021 18:07:25 +0200
7ac83c
Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to
7ac83c
 get_agent_metadata
7ac83c
7ac83c
Used to distinguish between empty metadata per design,
7ac83c
case of failed getting metadata that might succeed on a
7ac83c
retry and fatal failure.
7ac83c
Fixes as well regression that leads to endless retries getting
7ac83c
metadata for #watchdog - not superserious as it happens with
7ac83c
delays in between but still undesirable.
7ac83c
---
7ac83c
 daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++-------------
7ac83c
 1 file changed, 55 insertions(+), 37 deletions(-)
7ac83c
7ac83c
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
7ac83c
index a778801b1..cd9968f1a 100644
7ac83c
--- a/daemons/fenced/fenced_commands.c
7ac83c
+++ b/daemons/fenced/fenced_commands.c
7ac83c
@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re
7ac83c
 static void search_devices_record_result(struct device_search_s *search, const char *device,
7ac83c
                                          gboolean can_fence);
7ac83c
 
7ac83c
-static xmlNode * get_agent_metadata(const char *agent);
7ac83c
+static int get_agent_metadata(const char *agent, xmlNode **metadata);
7ac83c
 static void read_action_metadata(stonith_device_t *device);
7ac83c
 
7ac83c
 typedef struct async_command_s {
7ac83c
@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data)
7ac83c
 static int
7ac83c
 get_agent_metadata_cb(gpointer data) {
7ac83c
     stonith_device_t *device = data;
7ac83c
+    guint period_ms;
7ac83c
 
7ac83c
-    device->agent_metadata = get_agent_metadata(device->agent);
7ac83c
-    if (device->agent_metadata) {
7ac83c
-        read_action_metadata(device);
7ac83c
-        stonith__device_parameter_flags(&(device->flags), device->id,
7ac83c
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
7ac83c
+        case pcmk_rc_ok:
7ac83c
+            if (device->agent_metadata) {
7ac83c
+                read_action_metadata(device);
7ac83c
+                stonith__device_parameter_flags(&(device->flags), device->id,
7ac83c
                                         device->agent_metadata);
7ac83c
-        return G_SOURCE_REMOVE;
7ac83c
-    } else {
7ac83c
-        guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
7ac83c
-        if (period_ms < 160 * 1000) {
7ac83c
-            mainloop_timer_set_period(device->timer, 2 * period_ms);
7ac83c
-        }
7ac83c
-        return G_SOURCE_CONTINUE;
7ac83c
+            }
7ac83c
+            return G_SOURCE_REMOVE;
7ac83c
+
7ac83c
+        case EAGAIN:
7ac83c
+            period_ms = pcmk__mainloop_timer_get_period(device->timer);
7ac83c
+            if (period_ms < 160 * 1000) {
7ac83c
+                mainloop_timer_set_period(device->timer, 2 * period_ms);
7ac83c
+            }
7ac83c
+            return G_SOURCE_CONTINUE;
7ac83c
+
7ac83c
+        default:
7ac83c
+            return G_SOURCE_REMOVE;
7ac83c
     }
7ac83c
 }
7ac83c
 
7ac83c
@@ -700,38 +707,41 @@ init_metadata_cache(void) {
7ac83c
     }
7ac83c
 }
7ac83c
 
7ac83c
-static xmlNode *
7ac83c
-get_agent_metadata(const char *agent)
7ac83c
+int
7ac83c
+get_agent_metadata(const char *agent, xmlNode ** metadata)
7ac83c
 {
7ac83c
-    xmlNode *xml = NULL;
7ac83c
     char *buffer = NULL;
7ac83c
 
7ac83c
+    if (metadata == NULL) {
7ac83c
+        return EINVAL;
7ac83c
+    }
7ac83c
+    *metadata = NULL;
7ac83c
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
7ac83c
+        return pcmk_rc_ok;
7ac83c
+    }
7ac83c
     init_metadata_cache();
7ac83c
     buffer = g_hash_table_lookup(metadata_cache, agent);
7ac83c
-    if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
7ac83c
-        return NULL;
7ac83c
-
7ac83c
-    } else if(buffer == NULL) {
7ac83c
+    if (buffer == NULL) {
7ac83c
         stonith_t *st = stonith_api_new();
7ac83c
         int rc;
7ac83c
 
7ac83c
         if (st == NULL) {
7ac83c
             crm_warn("Could not get agent meta-data: "
7ac83c
                      "API memory allocation failed");
7ac83c
-            return NULL;
7ac83c
+            return EAGAIN;
7ac83c
         }
7ac83c
-        rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
7ac83c
+        rc = st->cmds->metadata(st, st_opt_sync_call, agent,
7ac83c
+                                NULL, &buffer, 10);
7ac83c
         stonith_api_delete(st);
7ac83c
         if (rc || !buffer) {
7ac83c
             crm_err("Could not retrieve metadata for fencing agent %s", agent);
7ac83c
-            return NULL;
7ac83c
+            return EAGAIN;
7ac83c
         }
7ac83c
         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
7ac83c
     }
7ac83c
 
7ac83c
-    xml = string2xml(buffer);
7ac83c
-
7ac83c
-    return xml;
7ac83c
+    *metadata = string2xml(buffer);
7ac83c
+    return pcmk_rc_ok;
7ac83c
 }
7ac83c
 
7ac83c
 static gboolean
7ac83c
@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg)
7ac83c
         g_list_free_full(device->targets, free);
7ac83c
         device->targets = NULL;
7ac83c
     }
7ac83c
-    device->agent_metadata = get_agent_metadata(device->agent);
7ac83c
-    if (device->agent_metadata) {
7ac83c
-        read_action_metadata(device);
7ac83c
-        stonith__device_parameter_flags(&(device->flags), device->id,
7ac83c
-                                        device->agent_metadata);
7ac83c
-    } else {
7ac83c
-        if (device->timer == NULL) {
7ac83c
-            device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
7ac83c
+    switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
7ac83c
+        case pcmk_rc_ok:
7ac83c
+            if (device->agent_metadata) {
7ac83c
+                read_action_metadata(device);
7ac83c
+                stonith__device_parameter_flags(&(device->flags), device->id,
7ac83c
+                                                device->agent_metadata);
7ac83c
+            }
7ac83c
+            break;
7ac83c
+
7ac83c
+        case EAGAIN:
7ac83c
+            if (device->timer == NULL) {
7ac83c
+                device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
7ac83c
                                            TRUE, get_agent_metadata_cb, device);
7ac83c
-        }
7ac83c
-        if (!mainloop_timer_running(device->timer)) {
7ac83c
-            mainloop_timer_start(device->timer);
7ac83c
-        }
7ac83c
+            }
7ac83c
+            if (!mainloop_timer_running(device->timer)) {
7ac83c
+                mainloop_timer_start(device->timer);
7ac83c
+            }
7ac83c
+            break;
7ac83c
+
7ac83c
+        default:
7ac83c
+            break;
7ac83c
     }
7ac83c
 
7ac83c
     value = g_hash_table_lookup(device->params, "nodeid");
7ac83c
-- 
7ac83c
2.27.0
7ac83c
7ac83c
7ac83c
From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001
7ac83c
From: Klaus Wenninger <klaus.wenninger@aon.at>
7ac83c
Date: Fri, 30 Jul 2021 18:15:10 +0200
7ac83c
Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain
7ac83c
 nodes
7ac83c
7ac83c
Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being
7ac83c
fully upgraded to a version that supports the feature
7ac83c
before explicitly adding a watchdog-fence-device.
7ac83c
---
7ac83c
 configure.ac                        |   1 +
7ac83c
 daemons/controld/controld_control.c |   2 +-
7ac83c
 daemons/controld/controld_fencing.c |  14 ++
7ac83c
 daemons/controld/controld_fencing.h |   1 +
7ac83c
 daemons/fenced/Makefile.am          |   2 +-
7ac83c
 daemons/fenced/fence_watchdog.in    | 283 ++++++++++++++++++++++++++++
7ac83c
 daemons/fenced/fenced_commands.c    | 141 +++++++++++---
7ac83c
 daemons/fenced/fenced_remote.c      |  71 ++++---
7ac83c
 daemons/fenced/pacemaker-fenced.c   | 131 +++++++++----
7ac83c
 daemons/fenced/pacemaker-fenced.h   |   5 +-
7ac83c
 include/crm/crm.h                   |   2 +-
7ac83c
 include/crm/fencing/internal.h      |   8 +-
7ac83c
 lib/fencing/st_client.c             |  61 ++++++
7ac83c
 lib/lrmd/lrmd_client.c              |   6 +-
7ac83c
 rpm/pacemaker.spec.in               |   3 +
7ac83c
 16 files changed, 635 insertions(+), 97 deletions(-)
7ac83c
 create mode 100755 daemons/fenced/fence_watchdog.in
7ac83c
7ac83c
diff --git a/configure.ac b/configure.ac
7ac83c
index 436100c81..013562e46 100644
7ac83c
--- a/configure.ac
7ac83c
+++ b/configure.ac
7ac83c
@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli],
7ac83c
                   [cts/support/fence_dummy],
7ac83c
                   [cts/support/pacemaker-cts-dummyd],
7ac83c
                   [daemons/fenced/fence_legacy],
7ac83c
+                  [daemons/fenced/fence_watchdog],
7ac83c
                   [doc/abi-check],
7ac83c
                   [extra/resources/ClusterMon],
7ac83c
                   [extra/resources/HealthSMART],
7ac83c
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
7ac83c
index 45a70bb92..b5da6a46c 100644
7ac83c
--- a/daemons/controld/controld_control.c
7ac83c
+++ b/daemons/controld/controld_control.c
7ac83c
@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = {
7ac83c
     },
7ac83c
     {
7ac83c
         "stonith-watchdog-timeout", NULL, "time", NULL,
7ac83c
-        "0", pcmk__valid_sbd_timeout,
7ac83c
+        "0", controld_verify_stonith_watchdog_timeout,
7ac83c
         "How long to wait before we can assume nodes are safely down "
7ac83c
             "when watchdog-based self-fencing via SBD is in use",
7ac83c
         "If nonzero, along with `have-watchdog=true` automatically set by the "
7ac83c
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
7ac83c
index 0fba6613b..6c2a6c550 100644
7ac83c
--- a/daemons/controld/controld_fencing.c
7ac83c
+++ b/daemons/controld/controld_fencing.c
7ac83c
@@ -11,6 +11,7 @@
7ac83c
 #include <crm/crm.h>
7ac83c
 #include <crm/msg_xml.h>
7ac83c
 #include <crm/common/xml.h>
7ac83c
+#include <crm/stonith-ng.h>
7ac83c
 #include <crm/fencing/internal.h>
7ac83c
 
7ac83c
 #include <pacemaker-controld.h>
7ac83c
@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
7ac83c
     return TRUE;
7ac83c
 }
7ac83c
 
7ac83c
+bool
7ac83c
+controld_verify_stonith_watchdog_timeout(const char *value)
7ac83c
+{
7ac83c
+    gboolean rv = TRUE;
7ac83c
+
7ac83c
+    if (stonith_api && (stonith_api->state != stonith_disconnected) &&
7ac83c
+        stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
7ac83c
+                                                       fsa_our_uname)) {
7ac83c
+        rv = pcmk__valid_sbd_timeout(value);
7ac83c
+    }
7ac83c
+    return rv;
7ac83c
+}
7ac83c
+
7ac83c
 /* end stonith API client functions */
7ac83c
 
7ac83c
 
7ac83c
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
7ac83c
index d0ecc8234..ef68a0c83 100644
7ac83c
--- a/daemons/controld/controld_fencing.h
7ac83c
+++ b/daemons/controld/controld_fencing.h
7ac83c
@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value);
7ac83c
 void controld_trigger_fencer_connect(void);
7ac83c
 void controld_disconnect_fencer(bool destroy);
7ac83c
 gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
7ac83c
+bool controld_verify_stonith_watchdog_timeout(const char *value);
7ac83c
 
7ac83c
 // stonith cleanup list
7ac83c
 void add_stonith_cleanup(const char *target);
7ac83c
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
7ac83c
index 43413e11d..2923d7c9b 100644
7ac83c
--- a/daemons/fenced/Makefile.am
7ac83c
+++ b/daemons/fenced/Makefile.am
7ac83c
@@ -15,7 +15,7 @@ halibdir	= $(CRM_DAEMON_DIR)
7ac83c
 
7ac83c
 halib_PROGRAMS	= pacemaker-fenced cts-fence-helper
7ac83c
 
7ac83c
-sbin_SCRIPTS	= fence_legacy
7ac83c
+sbin_SCRIPTS	= fence_legacy fence_watchdog
7ac83c
 
7ac83c
 noinst_HEADERS	= pacemaker-fenced.h
7ac83c
 
7ac83c
diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in
7ac83c
new file mode 100755
7ac83c
index 000000000..c83304f1d
7ac83c
--- /dev/null
7ac83c
+++ b/daemons/fenced/fence_watchdog.in
7ac83c
@@ -0,0 +1,283 @@
7ac83c
+#!@PYTHON@
7ac83c
+"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent
7ac83c
+"""
7ac83c
+
7ac83c
+__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors"
7ac83c
+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
7ac83c
+
7ac83c
+import io
7ac83c
+import os
7ac83c
+import re
7ac83c
+import sys
7ac83c
+import atexit
7ac83c
+import getopt
7ac83c
+
7ac83c
+SHORT_DESC = "Dummy watchdog fence agent"
7ac83c
+LONG_DESC = """fence_watchdog just provides
7ac83c
+meta-data - actual fencing is done by the pacemaker internal watchdog agent."""
7ac83c
+
7ac83c
+ALL_OPT = {
7ac83c
+    "version" : {
7ac83c
+        "getopt" : "V",
7ac83c
+        "longopt" : "version",
7ac83c
+        "help" : "-V, --version                  Display version information and exit",
7ac83c
+        "required" : "0",
7ac83c
+        "shortdesc" : "Display version information and exit",
7ac83c
+        "order" : 53
7ac83c
+        },
7ac83c
+    "help"    : {
7ac83c
+        "getopt" : "h",
7ac83c
+        "longopt" : "help",
7ac83c
+        "help" : "-h, --help                     Display this help and exit",
7ac83c
+        "required" : "0",
7ac83c
+        "shortdesc" : "Display help and exit",
7ac83c
+        "order" : 54
7ac83c
+        },
7ac83c
+    "action" : {
7ac83c
+        "getopt" : "o:",
7ac83c
+        "longopt" : "action",
7ac83c
+        "help" : "-o, --action=[action]          Action: metadata",
7ac83c
+        "required" : "1",
7ac83c
+        "shortdesc" : "Fencing Action",
7ac83c
+        "default" : "metadata",
7ac83c
+        "order" : 1
7ac83c
+        },
7ac83c
+    "nodename" : {
7ac83c
+        "getopt" : "N:",
7ac83c
+        "longopt" : "nodename",
7ac83c
+        "help" : "-N, --nodename                 Node name of fence victim (ignored)",
7ac83c
+        "required" : "0",
7ac83c
+        "shortdesc" : "Ignored",
7ac83c
+        "order" : 2
7ac83c
+        },
7ac83c
+    "plug" : {
7ac83c
+        "getopt" : "n:",
7ac83c
+        "longopt" : "plug",
7ac83c
+        "help" : "-n, --plug=[id]                Physical plug number on device (ignored)",
7ac83c
+        "required" : "1",
7ac83c
+        "shortdesc" : "Ignored",
7ac83c
+        "order" : 4
7ac83c
+        }
7ac83c
+}
7ac83c
+
7ac83c
+
7ac83c
+def agent():
7ac83c
+    """ Return name this file was run as. """
7ac83c
+
7ac83c
+    return os.path.basename(sys.argv[0])
7ac83c
+
7ac83c
+
7ac83c
+def fail_usage(message):
7ac83c
+    """ Print a usage message and exit. """
7ac83c
+
7ac83c
+    sys.exit("%s\nPlease use '-h' for usage" % message)
7ac83c
+
7ac83c
+
7ac83c
+def show_docs(options):
7ac83c
+    """ Handle informational options (display info and exit). """
7ac83c
+
7ac83c
+    device_opt = options["device_opt"]
7ac83c
+
7ac83c
+    if "-h" in options:
7ac83c
+        usage(device_opt)
7ac83c
+        sys.exit(0)
7ac83c
+
7ac83c
+    if "-o" in options and options["-o"].lower() == "metadata":
7ac83c
+        metadata(device_opt, options)
7ac83c
+        sys.exit(0)
7ac83c
+
7ac83c
+    if "-V" in options:
7ac83c
+        print(AGENT_VERSION)
7ac83c
+        sys.exit(0)
7ac83c
+
7ac83c
+
7ac83c
+def sorted_options(avail_opt):
7ac83c
+    """ Return a list of all options, in their internally specified order. """
7ac83c
+
7ac83c
+    sorted_list = [(key, ALL_OPT[key]) for key in avail_opt]
7ac83c
+    sorted_list.sort(key=lambda x: x[1]["order"])
7ac83c
+    return sorted_list
7ac83c
+
7ac83c
+
7ac83c
+def usage(avail_opt):
7ac83c
+    """ Print a usage message. """
7ac83c
+    print(LONG_DESC)
7ac83c
+    print()
7ac83c
+    print("Usage:")
7ac83c
+    print("\t" + agent() + " [options]")
7ac83c
+    print("Options:")
7ac83c
+
7ac83c
+    for dummy, value in sorted_options(avail_opt):
7ac83c
+        if len(value["help"]) != 0:
7ac83c
+            print("   " + value["help"])
7ac83c
+
7ac83c
+
7ac83c
+def metadata(avail_opt, options):
7ac83c
+    """ Print agent metadata. """
7ac83c
+
7ac83c
+    print("""
7ac83c
+<resource-agent name="%s" shortdesc="%s">
7ac83c
+<longdesc>%s</longdesc>
7ac83c
+<parameters>""" % (agent(), SHORT_DESC, LONG_DESC))
7ac83c
+
7ac83c
+    for option, dummy in sorted_options(avail_opt):
7ac83c
+        if "shortdesc" in ALL_OPT[option]:
7ac83c
+            print('    
7ac83c
+                  'required="' + ALL_OPT[option]["required"] + '">')
7ac83c
+
7ac83c
+            default = ""
7ac83c
+            default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1]
7ac83c
+            default_name_no_arg = "-" + ALL_OPT[option]["getopt"]
7ac83c
+
7ac83c
+            if "default" in ALL_OPT[option]:
7ac83c
+                default = 'default="%s"' % str(ALL_OPT[option]["default"])
7ac83c
+            elif default_name_arg in options:
7ac83c
+                if options[default_name_arg]:
7ac83c
+                    try:
7ac83c
+                        default = 'default="%s"' % options[default_name_arg]
7ac83c
+                    except TypeError:
7ac83c
+                        ## @todo/@note: Currently there is no clean way how to handle lists
7ac83c
+                        ## we can create a string from it but we can't set it on command line
7ac83c
+                        default = 'default="%s"' % str(options[default_name_arg])
7ac83c
+            elif default_name_no_arg in options:
7ac83c
+                default = 'default="true"'
7ac83c
+
7ac83c
+            mixed = ALL_OPT[option]["help"]
7ac83c
+            ## split it between option and help text
7ac83c
+            res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed)
7ac83c
+            if None != res:
7ac83c
+                mixed = res.group(1)
7ac83c
+            mixed = mixed.replace("<", "<").replace(">", ">")
7ac83c
+            print('      <getopt mixed="' + mixed + '" />')
7ac83c
+
7ac83c
+            if ALL_OPT[option]["getopt"].count(":") > 0:
7ac83c
+                print('      <content type="string" ' + default + ' />')
7ac83c
+            else:
7ac83c
+                print('      <content type="boolean" ' + default + ' />')
7ac83c
+
7ac83c
+            print('      <shortdesc lang="en">' + ALL_OPT[option]["shortdesc"] + '</shortdesc>')
7ac83c
+            print('    </parameter>')
7ac83c
+
7ac83c
+    print('  </parameters>\n <actions>')
7ac83c
+    print('    <action name="on" />')
7ac83c
+    print('    <action name="off" />')
7ac83c
+    print('    <action name="reboot" />')
7ac83c
+    print('    <action name="monitor" />')
7ac83c
+    print('    <action name="list" />')
7ac83c
+    print('    <action name="metadata" />')
7ac83c
+    print('  </actions>')
7ac83c
+    print('</resource-agent>')
7ac83c
+
7ac83c
+
7ac83c
+def option_longopt(option):
7ac83c
+    """ Return the getopt-compatible long-option name of the given option. """
7ac83c
+
7ac83c
+    if ALL_OPT[option]["getopt"].endswith(":"):
7ac83c
+        return ALL_OPT[option]["longopt"] + "="
7ac83c
+    else:
7ac83c
+        return ALL_OPT[option]["longopt"]
7ac83c
+
7ac83c
+
7ac83c
+def opts_from_command_line(argv, avail_opt):
7ac83c
+    """ Read options from command-line arguments. """
7ac83c
+
7ac83c
+    # Prepare list of options for getopt
7ac83c
+    getopt_string = ""
7ac83c
+    longopt_list = []
7ac83c
+    for k in avail_opt:
7ac83c
+        if k in ALL_OPT:
7ac83c
+            getopt_string += ALL_OPT[k]["getopt"]
7ac83c
+        else:
7ac83c
+            fail_usage("Parse error: unknown option '" + k + "'")
7ac83c
+
7ac83c
+        if k in ALL_OPT and "longopt" in ALL_OPT[k]:
7ac83c
+            longopt_list.append(option_longopt(k))
7ac83c
+
7ac83c
+    try:
7ac83c
+        opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list)
7ac83c
+    except getopt.GetoptError as error:
7ac83c
+        fail_usage("Parse error: " + error.msg)
7ac83c
+
7ac83c
+    # Transform longopt to short one which are used in fencing agents
7ac83c
+    old_opt = opt
7ac83c
+    opt = {}
7ac83c
+    for old_option in dict(old_opt).keys():
7ac83c
+        if old_option.startswith("--"):
7ac83c
+            for option in ALL_OPT.keys():
7ac83c
+                if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option:
7ac83c
+                    opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option]
7ac83c
+        else:
7ac83c
+            opt[old_option] = dict(old_opt)[old_option]
7ac83c
+
7ac83c
+    return opt
7ac83c
+
7ac83c
+
7ac83c
+def opts_from_stdin(avail_opt):
7ac83c
+    """ Read options from standard input. """
7ac83c
+
7ac83c
+    opt = {}
7ac83c
+    name = ""
7ac83c
+    for line in sys.stdin.readlines():
7ac83c
+        line = line.strip()
7ac83c
+        if line.startswith("#") or (len(line) == 0):
7ac83c
+            continue
7ac83c
+
7ac83c
+        (name, value) = (line + "=").split("=", 1)
7ac83c
+        value = value[:-1]
7ac83c
+
7ac83c
+        if name not in avail_opt:
7ac83c
+            print("Parse error: Ignoring unknown option '%s'" % line,
7ac83c
+                  file=sys.stderr)
7ac83c
+            continue
7ac83c
+
7ac83c
+        if ALL_OPT[name]["getopt"].endswith(":"):
7ac83c
+            opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value
7ac83c
+        elif value.lower() in ["1", "yes", "on", "true"]:
7ac83c
+            opt["-"+ALL_OPT[name]["getopt"]] = "1"
7ac83c
+
7ac83c
+    return opt
7ac83c
+
7ac83c
+
7ac83c
+def process_input(avail_opt):
7ac83c
+    """ Set standard environment variables, and parse all options. """
7ac83c
+
7ac83c
+    # Set standard environment
7ac83c
+    os.putenv("LANG", "C")
7ac83c
+    os.putenv("LC_ALL", "C")
7ac83c
+
7ac83c
+    # Read options from command line or standard input
7ac83c
+    if len(sys.argv) > 1:
7ac83c
+        return opts_from_command_line(sys.argv[1:], avail_opt)
7ac83c
+    else:
7ac83c
+        return opts_from_stdin(avail_opt)
7ac83c
+
7ac83c
+
7ac83c
+def atexit_handler():
7ac83c
+    """ Close stdout on exit. """
7ac83c
+
7ac83c
+    try:
7ac83c
+        sys.stdout.close()
7ac83c
+        os.close(1)
7ac83c
+    except IOError:
7ac83c
+        sys.exit("%s failed to close standard output" % agent())
7ac83c
+
7ac83c
+
7ac83c
+def main():
7ac83c
+    """ Make it so! """
7ac83c
+
7ac83c
+    device_opt = ALL_OPT.keys()
7ac83c
+
7ac83c
+    ## Defaults for fence agent
7ac83c
+    atexit.register(atexit_handler)
7ac83c
+    options = process_input(device_opt)
7ac83c
+    options["device_opt"] = device_opt
7ac83c
+    show_docs(options)
7ac83c
+
7ac83c
+    print("Watchdog fencing may be initiated only by the cluster, not this agent.",
7ac83c
+          file=sys.stderr)
7ac83c
+
7ac83c
+    sys.exit(1)
7ac83c
+
7ac83c
+
7ac83c
+if __name__ == "__main__":
7ac83c
+    main()
7ac83c
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
7ac83c
index cd9968f1a..9470ea2c1 100644
7ac83c
--- a/daemons/fenced/fenced_commands.c
7ac83c
+++ b/daemons/fenced/fenced_commands.c
7ac83c
@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device)
7ac83c
         return TRUE;
7ac83c
     }
7ac83c
 
7ac83c
-    if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
7ac83c
-        if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
7ac83c
-            pcmk__panic(__func__);
7ac83c
-            goto done;
7ac83c
-
7ac83c
-        } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
7ac83c
-            pcmk__panic(__func__);
7ac83c
-            goto done;
7ac83c
-
7ac83c
+    if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
7ac83c
+                         STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
7ac83c
+        if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
7ac83c
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
7ac83c
+                pcmk__panic(__func__);
7ac83c
+                goto done;
7ac83c
+            }
7ac83c
         } else {
7ac83c
             crm_info("Faking success for %s watchdog operation", cmd->action);
7ac83c
             cmd->done_cb(0, 0, NULL, cmd);
7ac83c
@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata)
7ac83c
         return EINVAL;
7ac83c
     }
7ac83c
     *metadata = NULL;
7ac83c
-    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
7ac83c
+    if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
7ac83c
         return pcmk_rc_ok;
7ac83c
     }
7ac83c
     init_metadata_cache();
7ac83c
@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin,
7ac83c
     schedule_stonith_command(cmd, device);
7ac83c
 }
7ac83c
 
7ac83c
-gboolean
7ac83c
-string_in_list(GList *list, const char *item)
7ac83c
-{
7ac83c
-    int lpc = 0;
7ac83c
-    int max = g_list_length(list);
7ac83c
-
7ac83c
-    for (lpc = 0; lpc < max; lpc++) {
7ac83c
-        const char *value = g_list_nth_data(list, lpc);
7ac83c
-
7ac83c
-        if (pcmk__str_eq(item, value, pcmk__str_casei)) {
7ac83c
-            return TRUE;
7ac83c
-        } else {
7ac83c
-            crm_trace("%d: '%s' != '%s'", lpc, item, value);
7ac83c
-        }
7ac83c
-    }
7ac83c
-    return FALSE;
7ac83c
-}
7ac83c
-
7ac83c
 static void
7ac83c
 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
7ac83c
 {
7ac83c
@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
7ac83c
         if (!alias) {
7ac83c
             alias = search->host;
7ac83c
         }
7ac83c
-        if (string_in_list(dev->targets, alias)) {
7ac83c
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
7ac83c
             can_fence = TRUE;
7ac83c
         }
7ac83c
     }
7ac83c
@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
7ac83c
     stonith_device_t *dup = NULL;
7ac83c
     stonith_device_t *device = build_device_from_xml(msg);
7ac83c
     guint ndevices = 0;
7ac83c
+    int rv = pcmk_ok;
7ac83c
 
7ac83c
     CRM_CHECK(device != NULL, return -ENOMEM);
7ac83c
 
7ac83c
+    /* do we have a watchdog-device? */
7ac83c
+    if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
7ac83c
+        pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
7ac83c
+                     STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
7ac83c
+        if (stonith_watchdog_timeout_ms <= 0) {
7ac83c
+            crm_err("Ignoring watchdog fence device without "
7ac83c
+                    "stonith-watchdog-timeout set.");
7ac83c
+            rv = -ENODEV;
7ac83c
+            /* fall through to cleanup & return */
7ac83c
+        } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
7ac83c
+                                 STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
7ac83c
+            crm_err("Ignoring watchdog fence device with unknown "
7ac83c
+                    "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
7ac83c
+                    device->agent?device->agent:"");
7ac83c
+            rv = -ENODEV;
7ac83c
+            /* fall through to cleanup & return */
7ac83c
+        } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
7ac83c
+                                 pcmk__str_none)) {
7ac83c
+            crm_err("Ignoring watchdog fence device "
7ac83c
+                    "named %s !='"STONITH_WATCHDOG_ID"'.",
7ac83c
+                    device->id?device->id:"");
7ac83c
+            rv = -ENODEV;
7ac83c
+            /* fall through to cleanup & return */
7ac83c
+        } else {
7ac83c
+            if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
7ac83c
+                             pcmk__str_none)) {
7ac83c
+                /* this either has an empty list or the targets
7ac83c
+                   configured for watchdog-fencing
7ac83c
+                 */
7ac83c
+                g_list_free_full(stonith_watchdog_targets, free);
7ac83c
+                stonith_watchdog_targets = device->targets;
7ac83c
+                device->targets = NULL;
7ac83c
+            }
7ac83c
+            if (node_does_watchdog_fencing(stonith_our_uname)) {
7ac83c
+                g_list_free_full(device->targets, free);
7ac83c
+                device->targets = stonith__parse_targets(stonith_our_uname);
7ac83c
+                g_hash_table_replace(device->params,
7ac83c
+                                     strdup(PCMK_STONITH_HOST_LIST),
7ac83c
+                                     strdup(stonith_our_uname));
7ac83c
+                /* proceed as with any other stonith-device */
7ac83c
+                break;
7ac83c
+            }
7ac83c
+
7ac83c
+            crm_debug("Skip registration of watchdog fence device on node not in host-list.");
7ac83c
+            /* cleanup and fall through to more cleanup and return */
7ac83c
+            device->targets = NULL;
7ac83c
+            stonith_device_remove(device->id, from_cib);
7ac83c
+        }
7ac83c
+        free_device(device);
7ac83c
+        return rv;
7ac83c
+    } while (0);
7ac83c
+
7ac83c
     dup = device_has_duplicate(device);
7ac83c
     if (dup) {
7ac83c
         ndevices = g_hash_table_size(device_list);
7ac83c
@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc)
7ac83c
  *       (CIB registration is not sufficient), because monitor should not be
7ac83c
  *       possible unless the device is "started" (API registered).
7ac83c
  */
7ac83c
+
7ac83c
+static char *
7ac83c
+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
7ac83c
+{
7ac83c
+    int max = g_list_length(list);
7ac83c
+    size_t delim_len = delim?strlen(delim):0;
7ac83c
+    size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
7ac83c
+    char *rv;
7ac83c
+    GList *gIter;
7ac83c
+
7ac83c
+    for (gIter = list; gIter != NULL; gIter = gIter->next) {
7ac83c
+        const char *value = (const char *) gIter->data;
7ac83c
+
7ac83c
+        alloc_size += strlen(value);
7ac83c
+    }
7ac83c
+    rv = calloc(alloc_size, sizeof(char));
7ac83c
+    if (rv) {
7ac83c
+        char *pos = rv;
7ac83c
+        const char *lead_delim = "";
7ac83c
+
7ac83c
+        for (gIter = list; gIter != NULL; gIter = gIter->next) {
7ac83c
+            const char *value = (const char *) gIter->data;
7ac83c
+
7ac83c
+            pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
7ac83c
+            lead_delim = delim;
7ac83c
+        }
7ac83c
+        if (max && terminate_with_delim) {
7ac83c
+            sprintf(pos, "%s", delim);
7ac83c
+        }
7ac83c
+    }
7ac83c
+    return rv;
7ac83c
+}
7ac83c
+
7ac83c
 static int
7ac83c
 stonith_device_action(xmlNode * msg, char **output)
7ac83c
 {
7ac83c
@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output)
7ac83c
         return -EPROTO;
7ac83c
     }
7ac83c
 
7ac83c
+    if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
7ac83c
+        if (stonith_watchdog_timeout_ms <= 0) {
7ac83c
+            return -ENODEV;
7ac83c
+        } else {
7ac83c
+            if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
7ac83c
+                *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
7ac83c
+                return pcmk_ok;
7ac83c
+            } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
7ac83c
+                return pcmk_ok;
7ac83c
+            }
7ac83c
+        }
7ac83c
+    }
7ac83c
+
7ac83c
     device = g_hash_table_lookup(device_list, id);
7ac83c
     if ((device == NULL)
7ac83c
         || (!device->api_registered && !strcmp(action, "monitor"))) {
7ac83c
@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
7ac83c
          * Only use if all hosts on which the device can be active can always fence all listed hosts
7ac83c
          */
7ac83c
 
7ac83c
-        if (string_in_list(dev->targets, host)) {
7ac83c
+        if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) {
7ac83c
             can = TRUE;
7ac83c
         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
7ac83c
                    && g_hash_table_lookup(dev->aliases, host)) {
7ac83c
@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
7ac83c
             return;
7ac83c
         }
7ac83c
 
7ac83c
-        if (string_in_list(dev->targets, alias)) {
7ac83c
+        if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
7ac83c
             can = TRUE;
7ac83c
         }
7ac83c
 
7ac83c
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
7ac83c
index cf91acaed..224f2baba 100644
7ac83c
--- a/daemons/fenced/fenced_remote.c
7ac83c
+++ b/daemons/fenced/fenced_remote.c
7ac83c
@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
7ac83c
     }
7ac83c
 }
7ac83c
 
7ac83c
+static gboolean
7ac83c
+check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
7ac83c
+{
7ac83c
+    if (node_does_watchdog_fencing(op->target)) {
7ac83c
+
7ac83c
+        crm_notice("Waiting %lds for %s to self-fence (%s) for "
7ac83c
+                   "client %s " CRM_XS " id=%.8s",
7ac83c
+                   (stonith_watchdog_timeout_ms / 1000),
7ac83c
+                   op->target, op->action, op->client_name, op->id);
7ac83c
+        op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
7ac83c
+                                         remote_op_watchdog_done, op);
7ac83c
+        return TRUE;
7ac83c
+    } else {
7ac83c
+        crm_debug("Skipping fallback to watchdog-fencing as %s is "
7ac83c
+                 "not in host-list", op->target);
7ac83c
+    }
7ac83c
+    return FALSE;
7ac83c
+}
7ac83c
+
7ac83c
 void
7ac83c
 call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
7ac83c
 {
7ac83c
@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
7ac83c
             g_source_remove(op->op_timer_one);
7ac83c
         }
7ac83c
 
7ac83c
-        if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) {
7ac83c
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
7ac83c
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
7ac83c
-                       op->target, op->action, op->client_name, op->id);
7ac83c
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
7ac83c
-
7ac83c
-            /* TODO check devices to verify watchdog will be in use */
7ac83c
-        } else if(stonith_watchdog_timeout_ms > 0
7ac83c
-                  && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
7ac83c
-                  && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) {
7ac83c
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
7ac83c
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
7ac83c
-                       op->target, op->action, op->client_name, op->id);
7ac83c
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
7ac83c
-
7ac83c
-        } else {
7ac83c
+        if (!(stonith_watchdog_timeout_ms > 0 && (
7ac83c
+                (pcmk__str_eq(device, STONITH_WATCHDOG_ID,
7ac83c
+                                        pcmk__str_none)) ||
7ac83c
+                (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
7ac83c
+                    && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) &&
7ac83c
+             check_watchdog_fencing_and_wait(op))) {
7ac83c
+
7ac83c
+            /* Some thoughts about self-fencing cases reaching this point:
7ac83c
+               - Actually check in check_watchdog_fencing_and_wait
7ac83c
+                 shouldn't fail if STONITH_WATCHDOG_ID is
7ac83c
+                 chosen as fencing-device and it being present implies
7ac83c
+                 watchdog-fencing is enabled anyway
7ac83c
+               - If watchdog-fencing is disabled either in general or for
7ac83c
+                 a specific target - detected in check_watchdog_fencing_and_wait -
7ac83c
+                 for some other kind of self-fencing we can't expect
7ac83c
+                 a success answer but timeout is fine if the node doesn't
7ac83c
+                 come back in between
7ac83c
+               - Delicate might be the case where we have watchdog-fencing
7ac83c
+                 enabled for a node but the watchdog-fencing-device isn't
7ac83c
+                 explicitly chosen for suicide. Local pe-execution in sbd
7ac83c
+                 may detect the node as unclean and lead to timely suicide.
7ac83c
+                 Otherwise the selection of stonith-watchdog-timeout at
7ac83c
+                 least is questionable.
7ac83c
+             */
7ac83c
             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
7ac83c
         }
7ac83c
 
7ac83c
-
7ac83c
         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
7ac83c
         peer->tried = TRUE;
7ac83c
         free_xml(remote_op);
7ac83c
@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
7ac83c
          * but we have all the expected replies, then no devices
7ac83c
          * are available to execute the fencing operation. */
7ac83c
 
7ac83c
-        if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) {
7ac83c
-            crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
7ac83c
-                       CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
7ac83c
-                       op->target, op->action, op->client_name, op->id);
7ac83c
-            op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
7ac83c
-            return;
7ac83c
+        if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
7ac83c
+           STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
7ac83c
+            if (check_watchdog_fencing_and_wait(op)) {
7ac83c
+                return;
7ac83c
+            }
7ac83c
         }
7ac83c
 
7ac83c
         if (op->state == st_query) {
7ac83c
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
7ac83c
index 39738d8be..7f8b427d9 100644
7ac83c
--- a/daemons/fenced/pacemaker-fenced.c
7ac83c
+++ b/daemons/fenced/pacemaker-fenced.c
7ac83c
@@ -42,6 +42,7 @@
7ac83c
 
7ac83c
 char *stonith_our_uname = NULL;
7ac83c
 long stonith_watchdog_timeout_ms = 0;
7ac83c
+GList *stonith_watchdog_targets = NULL;
7ac83c
 
7ac83c
 static GMainLoop *mainloop = NULL;
7ac83c
 
7ac83c
@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc)
7ac83c
 }
7ac83c
 
7ac83c
 static void
7ac83c
-watchdog_device_update(xmlNode *cib)
7ac83c
+watchdog_device_update(void)
7ac83c
+{
7ac83c
+    if (stonith_watchdog_timeout_ms > 0) {
7ac83c
+        if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
7ac83c
+            !stonith_watchdog_targets) {
7ac83c
+            /* getting here watchdog-fencing enabled, no device there yet
7ac83c
+               and reason isn't stonith_watchdog_targets preventing that
7ac83c
+             */
7ac83c
+            int rc;
7ac83c
+            xmlNode *xml;
7ac83c
+
7ac83c
+            xml = create_device_registration_xml(
7ac83c
+                    STONITH_WATCHDOG_ID,
7ac83c
+                    st_namespace_internal,
7ac83c
+                    STONITH_WATCHDOG_AGENT,
7ac83c
+                    NULL, /* stonith_device_register will add our
7ac83c
+                             own name as PCMK_STONITH_HOST_LIST param
7ac83c
+                             so we can skip that here
7ac83c
+                           */
7ac83c
+                    NULL);
7ac83c
+            rc = stonith_device_register(xml, NULL, TRUE);
7ac83c
+            free_xml(xml);
7ac83c
+            if (rc != pcmk_ok) {
7ac83c
+                crm_crit("Cannot register watchdog pseudo fence agent");
7ac83c
+                crm_exit(CRM_EX_FATAL);
7ac83c
+            }
7ac83c
+        }
7ac83c
+
7ac83c
+    } else {
7ac83c
+        /* be silent if no device - todo parameter to stonith_device_remove */
7ac83c
+        if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
7ac83c
+            stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
7ac83c
+        }
7ac83c
+    }
7ac83c
+}
7ac83c
+
7ac83c
+static void
7ac83c
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
7ac83c
 {
7ac83c
     xmlNode *stonith_enabled_xml = NULL;
7ac83c
     const char *stonith_enabled_s = NULL;
7ac83c
@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib)
7ac83c
         }
7ac83c
     }
7ac83c
 
7ac83c
-    if (timeout_ms != stonith_watchdog_timeout_ms) {
7ac83c
-        crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
7ac83c
-        stonith_watchdog_timeout_ms = timeout_ms;
7ac83c
-
7ac83c
-        if (stonith_watchdog_timeout_ms > 0) {
7ac83c
-            int rc;
7ac83c
-            xmlNode *xml;
7ac83c
-            stonith_key_value_t *params = NULL;
7ac83c
-
7ac83c
-            params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST,
7ac83c
-                                           stonith_our_uname);
7ac83c
-
7ac83c
-            xml = create_device_registration_xml("watchdog", st_namespace_internal,
7ac83c
-                                                 STONITH_WATCHDOG_AGENT, params,
7ac83c
-                                                 NULL);
7ac83c
-            stonith_key_value_freeall(params, 1, 1);
7ac83c
-            rc = stonith_device_register(xml, NULL, FALSE);
7ac83c
-            free_xml(xml);
7ac83c
-            if (rc != pcmk_ok) {
7ac83c
-                crm_crit("Cannot register watchdog pseudo fence agent");
7ac83c
-                crm_exit(CRM_EX_FATAL);
7ac83c
-            }
7ac83c
-
7ac83c
-        } else {
7ac83c
-            stonith_device_remove("watchdog", FALSE);
7ac83c
-        }
7ac83c
-    }
7ac83c
+    stonith_watchdog_timeout_ms = timeout_ms;
7ac83c
 }
7ac83c
 
7ac83c
 /*!
7ac83c
@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
7ac83c
         return;
7ac83c
     }
7ac83c
 
7ac83c
+    /* if watchdog-fencing is disabled handle any watchdog-fence
7ac83c
+       resource as if it was disabled
7ac83c
+     */
7ac83c
+    if ((stonith_watchdog_timeout_ms <= 0) &&
7ac83c
+        pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
7ac83c
+        crm_info("Watchdog-fencing disabled thus handling "
7ac83c
+                 "device %s as disabled", rsc->id);
7ac83c
+        return;
7ac83c
+    }
7ac83c
+
7ac83c
     /* Check whether our node is allowed for this resource (and its parent if in a group) */
7ac83c
     node = our_node_allowed_for(rsc);
7ac83c
     if (rsc->parent && (rsc->parent->variant == pe_group)) {
7ac83c
@@ -772,6 +794,12 @@ cib_devices_update(void)
7ac83c
         }
7ac83c
     }
7ac83c
 
7ac83c
+    /* have list repopulated if cib has a watchdog-fencing-resource
7ac83c
+       TODO: keep a cached list for queries happening while we are refreshing
7ac83c
+     */
7ac83c
+    g_list_free_full(stonith_watchdog_targets, free);
7ac83c
+    stonith_watchdog_targets = NULL;
7ac83c
+
7ac83c
     for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
7ac83c
         cib_device_update(gIter->data, fenced_data_set);
7ac83c
     }
7ac83c
@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
7ac83c
             if (search != NULL) {
7ac83c
                 *search = 0;
7ac83c
                 stonith_device_remove(rsc_id, TRUE);
7ac83c
+                /* watchdog_device_update called afterwards
7ac83c
+                   to fall back to implicit definition if needed */
7ac83c
             } else {
7ac83c
                 crm_warn("Ignoring malformed CIB update (resource deletion)");
7ac83c
             }
7ac83c
@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value)
7ac83c
     return (match != NULL);
7ac83c
 }
7ac83c
 
7ac83c
+/*!
7ac83c
+ * \internal
7ac83c
+ * \brief Check whether a node does watchdog-fencing
7ac83c
+ *
7ac83c
+ * \param[in] node    Name of node to check
7ac83c
+ *
7ac83c
+ * \return TRUE if node found in stonith_watchdog_targets
7ac83c
+ *         or stonith_watchdog_targets is empty indicating
7ac83c
+ *         all nodes are doing watchdog-fencing
7ac83c
+ */
7ac83c
+gboolean
7ac83c
+node_does_watchdog_fencing(const char *node)
7ac83c
+{
7ac83c
+    return ((stonith_watchdog_targets == NULL) ||
7ac83c
+            pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei));
7ac83c
+}
7ac83c
+
7ac83c
+
7ac83c
 static void
7ac83c
 update_fencing_topology(const char *event, xmlNode * msg)
7ac83c
 {
7ac83c
@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
7ac83c
     xmlNode *stonith_enabled_xml = NULL;
7ac83c
     const char *stonith_enabled_s = NULL;
7ac83c
     static gboolean stonith_enabled_saved = TRUE;
7ac83c
+    long timeout_ms_saved = stonith_watchdog_timeout_ms;
7ac83c
+    gboolean need_full_refresh = FALSE;
7ac83c
 
7ac83c
     if(!have_cib_devices) {
7ac83c
         crm_trace("Skipping updates until we get a full dump");
7ac83c
@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
7ac83c
     }
7ac83c
 
7ac83c
     pcmk__refresh_node_caches_from_cib(local_cib);
7ac83c
+    update_stonith_watchdog_timeout_ms(local_cib);
7ac83c
 
7ac83c
     stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
7ac83c
                                            local_cib, LOG_NEVER);
7ac83c
@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
7ac83c
         stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
7ac83c
     }
7ac83c
 
7ac83c
-    watchdog_device_update(local_cib);
7ac83c
-
7ac83c
     if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
7ac83c
         crm_trace("Ignoring CIB updates while fencing is disabled");
7ac83c
         stonith_enabled_saved = FALSE;
7ac83c
-        return;
7ac83c
 
7ac83c
     } else if (stonith_enabled_saved == FALSE) {
7ac83c
         crm_info("Updating fencing device and topology lists "
7ac83c
                  "now that fencing is enabled");
7ac83c
         stonith_enabled_saved = TRUE;
7ac83c
-        fencing_topology_init();
7ac83c
-        cib_devices_update();
7ac83c
+        need_full_refresh = TRUE;
7ac83c
 
7ac83c
     } else {
7ac83c
-        update_fencing_topology(event, msg);
7ac83c
-        update_cib_stonith_devices(event, msg);
7ac83c
+        if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
7ac83c
+            need_full_refresh = TRUE;
7ac83c
+        } else {
7ac83c
+            update_fencing_topology(event, msg);
7ac83c
+            update_cib_stonith_devices(event, msg);
7ac83c
+            watchdog_device_update();
7ac83c
+        }
7ac83c
+    }
7ac83c
+
7ac83c
+    if (need_full_refresh) {
7ac83c
+        fencing_topology_init();
7ac83c
+        cib_devices_update();
7ac83c
+        watchdog_device_update();
7ac83c
     }
7ac83c
 }
7ac83c
 
7ac83c
@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us
7ac83c
     local_cib = copy_xml(output);
7ac83c
 
7ac83c
     pcmk__refresh_node_caches_from_cib(local_cib);
7ac83c
+    update_stonith_watchdog_timeout_ms(local_cib);
7ac83c
 
7ac83c
     fencing_topology_init();
7ac83c
-    watchdog_device_update(local_cib);
7ac83c
     cib_devices_update();
7ac83c
+    watchdog_device_update();
7ac83c
 }
7ac83c
 
7ac83c
 static void
7ac83c
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
7ac83c
index d330fda4d..14e085e98 100644
7ac83c
--- a/daemons/fenced/pacemaker-fenced.h
7ac83c
+++ b/daemons/fenced/pacemaker-fenced.h
7ac83c
@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer);
7ac83c
 
7ac83c
 int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
7ac83c
 
7ac83c
-gboolean string_in_list(GList *list, const char *item);
7ac83c
-
7ac83c
 gboolean node_has_attr(const char *node, const char *name, const char *value);
7ac83c
 
7ac83c
+gboolean node_does_watchdog_fencing(const char *node);
7ac83c
+
7ac83c
 extern char *stonith_our_uname;
7ac83c
 extern gboolean stand_alone;
7ac83c
 extern GHashTable *device_list;
7ac83c
 extern GHashTable *topology;
7ac83c
 extern long stonith_watchdog_timeout_ms;
7ac83c
+extern GList *stonith_watchdog_targets;
7ac83c
 
7ac83c
 extern GHashTable *stonith_remote_op_list;
7ac83c
diff --git a/include/crm/crm.h b/include/crm/crm.h
7ac83c
index ee52c3630..7861c160e 100644
7ac83c
--- a/include/crm/crm.h
7ac83c
+++ b/include/crm/crm.h
7ac83c
@@ -66,7 +66,7 @@ extern "C" {
7ac83c
  * >=3.0.13: Fail counts include operation name and interval
7ac83c
  * >=3.2.0:  DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
7ac83c
  */
7ac83c
-#  define CRM_FEATURE_SET		"3.10.2"
7ac83c
+#  define CRM_FEATURE_SET		"3.11.0"
7ac83c
 
7ac83c
 /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
7ac83c
  * recipient of a CPG message. This imposes an arbitrary limit on cluster node
7ac83c
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
7ac83c
index 8bcb544d8..f222edba3 100644
7ac83c
--- a/include/crm/fencing/internal.h
7ac83c
+++ b/include/crm/fencing/internal.h
7ac83c
@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
7ac83c
 #  define STONITH_OP_LEVEL_ADD       "st_level_add"
7ac83c
 #  define STONITH_OP_LEVEL_DEL       "st_level_remove"
7ac83c
 
7ac83c
-#  define STONITH_WATCHDOG_AGENT  "#watchdog"
7ac83c
+#  define STONITH_WATCHDOG_AGENT          "fence_watchdog"
7ac83c
+/* Don't change 2 below as it would break rolling upgrade */
7ac83c
+#  define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog"
7ac83c
+#  define STONITH_WATCHDOG_ID             "watchdog"
7ac83c
 
7ac83c
 #  ifdef HAVE_STONITH_STONITH_H
7ac83c
 // utilities from st_lha.c
7ac83c
@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state)
7ac83c
     return state != st_failed && state != st_done;
7ac83c
 }
7ac83c
 
7ac83c
+gboolean stonith__watchdog_fencing_enabled_for_node(const char *node);
7ac83c
+gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node);
7ac83c
+
7ac83c
 #endif
7ac83c
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
7ac83c
index e285f51e2..0ff98157b 100644
7ac83c
--- a/lib/fencing/st_client.c
7ac83c
+++ b/lib/fencing/st_client.c
7ac83c
@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
7ac83c
     return st_namespace_invalid;
7ac83c
 }
7ac83c
 
7ac83c
+gboolean
7ac83c
+stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
7ac83c
+{
7ac83c
+    gboolean rv = FALSE;
7ac83c
+    stonith_t *stonith_api = st?st:stonith_api_new();
7ac83c
+    char *list = NULL;
7ac83c
+
7ac83c
+    if(stonith_api) {
7ac83c
+        if (stonith_api->state == stonith_disconnected) {
7ac83c
+            int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
7ac83c
+
7ac83c
+            if (rc != pcmk_ok) {
7ac83c
+                crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
7ac83c
+            }
7ac83c
+        }
7ac83c
+
7ac83c
+        if (stonith_api->state != stonith_disconnected) {
7ac83c
+            /* caveat!!!
7ac83c
+             * this might fail when when stonithd is just updating the device-list
7ac83c
+             * probably something we should fix as well for other api-calls */
7ac83c
+            int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
7ac83c
+            if ((rc != pcmk_ok) || (list == NULL)) {
7ac83c
+                /* due to the race described above it can happen that
7ac83c
+                 * we drop in here - so as not to make remote nodes
7ac83c
+                 * panic on that answer
7ac83c
+                 */
7ac83c
+                crm_warn("watchdog-fencing-query failed");
7ac83c
+            } else if (list[0] == '\0') {
7ac83c
+                crm_warn("watchdog-fencing-query returned an empty list - any node");
7ac83c
+                rv = TRUE;
7ac83c
+            } else {
7ac83c
+                GList *targets = stonith__parse_targets(list);
7ac83c
+                rv = pcmk__str_in_list(targets, node, pcmk__str_casei);
7ac83c
+                g_list_free_full(targets, free);
7ac83c
+            }
7ac83c
+            free(list);
7ac83c
+            if (!st) {
7ac83c
+                /* if we're provided the api we still might have done the
7ac83c
+                 * connection - but let's assume the caller won't bother
7ac83c
+                 */
7ac83c
+                stonith_api->cmds->disconnect(stonith_api);
7ac83c
+            }
7ac83c
+        }
7ac83c
+
7ac83c
+        if (!st) {
7ac83c
+            stonith_api_delete(stonith_api);
7ac83c
+        }
7ac83c
+    } else {
7ac83c
+        crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
7ac83c
+    }
7ac83c
+    crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
7ac83c
+              node, rv?"":"not ");
7ac83c
+    return rv;
7ac83c
+}
7ac83c
+
7ac83c
+gboolean
7ac83c
+stonith__watchdog_fencing_enabled_for_node(const char *node)
7ac83c
+{
7ac83c
+    return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
7ac83c
+}
7ac83c
+
7ac83c
 static void
7ac83c
 log_action(stonith_action_t *action, pid_t pid)
7ac83c
 {
7ac83c
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
7ac83c
index 87d050ed1..bf4bceb42 100644
7ac83c
--- a/lib/lrmd/lrmd_client.c
7ac83c
+++ b/lib/lrmd/lrmd_client.c
7ac83c
@@ -34,6 +34,7 @@
7ac83c
 #include <crm/msg_xml.h>
7ac83c
 
7ac83c
 #include <crm/stonith-ng.h>
7ac83c
+#include <crm/fencing/internal.h>
7ac83c
 
7ac83c
 #ifdef HAVE_GNUTLS_GNUTLS_H
7ac83c
 #  undef KEYFILE
7ac83c
@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash)
7ac83c
     crm_xml_add(data, F_LRMD_ORIGIN, __func__);
7ac83c
 
7ac83c
     value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
7ac83c
-    crm_xml_add(data, F_LRMD_WATCHDOG, value);
7ac83c
+    if ((value) &&
7ac83c
+        (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) {
7ac83c
+       crm_xml_add(data, F_LRMD_WATCHDOG, value);
7ac83c
+    }
7ac83c
 
7ac83c
     rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0,
7ac83c
                            (native->type == pcmk__client_ipc));
7ac83c
diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in
7ac83c
index 79e78ede9..f58357a77 100644
7ac83c
--- a/rpm/pacemaker.spec.in
7ac83c
+++ b/rpm/pacemaker.spec.in
7ac83c
@@ -744,6 +744,7 @@ exit 0
7ac83c
 %doc %{_mandir}/man8/crm_attribute.*
7ac83c
 %doc %{_mandir}/man8/crm_master.*
7ac83c
 %doc %{_mandir}/man8/fence_legacy.*
7ac83c
+%doc %{_mandir}/man8/fence_watchdog.*
7ac83c
 %doc %{_mandir}/man8/pacemakerd.*
7ac83c
 
7ac83c
 %doc %{_datadir}/pacemaker/alerts
7ac83c
@@ -796,6 +797,7 @@ exit 0
7ac83c
 %{_sbindir}/crm_simulate
7ac83c
 %{_sbindir}/crm_report
7ac83c
 %{_sbindir}/crm_ticket
7ac83c
+%{_sbindir}/fence_watchdog
7ac83c
 %{_sbindir}/stonith_admin
7ac83c
 # "dirname" is owned by -schemas, which is a prerequisite
7ac83c
 %{_datadir}/pacemaker/report.collector
7ac83c
@@ -822,6 +824,7 @@ exit 0
7ac83c
 %exclude %{_mandir}/man8/crm_attribute.*
7ac83c
 %exclude %{_mandir}/man8/crm_master.*
7ac83c
 %exclude %{_mandir}/man8/fence_legacy.*
7ac83c
+%exclude %{_mandir}/man8/fence_watchdog.*
7ac83c
 %exclude %{_mandir}/man8/pacemakerd.*
7ac83c
 %exclude %{_mandir}/man8/pacemaker-remoted.*
7ac83c
 
7ac83c
-- 
7ac83c
2.27.0
7ac83c
7ac83c
7ac83c
From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001
7ac83c
From: Klaus Wenninger <klaus.wenninger@aon.at>
7ac83c
Date: Wed, 4 Aug 2021 15:57:23 +0200
7ac83c
Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node
7ac83c
 restriction
7ac83c
7ac83c
---
7ac83c
 lib/fencing/st_client.c | 1 -
7ac83c
 1 file changed, 1 deletion(-)
7ac83c
7ac83c
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
7ac83c
index 0ff98157b..14fa7b2a6 100644
7ac83c
--- a/lib/fencing/st_client.c
7ac83c
+++ b/lib/fencing/st_client.c
7ac83c
@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
7ac83c
                  */
7ac83c
                 crm_warn("watchdog-fencing-query failed");
7ac83c
             } else if (list[0] == '\0') {
7ac83c
-                crm_warn("watchdog-fencing-query returned an empty list - any node");
7ac83c
                 rv = TRUE;
7ac83c
             } else {
7ac83c
                 GList *targets = stonith__parse_targets(list);
7ac83c
-- 
7ac83c
2.27.0
7ac83c