commit 7aa42a14d8b06638f3e3e175fc59e16f730979a8 Author: David Vossel Date: Thu Apr 17 10:52:45 2014 -0500 High: fencing: Execute all required fencing devices regardless of what topology level they are at (cherry picked from commit c922fd50f7c8deb337aad932cb0b8d1c26cbcd99) diff --git a/fencing/commands.c b/fencing/commands.c index 8efb156..b4cd862 100644 --- a/fencing/commands.c +++ b/fencing/commands.c @@ -1297,6 +1297,10 @@ stonith_query_capable_device_cb(GList * devices, void *user_data) crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); + if (is_action_required(query->action, device)) { + crm_xml_add_int(dev, F_STONITH_DEVICE_REQUIRED, 1); + } + crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); if (action_specific_timeout) { crm_xml_add_int(dev, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } diff --git a/fencing/internal.h b/fencing/internal.h index 971a32f..3fcad20 100644 --- a/fencing/internal.h +++ b/fencing/internal.h @@ -113,6 +113,10 @@ typedef struct remote_fencing_op_s { /*! The current topology level being executed */ guint level; + + /*! List of required devices the topology must execute regardless of what + * topology level they exist at. */ + GListPtr required_list; /*! The device list of all the devices at the current executing topology level. */ GListPtr devices_list; /*! Current entry in the topology device list */ diff --git a/fencing/remote.c b/fencing/remote.c index fd25025..10447e7 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -68,6 +68,12 @@ static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(remote_fencing_op_t * op, st_query_result_t * chosen_peer, int default_timeout); +static gint +sort_strings(gconstpointer a, gconstpointer b) +{ + return strcmp(a, b); +} + static void free_remote_query(gpointer data) { @@ -128,6 +134,10 @@ free_remote_op(gpointer data) g_list_free_full(op->devices_list, free); op->devices_list = NULL; } + if (op->required_list) { + g_list_free_full(op->required_list, free); + op->required_list = NULL; + } free(op); } @@ -400,6 +410,25 @@ topology_is_empty(stonith_topology_t *tp) return TRUE; } +static void +add_required_device(remote_fencing_op_t * op, const char *device) +{ + GListPtr match = g_list_find_custom(op->required_list, device, sort_strings); + if (match) { + /* device already marked required */ + return; + } + op->required_list = g_list_prepend(op->required_list, strdup(device)); + + /* make sure the required devices is in the current list of devices to be executed */ + if (op->devices_list) { + GListPtr match = g_list_find_custom(op->devices_list, device, sort_strings); + if (match == NULL) { + op->devices_list = g_list_append(op->devices_list, strdup(device)); + } + } +} + /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GListPtr devices) @@ -413,6 +442,18 @@ set_op_device_list(remote_fencing_op_t * op, GListPtr devices) for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } + + /* tack on whatever required devices have not been executed + * to the end of the current devices list. This ensures that + * the required devices will get executed regardless of what topology + * level they exist at. */ + for (lpc = op->required_list; lpc != NULL; lpc = lpc->next) { + GListPtr match = g_list_find_custom(op->devices_list, lpc->data, sort_strings); + if (match == NULL) { + op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); + } + } + op->devices = op->devices_list; } @@ -714,12 +755,6 @@ initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean ma return op; } -static gint -sort_strings(gconstpointer a, gconstpointer b) -{ - return strcmp(a, b); -} - enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, @@ -1165,11 +1200,13 @@ process_remote_stonith_query(xmlNode * msg) const char *device = ID(child); int action_timeout = 0; int verified = 0; + int required = 0; if (device) { result->device_list = g_list_prepend(result->device_list, strdup(device)); crm_element_value_int(child, F_STONITH_ACTION_TIMEOUT, &action_timeout); crm_element_value_int(child, F_STONITH_DEVICE_VERIFIED, &verified); + crm_element_value_int(child, F_STONITH_DEVICE_REQUIRED, &required); if (action_timeout) { crm_trace("Peer %s with device %s returned action timeout %d", result->host, device, action_timeout); @@ -1181,6 +1218,13 @@ process_remote_stonith_query(xmlNode * msg) g_hash_table_insert(result->verified_devices, strdup(device), GINT_TO_POINTER(verified)); } + if (required) { + crm_trace("Peer %s requires device %s to execute for action %s", + result->host, device, op->action); + /* This matters when executing a topology. Required devices will get + * executed regardless of their topology level. We use this for unfencing. */ + add_required_device(op, device); + } } } @@ -1312,10 +1356,14 @@ process_remote_stonith_exec(xmlNode * msg) * Continue the topology if more devices exist at the current level, otherwise * mark as done. */ if (rc == pcmk_ok) { + GListPtr required_match = g_list_find_custom(op->required_list, device, sort_strings); if (op->devices) { /* Success, are there any more? */ op->devices = op->devices->next; } + if (required_match) { + op->required_list = g_list_remove(op->required_list, required_match->data); + } /* if no more devices at this fencing level, we are done, * else we need to contine with executing the next device in the list */ if (op->devices == NULL) { diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index 2822e9a..3625cf9 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -66,6 +66,8 @@ xmlNode *create_device_registration_xml(const char *id, const char *namespace, c /*! Has this device been verified using a monitor type * operation (monitor, list, status) */ # define F_STONITH_DEVICE_VERIFIED "st_monitor_verified" +/*! device is required for this action */ +# define F_STONITH_DEVICE_REQUIRED "st_required" # define F_STONITH_CALLBACK_TOKEN "st_async_id" # define F_STONITH_CLIENTNAME "st_clientname" # define F_STONITH_CLIENTNODE "st_clientnode"