From f4e3d77c94906a062641c7bf34243049de521a87 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Wed, 3 Feb 2021 13:25:22 +0100 Subject: [PATCH] Fix: crm_mon: detect when run on remote-node --- daemons/execd/remoted_proxy.c | 17 +++++++ daemons/pacemakerd/pacemakerd.c | 6 +-- include/crm/common/ipc_internal.h | 2 + lib/common/ipc_server.c | 26 ++++++++++ tools/crm_mon.c | 99 ++++++++++++++++++++++++--------------- 5 files changed, 106 insertions(+), 44 deletions(-) diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index 9329fa6..0fe39bf 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -29,6 +29,7 @@ static qb_ipcs_service_t *cib_shm = NULL; static qb_ipcs_service_t *attrd_ipcs = NULL; static qb_ipcs_service_t *crmd_ipcs = NULL; static qb_ipcs_service_t *stonith_ipcs = NULL; +static qb_ipcs_service_t *pacemakerd_ipcs = NULL; // An IPC provider is a cluster node controller connecting as a client static GList *ipc_providers = NULL; @@ -126,6 +127,12 @@ stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) } static int32_t +pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +{ + return -EREMOTEIO; +} + +static int32_t cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW); @@ -356,6 +363,14 @@ static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { .connection_destroyed = ipc_proxy_destroy }; +static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = { + .connection_accept = pacemakerd_proxy_accept, + .connection_created = NULL, + .msg_process = NULL, + .connection_closed = NULL, + .connection_destroyed = NULL +}; + static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { .connection_accept = cib_proxy_accept_ro, .connection_created = NULL, @@ -422,6 +437,7 @@ ipc_proxy_init(void) &cib_proxy_callbacks_rw); pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks); pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks); + pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks); crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks); if (crmd_ipcs == NULL) { crm_err("Failed to create controller: exiting and inhibiting respawn"); @@ -444,6 +460,7 @@ ipc_proxy_cleanup(void) pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm); qb_ipcs_destroy(attrd_ipcs); qb_ipcs_destroy(stonith_ipcs); + qb_ipcs_destroy(pacemakerd_ipcs); qb_ipcs_destroy(crmd_ipcs); cib_ro = NULL; cib_rw = NULL; diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 509b0f8..4572b70 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -1287,11 +1287,7 @@ main(int argc, char **argv) // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); - if (ipcs == NULL) { - crm_err("Couldn't start IPC server"); - crm_exit(CRM_EX_OSERR); - } + pcmk__serve_pacemakerd_ipc(&ipcs, &mcp_ipc_callbacks); #ifdef SUPPORT_COROSYNC /* Allows us to block shutdown */ diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h index cf935f3..fb82ce1 100644 --- a/include/crm/common/ipc_internal.h +++ b/include/crm/common/ipc_internal.h @@ -221,6 +221,8 @@ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); +void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, + struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 4d3e954..b3aaf8e 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -922,6 +922,32 @@ pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, } /*! + * \internal + * \brief Add an IPC server to the main loop for the pacemakerd API + * + * \param[in] cb IPC callbacks + * + * \note This function exits with CRM_EX_OSERR if unable to create the servers. + */ +void +pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, + struct qb_ipcs_service_handlers *cb) +{ + *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); + + if (*ipcs == NULL) { + crm_err("Couldn't start pacemakerd IPC server"); + crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); + /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL + * if we want to prevent pacemakerd from restarting them. + * With pacemakerd we leave the exit-code shown to e.g. systemd + * to what it was prior to moving the code here from pacemakerd.c + */ + crm_exit(CRM_EX_OSERR); + } +} + +/*! * \brief Check whether string represents a client name used by cluster daemons * * \param[in] name String to check diff --git a/tools/crm_mon.c b/tools/crm_mon.c index d4d4ac3..e58fed2 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -83,6 +83,8 @@ static gchar **processed_args = NULL; static time_t last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; +static gboolean on_remote_node = FALSE; + int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { @@ -988,48 +990,63 @@ pacemakerd_status(void) } pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state); rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_poll); - if (rc == pcmk_rc_ok) { - rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); - if (rc == pcmk_rc_ok) { - rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_msec/2); + switch (rc) { + case pcmk_rc_ok: + rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); if (rc == pcmk_rc_ok) { - pcmk_dispatch_ipc(pacemakerd_api); - rc = ENOTCONN; - if ((output_format == mon_output_console) || - (output_format == mon_output_plain)) { - switch (state) { - case pcmk_pacemakerd_state_running: - rc = pcmk_rc_ok; - break; - case pcmk_pacemakerd_state_starting_daemons: - print_as(output_format ,"Pacemaker daemons starting ...\n"); - break; - case pcmk_pacemakerd_state_wait_for_ping: - print_as(output_format ,"Waiting for startup-trigger from SBD ...\n"); - break; - case pcmk_pacemakerd_state_shutting_down: - print_as(output_format ,"Pacemaker daemons shutting down ...\n"); - break; - case pcmk_pacemakerd_state_shutdown_complete: - /* assuming pacemakerd doesn't dispatch any pings after entering - * that state unless it is waiting for SBD - */ - print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n"); - break; - default: - break; - } - } else { - switch (state) { - case pcmk_pacemakerd_state_running: - rc = pcmk_rc_ok; - break; - default: - break; + rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_msec/2); + if (rc == pcmk_rc_ok) { + pcmk_dispatch_ipc(pacemakerd_api); + rc = ENOTCONN; + if ((output_format == mon_output_console) || + (output_format == mon_output_plain)) { + switch (state) { + case pcmk_pacemakerd_state_running: + rc = pcmk_rc_ok; + break; + case pcmk_pacemakerd_state_starting_daemons: + print_as(output_format ,"Pacemaker daemons starting ...\n"); + break; + case pcmk_pacemakerd_state_wait_for_ping: + print_as(output_format ,"Waiting for startup-trigger from SBD ...\n"); + break; + case pcmk_pacemakerd_state_shutting_down: + print_as(output_format ,"Pacemaker daemons shutting down ...\n"); + break; + case pcmk_pacemakerd_state_shutdown_complete: + /* assuming pacemakerd doesn't dispatch any pings after entering + * that state unless it is waiting for SBD + */ + print_as(output_format ,"Pacemaker daemons shut down - reporting to SBD ...\n"); + break; + default: + break; + } + } else { + switch (state) { + case pcmk_pacemakerd_state_running: + rc = pcmk_rc_ok; + break; + default: + break; + } } } } - } + break; + case EREMOTEIO: + rc = pcmk_rc_ok; + on_remote_node = TRUE; +#if CURSES_ENABLED + /* just show this if refresh is gonna remove all traces */ + if (output_format == mon_output_console) { + print_as(output_format , + "Running on remote-node waiting to be connected by cluster ...\n"); + } +#endif + break; + default: + break; } pcmk_free_ipc_api(pacemakerd_api); /* returning with ENOTCONN triggers a retry */ @@ -1348,7 +1365,11 @@ handle_connection_failures(int rc) pcmk_rc_str(rc)); rc = MON_STATUS_CRIT; } else if (rc == ENOTCONN) { - g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); + if (on_remote_node) { + g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); + } else { + g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); + } rc = pcmk_rc2exitc(rc); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); -- 1.8.3.1