diff --git a/SOURCES/013-rolling-upgrade-monitor.patch b/SOURCES/013-rolling-upgrade-monitor.patch new file mode 100644 index 0000000..ab67986 --- /dev/null +++ b/SOURCES/013-rolling-upgrade-monitor.patch @@ -0,0 +1,1978 @@ +From a35dfe0b76555f30dda4c9d96630866de40322b3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:40:24 -0500 +Subject: [PATCH 01/24] Low: fencing: use a default timeout with metadata and + validate + +If the caller did not specify a timeout, use a default in +stonith_api_operations_t:metadata() and validate(). (Timeout is currently +ignored past that point, so this has no effect yet.) + +Also, rename timeout argument for clarity. +--- + lib/fencing/st_client.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 2b0d308..28791ff 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -504,7 +504,8 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names + + static int + stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, +- const char *namespace, char **output, int timeout) ++ const char *namespace, char **output, ++ int timeout_sec) + { + /* By executing meta-data directly, we can get it from stonith_admin when + * the cluster is not running, which is important for higher-level tools. +@@ -512,16 +513,20 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a + + enum stonith_namespace ns = stonith_get_namespace(agent, namespace); + ++ if (timeout_sec <= 0) { ++ timeout_sec = CRMD_METADATA_CALL_TIMEOUT; ++ } ++ + crm_trace("Looking up metadata for %s agent %s", + stonith_namespace2text(ns), agent); + + switch (ns) { + case st_namespace_rhcs: +- return stonith__rhcs_metadata(agent, timeout, output); ++ return stonith__rhcs_metadata(agent, timeout_sec, output); + + #if HAVE_STONITH_STONITH_H + case st_namespace_lha: +- return stonith__lha_metadata(agent, timeout, output); ++ return stonith__lha_metadata(agent, timeout_sec, output); + #endif + + default: +@@ -1684,8 +1689,8 @@ stonith_api_delete(stonith_t * stonith) + static int + stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, + const char *namespace_s, const char *agent, +- stonith_key_value_t *params, int timeout, char **output, +- char **error_output) ++ stonith_key_value_t *params, int timeout_sec, ++ char **output, char **error_output) + { + /* Validation should be done directly via the agent, so we can get it from + * stonith_admin when the cluster is not running, which is important for +@@ -1731,17 +1736,21 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, + *error_output = NULL; + } + ++ if (timeout_sec <= 0) { ++ timeout_sec = CRMD_METADATA_CALL_TIMEOUT; // Questionable ++ } ++ + switch (stonith_get_namespace(agent, namespace_s)) { + case st_namespace_rhcs: + rc = stonith__rhcs_validate(st, call_options, target, agent, +- params_table, host_arg, timeout, ++ params_table, host_arg, timeout_sec, + output, error_output); + break; + + #if HAVE_STONITH_STONITH_H + case st_namespace_lha: + rc = stonith__lha_validate(st, call_options, target, agent, +- params_table, timeout, output, ++ params_table, timeout_sec, output, + error_output); + break; + #endif +-- +2.31.1 + +From c2a863b7daeb829c0210d87a2f1503c1cf4dc7a5 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:00:00 -0500 +Subject: [PATCH 02/24] Doc: fencer: improve + stonith_api_operations_t:metadata() description + +--- + include/crm/stonith-ng.h | 15 +++++++++++---- + lib/fencing/st_client.c | 7 ++++--- + 2 files changed, 15 insertions(+), 7 deletions(-) + +diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h +index 4fe52ef..a41d411 100644 +--- a/include/crm/stonith-ng.h ++++ b/include/crm/stonith-ng.h +@@ -206,14 +206,21 @@ typedef struct stonith_api_operations_s + stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list); + + /*! +- * \brief Get the metadata documentation for a resource. ++ * \brief Retrieve a fence agent's metadata + * +- * \note Value is returned in output. Output must be freed when set. ++ * \param[in,out] stonith Fencer connection ++ * \param[in] call_options Group of enum stonith_call_options ++ * (currently ignored) ++ * \param[in] agent Fence agent to query ++ * \param[in] namespace Namespace of fence agent to query (optional) ++ * \param[out] output Where to store metadata ++ * \param[in] timeout_sec Error if not complete within this time + * + * \return Legacy Pacemaker return code ++ * \note The caller is responsible for freeing *output using free(). + */ +- int (*metadata)(stonith_t *st, int options, +- const char *device, const char *provider, char **output, int timeout); ++ int (*metadata)(stonith_t *stonith, int call_options, const char *agent, ++ const char *namespace, char **output, int timeout_sec); + + /*! + * \brief Retrieve a list of installed stonith agents +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 28791ff..6c252bc 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -502,10 +502,11 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names + return count; + } + ++// See stonith_api_operations_t:metadata() documentation + static int +-stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, +- const char *namespace, char **output, +- int timeout_sec) ++stonith_api_device_metadata(stonith_t *stonith, int call_options, ++ const char *agent, const char *namespace, ++ char **output, int timeout_sec) + { + /* By executing meta-data directly, we can get it from stonith_admin when + * the cluster is not running, which is important for higher-level tools. +-- +2.31.1 + +From 9beff34a0d39425ef470e59e251a8ca7c08e69a0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:16:54 -0500 +Subject: [PATCH 03/24] Doc: fencing: add doxygen block for + stonith__action_create() + +... and rename a couple arguments for clarity +--- + include/crm/fencing/internal.h | 4 ++-- + lib/fencing/st_actions.c | 33 ++++++++++++++++++++++++--------- + 2 files changed, 26 insertions(+), 11 deletions(-) + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index d2b49f8..e2ca85e 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -50,10 +50,10 @@ struct stonith_action_s; + typedef struct stonith_action_s stonith_action_t; + + stonith_action_t *stonith_action_create(const char *agent, +- const char *_action, ++ const char *action_name, + const char *victim, + uint32_t victim_nodeid, +- int timeout, ++ int timeout_sec, + GHashTable * device_args, + GHashTable * port_map, + const char * host_arg); +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index b3429f6..d16fa33 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -232,27 +232,42 @@ stonith__action_result(stonith_action_t *action) + } + + #define FAILURE_MAX_RETRIES 2 ++ ++/*! ++ * \internal ++ * \brief Create a new fencing action to be executed ++ * ++ * \param[in] agent Fence agent to use ++ * \param[in] action_name Fencing action to be executed ++ * \param[in] victim Name of target of fencing action (if known) ++ * \param[in] victim_nodeid Node ID of target of fencing action (if known) ++ * \param[in] timeout_sec Timeout to be used when executing action ++ * \param[in] device_args Parameters to pass to fence agent ++ * \param[in] port_map Mapping of target names to device ports ++ * \param[in] host_arg Agent parameter used to pass target name ++ * ++ * \return Newly created fencing action (asserts on error, never NULL) ++ */ + stonith_action_t * + stonith_action_create(const char *agent, +- const char *_action, ++ const char *action_name, + const char *victim, + uint32_t victim_nodeid, +- int timeout, GHashTable * device_args, ++ int timeout_sec, GHashTable * device_args, + GHashTable * port_map, const char *host_arg) + { +- stonith_action_t *action; ++ stonith_action_t *action = calloc(1, sizeof(stonith_action_t)); + +- action = calloc(1, sizeof(stonith_action_t)); + CRM_ASSERT(action != NULL); + +- action->args = make_args(agent, _action, victim, victim_nodeid, ++ action->args = make_args(agent, action_name, victim, victim_nodeid, + device_args, port_map, host_arg); + crm_debug("Preparing '%s' action for %s using agent %s", +- _action, (victim? victim : "no target"), agent); ++ action_name, (victim? victim : "no target"), agent); + action->agent = strdup(agent); +- action->action = strdup(_action); ++ action->action = strdup(action_name); + pcmk__str_update(&action->victim, victim); +- action->timeout = action->remaining_timeout = timeout; ++ action->timeout = action->remaining_timeout = timeout_sec; + action->max_retries = FAILURE_MAX_RETRIES; + + pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, +@@ -262,7 +277,7 @@ stonith_action_create(const char *agent, + char buffer[512]; + const char *value = NULL; + +- snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); ++ snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name); + value = g_hash_table_lookup(device_args, buffer); + + if (value) { +-- +2.31.1 + +From 3001cb016eefff55c55e709247b0c14c331fb330 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:20:24 -0500 +Subject: [PATCH 04/24] Low: fencing: use requested timeout with RHCS metadata + actions + +... instead of hardcoded 5 seconds, and rename timeout argument for clarity +--- + lib/fencing/st_rhcs.c | 35 ++++++++++++++++------------------- + 1 file changed, 16 insertions(+), 19 deletions(-) + +diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c +index dfccff2..5e600d2 100644 +--- a/lib/fencing/st_rhcs.c ++++ b/lib/fencing/st_rhcs.c +@@ -112,25 +112,24 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter) + } + + /*! +- * \brief Execute RHCS-compatible agent's meta-data action ++ * \brief Execute RHCS-compatible agent's metadata action + * +- * \param[in] agent Agent to execute +- * \param[in] timeout Action timeout +- * \param[out] metadata Where to store output xmlNode (or NULL to ignore) +- * +- * \todo timeout is currently ignored; shouldn't we use it? ++ * \param[in] agent Agent to execute ++ * \param[in] timeout_sec Action timeout ++ * \param[out] metadata Where to store output xmlNode (or NULL to ignore) + */ + static int +-stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) ++stonith__rhcs_get_metadata(const char *agent, int timeout_sec, ++ xmlNode **metadata) + { + xmlNode *xml = NULL; + xmlNode *actions = NULL; + xmlXPathObject *xpathObj = NULL; +- pcmk__action_result_t *result = NULL; +- stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, +- 5, NULL, NULL, NULL); ++ stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, ++ 0, timeout_sec, NULL, ++ NULL, NULL); + int rc = stonith__execute(action); +- result = stonith__action_result(action); ++ pcmk__action_result_t *result = stonith__action_result(action); + + if (result == NULL) { + if (rc < 0) { +@@ -208,21 +207,19 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) + } + + /*! +- * \brief Execute RHCS-compatible agent's meta-data action +- * +- * \param[in] agent Agent to execute +- * \param[in] timeout Action timeout +- * \param[out] output Where to store action output (or NULL to ignore) ++ * \brief Retrieve metadata for RHCS-compatible fence agent + * +- * \todo timeout is currently ignored; shouldn't we use it? ++ * \param[in] agent Agent to execute ++ * \param[in] timeout_sec Action timeout ++ * \param[out] output Where to store action output (or NULL to ignore) + */ + int +-stonith__rhcs_metadata(const char *agent, int timeout, char **output) ++stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output) + { + char *buffer = NULL; + xmlNode *xml = NULL; + +- int rc = stonith__rhcs_get_metadata(agent, timeout, &xml); ++ int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml); + + if (rc != pcmk_ok) { + free_xml(xml); +-- +2.31.1 + +From 17dbf449d8b51ea27a89a13f47160a95b0a45149 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:32:44 -0500 +Subject: [PATCH 05/24] Refactor: fencing: make stonith_action_t:async bool + +--- + lib/fencing/st_actions.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index d16fa33..abd0d5a 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -9,6 +9,7 @@ + + #include + ++#include + #include + #include + #include +@@ -32,7 +33,7 @@ struct stonith_action_s { + char *victim; + GHashTable *args; + int timeout; +- int async; ++ bool async; + void *userdata; + void (*done_cb) (int pid, const pcmk__action_result_t *result, + void *user_data); +@@ -671,7 +672,7 @@ stonith_action_execute_async(stonith_action_t * action, + action->userdata = userdata; + action->done_cb = done; + action->fork_cb = fork_cb; +- action->async = 1; ++ action->async = true; + + return internal_stonith_action_execute(action); + } +-- +2.31.1 + +From 9b0f568dddc928104e6d2d54d5138e0c7ca5b537 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 14:59:28 -0500 +Subject: [PATCH 06/24] Refactor: fencing: rename + stonith_action_execute_async() + +... to stonith__execute_async(), since it's internal +--- + daemons/fenced/fenced_commands.c | 4 ++-- + include/crm/fencing/internal.h | 12 +++++------- + lib/fencing/st_actions.c | 11 +++++------ + 3 files changed, 12 insertions(+), 15 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 94aa6b8..41a1936 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -510,8 +510,8 @@ stonith_device_execute(stonith_device_t * device) + /* for async exec, exec_rc is negative for early error exit + otherwise handling of success/errors is done via callbacks */ + cmd->activating_on = device; +- exec_rc = stonith_action_execute_async(action, (void *)cmd, +- cmd->done_cb, fork_cb); ++ exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, ++ fork_cb); + if (exec_rc < 0) { + cmd->activating_on = NULL; + cmd->done_cb(0, stonith__action_result(action), cmd); +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index e2ca85e..1797d9a 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -64,13 +64,11 @@ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); + void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result); + xmlNode *stonith__find_xe_with_result(xmlNode *xml); + +-int +-stonith_action_execute_async(stonith_action_t * action, +- void *userdata, +- void (*done) (int pid, +- const pcmk__action_result_t *result, +- void *user_data), +- void (*fork_cb) (int pid, void *user_data)); ++int stonith__execute_async(stonith_action_t *action, void *userdata, ++ void (*done) (int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void (*fork_cb) (int pid, void *user_data)); + + xmlNode *create_level_registration_xml(const char *node, const char *pattern, + const char *attr, const char *value, +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index abd0d5a..c4e32bd 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -658,12 +658,11 @@ internal_stonith_action_execute(stonith_action_t * action) + * \return pcmk_ok if ownership of action has been taken, -errno otherwise + */ + int +-stonith_action_execute_async(stonith_action_t * action, +- void *userdata, +- void (*done) (int pid, +- const pcmk__action_result_t *result, +- void *user_data), +- void (*fork_cb) (int pid, void *user_data)) ++stonith__execute_async(stonith_action_t * action, void *userdata, ++ void (*done) (int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void (*fork_cb) (int pid, void *user_data)) + { + if (!action) { + return -EINVAL; +-- +2.31.1 + +From 1d8fbd12b302b5029a341f269bd00def79e6a0ea Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 16:43:57 -0500 +Subject: [PATCH 07/24] Refactor: fencing: add internal API for getting + metadata async + +Nothing uses it yet +--- + include/crm/fencing/internal.h | 6 +++ + lib/fencing/st_client.c | 80 ++++++++++++++++++++++++++++++++++ + 2 files changed, 86 insertions(+) + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 1797d9a..513d1c4 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -70,6 +70,12 @@ int stonith__execute_async(stonith_action_t *action, void *userdata, + void *user_data), + void (*fork_cb) (int pid, void *user_data)); + ++int stonith__metadata_async(const char *agent, int timeout_sec, ++ void (*callback)(int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void *user_data); ++ + xmlNode *create_level_registration_xml(const char *node, const char *pattern, + const char *attr, const char *value, + int level, +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 6c252bc..91075bd 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -2386,6 +2386,86 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, + freeXpathObject(xpath); + } + ++/*! ++ * \internal ++ * \brief Retrieve fence agent meta-data asynchronously ++ * ++ * \param[in] agent Agent to execute ++ * \param[in] timeout_sec Error if not complete within this time ++ * \param[in] callback Function to call with result (this will always be ++ * called, whether by this function directly or later ++ * via the main loop, and on success the metadata will ++ * be in its result argument's action_stdout) ++ * \param[in] user_data User data to pass to callback ++ * ++ * \return Standard Pacemaker return code ++ * \note The caller must use a main loop. This function is not a ++ * stonith_api_operations_t method because it does not need a stonith_t ++ * object and does not go through the fencer, but executes the agent ++ * directly. ++ */ ++int ++stonith__metadata_async(const char *agent, int timeout_sec, ++ void (*callback)(int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void *user_data) ++{ ++ switch (stonith_get_namespace(agent, NULL)) { ++ case st_namespace_rhcs: ++ { ++ stonith_action_t *action = NULL; ++ int rc = pcmk_ok; ++ ++ action = stonith_action_create(agent, "metadata", NULL, 0, ++ timeout_sec, NULL, NULL, NULL); ++ ++ rc = stonith__execute_async(action, user_data, callback, NULL); ++ if (rc != pcmk_ok) { ++ callback(0, stonith__action_result(action), user_data); ++ stonith__destroy_action(action); ++ } ++ return pcmk_legacy2rc(rc); ++ } ++ ++#if HAVE_STONITH_STONITH_H ++ case st_namespace_lha: ++ // LHA metadata is simply synthesized, so simulate async ++ { ++ pcmk__action_result_t result = { ++ .exit_status = CRM_EX_OK, ++ .execution_status = PCMK_EXEC_DONE, ++ .exit_reason = NULL, ++ .action_stdout = NULL, ++ .action_stderr = NULL, ++ }; ++ ++ stonith__lha_metadata(agent, timeout_sec, ++ &result.action_stdout); ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ return pcmk_rc_ok; ++ } ++#endif ++ ++ default: ++ { ++ pcmk__action_result_t result = { ++ .exit_status = CRM_EX_ERROR, ++ .execution_status = PCMK_EXEC_ERROR_HARD, ++ .exit_reason = crm_strdup_printf("No such agent '%s'", ++ agent), ++ .action_stdout = NULL, ++ .action_stderr = NULL, ++ }; ++ ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ return ENOENT; ++ } ++ } ++} ++ + /*! + * \internal + * \brief Return the exit status from an async action callback +-- +2.31.1 + +From 1869cc181ef9599bd938fc545d302b2721169755 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 13 Sep 2022 17:33:10 -0500 +Subject: [PATCH 08/24] Refactor: liblrmd: add internal API for getting + metadata async + +Nothing uses it yet +--- + include/crm/lrmd_internal.h | 10 +++- + lib/lrmd/lrmd_client.c | 115 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 123 insertions(+), 2 deletions(-) + +diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h +index 284c4d6..5cb00d5 100644 +--- a/include/crm/lrmd_internal.h ++++ b/include/crm/lrmd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2015-2021 the Pacemaker project contributors ++ * Copyright 2015-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -17,7 +17,7 @@ + #include // mainloop_io_t, ipc_client_callbacks + #include // pcmk__output_t + #include // pcmk__remote_t +-#include // lrmd_t, lrmd_event_data_t ++#include // lrmd_t, lrmd_event_data_t, lrmd_rsc_info_t + + int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port); + +@@ -35,6 +35,12 @@ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list, + int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, + const char *msg_type); + ++int lrmd__metadata_async(lrmd_rsc_info_t *rsc, ++ void (*callback)(int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void *user_data); ++ + void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, + int op_status, const char *exit_reason); + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 82afd6c..4b16bf0 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -2343,6 +2343,121 @@ lrmd_api_delete(lrmd_t * lrmd) + free(lrmd); + } + ++struct metadata_cb { ++ void (*callback)(int pid, const pcmk__action_result_t *result, ++ void *user_data); ++ void *user_data; ++}; ++ ++/*! ++ * \internal ++ * \brief Process asynchronous metadata completion ++ * ++ * \param[in] action Metadata action that completed ++ */ ++static void ++metadata_complete(svc_action_t *action) ++{ ++ struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; ++ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; ++ ++ pcmk__set_result(&result, action->rc, action->status, ++ services__exit_reason(action)); ++ pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); ++ ++ metadata_cb->callback(0, &result, metadata_cb->user_data); ++ result.action_stdout = NULL; // Prevent free, because action owns it ++ result.action_stderr = NULL; // Prevent free, because action owns it ++ pcmk__reset_result(&result); ++ free(metadata_cb); ++} ++ ++/*! ++ * \internal ++ * \brief Retrieve agent metadata asynchronously ++ * ++ * \param[in] rsc Resource agent specification ++ * \param[in] callback Function to call with result (this will always be ++ * called, whether by this function directly or later via ++ * the main loop, and on success the metadata will be in ++ * its result argument's action_stdout) ++ * \param[in] user_data User data to pass to callback ++ * ++ * \return Standard Pacemaker return code ++ * \note This function is not a lrmd_api_operations_t method because it does not ++ * need an lrmd_t object and does not go through the executor, but ++ * executes the agent directly. ++ */ ++int ++lrmd__metadata_async(lrmd_rsc_info_t *rsc, ++ void (*callback)(int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void *user_data) ++{ ++ svc_action_t *action = NULL; ++ struct metadata_cb *metadata_cb = NULL; ++ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; ++ ++ CRM_CHECK(callback != NULL, return EINVAL); ++ ++ if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { ++ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR, ++ "Invalid resource specification"); ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ return EINVAL; ++ } ++ ++ if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { ++ return stonith__metadata_async(rsc->type, ++ CRMD_METADATA_CALL_TIMEOUT / 1000, ++ callback, user_data); ++ } ++ ++ action = services__create_resource_action(rsc->type, rsc->standard, ++ rsc->provider, rsc->type, ++ CRMD_ACTION_METADATA, 0, ++ CRMD_METADATA_CALL_TIMEOUT, NULL, ++ 0); ++ if (action == NULL) { ++ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, ++ "Out of memory"); ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ return ENOMEM; ++ } ++ if (action->rc != PCMK_OCF_UNKNOWN) { ++ pcmk__set_result(&result, action->rc, action->status, ++ services__exit_reason(action)); ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ services_action_free(action); ++ return EINVAL; ++ } ++ ++ action->cb_data = calloc(1, sizeof(struct metadata_cb)); ++ if (action->cb_data == NULL) { ++ services_action_free(action); ++ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, ++ "Out of memory"); ++ callback(0, &result, user_data); ++ pcmk__reset_result(&result); ++ return ENOMEM; ++ } ++ ++ metadata_cb = (struct metadata_cb *) action->cb_data; ++ metadata_cb->callback = callback; ++ metadata_cb->user_data = user_data; ++ if (!services_action_async(action, metadata_complete)) { ++ services_action_free(action); ++ return pcmk_rc_error; // @TODO Derive from action->rc and ->status ++ } ++ ++ // The services library has taken responsibility for action ++ return pcmk_rc_ok; ++} ++ + /*! + * \internal + * \brief Set the result of an executor event +-- +2.31.1 + +From de89164053cde8f44ca74a007703e0827ffd67ec Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 14 Sep 2022 16:34:37 -0500 +Subject: [PATCH 09/24] Low: controller: ignore CRM_OP_LRM_REFRESH + +This was only sent by crm_resource --refresh in versions 1.1.9 and earlier. +Since the local crm_resource is the same version as the controller, and +Pacemaker Remote was introduced in 1.1.9, this means that only remote nodes +running 1.1.9 can possibly send it. + +It didn't really do anything useful anyway, so just ignore it. +--- + daemons/controld/controld_execd.c | 33 +++++----------------------- + daemons/controld/controld_messages.c | 2 +- + include/crm/crm.h | 2 +- + lib/pacemaker/pcmk_graph_producer.c | 3 +-- + lib/pengine/common.c | 2 -- + 5 files changed, 9 insertions(+), 33 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index fa411a6..719fab0 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1553,32 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + lrmd_free_event(op); + } + +-static void +-handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, +- const char *from_host, const char *from_sys) +-{ +- int rc = pcmk_ok; +- xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); +- +- fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); +- crm_info("Forced a local resource history refresh: call=%d", rc); +- +- if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) { +- xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, +- from_sys, CRM_SYSTEM_LRMD, +- fsa_our_uuid); +- +- crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); +- +- if (relay_message(reply, TRUE) == FALSE) { +- crm_log_xml_err(reply, "Unable to route reply"); +- } +- free_xml(reply); +- } +- +- free_xml(fragment); +-} +- + static void + handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) + { +@@ -1787,7 +1761,12 @@ do_lrm_invoke(long long action, + } + + if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { +- handle_refresh_op(lrm_state, user_name, from_host, from_sys); ++ /* @COMPAT This can only be sent by crm_resource --refresh on a ++ * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely ++ * unlikely. It previously would cause the controller to re-write its ++ * resource history to the CIB. Just ignore it. ++ */ ++ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); + + } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { + handle_query_op(input->msg, lrm_state); +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 31d3524..957fc20 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -1061,7 +1061,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) + return handle_lrm_delete(stored_msg); + + } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) +- || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) ++ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT + || (strcmp(op, CRM_OP_REPROBE) == 0)) { + + crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); +diff --git a/include/crm/crm.h b/include/crm/crm.h +index 5ec66d2..f2e536e 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -146,7 +146,7 @@ extern char *crm_system_name; + # define CRM_OP_REGISTER "register" + # define CRM_OP_IPC_FWD "ipc_fwd" + # define CRM_OP_INVOKE_LRM "lrm_invoke" +-# define CRM_OP_LRM_REFRESH "lrm_refresh" /* Deprecated */ ++# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 + # define CRM_OP_LRM_QUERY "lrm_query" + # define CRM_OP_LRM_DELETE "lrm_delete" + # define CRM_OP_LRM_FAIL "lrm_fail" +diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c +index 4c1b5a6..0077719 100644 +--- a/lib/pacemaker/pcmk_graph_producer.c ++++ b/lib/pacemaker/pcmk_graph_producer.c +@@ -446,8 +446,7 @@ create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, + + } else if (pcmk__str_any_of(action->task, + CRM_OP_SHUTDOWN, +- CRM_OP_CLEAR_FAILCOUNT, +- CRM_OP_LRM_REFRESH, NULL)) { ++ CRM_OP_CLEAR_FAILCOUNT, NULL)) { + action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); + + } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 93ba3fe..7db9d0e 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -384,8 +384,6 @@ text2task(const char *task) + return no_action; + } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { + return no_action; +- } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { +- return no_action; + } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { + return no_action; + } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { +-- +2.31.1 + +From 406fbc52ed652915887e78138f8f3c2eeaeabfb6 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 14 Sep 2022 16:46:15 -0500 +Subject: [PATCH 10/24] API: libcrmcommon: deprecate CRM_OP_LRM_QUERY + +This has been unused since at least Pacemaker 1.0.0, and since we don't support +rolling upgrades from anything that old, and Pacemaker Remote didn't exist +then, we can just drop support for it entirely. +--- + daemons/controld/controld_execd.c | 17 ----------------- + include/crm/crm.h | 1 - + include/crm/crm_compat.h | 5 ++++- + 3 files changed, 4 insertions(+), 19 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 719fab0..54e6818 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1553,20 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, + lrmd_free_event(op); + } + +-static void +-handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) +-{ +- xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); +- xmlNode *reply = create_reply(msg, data); +- +- if (relay_message(reply, TRUE) == FALSE) { +- crm_err("Unable to route reply"); +- crm_log_xml_err(reply, "reply"); +- } +- free_xml(reply); +- free_xml(data); +-} +- + static void + handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, + const char *from_host, const char *user_name, +@@ -1768,9 +1754,6 @@ do_lrm_invoke(long long action, + */ + crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); + +- } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { +- handle_query_op(input->msg, lrm_state); +- + // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op + } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) { + update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, +diff --git a/include/crm/crm.h b/include/crm/crm.h +index f2e536e..38915e3 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -147,7 +147,6 @@ extern char *crm_system_name; + # define CRM_OP_IPC_FWD "ipc_fwd" + # define CRM_OP_INVOKE_LRM "lrm_invoke" + # define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 +-# define CRM_OP_LRM_QUERY "lrm_query" + # define CRM_OP_LRM_DELETE "lrm_delete" + # define CRM_OP_LRM_FAIL "lrm_fail" + # define CRM_OP_PROBED "probe_complete" +diff --git a/include/crm/crm_compat.h b/include/crm/crm_compat.h +index 3b35a5e..8a4b368 100644 +--- a/include/crm/crm_compat.h ++++ b/include/crm/crm_compat.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2021 the Pacemaker project contributors ++ * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -31,6 +31,9 @@ extern "C" { + //! \deprecated This defined constant will be removed in a future release + #define MAX_IPC_DELAY 120 + ++//! \deprecated This defined constant will be removed in a future release ++#define CRM_OP_LRM_QUERY "lrm_query" ++ + //!@{ + //! \deprecated This macro will be removed in a future release + +-- +2.31.1 + +From 7c3d2f58d387d2ec0d5c5d340f8816f324e816bf Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 14 Sep 2022 16:49:48 -0500 +Subject: [PATCH 11/24] Refactor: controller: drop do_lrm_query_internal() + +Now that there's only one (short) caller, just move its contents there +--- + daemons/controld/controld_execd.c | 28 +++++++++++----------------- + 1 file changed, 11 insertions(+), 17 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 54e6818..99c9193 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -811,19 +811,26 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) + return FALSE; + } + +-static xmlNode * +-do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) ++xmlNode * ++controld_query_executor_state(const char *node_name) + { + xmlNode *xml_state = NULL; + xmlNode *xml_data = NULL; + xmlNode *rsc_list = NULL; + crm_node_t *peer = NULL; ++ lrm_state_t *lrm_state = lrm_state_find(node_name); ++ ++ if (!lrm_state) { ++ crm_err("Could not find executor state for node %s", node_name); ++ return NULL; ++ } + + peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + +- xml_state = create_node_state_update(peer, update_flags, NULL, +- __func__); ++ xml_state = create_node_state_update(peer, ++ node_update_cluster|node_update_peer, ++ NULL, __func__); + if (xml_state == NULL) { + return NULL; + } +@@ -840,19 +847,6 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) + return xml_state; + } + +-xmlNode * +-controld_query_executor_state(const char *node_name) +-{ +- lrm_state_t *lrm_state = lrm_state_find(node_name); +- +- if (!lrm_state) { +- crm_err("Could not find executor state for node %s", node_name); +- return NULL; +- } +- return do_lrm_query_internal(lrm_state, +- node_update_cluster|node_update_peer); +-} +- + /*! + * \internal + * \brief Map standard Pacemaker return code to operation status and OCF code +-- +2.31.1 + +From 5cab259417a06f64a607f99c478459093ed1b5ed Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 14 Sep 2022 15:48:44 -0500 +Subject: [PATCH 12/24] Doc: controller: drop pointless comment + +It's (likely?) impossible for a live cluster to have been doing rolling +upgrades since 2006. +--- + daemons/controld/controld_execd.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 99c9193..53b1156 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -678,18 +678,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ + + target_rc = rsc_op_expected_rc(op); + +- /* there is a small risk in formerly mixed clusters that it will +- * be sub-optimal. +- * +- * however with our upgrade policy, the update we send should +- * still be completely supported anyway +- */ + caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); +- CRM_LOG_ASSERT(caller_version != NULL); +- +- if(caller_version == NULL) { +- caller_version = CRM_FEATURE_SET; +- } ++ CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); + + xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, + fsa_our_uname, src); +-- +2.31.1 + +From b4541d7ecd9551674c4546415751a223ff3013ed Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 15 Sep 2022 11:24:28 -0500 +Subject: [PATCH 13/24] Refactor: controller: move where reload actions get + remapped + +... from do_lrm_invoke() to do_lrm_rsc_op(), which will make planned changes +easier +--- + daemons/controld/controld_execd.c | 38 ++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 18 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 53b1156..c9f0cc7 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -43,7 +43,8 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us + static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, + const char *rsc_id, const char *operation); + static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, +- const char *operation, xmlNode *msg); ++ const char *operation, xmlNode *msg, ++ struct ra_metadata_s *md); + + static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); +@@ -1808,26 +1809,12 @@ do_lrm_invoke(long long action, + do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, + crm_rsc_delete, user_name); + +- } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, +- CRMD_ACTION_RELOAD_AGENT, NULL)) { +- /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs +- * will schedule reload-agent actions only. In either case, we need +- * to map that to whatever the resource agent actually supports. +- * Default to the OCF 1.1 name. +- */ ++ } else { + struct ra_metadata_s *md = NULL; +- const char *reload_name = CRMD_ACTION_RELOAD_AGENT; + + md = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_cache); +- if ((md != NULL) +- && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { +- reload_name = CRMD_ACTION_RELOAD; +- } +- do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml); +- +- } else { +- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); ++ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md); + } + + lrmd_free_rsc_info(rsc); +@@ -2176,7 +2163,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t + + static void + do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, +- const char *operation, xmlNode *msg) ++ const char *operation, xmlNode *msg, struct ra_metadata_s *md) + { + int rc; + int call_id = 0; +@@ -2198,6 +2185,21 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + } + } + ++ if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, ++ CRMD_ACTION_RELOAD_AGENT, NULL)) { ++ /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs ++ * will schedule reload-agent actions only. In either case, we need ++ * to map that to whatever the resource agent actually supports. ++ * Default to the OCF 1.1 name. ++ */ ++ if ((md != NULL) ++ && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { ++ operation = CRMD_ACTION_RELOAD; ++ } else { ++ operation = CRMD_ACTION_RELOAD_AGENT; ++ } ++ } ++ + op = construct_op(lrm_state, msg, rsc->id, operation); + CRM_CHECK(op != NULL, return); + +-- +2.31.1 + +From a4f6e394a61712da750aabffca2b6dd02f0c5ae6 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 15 Sep 2022 15:12:06 -0500 +Subject: [PATCH 14/24] Refactor: controller: drop operation argument to + do_lrm_rsc_op() + +It can be derived from the XML argument +--- + daemons/controld/controld_execd.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index c9f0cc7..89a993b 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -43,8 +43,7 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us + static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, + const char *rsc_id, const char *operation); + static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, +- const char *operation, xmlNode *msg, +- struct ra_metadata_s *md); ++ xmlNode *msg, struct ra_metadata_s *md); + + static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, + int log_level); +@@ -1814,7 +1813,7 @@ do_lrm_invoke(long long action, + + md = controld_get_rsc_metadata(lrm_state, rsc, + controld_metadata_from_cache); +- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md); ++ do_lrm_rsc_op(lrm_state, rsc, input->xml, md); + } + + lrmd_free_rsc_info(rsc); +@@ -2162,8 +2161,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t + } + + static void +-do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, +- const char *operation, xmlNode *msg, struct ra_metadata_s *md) ++do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, ++ struct ra_metadata_s *md) + { + int rc; + int call_id = 0; +@@ -2172,17 +2171,18 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + lrmd_key_value_t *params = NULL; + fsa_data_t *msg_data = NULL; + const char *transition = NULL; ++ const char *operation = NULL; + gboolean stop_recurring = FALSE; + const char *nack_reason = NULL; + +- CRM_CHECK(rsc != NULL, return); +- CRM_CHECK(operation != NULL, return); ++ CRM_CHECK((rsc != NULL) && (msg != NULL), return); + +- if (msg != NULL) { +- transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); +- if (transition == NULL) { +- crm_log_xml_err(msg, "Missing transition number"); +- } ++ operation = crm_element_value(msg, XML_LRM_ATTR_TASK); ++ CRM_CHECK(!pcmk__str_empty(operation), return); ++ ++ transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); ++ if (pcmk__str_empty(transition)) { ++ crm_log_xml_err(msg, "Missing transition number"); + } + + if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, +@@ -2241,7 +2241,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + crm_notice("Requesting local execution of %s operation for %s on %s " + CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, + crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, +- transition, rsc->id, operation, op->interval_ms); ++ (transition != NULL ? transition : ""), rsc->id, operation, op->interval_ms); + + if (pcmk_is_set(fsa_input_register, R_SHUTDOWN) + && pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) { +-- +2.31.1 + +From 486dbdf023f82a82a02207d8fb7921f8f2ac0588 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 15 Sep 2022 15:40:38 -0500 +Subject: [PATCH 15/24] Low: controller: add failsafe for no executor + connection + +... in do_lrm_rsc_op(), to make planned changes easier +--- + daemons/controld/controld_execd.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 89a993b..8986b9b 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -2185,6 +2185,17 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, + crm_log_xml_err(msg, "Missing transition number"); + } + ++ if (lrm_state == NULL) { ++ // This shouldn't be possible, but provide a failsafe just in case ++ crm_err("Cannot execute %s of %s: No executor connection " ++ CRM_XS " transition_key=%s", ++ operation, rsc->id, (transition != NULL ? transition : "")); ++ synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, ++ PCMK_OCF_UNKNOWN_ERROR, ++ "No executor connection"); ++ return; ++ } ++ + if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, + CRMD_ACTION_RELOAD_AGENT, NULL)) { + /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs +-- +2.31.1 + +From afd53bba7dfb5109d844318dff0f82e4687d9e32 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 15 Sep 2022 12:04:31 -0500 +Subject: [PATCH 16/24] Log: controller: improve messages when metadata cache + update fails + +Previously, metadata_cache_update() or ra_param_from_xml() would log an error, +then controld_get_rsc_metadata() (but not the other caller, +process_lrm_event()) would log another warning with the agent info. + +Combine these into a single message always logged by metadata_cache_update(), +which also has been renamed to controld_cache_metadata(). +--- + daemons/controld/controld_execd.c | 2 +- + daemons/controld/controld_metadata.c | 27 ++++++++++++--------------- + daemons/controld/controld_metadata.h | 6 +++--- + 3 files changed, 16 insertions(+), 19 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 8986b9b..fe16c96 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -2858,7 +2858,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, + } else if (rsc && (op->rc == PCMK_OCF_OK)) { + char *metadata = unescape_newlines(op->output); + +- metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); ++ controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); + free(metadata); + } + } +diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c +index 8c6f195..91a6a10 100644 +--- a/daemons/controld/controld_metadata.c ++++ b/daemons/controld/controld_metadata.c +@@ -149,13 +149,11 @@ ra_param_from_xml(xmlNode *param_xml) + + p = calloc(1, sizeof(struct ra_param_s)); + if (p == NULL) { +- crm_crit("Could not allocate memory for resource metadata"); + return NULL; + } + + p->rap_name = strdup(param_name); + if (p->rap_name == NULL) { +- crm_crit("Could not allocate memory for resource metadata"); + free(p); + return NULL; + } +@@ -196,10 +194,11 @@ log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version) + } + + struct ra_metadata_s * +-metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, +- const char *metadata_str) ++controld_cache_metadata(GHashTable *mdc, lrmd_rsc_info_t *rsc, ++ const char *metadata_str) + { + char *key = NULL; ++ const char *reason = NULL; + xmlNode *metadata = NULL; + xmlNode *match = NULL; + struct ra_metadata_s *md = NULL; +@@ -210,20 +209,19 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, + + key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); + if (!key) { +- crm_crit("Could not allocate memory for resource metadata"); ++ reason = "Invalid resource agent standard or type"; + goto err; + } + + metadata = string2xml(metadata_str); + if (!metadata) { +- crm_err("Metadata for %s:%s:%s is not valid XML", +- rsc->standard, rsc->provider, rsc->type); ++ reason = "Metadata is not valid XML"; + goto err; + } + + md = calloc(1, sizeof(struct ra_metadata_s)); + if (md == NULL) { +- crm_crit("Could not allocate memory for resource metadata"); ++ reason = "Could not allocate memory"; + goto err; + } + +@@ -281,6 +279,7 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, + struct ra_param_s *p = ra_param_from_xml(match); + + if (p == NULL) { ++ reason = "Could not allocate memory"; + goto err; + } + if (pcmk_is_set(p->rap_flags, ra_param_private)) { +@@ -311,6 +310,9 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, + return md; + + err: ++ crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s", ++ rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ++ (rsc->provider != NULL ? rsc->provider : ""), rsc->type, reason); + free(key); + free_xml(metadata); + metadata_free(md); +@@ -377,13 +379,8 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + return NULL; + } + +- metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, +- metadata_str); ++ metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc, ++ metadata_str); + free(metadata_str); +- if (metadata == NULL) { +- crm_warn("Failed to update metadata for %s (%s%s%s:%s)", +- rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), +- ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); +- } + return metadata; + } +diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h +index 7354f94..52d3336 100644 +--- a/daemons/controld/controld_metadata.h ++++ b/daemons/controld/controld_metadata.h +@@ -73,9 +73,9 @@ void metadata_cache_free(GHashTable *mdc); + void metadata_cache_reset(GHashTable *mdc); + void metadata_cache_fini(void); + +-struct ra_metadata_s *metadata_cache_update(GHashTable *mdc, +- lrmd_rsc_info_t *rsc, +- const char *metadata_str); ++struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc, ++ lrmd_rsc_info_t *rsc, ++ const char *metadata_str); + struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state, + lrmd_rsc_info_t *rsc, + uint32_t source); +-- +2.31.1 + +From caeed447d0d8a980d431efd70e5b6f9c91ffac7f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 15 Sep 2022 13:33:36 -0500 +Subject: [PATCH 17/24] Fix: controller: pre-load agent metadata asynchronously + +The controller needs resource agent metadata to record digests with pending and +completed resource actions. + +Previously, metadata was collected synchronously when needed. This caused +several problems, two of which are fixed here for most actions: synchronous +execution blocks the controller from doing anything else (and if the agent's +metadata action tries to contact the controller, that blocks everything until +the action times out), and the metadata action ate into the real action's +timeout. + +Now, if we're likely to need metadata for an action, attempt to get it +asynchronously before executing that action, so the metadata is available in +cache when needed. + +This is not a complete solution, as there are other code paths that might +require metadata and still lead to synchronous execution, but it handles the +most important cases. + +Fixes T554 +--- + daemons/controld/controld_execd.c | 105 +++++++++++++++++++++++---- + daemons/controld/controld_metadata.c | 22 +++--- + 2 files changed, 102 insertions(+), 25 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index fe16c96..c56fdf5 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -670,7 +670,6 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ + struct ra_metadata_s *metadata = NULL; + const char *caller_version = NULL; + lrm_state_t *lrm_state = NULL; +- uint32_t metadata_source = controld_metadata_from_agent; + + if (op == NULL) { + return FALSE; +@@ -703,19 +702,14 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ + return TRUE; + } + +- /* Getting meta-data from cache is OK unless this is a successful start +- * action -- always refresh from the agent for those, in case the +- * resource agent was updated. ++ /* Ideally the metadata is cached, and the agent is just a fallback. + * +- * @TODO Only refresh the meta-data after starts if the agent actually +- * changed (using something like inotify, or a hash or modification time of +- * the agent executable). ++ * @TODO Go through all callers and ensure they get metadata asynchronously ++ * first. + */ +- if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc) +- || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { +- metadata_source |= controld_metadata_from_cache; +- } +- metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source); ++ metadata = controld_get_rsc_metadata(lrm_state, rsc, ++ controld_metadata_from_agent ++ |controld_metadata_from_cache); + if (metadata == NULL) { + return TRUE; + } +@@ -1673,6 +1667,56 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, + user_name, input, unregister); + } + ++// User data for asynchronous metadata execution ++struct metadata_cb_data { ++ lrmd_rsc_info_t *rsc; // Copy of resource information ++ xmlNode *input_xml; // Copy of FSA input XML ++}; ++ ++static struct metadata_cb_data * ++new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) ++{ ++ struct metadata_cb_data *data = NULL; ++ ++ data = calloc(1, sizeof(struct metadata_cb_data)); ++ CRM_ASSERT(data != NULL); ++ data->input_xml = copy_xml(input_xml); ++ data->rsc = lrmd_copy_rsc_info(rsc); ++ return data; ++} ++ ++static void ++free_metadata_cb_data(struct metadata_cb_data *data) ++{ ++ lrmd_free_rsc_info(data->rsc); ++ free_xml(data->input_xml); ++ free(data); ++} ++ ++/*! ++ * \internal ++ * \brief Execute an action after metadata has been retrieved ++ * ++ * \param[in] pid Ignored ++ * \param[in] result Result of metadata action ++ * \param[in] user_data Metadata callback data ++ */ ++static void ++metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) ++{ ++ struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; ++ ++ struct ra_metadata_s *md = NULL; ++ lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); ++ ++ if ((lrm_state != NULL) && pcmk__result_ok(result)) { ++ md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, ++ result->action_stdout); ++ } ++ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); ++ free_metadata_cb_data(data); ++} ++ + /* A_LRM_INVOKE */ + void + do_lrm_invoke(long long action, +@@ -1811,9 +1855,40 @@ do_lrm_invoke(long long action, + } else { + struct ra_metadata_s *md = NULL; + +- md = controld_get_rsc_metadata(lrm_state, rsc, +- controld_metadata_from_cache); +- do_lrm_rsc_op(lrm_state, rsc, input->xml, md); ++ /* Getting metadata from cache is OK except for start actions -- ++ * always refresh from the agent for those, in case the resource ++ * agent was updated. ++ * ++ * @TODO Only refresh metadata for starts if the agent actually ++ * changed (using something like inotify, or a hash or modification ++ * time of the agent executable). ++ */ ++ if (strcmp(operation, CRMD_ACTION_START) != 0) { ++ md = controld_get_rsc_metadata(lrm_state, rsc, ++ controld_metadata_from_cache); ++ } ++ ++ if ((md == NULL) && crm_op_needs_metadata(rsc->standard, ++ operation)) { ++ /* Most likely, we'll need the agent metadata to record the ++ * pending operation and the operation result. Get it now rather ++ * than wait until then, so the metadata action doesn't eat into ++ * the real action's timeout. ++ * ++ * @TODO Metadata is retrieved via direct execution of the ++ * agent, which has a couple of related issues: the executor ++ * should execute agents, not the controller; and metadata for ++ * Pacemaker Remote nodes should be collected on those nodes, ++ * not locally. ++ */ ++ struct metadata_cb_data *data = NULL; ++ ++ data = new_metadata_cb_data(rsc, input->xml); ++ (void) lrmd__metadata_async(rsc, metadata_complete, ++ (void *) data); ++ } else { ++ do_lrm_rsc_op(lrm_state, rsc, input->xml, md); ++ } + } + + lrmd_free_rsc_info(rsc); +diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c +index 91a6a10..a954ebd 100644 +--- a/daemons/controld/controld_metadata.c ++++ b/daemons/controld/controld_metadata.c +@@ -356,17 +356,19 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + return NULL; + } + +- /* For now, we always collect resource agent meta-data via a local, +- * synchronous, direct execution of the agent. This has multiple issues: +- * the executor should execute agents, not the controller; meta-data for +- * Pacemaker Remote nodes should be collected on those nodes, not +- * locally; and the meta-data call shouldn't eat into the timeout of the +- * real action being performed. ++ /* For most actions, metadata was cached asynchronously before action ++ * execution (via metadata_complete()). + * +- * These issues are planned to be addressed by having the scheduler +- * schedule a meta-data cache check at the beginning of each transition. +- * Once that is working, this block will only be a fallback in case the +- * initial collection fails. ++ * However if that failed, and for other actions, retrieve the metadata now ++ * via a local, synchronous, direct execution of the agent. ++ * ++ * This has multiple issues, which is why this is just a fallback: the ++ * executor should execute agents, not the controller; metadata for ++ * Pacemaker Remote nodes should be collected on those nodes, not locally; ++ * the metadata call shouldn't eat into the timeout of the real action being ++ * performed; and the synchronous call blocks the controller (which also ++ * means that if the metadata action tries to contact the controller, ++ * everything will hang until the timeout). + */ + rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, + rsc->type, &metadata_str, 0); +-- +2.31.1 + +From fddf663d5285740771145e83c41f33c0bfb86dfb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 19 Sep 2022 15:19:06 -0500 +Subject: [PATCH 18/24] Low: libstonithd: return CRM_EX_NOSUCH for bad agent + namespace + +Callers can't rely on a particular exit code scheme at this point, +but it doesn't hurt +--- + lib/fencing/st_client.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 91075bd..d41b066 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -2451,7 +2451,7 @@ stonith__metadata_async(const char *agent, int timeout_sec, + default: + { + pcmk__action_result_t result = { +- .exit_status = CRM_EX_ERROR, ++ .exit_status = CRM_EX_NOSUCH, + .execution_status = PCMK_EXEC_ERROR_HARD, + .exit_reason = crm_strdup_printf("No such agent '%s'", + agent), +-- +2.31.1 + +From 2de926f5b2b5dbf28f994bc35477d59ce46d5ab1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 19 Sep 2022 15:23:43 -0500 +Subject: [PATCH 19/24] Low: liblrmd: consider invalid agent specification a + fatal error + +--- + lib/lrmd/lrmd_client.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 4b16bf0..d691dce 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -2402,7 +2402,8 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc, + CRM_CHECK(callback != NULL, return EINVAL); + + if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { +- pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR, ++ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, ++ PCMK_EXEC_ERROR_FATAL, + "Invalid resource specification"); + callback(0, &result, user_data); + pcmk__reset_result(&result); +-- +2.31.1 + +From 2d526dae9dbfc6f8658ff96f5f6d58ee09ea879c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 19 Sep 2022 15:25:12 -0500 +Subject: [PATCH 20/24] Low: liblrmd: use resource ID for metadata actions when + available + +--- + lib/lrmd/lrmd_client.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index d691dce..570a2b8 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -2416,11 +2416,11 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc, + callback, user_data); + } + +- action = services__create_resource_action(rsc->type, rsc->standard, +- rsc->provider, rsc->type, +- CRMD_ACTION_METADATA, 0, +- CRMD_METADATA_CALL_TIMEOUT, NULL, +- 0); ++ action = services__create_resource_action((rsc->id != NULL ? rsc->id : rsc->type), ++ rsc->standard, rsc->provider, ++ rsc->type, CRMD_ACTION_METADATA, ++ 0, CRMD_METADATA_CALL_TIMEOUT, ++ NULL, 0); + if (action == NULL) { + pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Out of memory"); +-- +2.31.1 + +From 3d632be58dca13293e4ae974da5dfe2838fcdf12 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 19 Sep 2022 15:27:11 -0500 +Subject: [PATCH 21/24] Refactor: controller: executor query can assume local + node + +--- + daemons/controld/controld_execd.c | 6 +++--- + daemons/controld/controld_fsa.h | 4 ++-- + daemons/controld/controld_join_client.c | 2 +- + daemons/controld/controld_join_dc.c | 2 +- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index c56fdf5..039b194 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -796,16 +796,16 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) + } + + xmlNode * +-controld_query_executor_state(const char *node_name) ++controld_query_executor_state(void) + { + xmlNode *xml_state = NULL; + xmlNode *xml_data = NULL; + xmlNode *rsc_list = NULL; + crm_node_t *peer = NULL; +- lrm_state_t *lrm_state = lrm_state_find(node_name); ++ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); + + if (!lrm_state) { +- crm_err("Could not find executor state for node %s", node_name); ++ crm_err("Could not find executor state for node %s", fsa_our_uname); + return NULL; + } + +diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h +index 296232f..d137310 100644 +--- a/daemons/controld/controld_fsa.h ++++ b/daemons/controld/controld_fsa.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2021 the Pacemaker project contributors ++ * Copyright 2004-2022 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -518,7 +518,7 @@ extern gboolean ever_had_quorum; + // These should be moved elsewhere + void do_update_cib_nodes(gboolean overwrite, const char *caller); + int crmd_cib_smart_opt(void); +-xmlNode *controld_query_executor_state(const char *node_name); ++xmlNode *controld_query_executor_state(void); + + const char *fsa_input2string(enum crmd_fsa_input input); + const char *fsa_state2string(enum crmd_fsa_state state); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 6485856..bfec430 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -268,7 +268,7 @@ do_cl_join_finalize_respond(long long action, + update_dc_expected(input->msg); + + /* send our status section to the DC */ +- tmp1 = controld_query_executor_state(fsa_our_uname); ++ tmp1 = controld_query_executor_state(); + if (tmp1 != NULL) { + xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, + CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 9386182..9a8ea3e 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -591,7 +591,7 @@ do_dc_join_ack(long long action, + } + controld_delete_node_state(join_from, section, cib_scope_local); + if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) { +- xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); ++ xmlNode *now_dc_lrmd_state = controld_query_executor_state(); + + if (now_dc_lrmd_state != NULL) { + fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, +-- +2.31.1 + +From d852ec335bd5b518a3f06c7f1b597370094311ae Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 20 Sep 2022 10:18:48 -0500 +Subject: [PATCH 22/24] Log: controller: add messages when getting agent + metadata + +--- + daemons/controld/controld_execd.c | 5 +++++ + daemons/controld/controld_metadata.c | 10 ++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 039b194..f02da82 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1884,6 +1884,11 @@ do_lrm_invoke(long long action, + struct metadata_cb_data *data = NULL; + + data = new_metadata_cb_data(rsc, input->xml); ++ crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", ++ rsc->id, rsc->standard, ++ ((rsc->provider == NULL)? "" : ":"), ++ ((rsc->provider == NULL)? "" : rsc->provider), ++ rsc->type); + (void) lrmd__metadata_async(rsc, metadata_complete, + (void *) data); + } else { +diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c +index a954ebd..39b43b0 100644 +--- a/daemons/controld/controld_metadata.c ++++ b/daemons/controld/controld_metadata.c +@@ -348,6 +348,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + free(key); + } + if (metadata != NULL) { ++ crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache", ++ rsc->id, rsc->standard, ++ ((rsc->provider == NULL)? "" : ":"), ++ ((rsc->provider == NULL)? "" : rsc->provider), ++ rsc->type); + return metadata; + } + } +@@ -370,6 +375,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, + * means that if the metadata action tries to contact the controller, + * everything will hang until the timeout). + */ ++ crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously", ++ rsc->id, rsc->standard, ++ ((rsc->provider == NULL)? "" : ":"), ++ ((rsc->provider == NULL)? "" : rsc->provider), ++ rsc->type); + rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, + rsc->type, &metadata_str, 0); + if (rc != pcmk_ok) { +-- +2.31.1 + +From 5aec773a20e1ded971a4082358e266353615f196 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 14 Sep 2022 14:36:44 -0500 +Subject: [PATCH 23/24] Test: cts-lab: allow any whitespace in "Recover" + messages + +This seems to have always been multiple spaces, not sure what happened +--- + cts/lab/CTStests.py | 12 ++++++------ + cts/lab/patterns.py | 4 ++-- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py +index 5535177..8b56758 100644 +--- a/cts/lab/CTStests.py ++++ b/cts/lab/CTStests.py +@@ -1,7 +1,7 @@ + """ Test-specific classes for Pacemaker's Cluster Test Suite (CTS) + """ + +-__copyright__ = "Copyright 2000-2021 the Pacemaker project contributors" ++__copyright__ = "Copyright 2000-2022 the Pacemaker project contributors" + __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + + # +@@ -1225,7 +1225,7 @@ class MaintenanceMode(CTSTest): + '''Return list of errors which should be ignored''' + return [ + r"Updating failcount for %s" % self.rid, +- r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid, ++ r"schedulerd.*: Recover\s+%s\s+\(.*\)" % self.rid, + r"Unknown operation: fail", + self.templates["Pat:RscOpOK"] % (self.action, self.rid), + r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), +@@ -1324,7 +1324,7 @@ class ResourceRecover(CTSTest): + '''Return list of errors which should be ignored''' + return [ + r"Updating failcount for %s" % self.rid, +- r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt), ++ r"schedulerd.*: Recover\s+(%s|%s)\s+\(.*\)" % (self.rid, self.rid_alt), + r"Unknown operation: fail", + self.templates["Pat:RscOpOK"] % (self.action, self.rid), + r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), +@@ -2559,7 +2559,7 @@ class RemoteLXC(CTSTest): + '''Return list of errors which should be ignored''' + return [ + r"Updating failcount for ping", +- r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)", ++ r"schedulerd.*: Recover\s+(ping|lxc-ms|container)\s+\(.*\)", + # The orphaned lxc-ms resource causes an expected transition error + # that is a result of the scheduler not having knowledge that the + # promotable resource used to be a clone. As a result, it looks like that +@@ -3054,7 +3054,7 @@ class RemoteStonithd(RemoteDriver): + r"Software caused connection abort", + r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor", + r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*", +- r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)", ++ r"schedulerd.*:\s+Recover\s+remote-.*\s+\(.*\)", + r"error: Result of monitor operation for .* on remote-.*: Internal communication failure", + ] + +@@ -3120,7 +3120,7 @@ class RemoteRscFailure(RemoteDriver): + + def errorstoignore(self): + ignore_pats = [ +- r"schedulerd.*: Recover remote-rsc\s*\(.*\)", ++ r"schedulerd.*: Recover\s+remote-rsc\s+\(.*\)", + r"Dummy.*: No process state file found", + ] + +diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py +index 90cac73..6e718f7 100644 +--- a/cts/lab/patterns.py ++++ b/cts/lab/patterns.py +@@ -66,7 +66,7 @@ class BasePatterns(object): + + "Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s", + "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK", +- "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s", ++ "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s", + "Pat:Fencing_active" : r"stonith resource .* is active on 2 nodes (attempting recovery)", + "Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error", + +@@ -180,7 +180,7 @@ class crm_corosync(BasePatterns): + r"Parameters to .* action changed:", + r"Parameters to .* changed", + r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)", +- r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)", ++ r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)", + r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", + r"Peer is not part of our cluster", + r"We appear to be in an election loop", +-- +2.31.1 + +From 338cf55d19cb4ebebedf092dd0a5969ac2eda295 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 19 Sep 2022 15:55:42 -0500 +Subject: [PATCH 24/24] Test: cts-lab: match parentheses correctly + +--- + cts/lab/patterns.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py +index 6e718f7..856fffb 100644 +--- a/cts/lab/patterns.py ++++ b/cts/lab/patterns.py +@@ -271,6 +271,7 @@ class crm_corosync(BasePatterns): + ] + self.components["pacemaker-based-ignore"] = [ + r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", ++ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + # This is overbroad, but we don't have a way to say that only + # certain transition errors are acceptable (if the fencer respawns, + # fence devices may appear multiply active). We have to rely on +@@ -328,7 +329,7 @@ class crm_corosync(BasePatterns): + r"crit:.*Fencing daemon connection failed", + r"error:.*Fencer connection failed \(will retry\)", + r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations", +- r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error", ++ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + # This is overbroad, but we don't have a way to say that only + # certain transition errors are acceptable (if the fencer respawns, + # fence devices may appear multiply active). We have to rely on +-- +2.31.1 + diff --git a/SOURCES/014-abort-transition.patch b/SOURCES/014-abort-transition.patch new file mode 100644 index 0000000..cd12ccd --- /dev/null +++ b/SOURCES/014-abort-transition.patch @@ -0,0 +1,59 @@ +From 04d1ba5ff20e135c900239f0ebadad42a41b5eba Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 10 Sep 2022 03:39:12 -0700 +Subject: [PATCH] Fix: controller: Resource reordering doesn't cause transition + abort + +The te_update_diff_v2() function ignores all move operations. This is +correct for most CIB sections. However, a move in the resources section +affects placement order and can require resources to change nodes. In +that case, since the diff handler does not cause a transition abort, the +moves will not be initiated until the next natural transition (up to the +value of cluster-recheck-interval). + +This commit modifies te_update_diff_v2() so that it no longer ignores +moves within the resources section. + +This fixes a regression triggered by 41d0a1a and set up by 45e5e82. +However, the underlying bug had already been present. Prior to 41d0a1a, +the CIB replacement notification handler caused a transition abort, when +the resources section was replaced, which hid this bug. + +Closes T549 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_te_callbacks.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c +index 6e0dd216e..87ad861a2 100644 +--- a/daemons/controld/controld_te_callbacks.c ++++ b/daemons/controld/controld_te_callbacks.c +@@ -419,7 +419,13 @@ te_update_diff_v2(xmlNode *diff) + crm_trace("Ignoring %s change for version field", op); + continue; + +- } else if (strcmp(op, "move") == 0) { ++ } else if ((strcmp(op, "move") == 0) ++ && (strstr(xpath, ++ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION ++ "/" XML_CIB_TAG_RESOURCES) == NULL)) { ++ /* We still need to consider moves within the resources section, ++ * since they affect placement order. ++ */ + crm_trace("Ignoring move change at %s", xpath); + continue; + } +@@ -434,7 +440,7 @@ te_update_diff_v2(xmlNode *diff) + match = match->children; + } + +- } else if (strcmp(op, "delete") != 0) { ++ } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) { + crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", + op, xpath); + continue; +-- +2.31.1 + diff --git a/SOURCES/015-one_shot.patch b/SOURCES/015-one_shot.patch new file mode 100644 index 0000000..4896d64 --- /dev/null +++ b/SOURCES/015-one_shot.patch @@ -0,0 +1,3589 @@ +From 23d14e3515d226fee3ec9e0328f001f53597dad2 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 11:23:46 -0700 +Subject: [PATCH 01/22] API: libpacemaker: pcmk_pacemakerd_status() ipc_name + arg is now const + +Signed-off-by: Reid Wahl +--- + include/pacemaker.h | 11 ++++++----- + include/pcmki/pcmki_cluster_queries.h | 3 ++- + lib/pacemaker/pcmk_cluster_queries.c | 17 +++++++++++++++-- + 3 files changed, 23 insertions(+), 8 deletions(-) + +diff --git a/include/pacemaker.h b/include/pacemaker.h +index 17c68e9..a76569a 100644 +--- a/include/pacemaker.h ++++ b/include/pacemaker.h +@@ -107,15 +107,16 @@ int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) + void pcmk_free_injections(pcmk_injections_t *injections); + + /*! +- * \brief Get pacemakerd status ++ * \brief Get and output \p pacemakerd status + * +- * \param[in,out] xml The destination for the result, as an XML tree. +- * \param[in] ipc_name IPC name for request +- * \param[in] message_timeout_ms Message timeout ++ * \param[in,out] xml Destination for the result, as an XML tree ++ * \param[in] ipc_name IPC name for request ++ * \param[in] message_timeout_ms Message timeout + * + * \return Standard Pacemaker return code + */ +-int pcmk_pacemakerd_status(xmlNodePtr *xml, char *ipc_name, unsigned int message_timeout_ms); ++int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, ++ unsigned int message_timeout_ms); + + /*! + * \brief Calculate and output resource operation digests +diff --git a/include/pcmki/pcmki_cluster_queries.h b/include/pcmki/pcmki_cluster_queries.h +index 0a4c21c..9aea9a5 100644 +--- a/include/pcmki/pcmki_cluster_queries.h ++++ b/include/pcmki/pcmki_cluster_queries.h +@@ -10,7 +10,8 @@ + + int pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_timeout_ms); + int pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms); +-int pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeout_ms); ++int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, ++ guint message_timeout_ms); + int pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT); + + #endif +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index c30a9b8..cac8ce0 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -358,8 +358,19 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) + return rc; + } + ++/*! ++ * \internal ++ * \brief Get and output \p pacemakerd status ++ * ++ * \param[in,out] out Output object ++ * \param[in] ipc_name IPC name for request ++ * \param[in] message_timeout_ms Message timeout ++ * ++ * \return Standard Pacemaker return code ++ */ + int +-pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeout_ms) ++pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, ++ guint message_timeout_ms) + { + data_t data = { + .out = out, +@@ -385,8 +396,10 @@ pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeo + return data.rc; + } + ++// Documented in header + int +-pcmk_pacemakerd_status(xmlNodePtr *xml, char *ipc_name, unsigned int message_timeout_ms) ++pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, ++ unsigned int message_timeout_ms) + { + pcmk__output_t *out = NULL; + int rc = pcmk_rc_ok; +-- +2.31.1 + +From b15f4030020a8c0aa1cdb9e72a633adff02944bc Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 12:19:49 -0700 +Subject: [PATCH 02/22] Feature: pacemakerd: New + pcmk__pcmkd_state_enum2friendly() function + +Given an enum pcmk_pacemakerd_state value, this function returns a +user-friendly string representation. This will be used in future +commits. + +Signed-off-by: Reid Wahl +--- + include/crm/common/ipc_internal.h | 3 +++ + lib/common/ipc_pacemakerd.c | 33 +++++++++++++++++++++++++++++++ + tools/crm_mon.c | 14 ++++++------- + 3 files changed, 43 insertions(+), 7 deletions(-) + +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 2a0c562..ebde808 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -29,6 +29,7 @@ extern "C" { + + #include // US_AUTH_GETPEEREID + #include ++#include // enum pcmk_pacemakerd_state + #include // mainloop_io_t + + /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that +@@ -250,6 +251,8 @@ pcmk__ipc_sys_name(const char *ipc_name, const char *fallback) + return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback)); + } + ++const char *pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c +index 2bec0d1..3777f95 100644 +--- a/lib/common/ipc_pacemakerd.c ++++ b/lib/common/ipc_pacemakerd.c +@@ -62,6 +62,39 @@ pcmk_pacemakerd_api_daemon_state_enum2text( + return "invalid"; + } + ++/*! ++ * \internal ++ * \brief Return a friendly string representation of a \p pacemakerd state ++ * ++ * \param[in] state \p pacemakerd state ++ * ++ * \return A user-friendly string representation of \p state, or ++ * "Invalid pacemakerd state" ++ */ ++const char * ++pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state) ++{ ++ switch (state) { ++ case pcmk_pacemakerd_state_init: ++ return "Initializing pacemaker"; ++ case pcmk_pacemakerd_state_starting_daemons: ++ return "Pacemaker daemons are starting"; ++ case pcmk_pacemakerd_state_wait_for_ping: ++ return "Waiting for startup trigger from SBD"; ++ case pcmk_pacemakerd_state_running: ++ return "Pacemaker is running"; ++ case pcmk_pacemakerd_state_shutting_down: ++ return "Pacemaker daemons are shutting down"; ++ case pcmk_pacemakerd_state_shutdown_complete: ++ /* Assuming pacemakerd won't process messages while in ++ * shutdown_complete state unless reporting to SBD ++ */ ++ return "Pacemaker daemons are shut down (reporting to SBD)"; ++ default: ++ return "Invalid pacemakerd state"; ++ } ++} ++ + // \return Standard Pacemaker return code + static int + new_data(pcmk_ipc_api_t *api) +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index eaf79bd..e8cb709 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -951,26 +951,26 @@ pacemakerd_status(void) + rc = ENOTCONN; + if ((output_format == mon_output_console) || + (output_format == mon_output_plain)) { ++ ++ const char *state_str = NULL; ++ state_str = pcmk__pcmkd_state_enum2friendly(state); + switch (state) { + case pcmk_pacemakerd_state_running: + rc = pcmk_rc_ok; + break; + case pcmk_pacemakerd_state_starting_daemons: +- out->info(out,"Pacemaker daemons starting ..."); ++ out->info(out, "%s", state_str); + break; + case pcmk_pacemakerd_state_wait_for_ping: +- out->info(out,"Waiting for startup-trigger from SBD ..."); ++ out->info(out, "%s", state_str); + break; + case pcmk_pacemakerd_state_shutting_down: +- out->info(out,"Pacemaker daemons shutting down ..."); ++ out->info(out, "%s", state_str); + /* try our luck maybe CIB is still accessible */ + rc = pcmk_rc_ok; + break; + case pcmk_pacemakerd_state_shutdown_complete: +- /* assuming pacemakerd doesn't dispatch any pings after entering +- * that state unless it is waiting for SBD +- */ +- out->info(out,"Pacemaker daemons shut down - reporting to SBD ..."); ++ out->info(out, "%s", state_str); + break; + default: + break; +-- +2.31.1 + +From 7eb4fa59db667f1904b607cde8ed8b9caf7a46ed Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 12:51:40 -0700 +Subject: [PATCH 03/22] Low: libcrmcommon: Check invalid time value in + pacemakerd API reply + +If the XML_ATTR_TSTAMP attribute is present but can't be parsed as an +integer, value_ll gets set to PCMK__PARSE_INT_DEFAULT (-1). This should +never happen, but just in case, we should convert a negative to 0 before +we cast to time_t, an unsigned type. + +Signed-off-by: Reid Wahl +--- + lib/common/ipc_pacemakerd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c +index 3777f95..562308c 100644 +--- a/lib/common/ipc_pacemakerd.c ++++ b/lib/common/ipc_pacemakerd.c +@@ -211,7 +211,7 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + reply_data.data.ping.status = + pcmk__str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), "ok", + pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error; +- reply_data.data.ping.last_good = (time_t) value_ll; ++ reply_data.data.ping.last_good = (value_ll < 0)? 0 : (time_t) value_ll; + reply_data.data.ping.sys_from = crm_element_value(msg_data, + XML_PING_ATTR_SYSFROM); + } else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) { +-- +2.31.1 + +From 3169eaafce20e2a444c3b96755daf36dd7143242 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 15:01:12 -0700 +Subject: [PATCH 04/22] Low: libpacemaker: Correct default for pinged_buf in + pacemakerd_event_cb + +Default should be NULL so that the last_updated default gets used +correctly in the pacemakerd-health message. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_cluster_queries.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index cac8ce0..43b2b1f 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -229,7 +229,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + (reply->data.ping.status == pcmk_rc_ok)? + pcmk_pacemakerd_api_daemon_state_enum2text( + reply->data.ping.state):"query failed", +- (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:""); ++ (reply->data.ping.status == pcmk_rc_ok)? pinged_buf : NULL); + data->rc = pcmk_rc_ok; + crm_time_free(crm_when); + free(pinged_buf); +-- +2.31.1 + +From c6141eb0f47fc806a309f99ec52ccb274b134533 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 22:48:40 -0700 +Subject: [PATCH 05/22] Refactor: libpacemaker: Improve return codes in + pcmk__pacemakerd_status + +Use pcmk_rc_ipc_unresponsive if we don't get a response from the API, +and EBADMSG if we get an bad reply or unexpected reply type. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_cluster_queries.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index 43b2b1f..9937e16 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -207,6 +207,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + out->err(out, "error: Bad reply from pacemakerd: %s", + crm_exit_str(status)); + event_done(data, pacemakerd_api); ++ data->rc = EBADMSG; + return; + } + +@@ -214,6 +215,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + out->err(out, "error: Unknown reply type %d from pacemakerd", + reply->reply_type); + event_done(data, pacemakerd_api); ++ data->rc = EBADMSG; + return; + } + +@@ -375,7 +377,7 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, + data_t data = { + .out = out, + .mainloop = NULL, +- .rc = pcmk_rc_ok, ++ .rc = pcmk_rc_ipc_unresponsive, + .message_timer_id = 0, + .message_timeout_ms = message_timeout_ms + }; +-- +2.31.1 + +From df2e449a29fe3460b98403767a16be4d89ef3455 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 15:12:23 -0700 +Subject: [PATCH 06/22] Feature: libpacemaker: pacemakerd-health message + accepts state + +Previously, the pacemakerd-health message accepted only a state string. +This made it difficult to use different state strings for different +output formats. + +Now, the pacemakerd-health message accepts an enum pcmk_pacemakerd_state +value and an optional state string. If the state string is not set, then +the formatter function looks up an appropriate string representation for +the state. If the state string is set, it acts as an explicit override +and is used in place of a lookup. + +Note that this will cause "invalid" to be printed instead of "" +for quiet text outputs, and it will cause "Invalid pacemakerd state" to +be printed instead of "unknown state" for the default output. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_cluster_queries.c | 40 +++++++++++--------- + lib/pacemaker/pcmk_output.c | 56 +++++++++++++++++++++------- + 2 files changed, 65 insertions(+), 31 deletions(-) + +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index 9937e16..3e36a12 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -180,9 +180,6 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + pcmk__output_t *out = data->out; + pcmk_pacemakerd_api_reply_t *reply = event_data; + +- crm_time_t *crm_when; +- char *pinged_buf = NULL; +- + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (data->rc == ECONNRESET) { // Unexpected +@@ -220,22 +217,29 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + } + + // Parse desired information from reply +- crm_when = crm_time_new(NULL); +- crm_time_set_timet(crm_when, &reply->data.ping.last_good); +- pinged_buf = crm_time_as_string(crm_when, +- crm_time_log_date | crm_time_log_timeofday | +- crm_time_log_with_timezone); +- +- out->message(out, "pacemakerd-health", +- reply->data.ping.sys_from, +- (reply->data.ping.status == pcmk_rc_ok)? +- pcmk_pacemakerd_api_daemon_state_enum2text( +- reply->data.ping.state):"query failed", +- (reply->data.ping.status == pcmk_rc_ok)? pinged_buf : NULL); ++ if (reply->data.ping.status == pcmk_rc_ok) { ++ crm_time_t *when = crm_time_new(NULL); ++ char *when_s = NULL; ++ ++ crm_time_set_timet(when, &reply->data.ping.last_good); ++ when_s = crm_time_as_string(when, ++ crm_time_log_date ++ |crm_time_log_timeofday ++ |crm_time_log_with_timezone); ++ ++ out->message(out, "pacemakerd-health", ++ reply->data.ping.sys_from, reply->data.ping.state, NULL, ++ when_s); ++ ++ crm_time_free(when); ++ free(when_s); ++ ++ } else { ++ out->message(out, "pacemakerd-health", ++ reply->data.ping.sys_from, reply->data.ping.state, ++ "query failed", NULL); ++ } + data->rc = pcmk_rc_ok; +- crm_time_free(crm_when); +- free(pinged_buf); +- + event_done(data, pacemakerd_api); + } + +diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c +index 9a522a3..edd4b82 100644 +--- a/lib/pacemaker/pcmk_output.c ++++ b/lib/pacemaker/pcmk_output.c +@@ -627,36 +627,65 @@ health_xml(pcmk__output_t *out, va_list args) + return pcmk_rc_ok; + } + +-PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") ++PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", ++ "const char *") + static int +-pacemakerd_health_text(pcmk__output_t *out, va_list args) ++pacemakerd_health(pcmk__output_t *out, va_list args) + { + const char *sys_from = va_arg(args, const char *); +- const char *state = va_arg(args, const char *); ++ enum pcmk_pacemakerd_state state = ++ (enum pcmk_pacemakerd_state) va_arg(args, int); ++ const char *state_s = va_arg(args, const char *); + const char *last_updated = va_arg(args, const char *); + ++ if (state_s == NULL) { ++ state_s = pcmk__pcmkd_state_enum2friendly(state); ++ } ++ return out->info(out, "Status of %s: '%s' (last updated %s)", ++ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", ++ state_s, ++ (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); ++} ++ ++PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", ++ "const char *") ++static int ++pacemakerd_health_text(pcmk__output_t *out, va_list args) ++{ + if (!out->is_quiet(out)) { +- return out->info(out, "Status of %s: '%s' %s %s", crm_str(sys_from), +- crm_str(state), (!pcmk__str_empty(last_updated))? +- "last updated":"", crm_str(last_updated)); ++ return pacemakerd_health(out, args); + } else { +- pcmk__formatted_printf(out, "%s\n", crm_str(state)); ++ const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); ++ enum pcmk_pacemakerd_state state = ++ (enum pcmk_pacemakerd_state) va_arg(args, int); ++ const char *state_s = va_arg(args, const char *); ++ const char *last_updated G_GNUC_UNUSED = va_arg(args, const char *); ++ ++ if (state_s == NULL) { ++ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); ++ } ++ pcmk__formatted_printf(out, "%s\n", state_s); + return pcmk_rc_ok; + } +- +- return pcmk_rc_no_output; + } + +-PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") ++PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", ++ "const char *") + static int + pacemakerd_health_xml(pcmk__output_t *out, va_list args) + { + const char *sys_from = va_arg(args, const char *); +- const char *state = va_arg(args, const char *); ++ enum pcmk_pacemakerd_state state = ++ (enum pcmk_pacemakerd_state) va_arg(args, int); ++ const char *state_s = va_arg(args, const char *); + const char *last_updated = va_arg(args, const char *); + ++ if (state_s == NULL) { ++ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); ++ } ++ + pcmk__output_create_xml_node(out, crm_str(sys_from), +- "state", crm_str(state), ++ "state", state_s, + "last_updated", crm_str(last_updated), + NULL); + return pcmk_rc_ok; +@@ -1899,7 +1928,8 @@ static pcmk__message_entry_t fmt_functions[] = { + { "locations-list", "xml", locations_list_xml }, + { "node-action", "default", node_action }, + { "node-action", "xml", node_action_xml }, +- { "pacemakerd-health", "default", pacemakerd_health_text }, ++ { "pacemakerd-health", "default", pacemakerd_health }, ++ { "pacemakerd-health", "text", pacemakerd_health_text }, + { "pacemakerd-health", "xml", pacemakerd_health_xml }, + { "profile", "default", profile_default, }, + { "profile", "xml", profile_xml }, +-- +2.31.1 + +From 9bb521dc8b835641746095fe66b7a2137ce12c20 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 15:53:18 -0700 +Subject: [PATCH 07/22] Feature: libpacemaker: pcmk__pacemakerd_status() can + return pcmkd state + +Signed-off-by: Reid Wahl +--- + include/pcmki/pcmki_cluster_queries.h | 3 ++- + lib/pacemaker/pcmk_cluster_queries.c | 22 +++++++++++++++++----- + tools/crmadmin.c | 3 ++- + 3 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/include/pcmki/pcmki_cluster_queries.h b/include/pcmki/pcmki_cluster_queries.h +index 9aea9a5..702ab1f 100644 +--- a/include/pcmki/pcmki_cluster_queries.h ++++ b/include/pcmki/pcmki_cluster_queries.h +@@ -11,7 +11,8 @@ + int pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_timeout_ms); + int pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms); + int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, +- guint message_timeout_ms); ++ guint message_timeout_ms, ++ enum pcmk_pacemakerd_state *state); + int pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT); + + #endif +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index 3e36a12..5834ef0 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -36,6 +36,7 @@ typedef struct { + int rc; + guint message_timer_id; + guint message_timeout_ms; ++ enum pcmk_pacemakerd_state pcmkd_state; + } data_t; + + static void +@@ -217,6 +218,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + } + + // Parse desired information from reply ++ data->pcmkd_state = reply->data.ping.state; + if (reply->data.ping.status == pcmk_rc_ok) { + crm_time_t *when = crm_time_new(NULL); + char *when_s = NULL; +@@ -282,7 +284,8 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time + .mainloop = NULL, + .rc = pcmk_rc_ok, + .message_timer_id = 0, +- .message_timeout_ms = message_timeout_ms ++ .message_timeout_ms = message_timeout_ms, ++ .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; + pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb); + +@@ -327,7 +330,8 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) + .mainloop = NULL, + .rc = pcmk_rc_ok, + .message_timer_id = 0, +- .message_timeout_ms = message_timeout_ms ++ .message_timeout_ms = message_timeout_ms, ++ .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; + pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb); + +@@ -371,19 +375,23 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) + * \param[in,out] out Output object + * \param[in] ipc_name IPC name for request + * \param[in] message_timeout_ms Message timeout ++ * \param[out] state Where to store the \p pacemakerd state, if ++ * not \p NULL + * + * \return Standard Pacemaker return code + */ + int + pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, +- guint message_timeout_ms) ++ guint message_timeout_ms, ++ enum pcmk_pacemakerd_state *state) + { + data_t data = { + .out = out, + .mainloop = NULL, + .rc = pcmk_rc_ipc_unresponsive, + .message_timer_id = 0, +- .message_timeout_ms = message_timeout_ms ++ .message_timeout_ms = message_timeout_ms, ++ .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; + pcmk_ipc_api_t *pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb); + +@@ -399,6 +407,9 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, + pcmk_free_ipc_api(pacemakerd_api); + } + ++ if (state != NULL) { ++ *state = data.pcmkd_state; ++ } + return data.rc; + } + +@@ -417,7 +428,8 @@ pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, + + pcmk__register_lib_messages(out); + +- rc = pcmk__pacemakerd_status(out, ipc_name, (guint) message_timeout_ms); ++ rc = pcmk__pacemakerd_status(out, ipc_name, (guint) message_timeout_ms, ++ NULL); + pcmk__out_epilogue(out, xml, rc); + return rc; + } +diff --git a/tools/crmadmin.c b/tools/crmadmin.c +index 169289f..f4c2783 100644 +--- a/tools/crmadmin.c ++++ b/tools/crmadmin.c +@@ -238,7 +238,8 @@ main(int argc, char **argv) + rc = pcmk__controller_status(out, options.optarg, options.timeout); + break; + case cmd_pacemakerd_health: +- rc = pcmk__pacemakerd_status(out, options.ipc_name, options.timeout); ++ rc = pcmk__pacemakerd_status(out, options.ipc_name, options.timeout, ++ NULL); + break; + case cmd_list_nodes: + rc = pcmk__list_nodes(out, options.optarg, options.BASH_EXPORT); +-- +2.31.1 + +From 4841c22f9a7cc927e87007c9691e2c239f035a58 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 16:50:06 -0700 +Subject: [PATCH 08/22] Fix: libpacemaker: Memory leak in + pcmk_cluster_queries.c:ipc_connect() + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_cluster_queries.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index 5834ef0..00a809d 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -270,6 +270,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) + pcmk_ipc_name(api, true), + pcmk_rc_str(rc)); + data->rc = rc; ++ pcmk_free_ipc_api(api); + return NULL; + } + +-- +2.31.1 + +From 8e202448c47ad0ddc148b2e0514ef98b4847fa6e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 18:25:35 -0700 +Subject: [PATCH 09/22] Doc: libpe_status: Replace old funcname in + pe__build_rsc_list() comment + +build_uname_list -> pe__build_node_name_list() + +Signed-off-by: Reid Wahl +--- + lib/pengine/utils.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index 77111a6..1a4eb3e 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -2551,9 +2551,9 @@ pe__build_rsc_list(pe_working_set_t *data_set, const char *s) { + resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); + } + } else { +- /* The given string was not a valid resource name. It's either +- * a tag or it's a typo or something. See build_uname_list for +- * more detail. ++ /* The given string was not a valid resource name. It's a tag or a ++ * typo or something. See pe__build_node_name_list() for more ++ * detail. + */ + resources = pe__rscs_with_tag(data_set, s); + } +-- +2.31.1 + +From 0c412f49d607a8f60790b13e75d8c7b3a8c6c1d9 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 18:38:06 -0700 +Subject: [PATCH 10/22] Refactor: libpacemaker: Clarify pointer arguments in + pcmk_status.c + +Make only_node and only_rsc arguments const. Add doxygen blocks. Change +"st" argument to "stonith". + +This is not comprehensive. We're updating pcmk__status() because we're +about to add a new argument, and pcmk__output_cluster_status() because +it shares most of its arguments with pcmk__status(). + +Signed-off-by: Reid Wahl +--- + include/pcmki/pcmki_status.h | 18 ++++--- + lib/pacemaker/pcmk_status.c | 93 +++++++++++++++++++++++++++++------- + 2 files changed, 87 insertions(+), 24 deletions(-) + +diff --git a/include/pcmki/pcmki_status.h b/include/pcmki/pcmki_status.h +index 6614fe4..2bbd099 100644 +--- a/include/pcmki/pcmki_status.h ++++ b/include/pcmki/pcmki_status.h +@@ -38,15 +38,19 @@ extern "C" { + */ + int pcmk__output_simple_status(pcmk__output_t *out, pe_working_set_t *data_set); + +-int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, +- xmlNode *current_cib, enum pcmk__fence_history fence_history, +- uint32_t show, uint32_t show_opts, char *only_node, +- char *only_rsc, char *neg_location_prefix, ++int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, ++ cib_t *cib, xmlNode *current_cib, ++ enum pcmk__fence_history fence_history, ++ uint32_t show, uint32_t show_opts, ++ const char *only_node, const char *only_rsc, ++ const char *neg_location_prefix, + bool simple_output); + +-int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, +- uint32_t show, uint32_t show_opts, char *only_node, char *only_rsc, +- char *neg_location_prefix, bool simple_output); ++int pcmk__status(pcmk__output_t *out, cib_t *cib, ++ enum pcmk__fence_history fence_history, uint32_t show, ++ uint32_t show_opts, const char *only_node, ++ const char *only_rsc, const char *neg_location_prefix, ++ bool simple_output); + + #ifdef __cplusplus + } +diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c +index 12136ea..1bf0172 100644 +--- a/lib/pacemaker/pcmk_status.c ++++ b/lib/pacemaker/pcmk_status.c +@@ -135,11 +135,38 @@ pacemakerd_status(pcmk__output_t *out) + return rc; + } + ++/*! ++ * \internal ++ * \brief Output the cluster status given a fencer and CIB connection ++ * ++ * \param[in,out] out Output object ++ * \param[in,out] stonith Fencer connection ++ * \param[in,out] cib CIB connection ++ * \param[in] current_cib Current CIB XML ++ * \param[in] fence_history How much of the fencing history to output ++ * \param[in] show Group of \p pcmk_section_e flags ++ * \param[in] show_opts Group of \p pcmk_show_opt_e flags ++ * \param[in] only_node If a node name or tag, include only the ++ * matching node(s) (if any) in the output. ++ * If \p "*" or \p NULL, include all nodes ++ * in the output. ++ * \param[in] only_rsc If a resource ID or tag, include only the ++ * matching resource(s) (if any) in the ++ * output. If \p "*" or \p NULL, include all ++ * resources in the output. ++ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID ++ * \param[in] simple_output Whether to use a simple output format. ++ * Note: This is for use by \p crm_mon only ++ * and is planned to be deprecated. ++ * ++ * \return Standard Pacemaker return code ++ */ + int +-pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, ++pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, cib_t *cib, + xmlNode *current_cib, enum pcmk__fence_history fence_history, +- uint32_t show, uint32_t show_opts, char *only_node, +- char *only_rsc, char *neg_location_prefix, bool simple_output) ++ uint32_t show, uint32_t show_opts, ++ const char *only_node, const char *only_rsc, ++ const char *neg_location_prefix, bool simple_output) + { + xmlNode *cib_copy = copy_xml(current_cib); + stonith_history_t *stonith_history = NULL; +@@ -159,7 +186,8 @@ pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, + + /* get the stonith-history if there is evidence we need it */ + if (fence_history != pcmk__fence_history_none) { +- history_rc = pcmk__get_fencing_history(st, &stonith_history, fence_history); ++ history_rc = pcmk__get_fencing_history(stonith, &stonith_history, ++ fence_history); + } + + data_set = pe_new_working_set(); +@@ -235,14 +263,43 @@ pcmk_status(xmlNodePtr *xml) + return rc; + } + ++/*! ++ * \internal ++ * \brief Query and output the cluster status ++ * ++ * The operation is considered a success if we're able to get the \p pacemakerd ++ * state. If possible, we'll also try to connect to the fencer and CIB and ++ * output their respective status information. ++ * ++ * \param[in,out] out Output object ++ * \param[in,out] cib CIB connection ++ * \param[in] fence_history How much of the fencing history to output ++ * \param[in] show Group of \p pcmk_section_e flags ++ * \param[in] show_opts Group of \p pcmk_show_opt_e flags ++ * \param[in] only_node If a node name or tag, include only the ++ * matching node(s) (if any) in the output. ++ * If \p "*" or \p NULL, include all nodes ++ * in the output. ++ * \param[in] only_rsc If a resource ID or tag, include only the ++ * matching resource(s) (if any) in the ++ * output. If \p "*" or \p NULL, include all ++ * resources in the output. ++ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID ++ * \param[in] simple_output Whether to use a simple output format. ++ * Note: This is for use by \p crm_mon only ++ * and is planned to be deprecated. ++ * ++ * \return Standard Pacemaker return code ++ */ + int +-pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, +- uint32_t show, uint32_t show_opts, char *only_node, char *only_rsc, +- char *neg_location_prefix, bool simple_output) ++pcmk__status(pcmk__output_t *out, cib_t *cib, ++ enum pcmk__fence_history fence_history, uint32_t show, ++ uint32_t show_opts, const char *only_node, const char *only_rsc, ++ const char *neg_location_prefix, bool simple_output) + { + xmlNode *current_cib = NULL; + int rc = pcmk_rc_ok; +- stonith_t *st = NULL; ++ stonith_t *stonith = NULL; + + if (cib == NULL) { + return ENOTCONN; +@@ -261,9 +318,9 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_his + } + + if (fence_history != pcmk__fence_history_none && cib->variant == cib_native) { +- st = fencing_connect(); ++ stonith = fencing_connect(); + +- if (st == NULL) { ++ if (stonith == NULL) { + return ENOTCONN; + } + } +@@ -273,17 +330,19 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_his + goto done; + } + +- rc = pcmk__output_cluster_status(out, st, cib, current_cib, fence_history, show, show_opts, +- only_node, only_rsc, neg_location_prefix, simple_output); ++ rc = pcmk__output_cluster_status(out, stonith, cib, current_cib, ++ fence_history, show, show_opts, only_node, ++ only_rsc, neg_location_prefix, ++ simple_output); + + done: +- if (st != NULL) { +- if (st->state != stonith_disconnected) { +- st->cmds->remove_notification(st, NULL); +- st->cmds->disconnect(st); ++ if (stonith != NULL) { ++ if (stonith->state != stonith_disconnected) { ++ stonith->cmds->remove_notification(stonith, NULL); ++ stonith->cmds->disconnect(stonith); + } + +- stonith_api_delete(st); ++ stonith_api_delete(stonith); + } + + return rc; +-- +2.31.1 + +From 8384af058c47a46cd10a070f90f6dc0bd1b12045 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 22:22:49 -0700 +Subject: [PATCH 11/22] Feature: libpacemaker: HTML formatter for + pacemakerd-health message + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_output.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c +index edd4b82..c088a6a 100644 +--- a/lib/pacemaker/pcmk_output.c ++++ b/lib/pacemaker/pcmk_output.c +@@ -646,6 +646,32 @@ pacemakerd_health(pcmk__output_t *out, va_list args) + state_s, + (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); + } ++ ++PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", ++ "const char *") ++static int ++pacemakerd_health_html(pcmk__output_t *out, va_list args) ++{ ++ const char *sys_from = va_arg(args, const char *); ++ enum pcmk_pacemakerd_state state = ++ (enum pcmk_pacemakerd_state) va_arg(args, int); ++ const char *state_s = va_arg(args, const char *); ++ const char *last_updated = va_arg(args, const char *); ++ char *msg = NULL; ++ ++ if (state_s == NULL) { ++ state_s = pcmk__pcmkd_state_enum2friendly(state); ++ } ++ ++ msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", ++ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", ++ state_s, ++ (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); ++ pcmk__output_create_html_node(out, "li", NULL, NULL, msg); ++ ++ free(msg); ++ return pcmk_rc_ok; ++} + + PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", + "const char *") +@@ -1929,6 +1955,7 @@ static pcmk__message_entry_t fmt_functions[] = { + { "node-action", "default", node_action }, + { "node-action", "xml", node_action_xml }, + { "pacemakerd-health", "default", pacemakerd_health }, ++ { "pacemakerd-health", "html", pacemakerd_health_html }, + { "pacemakerd-health", "text", pacemakerd_health_text }, + { "pacemakerd-health", "xml", pacemakerd_health_xml }, + { "profile", "default", profile_default, }, +-- +2.31.1 + +From ec6a28bf64d23107c81d473c02038c29b17f2917 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 3 Sep 2022 21:40:04 -0700 +Subject: [PATCH 12/22] Low: schemas: Copy some API schemas in preparation for + changes + +Signed-off-by: Reid Wahl +--- + include/crm/common/output_internal.h | 2 +- + xml/api/command-output-2.23.rng | 26 +++ + xml/api/crm_resource-2.23.rng | 288 +++++++++++++++++++++++++++ + xml/api/stonith_admin-2.23.rng | 52 +++++ + 4 files changed, 367 insertions(+), 1 deletion(-) + create mode 100644 xml/api/command-output-2.23.rng + create mode 100644 xml/api/crm_resource-2.23.rng + create mode 100644 xml/api/stonith_admin-2.23.rng + +diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h +index 24f5b2c..1e71e13 100644 +--- a/include/crm/common/output_internal.h ++++ b/include/crm/common/output_internal.h +@@ -28,7 +28,7 @@ extern "C" { + */ + + +-# define PCMK__API_VERSION "2.22" ++# define PCMK__API_VERSION "2.23" + + #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) + # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) +diff --git a/xml/api/command-output-2.23.rng b/xml/api/command-output-2.23.rng +new file mode 100644 +index 0000000..710c134 +--- /dev/null ++++ b/xml/api/command-output-2.23.rng +@@ -0,0 +1,26 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ stdout ++ ++ ++ ++ ++ ++ stderr ++ ++ ++ ++ ++ ++ +diff --git a/xml/api/crm_resource-2.23.rng b/xml/api/crm_resource-2.23.rng +new file mode 100644 +index 0000000..8a46675 +--- /dev/null ++++ b/xml/api/crm_resource-2.23.rng +@@ -0,0 +1,288 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ promoted ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ocf ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ true ++ false ++ ++ ++ ++ true ++ ++ ++ ++ ++ ++ true ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Stopped ++ Started ++ Promoted ++ Unpromoted ++ ++ ++ Master ++ Slave ++ ++ ++ +diff --git a/xml/api/stonith_admin-2.23.rng b/xml/api/stonith_admin-2.23.rng +new file mode 100644 +index 0000000..b55fae9 +--- /dev/null ++++ b/xml/api/stonith_admin-2.23.rng +@@ -0,0 +1,52 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 526a4148ba548a3dfec4394c9d10a8d71d18b81e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sun, 4 Sep 2022 01:15:59 -0700 +Subject: [PATCH 13/22] Fix: schemas: crm_resource --validate validation fails + +In case of an error, the output of `crm_resource --validate` fails to +validate (if validate-all does not output XML). This is because if a + contains two elements with the same name in a RelaxNG schema, +only the first occurrence is honored and the rest are ignored. (This +does not seem to be documented clearly; it's a conclusion based on +experimentation.) + +The solution is to create just one that contains a +(instead of a that contains two s). + +Closes RHBZ#2123727 + +Signed-off-by: Reid Wahl +--- + xml/Makefile.am | 2 +- + xml/api/command-output-2.23.rng | 14 +------------- + xml/api/crm_resource-2.23.rng | 10 +++++----- + xml/api/stonith_admin-2.23.rng | 2 +- + xml/api/subprocess-output-2.23.rng | 24 ++++++++++++++++++++++++ + 5 files changed, 32 insertions(+), 20 deletions(-) + create mode 100644 xml/api/subprocess-output-2.23.rng + +diff --git a/xml/Makefile.am b/xml/Makefile.am +index 39f02f5..0a4a8aa 100644 +--- a/xml/Makefile.am ++++ b/xml/Makefile.am +@@ -69,7 +69,7 @@ API_request_base = command-output \ + CIB_cfg_base = options nodes resources constraints fencing acls tags alerts + + # Names of all schemas (including top level and those included by others) +-API_base = $(API_request_base) fence-event failure generic-list item node-attrs node-history nodes resources status ++API_base = $(API_request_base) fence-event failure generic-list item node-attrs node-history nodes resources status subprocess-output + CIB_base = cib $(CIB_cfg_base) status score rule nvset + + # Static schema files and transforms (only CIB has transforms) +diff --git a/xml/api/command-output-2.23.rng b/xml/api/command-output-2.23.rng +index 710c134..4de49bd 100644 +--- a/xml/api/command-output-2.23.rng ++++ b/xml/api/command-output-2.23.rng +@@ -8,19 +8,7 @@ + + + +- +- +- +- stdout +- +- +- +- +- +- stderr +- +- +- ++ + + + +diff --git a/xml/api/crm_resource-2.23.rng b/xml/api/crm_resource-2.23.rng +index 8a46675..f841026 100644 +--- a/xml/api/crm_resource-2.23.rng ++++ b/xml/api/crm_resource-2.23.rng +@@ -229,12 +229,12 @@ + + + +- +- ++ ++ + +- +- +- ++ ++ ++ + + + +diff --git a/xml/api/stonith_admin-2.23.rng b/xml/api/stonith_admin-2.23.rng +index b55fae9..f3fab68 100644 +--- a/xml/api/stonith_admin-2.23.rng ++++ b/xml/api/stonith_admin-2.23.rng +@@ -45,7 +45,7 @@ + + + +- ++ + + + +diff --git a/xml/api/subprocess-output-2.23.rng b/xml/api/subprocess-output-2.23.rng +new file mode 100644 +index 0000000..2f7a8e7 +--- /dev/null ++++ b/xml/api/subprocess-output-2.23.rng +@@ -0,0 +1,24 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ stdout ++ ++ ++ ++ ++ ++ stderr ++ ++ ++ ++ ++ +-- +2.31.1 + +From 60af39cd1582bcf91ebcfc5f9ce2fc98fd14b5b9 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 27 Aug 2022 22:46:38 -0700 +Subject: [PATCH 14/22] Low: schemas: Add schema for crm_error + +This matches the current capabilities of crm_error, though we might want +to change to a oneOrMore choice for name and description later. + +Closes T97 + +Signed-off-by: Reid Wahl +--- + xml/api/crm_error-2.23.rng | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + create mode 100644 xml/api/crm_error-2.23.rng + +diff --git a/xml/api/crm_error-2.23.rng b/xml/api/crm_error-2.23.rng +new file mode 100644 +index 0000000..8ba6e62 +--- /dev/null ++++ b/xml/api/crm_error-2.23.rng +@@ -0,0 +1,24 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 4dbb0e9d79dd36647fbb222bd5c2adae518e541c Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 14 Sep 2022 22:53:49 -0700 +Subject: [PATCH 15/22] Low: schemas: Copy API schemas in preparation for + changes + +Signed-off-by: Reid Wahl +--- + include/crm/common/output_internal.h | 2 +- + xml/api/crm_mon-2.24.rng | 186 +++++++++++++++ + xml/api/crm_resource-2.24.rng | 288 +++++++++++++++++++++++ + xml/api/crm_simulate-2.24.rng | 338 +++++++++++++++++++++++++++ + xml/api/nodes-2.24.rng | 54 +++++ + xml/api/resources-2.24.rng | 109 +++++++++ + 6 files changed, 976 insertions(+), 1 deletion(-) + create mode 100644 xml/api/crm_mon-2.24.rng + create mode 100644 xml/api/crm_resource-2.24.rng + create mode 100644 xml/api/crm_simulate-2.24.rng + create mode 100644 xml/api/nodes-2.24.rng + create mode 100644 xml/api/resources-2.24.rng + +diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h +index 1e71e13..6c6d5a3 100644 +--- a/include/crm/common/output_internal.h ++++ b/include/crm/common/output_internal.h +@@ -28,7 +28,7 @@ extern "C" { + */ + + +-# define PCMK__API_VERSION "2.23" ++# define PCMK__API_VERSION "2.24" + + #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) + # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) +diff --git a/xml/api/crm_mon-2.24.rng b/xml/api/crm_mon-2.24.rng +new file mode 100644 +index 0000000..b52307a +--- /dev/null ++++ b/xml/api/crm_mon-2.24.rng +@@ -0,0 +1,186 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ granted ++ revoked ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/api/crm_resource-2.24.rng b/xml/api/crm_resource-2.24.rng +new file mode 100644 +index 0000000..6a3334c +--- /dev/null ++++ b/xml/api/crm_resource-2.24.rng +@@ -0,0 +1,288 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ promoted ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ocf ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ true ++ false ++ ++ ++ ++ true ++ ++ ++ ++ ++ ++ true ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Stopped ++ Started ++ Promoted ++ Unpromoted ++ ++ ++ Master ++ Slave ++ ++ ++ +diff --git a/xml/api/crm_simulate-2.24.rng b/xml/api/crm_simulate-2.24.rng +new file mode 100644 +index 0000000..5be0afa +--- /dev/null ++++ b/xml/api/crm_simulate-2.24.rng +@@ -0,0 +1,338 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/api/nodes-2.24.rng b/xml/api/nodes-2.24.rng +new file mode 100644 +index 0000000..9686344 +--- /dev/null ++++ b/xml/api/nodes-2.24.rng +@@ -0,0 +1,54 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ red ++ yellow ++ green ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ unknown ++ member ++ remote ++ ping ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/api/resources-2.24.rng b/xml/api/resources-2.24.rng +new file mode 100644 +index 0000000..e279583 +--- /dev/null ++++ b/xml/api/resources-2.24.rng +@@ -0,0 +1,109 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ docker ++ rkt ++ podman ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 1d36b5d50e071ecaa66948066f23043a513871e8 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 14 Sep 2022 22:58:04 -0700 +Subject: [PATCH 16/22] API: schemas: Add locked_to= to resources API schema + +Ref T433 + +Signed-off-by: Reid Wahl +--- + xml/api/resources-2.24.rng | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xml/api/resources-2.24.rng b/xml/api/resources-2.24.rng +index e279583..f8ae6eb 100644 +--- a/xml/api/resources-2.24.rng ++++ b/xml/api/resources-2.24.rng +@@ -94,6 +94,9 @@ + + + ++ ++ ++ + + + +-- +2.31.1 + +From e8caa027408243a6c7edfa966a1a7b0535458b9a Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 21:53:02 -0700 +Subject: [PATCH 17/22] Low: schemas: Copy API schemas in preparation for + changes + +Signed-off-by: Reid Wahl +--- + include/crm/common/output_internal.h | 2 +- + xml/api/crm_mon-2.25.rng | 186 +++++++++++++++++++++++++++ + xml/api/crmadmin-2.25.rng | 68 ++++++++++ + 3 files changed, 255 insertions(+), 1 deletion(-) + create mode 100644 xml/api/crm_mon-2.25.rng + create mode 100644 xml/api/crmadmin-2.25.rng + +diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h +index 6c6d5a3..1974721 100644 +--- a/include/crm/common/output_internal.h ++++ b/include/crm/common/output_internal.h +@@ -28,7 +28,7 @@ extern "C" { + */ + + +-# define PCMK__API_VERSION "2.24" ++# define PCMK__API_VERSION "2.25" + + #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) + # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) +diff --git a/xml/api/crm_mon-2.25.rng b/xml/api/crm_mon-2.25.rng +new file mode 100644 +index 0000000..b52307a +--- /dev/null ++++ b/xml/api/crm_mon-2.25.rng +@@ -0,0 +1,186 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ granted ++ revoked ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xml/api/crmadmin-2.25.rng b/xml/api/crmadmin-2.25.rng +new file mode 100644 +index 0000000..34c9ca4 +--- /dev/null ++++ b/xml/api/crmadmin-2.25.rng +@@ -0,0 +1,68 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ unknown ++ member ++ remote ++ ping ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 9e06f1b526e9ceb94cc1709e245537d169ca2952 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 21:55:51 -0700 +Subject: [PATCH 18/22] Low: schemas: Add pacemakerd-health schema in + preparation for fix + +Signed-off-by: Reid Wahl +--- + xml/api/crm_mon-2.25.rng | 3 +++ + xml/api/crmadmin-2.25.rng | 9 +-------- + xml/api/pacemakerd-health-2.25.rng | 20 ++++++++++++++++++++ + 3 files changed, 24 insertions(+), 8 deletions(-) + create mode 100644 xml/api/pacemakerd-health-2.25.rng + +diff --git a/xml/api/crm_mon-2.25.rng b/xml/api/crm_mon-2.25.rng +index b52307a..1e501dd 100644 +--- a/xml/api/crm_mon-2.25.rng ++++ b/xml/api/crm_mon-2.25.rng +@@ -7,6 +7,9 @@ + + + ++ ++ ++ + + + +diff --git a/xml/api/crmadmin-2.25.rng b/xml/api/crmadmin-2.25.rng +index 34c9ca4..973f6d4 100644 +--- a/xml/api/crmadmin-2.25.rng ++++ b/xml/api/crmadmin-2.25.rng +@@ -11,7 +11,7 @@ + + + +- ++ + + + +@@ -29,13 +29,6 @@ + + + +- +- +- +- +- +- +- + + + +diff --git a/xml/api/pacemakerd-health-2.25.rng b/xml/api/pacemakerd-health-2.25.rng +new file mode 100644 +index 0000000..2089b25 +--- /dev/null ++++ b/xml/api/pacemakerd-health-2.25.rng +@@ -0,0 +1,20 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 9a320b51e21e4c52a5ac3332d35c0d70fdd1650c Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 22:03:19 -0700 +Subject: [PATCH 19/22] Low: libpacemaker: Fix pacemakerd-health XML output + +We were using F_CRM_SYS_FROM as the name of the XML element, instead of +something static like "pacemakerd". It happens that the value in +F_CRM_SYS_FROM seems to always be CRM_SYSTEM_MCP ("pacemakerd"), so the +element name was effectively deterministic. Nonetheless, the schema +required the element be called "pacemakerd"; there was no allowance for +another system name. That defeats any purpose of flexible element +naming. + +It seems better to call the element "pacemakerd" and make +sys_from a field, if we keep sys_from at all. (Can't use +"pacemakerd-health" for backward compatibility reasons.) + +Additionally, if sys_from or last_updated is NULL, pass them directly to +pcmk__output_create_xml_node(). Those attributes will simply be skipped +if their values are NULL. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_output.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c +index c088a6a..153a422 100644 +--- a/lib/pacemaker/pcmk_output.c ++++ b/lib/pacemaker/pcmk_output.c +@@ -710,9 +710,10 @@ pacemakerd_health_xml(pcmk__output_t *out, va_list args) + state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); + } + +- pcmk__output_create_xml_node(out, crm_str(sys_from), ++ pcmk__output_create_xml_node(out, "pacemakerd", ++ "sys_from", sys_from, + "state", state_s, +- "last_updated", crm_str(last_updated), ++ "last_updated", last_updated, + NULL); + return pcmk_rc_ok; + } +-- +2.31.1 + +From bb57ee10fe6eaeaaeafbf8b491b446f7bffb6b22 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 11 Oct 2022 15:14:27 -0700 +Subject: [PATCH 20/22] Refactor: libpacemaker: Default to sync dispatch in + pcmk_cluster_queries + +If message_timeout_ms == 0 for various functions in +pcmk_cluster_queries.c, default to using sync dispatch instead of +starting a mainloop with timeout 30s that behaves basically like sync +dispatch. + +This makes it easier to reason about calling these functions when the +caller may have its own mainloop. + +Signed-off-by: Reid Wahl +--- + include/pacemaker.h | 8 +++- + lib/pacemaker/pcmk_cluster_queries.c | 64 ++++++++++++++++++++++------ + 2 files changed, 57 insertions(+), 15 deletions(-) + +diff --git a/include/pacemaker.h b/include/pacemaker.h +index a76569a..0ca9c29 100644 +--- a/include/pacemaker.h ++++ b/include/pacemaker.h +@@ -111,7 +111,13 @@ void pcmk_free_injections(pcmk_injections_t *injections); + * + * \param[in,out] xml Destination for the result, as an XML tree + * \param[in] ipc_name IPC name for request +- * \param[in] message_timeout_ms Message timeout ++ * \param[in] message_timeout_ms How long to wait for a reply from the ++ * \p pacemakerd API. If 0, ++ * \p pcmk_ipc_dispatch_sync will be used. ++ * If positive, \p pcmk_ipc_dispatch_main ++ * will be used, and a new mainloop will be ++ * created for this purpose (freed before ++ * return). + * + * \return Standard Pacemaker return code + */ +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index 00a809d..d4361c9 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -246,13 +246,13 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + } + + static pcmk_ipc_api_t * +-ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) ++ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, ++ enum pcmk_ipc_dispatch dispatch_type) + { + int rc; + pcmk__output_t *out = data->out; + pcmk_ipc_api_t *api = NULL; + +- + rc = pcmk_new_ipc_api(&api, server); + if (api == NULL) { + out->err(out, "error: Could not connect to %s: %s", +@@ -264,7 +264,8 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) + if (cb != NULL) { + pcmk_register_ipc_callback(api, cb, data); + } +- rc = pcmk_connect_ipc(api, pcmk_ipc_dispatch_main); ++ ++ rc = pcmk_connect_ipc(api, dispatch_type); + if (rc != pcmk_rc_ok) { + out->err(out, "error: Could not connect to %s: %s", + pcmk_ipc_name(api, true), +@@ -288,16 +289,26 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time + .message_timeout_ms = message_timeout_ms, + .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; +- pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb); ++ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; ++ pcmk_ipc_api_t *controld_api = NULL; ++ ++ if (message_timeout_ms == 0) { ++ dispatch_type = pcmk_ipc_dispatch_sync; ++ } ++ controld_api = ipc_connect(&data, pcmk_ipc_controld, ++ controller_status_event_cb, dispatch_type); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, dest_node); + if (rc != pcmk_rc_ok) { +- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); ++ out->err(out, "error: Could not ping controller API: %s", ++ pcmk_rc_str(rc)); + data.rc = rc; + } + +- start_main_loop(&data); ++ if (dispatch_type == pcmk_ipc_dispatch_main) { ++ start_main_loop(&data); ++ } + + pcmk_free_ipc_api(controld_api); + } +@@ -334,16 +345,26 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) + .message_timeout_ms = message_timeout_ms, + .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; +- pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb); ++ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; ++ pcmk_ipc_api_t *controld_api = NULL; ++ ++ if (message_timeout_ms == 0) { ++ dispatch_type = pcmk_ipc_dispatch_sync; ++ } ++ controld_api = ipc_connect(&data, pcmk_ipc_controld, ++ designated_controller_event_cb, dispatch_type); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, NULL); + if (rc != pcmk_rc_ok) { +- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); ++ out->err(out, "error: Could not ping controller API: %s", ++ pcmk_rc_str(rc)); + data.rc = rc; + } + +- start_main_loop(&data); ++ if (dispatch_type == pcmk_ipc_dispatch_main) { ++ start_main_loop(&data); ++ } + + pcmk_free_ipc_api(controld_api); + } +@@ -375,7 +396,13 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) + * + * \param[in,out] out Output object + * \param[in] ipc_name IPC name for request +- * \param[in] message_timeout_ms Message timeout ++ * \param[in] message_timeout_ms How long to wait for a reply from the ++ * \p pacemakerd API. If 0, ++ * \p pcmk_ipc_dispatch_sync will be used. ++ * If positive, \p pcmk_ipc_dispatch_main ++ * will be used, and a new mainloop will be ++ * created for this purpose (freed before ++ * return). + * \param[out] state Where to store the \p pacemakerd state, if + * not \p NULL + * +@@ -394,17 +421,26 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, + .message_timeout_ms = message_timeout_ms, + .pcmkd_state = pcmk_pacemakerd_state_invalid, + }; +- pcmk_ipc_api_t *pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb); ++ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; ++ pcmk_ipc_api_t *pacemakerd_api = NULL; ++ ++ if (message_timeout_ms == 0) { ++ dispatch_type = pcmk_ipc_dispatch_sync; ++ } ++ pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, ++ pacemakerd_event_cb, dispatch_type); + + if (pacemakerd_api != NULL) { + int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); + if (rc != pcmk_rc_ok) { +- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); ++ out->err(out, "error: Could not ping launcher API: %s", ++ pcmk_rc_str(rc)); + data.rc = rc; + } + +- start_main_loop(&data); +- ++ if (dispatch_type == pcmk_ipc_dispatch_main) { ++ start_main_loop(&data); ++ } + pcmk_free_ipc_api(pacemakerd_api); + } + +-- +2.31.1 + +From 97cb9452bb918c0b8ad6d1b937bff8f222191580 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Oct 2022 18:10:54 -0700 +Subject: [PATCH 21/22] Fix: tools: crm_mon --one-shot fails while pacemaker is + shutting down + +crm_mon --one-shot checks the pacemakerd state before trying to get a +CIB connection. If pacemakerd is shutting down, it returns ENOTCONN. +This can cause a resource agent that calls crm_mon (for example, +ocf:heartbeat:pgsql) to fail to stop during shutdown. + +This is a regression introduced by commit 3f342e3. +crm_mon.c:pacemakerd_status() returns pcmk_rc_ok if pacemakerd is +shutting down, since 49ebe4c and 46d6edd (fixes for CLBZ#5471). 3f342e3 +refactored crm_mon --one-shot to use library functions. pcmk__status() +now does most of the work, calling pcmk_status.c:pacemakerd_status(). +That function returns ENOTCONN if pacemakerd is shutting down. As a +result, we don't try to connect to the CIB during shutdown. + +Here we update pcmk__status() to use pcmk__pacemakerd_status() instead +of a static and mostly redundant pacemakerd_status(). It receives the +pacemakerd state via an output pointer argument. If pacemakerd is +running or shutting down (or if we get an EREMOTEIO rc), we try +connecting to the fencer and CIB. However, as long as we successfully +get the pacemakerd state, we return success from pcmk__status(), since +we did obtain the cluster status. + +A couple of minor notes: +* pcmk__status() now takes a timeout argument that it passes to + pcmk__pacemakerd_status(). timeout == 0 uses pcmk_ipc_dispatch_sync, + matching the old implementation. A positive timeout uses + pcmk_ipc_dispatch_main. +* pcmk_cluster_queries.c:ipc_connect() no longer always prints a "Could + not connect" error for EREMOTEIO. The caller may consider it OK. + +Fixes T579 +Fixes CLBZ#5501 + +Signed-off-by: Reid Wahl +--- + include/pcmki/pcmki_status.h | 2 +- + lib/pacemaker/pcmk_cluster_queries.c | 27 ++++-- + lib/pacemaker/pcmk_status.c | 120 +++++++++------------------ + tools/crm_mon.c | 2 +- + 4 files changed, 61 insertions(+), 90 deletions(-) + +diff --git a/include/pcmki/pcmki_status.h b/include/pcmki/pcmki_status.h +index 2bbd099..0dde21c 100644 +--- a/include/pcmki/pcmki_status.h ++++ b/include/pcmki/pcmki_status.h +@@ -50,7 +50,7 @@ int pcmk__status(pcmk__output_t *out, cib_t *cib, + enum pcmk__fence_history fence_history, uint32_t show, + uint32_t show_opts, const char *only_node, + const char *only_rsc, const char *neg_location_prefix, +- bool simple_output); ++ bool simple_output, guint timeout_ms); + + #ifdef __cplusplus + } +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index d4361c9..220c872 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -247,7 +247,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + + static pcmk_ipc_api_t * + ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, +- enum pcmk_ipc_dispatch dispatch_type) ++ enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok) + { + int rc; + pcmk__output_t *out = data->out; +@@ -267,9 +267,15 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, + + rc = pcmk_connect_ipc(api, dispatch_type); + if (rc != pcmk_rc_ok) { +- out->err(out, "error: Could not connect to %s: %s", +- pcmk_ipc_name(api, true), +- pcmk_rc_str(rc)); ++ if ((rc == EREMOTEIO) && eremoteio_ok) { ++ /* EREMOTEIO may be expected and acceptable for some callers. ++ * Preserve the return code in case callers need to handle it ++ * specially. ++ */ ++ } else { ++ out->err(out, "error: Could not connect to %s: %s", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc)); ++ } + data->rc = rc; + pcmk_free_ipc_api(api); + return NULL; +@@ -296,7 +302,8 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time + dispatch_type = pcmk_ipc_dispatch_sync; + } + controld_api = ipc_connect(&data, pcmk_ipc_controld, +- controller_status_event_cb, dispatch_type); ++ controller_status_event_cb, dispatch_type, ++ false); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, dest_node); +@@ -352,7 +359,8 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) + dispatch_type = pcmk_ipc_dispatch_sync; + } + controld_api = ipc_connect(&data, pcmk_ipc_controld, +- designated_controller_event_cb, dispatch_type); ++ designated_controller_event_cb, dispatch_type, ++ false); + + if (controld_api != NULL) { + int rc = pcmk_controld_api_ping(controld_api, NULL); +@@ -407,6 +415,11 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) + * not \p NULL + * + * \return Standard Pacemaker return code ++ * ++ * \note This function returns \p EREMOTEIO if run on a Pacemaker Remote node ++ * with \p pacemaker-remoted running, since \p pacemakerd is not proxied ++ * to remote nodes. The fencer and CIB may still be accessible, but ++ * \p state will be \p pcmk_pacemakerd_state_invalid. + */ + int + pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, +@@ -428,7 +441,7 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, + dispatch_type = pcmk_ipc_dispatch_sync; + } + pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, +- pacemakerd_event_cb, dispatch_type); ++ pacemakerd_event_cb, dispatch_type, true); + + if (pacemakerd_api != NULL) { + int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); +diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c +index 1bf0172..794c9ea 100644 +--- a/lib/pacemaker/pcmk_status.c ++++ b/lib/pacemaker/pcmk_status.c +@@ -70,71 +70,6 @@ fencing_connect(void) + } + } + +-static void +-pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, +- enum pcmk_ipc_event event_type, crm_exit_t status, +- void *event_data, void *user_data) +-{ +- pcmk_pacemakerd_api_reply_t *reply = event_data; +- enum pcmk_pacemakerd_state *state = +- (enum pcmk_pacemakerd_state *) user_data; +- +- /* we are just interested in the latest reply */ +- *state = pcmk_pacemakerd_state_invalid; +- +- if (event_type != pcmk_ipc_event_reply || status != CRM_EX_OK) { +- return; +- } +- +- if (reply->reply_type == pcmk_pacemakerd_reply_ping && +- reply->data.ping.last_good != (time_t) 0 && +- reply->data.ping.status == pcmk_rc_ok) { +- *state = reply->data.ping.state; +- } +-} +- +-static int +-pacemakerd_status(pcmk__output_t *out) +-{ +- int rc = pcmk_rc_ok; +- pcmk_ipc_api_t *pacemakerd_api = NULL; +- enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; +- +- rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); +- if (pacemakerd_api == NULL) { +- out->err(out, "Could not connect to pacemakerd: %s", +- pcmk_rc_str(rc)); +- return rc; +- } +- +- pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state); +- +- rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_sync); +- if (rc == EREMOTEIO) { +- return pcmk_rc_ok; +- } else if (rc != pcmk_rc_ok) { +- out->err(out, "Could not connect to pacemakerd: %s", +- pcmk_rc_str(rc)); +- pcmk_free_ipc_api(pacemakerd_api); +- return rc; +- } +- +- rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); +- +- if (rc != pcmk_rc_ok) { +- /* Got some error from pcmk_pacemakerd_api_ping, so return it. */ +- } else if (state == pcmk_pacemakerd_state_running) { +- rc = pcmk_rc_ok; +- } else if (state == pcmk_pacemakerd_state_shutting_down) { +- rc = ENOTCONN; +- } else { +- rc = EAGAIN; +- } +- +- pcmk_free_ipc_api(pacemakerd_api); +- return rc; +-} +- + /*! + * \internal + * \brief Output the cluster status given a fencer and CIB connection +@@ -256,7 +191,7 @@ pcmk_status(xmlNodePtr *xml) + stonith__register_messages(out); + + rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all, +- show_opts, NULL, NULL, NULL, false); ++ show_opts, NULL, NULL, NULL, false, 0); + pcmk__out_epilogue(out, xml, rc); + + cib_delete(cib); +@@ -288,6 +223,13 @@ pcmk_status(xmlNodePtr *xml) + * \param[in] simple_output Whether to use a simple output format. + * Note: This is for use by \p crm_mon only + * and is planned to be deprecated. ++ * \param[in] timeout_ms How long to wait for a reply from the ++ * \p pacemakerd API. If 0, ++ * \p pcmk_ipc_dispatch_sync will be used. ++ * If positive, \p pcmk_ipc_dispatch_main ++ * will be used, and a new mainloop will be ++ * created for this purpose (freed before ++ * return). + * + * \return Standard Pacemaker return code + */ +@@ -295,34 +237,47 @@ int + pcmk__status(pcmk__output_t *out, cib_t *cib, + enum pcmk__fence_history fence_history, uint32_t show, + uint32_t show_opts, const char *only_node, const char *only_rsc, +- const char *neg_location_prefix, bool simple_output) ++ const char *neg_location_prefix, bool simple_output, ++ guint timeout_ms) + { + xmlNode *current_cib = NULL; + int rc = pcmk_rc_ok; + stonith_t *stonith = NULL; ++ enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; + + if (cib == NULL) { + return ENOTCONN; + } + +- if (cib->variant == cib_native) { +- if (cib->state == cib_connected_query || cib->state == cib_connected_command) { +- rc = pcmk_rc_ok; +- } else { +- rc = pacemakerd_status(out); ++ if ((cib->variant == cib_native) ++ && (cib->state != cib_connected_query) ++ && (cib->state != cib_connected_command)) { ++ ++ rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, &state); ++ switch (rc) { ++ case pcmk_rc_ok: ++ switch (state) { ++ case pcmk_pacemakerd_state_running: ++ case pcmk_pacemakerd_state_shutting_down: ++ // CIB may still be available while shutting down ++ break; ++ default: ++ return rc; ++ } ++ break; ++ case EREMOTEIO: ++ /* We'll always get EREMOTEIO if we run this on a Pacemaker ++ * Remote node. The fencer and CIB might be available. ++ */ ++ rc = pcmk_rc_ok; ++ break; ++ default: ++ return rc; + } + } + +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- + if (fence_history != pcmk__fence_history_none && cib->variant == cib_native) { + stonith = fencing_connect(); +- +- if (stonith == NULL) { +- return ENOTCONN; +- } + } + + rc = cib_connect(out, cib, ¤t_cib); +@@ -334,6 +289,9 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, + fence_history, show, show_opts, only_node, + only_rsc, neg_location_prefix, + simple_output); ++ if (rc != pcmk_rc_ok) { ++ out->err(out, "Error outputting status info from the fencer or CIB"); ++ } + + done: + if (stonith != NULL) { +@@ -345,7 +303,7 @@ done: + stonith_api_delete(stonith); + } + +- return rc; ++ return pcmk_rc_ok; + } + + /* This is an internal-only function that is planned to be deprecated and removed. +diff --git a/tools/crm_mon.c b/tools/crm_mon.c +index e8cb709..c70c439 100644 +--- a/tools/crm_mon.c ++++ b/tools/crm_mon.c +@@ -1330,7 +1330,7 @@ one_shot(void) + int rc = pcmk__status(out, cib, fence_history, show, show_opts, + options.only_node, options.only_rsc, + options.neg_location_prefix, +- output_format == mon_output_monitor); ++ output_format == mon_output_monitor, 0); + + if (rc == pcmk_rc_ok) { + clean_up(pcmk_rc2exitc(rc)); +-- +2.31.1 + +From 4e63214f61f03d2756f884dd411db07cb22e9de6 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 11 Oct 2022 13:25:45 -0700 +Subject: [PATCH 22/22] Low: libpacemaker: Correct sys_from default in + pacemakerd_health() + +sys_from should be a subsystem ("pacemakerd" is expected), not a node. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_output.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c +index 153a422..b61f354 100644 +--- a/lib/pacemaker/pcmk_output.c ++++ b/lib/pacemaker/pcmk_output.c +@@ -642,7 +642,7 @@ pacemakerd_health(pcmk__output_t *out, va_list args) + state_s = pcmk__pcmkd_state_enum2friendly(state); + } + return out->info(out, "Status of %s: '%s' (last updated %s)", +- (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", ++ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown subsystem", + state_s, + (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); + } +@@ -664,7 +664,7 @@ pacemakerd_health_html(pcmk__output_t *out, va_list args) + } + + msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", +- (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", ++ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown subsystem", + state_s, + (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); + pcmk__output_create_html_node(out, "li", NULL, NULL, msg); +-- +2.31.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index bd2e734..eb637a7 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -230,7 +230,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist} +Release: %{pcmk_release}%{?dist}.2 License: GPLv2+ and LGPLv2+ Url: https://www.clusterlabs.org/ @@ -258,6 +258,9 @@ Patch009: 009-validate.patch Patch010: 010-regression.patch Patch011: 011-unfencing.patch Patch012: 012-crm_resource.patch +Patch013: 013-rolling-upgrade-monitor.patch +Patch014: 014-abort-transition.patch +Patch015: 015-one_shot.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -858,6 +861,16 @@ exit 0 * Fri Mar 17 2023 MSVSphere Packaging Team - 2.1.4-5 - Rebuilt for MSVSphere 9.1. +* Wed Oct 26 2022 Chris Lumens - 2.1.4-5.2 +- Fix regression where crm_mon returns nonzero status at cluster shutdown +- Resolves: rhbz2133911 + +* Tue Oct 18 2022 Chris Lumens - 2.1.4-5.1 +- Fix regression where reordered resources do not get moved +- Execute resource metadata actions asynchronously +- Resolves: rhbz2128035 +- Resolves: rhbz2128036 + * Wed Aug 10 2022 Ken Gaillot - 2.1.4-5 - Fix regression in crm_resource -O - Resolves: rhbz2089353