You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1979 lines
80 KiB
1979 lines
80 KiB
From a35dfe0b76555f30dda4c9d96630866de40322b3 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:40:24 -0500
|
|
Subject: [PATCH 01/24] Low: fencing: use a default timeout with metadata and
|
|
validate
|
|
|
|
If the caller did not specify a timeout, use a default in
|
|
stonith_api_operations_t:metadata() and validate(). (Timeout is currently
|
|
ignored past that point, so this has no effect yet.)
|
|
|
|
Also, rename timeout argument for clarity.
|
|
---
|
|
lib/fencing/st_client.c | 23 ++++++++++++++++-------
|
|
1 file changed, 16 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index 2b0d308..28791ff 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -504,7 +504,8 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
|
|
|
|
static int
|
|
stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
|
|
- const char *namespace, char **output, int timeout)
|
|
+ const char *namespace, char **output,
|
|
+ int timeout_sec)
|
|
{
|
|
/* By executing meta-data directly, we can get it from stonith_admin when
|
|
* the cluster is not running, which is important for higher-level tools.
|
|
@@ -512,16 +513,20 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a
|
|
|
|
enum stonith_namespace ns = stonith_get_namespace(agent, namespace);
|
|
|
|
+ if (timeout_sec <= 0) {
|
|
+ timeout_sec = CRMD_METADATA_CALL_TIMEOUT;
|
|
+ }
|
|
+
|
|
crm_trace("Looking up metadata for %s agent %s",
|
|
stonith_namespace2text(ns), agent);
|
|
|
|
switch (ns) {
|
|
case st_namespace_rhcs:
|
|
- return stonith__rhcs_metadata(agent, timeout, output);
|
|
+ return stonith__rhcs_metadata(agent, timeout_sec, output);
|
|
|
|
#if HAVE_STONITH_STONITH_H
|
|
case st_namespace_lha:
|
|
- return stonith__lha_metadata(agent, timeout, output);
|
|
+ return stonith__lha_metadata(agent, timeout_sec, output);
|
|
#endif
|
|
|
|
default:
|
|
@@ -1684,8 +1689,8 @@ stonith_api_delete(stonith_t * stonith)
|
|
static int
|
|
stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
|
|
const char *namespace_s, const char *agent,
|
|
- stonith_key_value_t *params, int timeout, char **output,
|
|
- char **error_output)
|
|
+ stonith_key_value_t *params, int timeout_sec,
|
|
+ char **output, char **error_output)
|
|
{
|
|
/* Validation should be done directly via the agent, so we can get it from
|
|
* stonith_admin when the cluster is not running, which is important for
|
|
@@ -1731,17 +1736,21 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id,
|
|
*error_output = NULL;
|
|
}
|
|
|
|
+ if (timeout_sec <= 0) {
|
|
+ timeout_sec = CRMD_METADATA_CALL_TIMEOUT; // Questionable
|
|
+ }
|
|
+
|
|
switch (stonith_get_namespace(agent, namespace_s)) {
|
|
case st_namespace_rhcs:
|
|
rc = stonith__rhcs_validate(st, call_options, target, agent,
|
|
- params_table, host_arg, timeout,
|
|
+ params_table, host_arg, timeout_sec,
|
|
output, error_output);
|
|
break;
|
|
|
|
#if HAVE_STONITH_STONITH_H
|
|
case st_namespace_lha:
|
|
rc = stonith__lha_validate(st, call_options, target, agent,
|
|
- params_table, timeout, output,
|
|
+ params_table, timeout_sec, output,
|
|
error_output);
|
|
break;
|
|
#endif
|
|
--
|
|
2.31.1
|
|
|
|
From c2a863b7daeb829c0210d87a2f1503c1cf4dc7a5 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:00:00 -0500
|
|
Subject: [PATCH 02/24] Doc: fencer: improve
|
|
stonith_api_operations_t:metadata() description
|
|
|
|
---
|
|
include/crm/stonith-ng.h | 15 +++++++++++----
|
|
lib/fencing/st_client.c | 7 ++++---
|
|
2 files changed, 15 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
|
|
index 4fe52ef..a41d411 100644
|
|
--- a/include/crm/stonith-ng.h
|
|
+++ b/include/crm/stonith-ng.h
|
|
@@ -206,14 +206,21 @@ typedef struct stonith_api_operations_s
|
|
stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list);
|
|
|
|
/*!
|
|
- * \brief Get the metadata documentation for a resource.
|
|
+ * \brief Retrieve a fence agent's metadata
|
|
*
|
|
- * \note Value is returned in output. Output must be freed when set.
|
|
+ * \param[in,out] stonith Fencer connection
|
|
+ * \param[in] call_options Group of enum stonith_call_options
|
|
+ * (currently ignored)
|
|
+ * \param[in] agent Fence agent to query
|
|
+ * \param[in] namespace Namespace of fence agent to query (optional)
|
|
+ * \param[out] output Where to store metadata
|
|
+ * \param[in] timeout_sec Error if not complete within this time
|
|
*
|
|
* \return Legacy Pacemaker return code
|
|
+ * \note The caller is responsible for freeing *output using free().
|
|
*/
|
|
- int (*metadata)(stonith_t *st, int options,
|
|
- const char *device, const char *provider, char **output, int timeout);
|
|
+ int (*metadata)(stonith_t *stonith, int call_options, const char *agent,
|
|
+ const char *namespace, char **output, int timeout_sec);
|
|
|
|
/*!
|
|
* \brief Retrieve a list of installed stonith agents
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index 28791ff..6c252bc 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -502,10 +502,11 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names
|
|
return count;
|
|
}
|
|
|
|
+// See stonith_api_operations_t:metadata() documentation
|
|
static int
|
|
-stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent,
|
|
- const char *namespace, char **output,
|
|
- int timeout_sec)
|
|
+stonith_api_device_metadata(stonith_t *stonith, int call_options,
|
|
+ const char *agent, const char *namespace,
|
|
+ char **output, int timeout_sec)
|
|
{
|
|
/* By executing meta-data directly, we can get it from stonith_admin when
|
|
* the cluster is not running, which is important for higher-level tools.
|
|
--
|
|
2.31.1
|
|
|
|
From 9beff34a0d39425ef470e59e251a8ca7c08e69a0 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:16:54 -0500
|
|
Subject: [PATCH 03/24] Doc: fencing: add doxygen block for
|
|
stonith__action_create()
|
|
|
|
... and rename a couple arguments for clarity
|
|
---
|
|
include/crm/fencing/internal.h | 4 ++--
|
|
lib/fencing/st_actions.c | 33 ++++++++++++++++++++++++---------
|
|
2 files changed, 26 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
|
index d2b49f8..e2ca85e 100644
|
|
--- a/include/crm/fencing/internal.h
|
|
+++ b/include/crm/fencing/internal.h
|
|
@@ -50,10 +50,10 @@ struct stonith_action_s;
|
|
typedef struct stonith_action_s stonith_action_t;
|
|
|
|
stonith_action_t *stonith_action_create(const char *agent,
|
|
- const char *_action,
|
|
+ const char *action_name,
|
|
const char *victim,
|
|
uint32_t victim_nodeid,
|
|
- int timeout,
|
|
+ int timeout_sec,
|
|
GHashTable * device_args,
|
|
GHashTable * port_map,
|
|
const char * host_arg);
|
|
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
|
|
index b3429f6..d16fa33 100644
|
|
--- a/lib/fencing/st_actions.c
|
|
+++ b/lib/fencing/st_actions.c
|
|
@@ -232,27 +232,42 @@ stonith__action_result(stonith_action_t *action)
|
|
}
|
|
|
|
#define FAILURE_MAX_RETRIES 2
|
|
+
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Create a new fencing action to be executed
|
|
+ *
|
|
+ * \param[in] agent Fence agent to use
|
|
+ * \param[in] action_name Fencing action to be executed
|
|
+ * \param[in] victim Name of target of fencing action (if known)
|
|
+ * \param[in] victim_nodeid Node ID of target of fencing action (if known)
|
|
+ * \param[in] timeout_sec Timeout to be used when executing action
|
|
+ * \param[in] device_args Parameters to pass to fence agent
|
|
+ * \param[in] port_map Mapping of target names to device ports
|
|
+ * \param[in] host_arg Agent parameter used to pass target name
|
|
+ *
|
|
+ * \return Newly created fencing action (asserts on error, never NULL)
|
|
+ */
|
|
stonith_action_t *
|
|
stonith_action_create(const char *agent,
|
|
- const char *_action,
|
|
+ const char *action_name,
|
|
const char *victim,
|
|
uint32_t victim_nodeid,
|
|
- int timeout, GHashTable * device_args,
|
|
+ int timeout_sec, GHashTable * device_args,
|
|
GHashTable * port_map, const char *host_arg)
|
|
{
|
|
- stonith_action_t *action;
|
|
+ stonith_action_t *action = calloc(1, sizeof(stonith_action_t));
|
|
|
|
- action = calloc(1, sizeof(stonith_action_t));
|
|
CRM_ASSERT(action != NULL);
|
|
|
|
- action->args = make_args(agent, _action, victim, victim_nodeid,
|
|
+ action->args = make_args(agent, action_name, victim, victim_nodeid,
|
|
device_args, port_map, host_arg);
|
|
crm_debug("Preparing '%s' action for %s using agent %s",
|
|
- _action, (victim? victim : "no target"), agent);
|
|
+ action_name, (victim? victim : "no target"), agent);
|
|
action->agent = strdup(agent);
|
|
- action->action = strdup(_action);
|
|
+ action->action = strdup(action_name);
|
|
pcmk__str_update(&action->victim, victim);
|
|
- action->timeout = action->remaining_timeout = timeout;
|
|
+ action->timeout = action->remaining_timeout = timeout_sec;
|
|
action->max_retries = FAILURE_MAX_RETRIES;
|
|
|
|
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
|
|
@@ -262,7 +277,7 @@ stonith_action_create(const char *agent,
|
|
char buffer[512];
|
|
const char *value = NULL;
|
|
|
|
- snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action);
|
|
+ snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name);
|
|
value = g_hash_table_lookup(device_args, buffer);
|
|
|
|
if (value) {
|
|
--
|
|
2.31.1
|
|
|
|
From 3001cb016eefff55c55e709247b0c14c331fb330 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:20:24 -0500
|
|
Subject: [PATCH 04/24] Low: fencing: use requested timeout with RHCS metadata
|
|
actions
|
|
|
|
... instead of hardcoded 5 seconds, and rename timeout argument for clarity
|
|
---
|
|
lib/fencing/st_rhcs.c | 35 ++++++++++++++++-------------------
|
|
1 file changed, 16 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
|
|
index dfccff2..5e600d2 100644
|
|
--- a/lib/fencing/st_rhcs.c
|
|
+++ b/lib/fencing/st_rhcs.c
|
|
@@ -112,25 +112,24 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
|
|
}
|
|
|
|
/*!
|
|
- * \brief Execute RHCS-compatible agent's meta-data action
|
|
+ * \brief Execute RHCS-compatible agent's metadata action
|
|
*
|
|
- * \param[in] agent Agent to execute
|
|
- * \param[in] timeout Action timeout
|
|
- * \param[out] metadata Where to store output xmlNode (or NULL to ignore)
|
|
- *
|
|
- * \todo timeout is currently ignored; shouldn't we use it?
|
|
+ * \param[in] agent Agent to execute
|
|
+ * \param[in] timeout_sec Action timeout
|
|
+ * \param[out] metadata Where to store output xmlNode (or NULL to ignore)
|
|
*/
|
|
static int
|
|
-stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
|
|
+stonith__rhcs_get_metadata(const char *agent, int timeout_sec,
|
|
+ xmlNode **metadata)
|
|
{
|
|
xmlNode *xml = NULL;
|
|
xmlNode *actions = NULL;
|
|
xmlXPathObject *xpathObj = NULL;
|
|
- pcmk__action_result_t *result = NULL;
|
|
- stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
|
|
- 5, NULL, NULL, NULL);
|
|
+ stonith_action_t *action = stonith_action_create(agent, "metadata", NULL,
|
|
+ 0, timeout_sec, NULL,
|
|
+ NULL, NULL);
|
|
int rc = stonith__execute(action);
|
|
- result = stonith__action_result(action);
|
|
+ pcmk__action_result_t *result = stonith__action_result(action);
|
|
|
|
if (result == NULL) {
|
|
if (rc < 0) {
|
|
@@ -208,21 +207,19 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
|
|
}
|
|
|
|
/*!
|
|
- * \brief Execute RHCS-compatible agent's meta-data action
|
|
- *
|
|
- * \param[in] agent Agent to execute
|
|
- * \param[in] timeout Action timeout
|
|
- * \param[out] output Where to store action output (or NULL to ignore)
|
|
+ * \brief Retrieve metadata for RHCS-compatible fence agent
|
|
*
|
|
- * \todo timeout is currently ignored; shouldn't we use it?
|
|
+ * \param[in] agent Agent to execute
|
|
+ * \param[in] timeout_sec Action timeout
|
|
+ * \param[out] output Where to store action output (or NULL to ignore)
|
|
*/
|
|
int
|
|
-stonith__rhcs_metadata(const char *agent, int timeout, char **output)
|
|
+stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output)
|
|
{
|
|
char *buffer = NULL;
|
|
xmlNode *xml = NULL;
|
|
|
|
- int rc = stonith__rhcs_get_metadata(agent, timeout, &xml);
|
|
+ int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml);
|
|
|
|
if (rc != pcmk_ok) {
|
|
free_xml(xml);
|
|
--
|
|
2.31.1
|
|
|
|
From 17dbf449d8b51ea27a89a13f47160a95b0a45149 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:32:44 -0500
|
|
Subject: [PATCH 05/24] Refactor: fencing: make stonith_action_t:async bool
|
|
|
|
---
|
|
lib/fencing/st_actions.c | 5 +++--
|
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
|
|
index d16fa33..abd0d5a 100644
|
|
--- a/lib/fencing/st_actions.c
|
|
+++ b/lib/fencing/st_actions.c
|
|
@@ -9,6 +9,7 @@
|
|
|
|
#include <crm_internal.h>
|
|
|
|
+#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
@@ -32,7 +33,7 @@ struct stonith_action_s {
|
|
char *victim;
|
|
GHashTable *args;
|
|
int timeout;
|
|
- int async;
|
|
+ bool async;
|
|
void *userdata;
|
|
void (*done_cb) (int pid, const pcmk__action_result_t *result,
|
|
void *user_data);
|
|
@@ -671,7 +672,7 @@ stonith_action_execute_async(stonith_action_t * action,
|
|
action->userdata = userdata;
|
|
action->done_cb = done;
|
|
action->fork_cb = fork_cb;
|
|
- action->async = 1;
|
|
+ action->async = true;
|
|
|
|
return internal_stonith_action_execute(action);
|
|
}
|
|
--
|
|
2.31.1
|
|
|
|
From 9b0f568dddc928104e6d2d54d5138e0c7ca5b537 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 14:59:28 -0500
|
|
Subject: [PATCH 06/24] Refactor: fencing: rename
|
|
stonith_action_execute_async()
|
|
|
|
... to stonith__execute_async(), since it's internal
|
|
---
|
|
daemons/fenced/fenced_commands.c | 4 ++--
|
|
include/crm/fencing/internal.h | 12 +++++-------
|
|
lib/fencing/st_actions.c | 11 +++++------
|
|
3 files changed, 12 insertions(+), 15 deletions(-)
|
|
|
|
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
|
index 94aa6b8..41a1936 100644
|
|
--- a/daemons/fenced/fenced_commands.c
|
|
+++ b/daemons/fenced/fenced_commands.c
|
|
@@ -510,8 +510,8 @@ stonith_device_execute(stonith_device_t * device)
|
|
/* for async exec, exec_rc is negative for early error exit
|
|
otherwise handling of success/errors is done via callbacks */
|
|
cmd->activating_on = device;
|
|
- exec_rc = stonith_action_execute_async(action, (void *)cmd,
|
|
- cmd->done_cb, fork_cb);
|
|
+ exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb,
|
|
+ fork_cb);
|
|
if (exec_rc < 0) {
|
|
cmd->activating_on = NULL;
|
|
cmd->done_cb(0, stonith__action_result(action), cmd);
|
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
|
index e2ca85e..1797d9a 100644
|
|
--- a/include/crm/fencing/internal.h
|
|
+++ b/include/crm/fencing/internal.h
|
|
@@ -64,13 +64,11 @@ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result);
|
|
void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result);
|
|
xmlNode *stonith__find_xe_with_result(xmlNode *xml);
|
|
|
|
-int
|
|
-stonith_action_execute_async(stonith_action_t * action,
|
|
- void *userdata,
|
|
- void (*done) (int pid,
|
|
- const pcmk__action_result_t *result,
|
|
- void *user_data),
|
|
- void (*fork_cb) (int pid, void *user_data));
|
|
+int stonith__execute_async(stonith_action_t *action, void *userdata,
|
|
+ void (*done) (int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void (*fork_cb) (int pid, void *user_data));
|
|
|
|
xmlNode *create_level_registration_xml(const char *node, const char *pattern,
|
|
const char *attr, const char *value,
|
|
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
|
|
index abd0d5a..c4e32bd 100644
|
|
--- a/lib/fencing/st_actions.c
|
|
+++ b/lib/fencing/st_actions.c
|
|
@@ -658,12 +658,11 @@ internal_stonith_action_execute(stonith_action_t * action)
|
|
* \return pcmk_ok if ownership of action has been taken, -errno otherwise
|
|
*/
|
|
int
|
|
-stonith_action_execute_async(stonith_action_t * action,
|
|
- void *userdata,
|
|
- void (*done) (int pid,
|
|
- const pcmk__action_result_t *result,
|
|
- void *user_data),
|
|
- void (*fork_cb) (int pid, void *user_data))
|
|
+stonith__execute_async(stonith_action_t * action, void *userdata,
|
|
+ void (*done) (int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void (*fork_cb) (int pid, void *user_data))
|
|
{
|
|
if (!action) {
|
|
return -EINVAL;
|
|
--
|
|
2.31.1
|
|
|
|
From 1d8fbd12b302b5029a341f269bd00def79e6a0ea Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 16:43:57 -0500
|
|
Subject: [PATCH 07/24] Refactor: fencing: add internal API for getting
|
|
metadata async
|
|
|
|
Nothing uses it yet
|
|
---
|
|
include/crm/fencing/internal.h | 6 +++
|
|
lib/fencing/st_client.c | 80 ++++++++++++++++++++++++++++++++++
|
|
2 files changed, 86 insertions(+)
|
|
|
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
|
index 1797d9a..513d1c4 100644
|
|
--- a/include/crm/fencing/internal.h
|
|
+++ b/include/crm/fencing/internal.h
|
|
@@ -70,6 +70,12 @@ int stonith__execute_async(stonith_action_t *action, void *userdata,
|
|
void *user_data),
|
|
void (*fork_cb) (int pid, void *user_data));
|
|
|
|
+int stonith__metadata_async(const char *agent, int timeout_sec,
|
|
+ void (*callback)(int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void *user_data);
|
|
+
|
|
xmlNode *create_level_registration_xml(const char *node, const char *pattern,
|
|
const char *attr, const char *value,
|
|
int level,
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index 6c252bc..91075bd 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -2386,6 +2386,86 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name,
|
|
freeXpathObject(xpath);
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Retrieve fence agent meta-data asynchronously
|
|
+ *
|
|
+ * \param[in] agent Agent to execute
|
|
+ * \param[in] timeout_sec Error if not complete within this time
|
|
+ * \param[in] callback Function to call with result (this will always be
|
|
+ * called, whether by this function directly or later
|
|
+ * via the main loop, and on success the metadata will
|
|
+ * be in its result argument's action_stdout)
|
|
+ * \param[in] user_data User data to pass to callback
|
|
+ *
|
|
+ * \return Standard Pacemaker return code
|
|
+ * \note The caller must use a main loop. This function is not a
|
|
+ * stonith_api_operations_t method because it does not need a stonith_t
|
|
+ * object and does not go through the fencer, but executes the agent
|
|
+ * directly.
|
|
+ */
|
|
+int
|
|
+stonith__metadata_async(const char *agent, int timeout_sec,
|
|
+ void (*callback)(int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void *user_data)
|
|
+{
|
|
+ switch (stonith_get_namespace(agent, NULL)) {
|
|
+ case st_namespace_rhcs:
|
|
+ {
|
|
+ stonith_action_t *action = NULL;
|
|
+ int rc = pcmk_ok;
|
|
+
|
|
+ action = stonith_action_create(agent, "metadata", NULL, 0,
|
|
+ timeout_sec, NULL, NULL, NULL);
|
|
+
|
|
+ rc = stonith__execute_async(action, user_data, callback, NULL);
|
|
+ if (rc != pcmk_ok) {
|
|
+ callback(0, stonith__action_result(action), user_data);
|
|
+ stonith__destroy_action(action);
|
|
+ }
|
|
+ return pcmk_legacy2rc(rc);
|
|
+ }
|
|
+
|
|
+#if HAVE_STONITH_STONITH_H
|
|
+ case st_namespace_lha:
|
|
+ // LHA metadata is simply synthesized, so simulate async
|
|
+ {
|
|
+ pcmk__action_result_t result = {
|
|
+ .exit_status = CRM_EX_OK,
|
|
+ .execution_status = PCMK_EXEC_DONE,
|
|
+ .exit_reason = NULL,
|
|
+ .action_stdout = NULL,
|
|
+ .action_stderr = NULL,
|
|
+ };
|
|
+
|
|
+ stonith__lha_metadata(agent, timeout_sec,
|
|
+ &result.action_stdout);
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ return pcmk_rc_ok;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ default:
|
|
+ {
|
|
+ pcmk__action_result_t result = {
|
|
+ .exit_status = CRM_EX_ERROR,
|
|
+ .execution_status = PCMK_EXEC_ERROR_HARD,
|
|
+ .exit_reason = crm_strdup_printf("No such agent '%s'",
|
|
+ agent),
|
|
+ .action_stdout = NULL,
|
|
+ .action_stderr = NULL,
|
|
+ };
|
|
+
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ return ENOENT;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
/*!
|
|
* \internal
|
|
* \brief Return the exit status from an async action callback
|
|
--
|
|
2.31.1
|
|
|
|
From 1869cc181ef9599bd938fc545d302b2721169755 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 13 Sep 2022 17:33:10 -0500
|
|
Subject: [PATCH 08/24] Refactor: liblrmd: add internal API for getting
|
|
metadata async
|
|
|
|
Nothing uses it yet
|
|
---
|
|
include/crm/lrmd_internal.h | 10 +++-
|
|
lib/lrmd/lrmd_client.c | 115 ++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 123 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h
|
|
index 284c4d6..5cb00d5 100644
|
|
--- a/include/crm/lrmd_internal.h
|
|
+++ b/include/crm/lrmd_internal.h
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2015-2021 the Pacemaker project contributors
|
|
+ * Copyright 2015-2022 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -17,7 +17,7 @@
|
|
#include <crm/common/mainloop.h> // mainloop_io_t, ipc_client_callbacks
|
|
#include <crm/common/output_internal.h> // pcmk__output_t
|
|
#include <crm/common/remote_internal.h> // pcmk__remote_t
|
|
-#include <crm/lrmd.h> // lrmd_t, lrmd_event_data_t
|
|
+#include <crm/lrmd.h> // lrmd_t, lrmd_event_data_t, lrmd_rsc_info_t
|
|
|
|
int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port);
|
|
|
|
@@ -35,6 +35,12 @@ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list,
|
|
int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id,
|
|
const char *msg_type);
|
|
|
|
+int lrmd__metadata_async(lrmd_rsc_info_t *rsc,
|
|
+ void (*callback)(int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void *user_data);
|
|
+
|
|
void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc,
|
|
int op_status, const char *exit_reason);
|
|
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index 82afd6c..4b16bf0 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -2343,6 +2343,121 @@ lrmd_api_delete(lrmd_t * lrmd)
|
|
free(lrmd);
|
|
}
|
|
|
|
+struct metadata_cb {
|
|
+ void (*callback)(int pid, const pcmk__action_result_t *result,
|
|
+ void *user_data);
|
|
+ void *user_data;
|
|
+};
|
|
+
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Process asynchronous metadata completion
|
|
+ *
|
|
+ * \param[in] action Metadata action that completed
|
|
+ */
|
|
+static void
|
|
+metadata_complete(svc_action_t *action)
|
|
+{
|
|
+ struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data;
|
|
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
|
|
+
|
|
+ pcmk__set_result(&result, action->rc, action->status,
|
|
+ services__exit_reason(action));
|
|
+ pcmk__set_result_output(&result, action->stdout_data, action->stderr_data);
|
|
+
|
|
+ metadata_cb->callback(0, &result, metadata_cb->user_data);
|
|
+ result.action_stdout = NULL; // Prevent free, because action owns it
|
|
+ result.action_stderr = NULL; // Prevent free, because action owns it
|
|
+ pcmk__reset_result(&result);
|
|
+ free(metadata_cb);
|
|
+}
|
|
+
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Retrieve agent metadata asynchronously
|
|
+ *
|
|
+ * \param[in] rsc Resource agent specification
|
|
+ * \param[in] callback Function to call with result (this will always be
|
|
+ * called, whether by this function directly or later via
|
|
+ * the main loop, and on success the metadata will be in
|
|
+ * its result argument's action_stdout)
|
|
+ * \param[in] user_data User data to pass to callback
|
|
+ *
|
|
+ * \return Standard Pacemaker return code
|
|
+ * \note This function is not a lrmd_api_operations_t method because it does not
|
|
+ * need an lrmd_t object and does not go through the executor, but
|
|
+ * executes the agent directly.
|
|
+ */
|
|
+int
|
|
+lrmd__metadata_async(lrmd_rsc_info_t *rsc,
|
|
+ void (*callback)(int pid,
|
|
+ const pcmk__action_result_t *result,
|
|
+ void *user_data),
|
|
+ void *user_data)
|
|
+{
|
|
+ svc_action_t *action = NULL;
|
|
+ struct metadata_cb *metadata_cb = NULL;
|
|
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
|
|
+
|
|
+ CRM_CHECK(callback != NULL, return EINVAL);
|
|
+
|
|
+ if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
|
|
+ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
|
|
+ "Invalid resource specification");
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ return EINVAL;
|
|
+ }
|
|
+
|
|
+ if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) {
|
|
+ return stonith__metadata_async(rsc->type,
|
|
+ CRMD_METADATA_CALL_TIMEOUT / 1000,
|
|
+ callback, user_data);
|
|
+ }
|
|
+
|
|
+ action = services__create_resource_action(rsc->type, rsc->standard,
|
|
+ rsc->provider, rsc->type,
|
|
+ CRMD_ACTION_METADATA, 0,
|
|
+ CRMD_METADATA_CALL_TIMEOUT, NULL,
|
|
+ 0);
|
|
+ if (action == NULL) {
|
|
+ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
|
|
+ "Out of memory");
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ return ENOMEM;
|
|
+ }
|
|
+ if (action->rc != PCMK_OCF_UNKNOWN) {
|
|
+ pcmk__set_result(&result, action->rc, action->status,
|
|
+ services__exit_reason(action));
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ services_action_free(action);
|
|
+ return EINVAL;
|
|
+ }
|
|
+
|
|
+ action->cb_data = calloc(1, sizeof(struct metadata_cb));
|
|
+ if (action->cb_data == NULL) {
|
|
+ services_action_free(action);
|
|
+ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
|
|
+ "Out of memory");
|
|
+ callback(0, &result, user_data);
|
|
+ pcmk__reset_result(&result);
|
|
+ return ENOMEM;
|
|
+ }
|
|
+
|
|
+ metadata_cb = (struct metadata_cb *) action->cb_data;
|
|
+ metadata_cb->callback = callback;
|
|
+ metadata_cb->user_data = user_data;
|
|
+ if (!services_action_async(action, metadata_complete)) {
|
|
+ services_action_free(action);
|
|
+ return pcmk_rc_error; // @TODO Derive from action->rc and ->status
|
|
+ }
|
|
+
|
|
+ // The services library has taken responsibility for action
|
|
+ return pcmk_rc_ok;
|
|
+}
|
|
+
|
|
/*!
|
|
* \internal
|
|
* \brief Set the result of an executor event
|
|
--
|
|
2.31.1
|
|
|
|
From de89164053cde8f44ca74a007703e0827ffd67ec Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 14 Sep 2022 16:34:37 -0500
|
|
Subject: [PATCH 09/24] Low: controller: ignore CRM_OP_LRM_REFRESH
|
|
|
|
This was only sent by crm_resource --refresh in versions 1.1.9 and earlier.
|
|
Since the local crm_resource is the same version as the controller, and
|
|
Pacemaker Remote was introduced in 1.1.9, this means that only remote nodes
|
|
running 1.1.9 can possibly send it.
|
|
|
|
It didn't really do anything useful anyway, so just ignore it.
|
|
---
|
|
daemons/controld/controld_execd.c | 33 +++++-----------------------
|
|
daemons/controld/controld_messages.c | 2 +-
|
|
include/crm/crm.h | 2 +-
|
|
lib/pacemaker/pcmk_graph_producer.c | 3 +--
|
|
lib/pengine/common.c | 2 --
|
|
5 files changed, 9 insertions(+), 33 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index fa411a6..719fab0 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -1553,32 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
|
|
lrmd_free_event(op);
|
|
}
|
|
|
|
-static void
|
|
-handle_refresh_op(lrm_state_t *lrm_state, const char *user_name,
|
|
- const char *from_host, const char *from_sys)
|
|
-{
|
|
- int rc = pcmk_ok;
|
|
- xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
|
|
-
|
|
- fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
|
|
- crm_info("Forced a local resource history refresh: call=%d", rc);
|
|
-
|
|
- if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) {
|
|
- xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host,
|
|
- from_sys, CRM_SYSTEM_LRMD,
|
|
- fsa_our_uuid);
|
|
-
|
|
- crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
|
|
-
|
|
- if (relay_message(reply, TRUE) == FALSE) {
|
|
- crm_log_xml_err(reply, "Unable to route reply");
|
|
- }
|
|
- free_xml(reply);
|
|
- }
|
|
-
|
|
- free_xml(fragment);
|
|
-}
|
|
-
|
|
static void
|
|
handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
|
|
{
|
|
@@ -1787,7 +1761,12 @@ do_lrm_invoke(long long action,
|
|
}
|
|
|
|
if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
|
|
- handle_refresh_op(lrm_state, user_name, from_host, from_sys);
|
|
+ /* @COMPAT This can only be sent by crm_resource --refresh on a
|
|
+ * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
|
|
+ * unlikely. It previously would cause the controller to re-write its
|
|
+ * resource history to the CIB. Just ignore it.
|
|
+ */
|
|
+ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
|
|
|
|
} else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
|
|
handle_query_op(input->msg, lrm_state);
|
|
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
|
|
index 31d3524..957fc20 100644
|
|
--- a/daemons/controld/controld_messages.c
|
|
+++ b/daemons/controld/controld_messages.c
|
|
@@ -1061,7 +1061,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
|
|
return handle_lrm_delete(stored_msg);
|
|
|
|
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|
|
- || (strcmp(op, CRM_OP_LRM_REFRESH) == 0)
|
|
+ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|
|
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
|
|
|
|
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
|
|
diff --git a/include/crm/crm.h b/include/crm/crm.h
|
|
index 5ec66d2..f2e536e 100644
|
|
--- a/include/crm/crm.h
|
|
+++ b/include/crm/crm.h
|
|
@@ -146,7 +146,7 @@ extern char *crm_system_name;
|
|
# define CRM_OP_REGISTER "register"
|
|
# define CRM_OP_IPC_FWD "ipc_fwd"
|
|
# define CRM_OP_INVOKE_LRM "lrm_invoke"
|
|
-# define CRM_OP_LRM_REFRESH "lrm_refresh" /* Deprecated */
|
|
+# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
|
|
# define CRM_OP_LRM_QUERY "lrm_query"
|
|
# define CRM_OP_LRM_DELETE "lrm_delete"
|
|
# define CRM_OP_LRM_FAIL "lrm_fail"
|
|
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
|
|
index 4c1b5a6..0077719 100644
|
|
--- a/lib/pacemaker/pcmk_graph_producer.c
|
|
+++ b/lib/pacemaker/pcmk_graph_producer.c
|
|
@@ -446,8 +446,7 @@ create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
|
|
|
|
} else if (pcmk__str_any_of(action->task,
|
|
CRM_OP_SHUTDOWN,
|
|
- CRM_OP_CLEAR_FAILCOUNT,
|
|
- CRM_OP_LRM_REFRESH, NULL)) {
|
|
+ CRM_OP_CLEAR_FAILCOUNT, NULL)) {
|
|
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
|
|
|
|
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
|
|
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
|
|
index 93ba3fe..7db9d0e 100644
|
|
--- a/lib/pengine/common.c
|
|
+++ b/lib/pengine/common.c
|
|
@@ -384,8 +384,6 @@ text2task(const char *task)
|
|
return no_action;
|
|
} else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
|
|
return no_action;
|
|
- } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
|
|
- return no_action;
|
|
} else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
|
|
return no_action;
|
|
} else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
|
|
--
|
|
2.31.1
|
|
|
|
From 406fbc52ed652915887e78138f8f3c2eeaeabfb6 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 14 Sep 2022 16:46:15 -0500
|
|
Subject: [PATCH 10/24] API: libcrmcommon: deprecate CRM_OP_LRM_QUERY
|
|
|
|
This has been unused since at least Pacemaker 1.0.0, and since we don't support
|
|
rolling upgrades from anything that old, and Pacemaker Remote didn't exist
|
|
then, we can just drop support for it entirely.
|
|
---
|
|
daemons/controld/controld_execd.c | 17 -----------------
|
|
include/crm/crm.h | 1 -
|
|
include/crm/crm_compat.h | 5 ++++-
|
|
3 files changed, 4 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 719fab0..54e6818 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -1553,20 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
|
|
lrmd_free_event(op);
|
|
}
|
|
|
|
-static void
|
|
-handle_query_op(xmlNode *msg, lrm_state_t *lrm_state)
|
|
-{
|
|
- xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
|
|
- xmlNode *reply = create_reply(msg, data);
|
|
-
|
|
- if (relay_message(reply, TRUE) == FALSE) {
|
|
- crm_err("Unable to route reply");
|
|
- crm_log_xml_err(reply, "reply");
|
|
- }
|
|
- free_xml(reply);
|
|
- free_xml(data);
|
|
-}
|
|
-
|
|
static void
|
|
handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
|
|
const char *from_host, const char *user_name,
|
|
@@ -1768,9 +1754,6 @@ do_lrm_invoke(long long action,
|
|
*/
|
|
crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
|
|
|
|
- } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) {
|
|
- handle_query_op(input->msg, lrm_state);
|
|
-
|
|
// @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
|
|
} else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) {
|
|
update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
|
|
diff --git a/include/crm/crm.h b/include/crm/crm.h
|
|
index f2e536e..38915e3 100644
|
|
--- a/include/crm/crm.h
|
|
+++ b/include/crm/crm.h
|
|
@@ -147,7 +147,6 @@ extern char *crm_system_name;
|
|
# define CRM_OP_IPC_FWD "ipc_fwd"
|
|
# define CRM_OP_INVOKE_LRM "lrm_invoke"
|
|
# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
|
|
-# define CRM_OP_LRM_QUERY "lrm_query"
|
|
# define CRM_OP_LRM_DELETE "lrm_delete"
|
|
# define CRM_OP_LRM_FAIL "lrm_fail"
|
|
# define CRM_OP_PROBED "probe_complete"
|
|
diff --git a/include/crm/crm_compat.h b/include/crm/crm_compat.h
|
|
index 3b35a5e..8a4b368 100644
|
|
--- a/include/crm/crm_compat.h
|
|
+++ b/include/crm/crm_compat.h
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2004-2021 the Pacemaker project contributors
|
|
+ * Copyright 2004-2022 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -31,6 +31,9 @@ extern "C" {
|
|
//! \deprecated This defined constant will be removed in a future release
|
|
#define MAX_IPC_DELAY 120
|
|
|
|
+//! \deprecated This defined constant will be removed in a future release
|
|
+#define CRM_OP_LRM_QUERY "lrm_query"
|
|
+
|
|
//!@{
|
|
//! \deprecated This macro will be removed in a future release
|
|
|
|
--
|
|
2.31.1
|
|
|
|
From 7c3d2f58d387d2ec0d5c5d340f8816f324e816bf Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 14 Sep 2022 16:49:48 -0500
|
|
Subject: [PATCH 11/24] Refactor: controller: drop do_lrm_query_internal()
|
|
|
|
Now that there's only one (short) caller, just move its contents there
|
|
---
|
|
daemons/controld/controld_execd.c | 28 +++++++++++-----------------
|
|
1 file changed, 11 insertions(+), 17 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 54e6818..99c9193 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -811,19 +811,26 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
|
|
return FALSE;
|
|
}
|
|
|
|
-static xmlNode *
|
|
-do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
|
|
+xmlNode *
|
|
+controld_query_executor_state(const char *node_name)
|
|
{
|
|
xmlNode *xml_state = NULL;
|
|
xmlNode *xml_data = NULL;
|
|
xmlNode *rsc_list = NULL;
|
|
crm_node_t *peer = NULL;
|
|
+ lrm_state_t *lrm_state = lrm_state_find(node_name);
|
|
+
|
|
+ if (!lrm_state) {
|
|
+ crm_err("Could not find executor state for node %s", node_name);
|
|
+ return NULL;
|
|
+ }
|
|
|
|
peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
|
|
CRM_CHECK(peer != NULL, return NULL);
|
|
|
|
- xml_state = create_node_state_update(peer, update_flags, NULL,
|
|
- __func__);
|
|
+ xml_state = create_node_state_update(peer,
|
|
+ node_update_cluster|node_update_peer,
|
|
+ NULL, __func__);
|
|
if (xml_state == NULL) {
|
|
return NULL;
|
|
}
|
|
@@ -840,19 +847,6 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
|
|
return xml_state;
|
|
}
|
|
|
|
-xmlNode *
|
|
-controld_query_executor_state(const char *node_name)
|
|
-{
|
|
- lrm_state_t *lrm_state = lrm_state_find(node_name);
|
|
-
|
|
- if (!lrm_state) {
|
|
- crm_err("Could not find executor state for node %s", node_name);
|
|
- return NULL;
|
|
- }
|
|
- return do_lrm_query_internal(lrm_state,
|
|
- node_update_cluster|node_update_peer);
|
|
-}
|
|
-
|
|
/*!
|
|
* \internal
|
|
* \brief Map standard Pacemaker return code to operation status and OCF code
|
|
--
|
|
2.31.1
|
|
|
|
From 5cab259417a06f64a607f99c478459093ed1b5ed Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 14 Sep 2022 15:48:44 -0500
|
|
Subject: [PATCH 12/24] Doc: controller: drop pointless comment
|
|
|
|
It's (likely?) impossible for a live cluster to have been doing rolling
|
|
upgrades since 2006.
|
|
---
|
|
daemons/controld/controld_execd.c | 12 +-----------
|
|
1 file changed, 1 insertion(+), 11 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 99c9193..53b1156 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -678,18 +678,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
|
|
|
|
target_rc = rsc_op_expected_rc(op);
|
|
|
|
- /* there is a small risk in formerly mixed clusters that it will
|
|
- * be sub-optimal.
|
|
- *
|
|
- * however with our upgrade policy, the update we send should
|
|
- * still be completely supported anyway
|
|
- */
|
|
caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
|
|
- CRM_LOG_ASSERT(caller_version != NULL);
|
|
-
|
|
- if(caller_version == NULL) {
|
|
- caller_version = CRM_FEATURE_SET;
|
|
- }
|
|
+ CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET);
|
|
|
|
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
|
|
fsa_our_uname, src);
|
|
--
|
|
2.31.1
|
|
|
|
From b4541d7ecd9551674c4546415751a223ff3013ed Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 15 Sep 2022 11:24:28 -0500
|
|
Subject: [PATCH 13/24] Refactor: controller: move where reload actions get
|
|
remapped
|
|
|
|
... from do_lrm_invoke() to do_lrm_rsc_op(), which will make planned changes
|
|
easier
|
|
---
|
|
daemons/controld/controld_execd.c | 38 ++++++++++++++++---------------
|
|
1 file changed, 20 insertions(+), 18 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 53b1156..c9f0cc7 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -43,7 +43,8 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
|
|
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
|
|
const char *rsc_id, const char *operation);
|
|
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
- const char *operation, xmlNode *msg);
|
|
+ const char *operation, xmlNode *msg,
|
|
+ struct ra_metadata_s *md);
|
|
|
|
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
|
|
int log_level);
|
|
@@ -1808,26 +1809,12 @@ do_lrm_invoke(long long action,
|
|
do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
|
|
crm_rsc_delete, user_name);
|
|
|
|
- } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
|
|
- CRMD_ACTION_RELOAD_AGENT, NULL)) {
|
|
- /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
|
|
- * will schedule reload-agent actions only. In either case, we need
|
|
- * to map that to whatever the resource agent actually supports.
|
|
- * Default to the OCF 1.1 name.
|
|
- */
|
|
+ } else {
|
|
struct ra_metadata_s *md = NULL;
|
|
- const char *reload_name = CRMD_ACTION_RELOAD_AGENT;
|
|
|
|
md = controld_get_rsc_metadata(lrm_state, rsc,
|
|
controld_metadata_from_cache);
|
|
- if ((md != NULL)
|
|
- && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
|
|
- reload_name = CRMD_ACTION_RELOAD;
|
|
- }
|
|
- do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml);
|
|
-
|
|
- } else {
|
|
- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml);
|
|
+ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
|
|
}
|
|
|
|
lrmd_free_rsc_info(rsc);
|
|
@@ -2176,7 +2163,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
|
|
|
|
static void
|
|
do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
- const char *operation, xmlNode *msg)
|
|
+ const char *operation, xmlNode *msg, struct ra_metadata_s *md)
|
|
{
|
|
int rc;
|
|
int call_id = 0;
|
|
@@ -2198,6 +2185,21 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
}
|
|
}
|
|
|
|
+ if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
|
|
+ CRMD_ACTION_RELOAD_AGENT, NULL)) {
|
|
+ /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
|
|
+ * will schedule reload-agent actions only. In either case, we need
|
|
+ * to map that to whatever the resource agent actually supports.
|
|
+ * Default to the OCF 1.1 name.
|
|
+ */
|
|
+ if ((md != NULL)
|
|
+ && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
|
|
+ operation = CRMD_ACTION_RELOAD;
|
|
+ } else {
|
|
+ operation = CRMD_ACTION_RELOAD_AGENT;
|
|
+ }
|
|
+ }
|
|
+
|
|
op = construct_op(lrm_state, msg, rsc->id, operation);
|
|
CRM_CHECK(op != NULL, return);
|
|
|
|
--
|
|
2.31.1
|
|
|
|
From a4f6e394a61712da750aabffca2b6dd02f0c5ae6 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 15 Sep 2022 15:12:06 -0500
|
|
Subject: [PATCH 14/24] Refactor: controller: drop operation argument to
|
|
do_lrm_rsc_op()
|
|
|
|
It can be derived from the XML argument
|
|
---
|
|
daemons/controld/controld_execd.c | 26 +++++++++++++-------------
|
|
1 file changed, 13 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index c9f0cc7..89a993b 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -43,8 +43,7 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us
|
|
static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
|
|
const char *rsc_id, const char *operation);
|
|
static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
- const char *operation, xmlNode *msg,
|
|
- struct ra_metadata_s *md);
|
|
+ xmlNode *msg, struct ra_metadata_s *md);
|
|
|
|
static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
|
|
int log_level);
|
|
@@ -1814,7 +1813,7 @@ do_lrm_invoke(long long action,
|
|
|
|
md = controld_get_rsc_metadata(lrm_state, rsc,
|
|
controld_metadata_from_cache);
|
|
- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md);
|
|
+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
|
|
}
|
|
|
|
lrmd_free_rsc_info(rsc);
|
|
@@ -2162,8 +2161,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t
|
|
}
|
|
|
|
static void
|
|
-do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
- const char *operation, xmlNode *msg, struct ra_metadata_s *md)
|
|
+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
|
|
+ struct ra_metadata_s *md)
|
|
{
|
|
int rc;
|
|
int call_id = 0;
|
|
@@ -2172,17 +2171,18 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
lrmd_key_value_t *params = NULL;
|
|
fsa_data_t *msg_data = NULL;
|
|
const char *transition = NULL;
|
|
+ const char *operation = NULL;
|
|
gboolean stop_recurring = FALSE;
|
|
const char *nack_reason = NULL;
|
|
|
|
- CRM_CHECK(rsc != NULL, return);
|
|
- CRM_CHECK(operation != NULL, return);
|
|
+ CRM_CHECK((rsc != NULL) && (msg != NULL), return);
|
|
|
|
- if (msg != NULL) {
|
|
- transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
|
|
- if (transition == NULL) {
|
|
- crm_log_xml_err(msg, "Missing transition number");
|
|
- }
|
|
+ operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
|
|
+ CRM_CHECK(!pcmk__str_empty(operation), return);
|
|
+
|
|
+ transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
|
|
+ if (pcmk__str_empty(transition)) {
|
|
+ crm_log_xml_err(msg, "Missing transition number");
|
|
}
|
|
|
|
if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
|
|
@@ -2241,7 +2241,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
crm_notice("Requesting local execution of %s operation for %s on %s "
|
|
CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
|
|
crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
|
|
- transition, rsc->id, operation, op->interval_ms);
|
|
+ (transition != NULL ? transition : ""), rsc->id, operation, op->interval_ms);
|
|
|
|
if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)
|
|
&& pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) {
|
|
--
|
|
2.31.1
|
|
|
|
From 486dbdf023f82a82a02207d8fb7921f8f2ac0588 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 15 Sep 2022 15:40:38 -0500
|
|
Subject: [PATCH 15/24] Low: controller: add failsafe for no executor
|
|
connection
|
|
|
|
... in do_lrm_rsc_op(), to make planned changes easier
|
|
---
|
|
daemons/controld/controld_execd.c | 11 +++++++++++
|
|
1 file changed, 11 insertions(+)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 89a993b..8986b9b 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -2185,6 +2185,17 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
|
|
crm_log_xml_err(msg, "Missing transition number");
|
|
}
|
|
|
|
+ if (lrm_state == NULL) {
|
|
+ // This shouldn't be possible, but provide a failsafe just in case
|
|
+ crm_err("Cannot execute %s of %s: No executor connection "
|
|
+ CRM_XS " transition_key=%s",
|
|
+ operation, rsc->id, (transition != NULL ? transition : ""));
|
|
+ synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
|
|
+ PCMK_OCF_UNKNOWN_ERROR,
|
|
+ "No executor connection");
|
|
+ return;
|
|
+ }
|
|
+
|
|
if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
|
|
CRMD_ACTION_RELOAD_AGENT, NULL)) {
|
|
/* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
|
|
--
|
|
2.31.1
|
|
|
|
From afd53bba7dfb5109d844318dff0f82e4687d9e32 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 15 Sep 2022 12:04:31 -0500
|
|
Subject: [PATCH 16/24] Log: controller: improve messages when metadata cache
|
|
update fails
|
|
|
|
Previously, metadata_cache_update() or ra_param_from_xml() would log an error,
|
|
then controld_get_rsc_metadata() (but not the other caller,
|
|
process_lrm_event()) would log another warning with the agent info.
|
|
|
|
Combine these into a single message always logged by metadata_cache_update(),
|
|
which also has been renamed to controld_cache_metadata().
|
|
---
|
|
daemons/controld/controld_execd.c | 2 +-
|
|
daemons/controld/controld_metadata.c | 27 ++++++++++++---------------
|
|
daemons/controld/controld_metadata.h | 6 +++---
|
|
3 files changed, 16 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 8986b9b..fe16c96 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -2858,7 +2858,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
} else if (rsc && (op->rc == PCMK_OCF_OK)) {
|
|
char *metadata = unescape_newlines(op->output);
|
|
|
|
- metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
|
|
+ controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
|
|
free(metadata);
|
|
}
|
|
}
|
|
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
|
|
index 8c6f195..91a6a10 100644
|
|
--- a/daemons/controld/controld_metadata.c
|
|
+++ b/daemons/controld/controld_metadata.c
|
|
@@ -149,13 +149,11 @@ ra_param_from_xml(xmlNode *param_xml)
|
|
|
|
p = calloc(1, sizeof(struct ra_param_s));
|
|
if (p == NULL) {
|
|
- crm_crit("Could not allocate memory for resource metadata");
|
|
return NULL;
|
|
}
|
|
|
|
p->rap_name = strdup(param_name);
|
|
if (p->rap_name == NULL) {
|
|
- crm_crit("Could not allocate memory for resource metadata");
|
|
free(p);
|
|
return NULL;
|
|
}
|
|
@@ -196,10 +194,11 @@ log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version)
|
|
}
|
|
|
|
struct ra_metadata_s *
|
|
-metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
|
|
- const char *metadata_str)
|
|
+controld_cache_metadata(GHashTable *mdc, lrmd_rsc_info_t *rsc,
|
|
+ const char *metadata_str)
|
|
{
|
|
char *key = NULL;
|
|
+ const char *reason = NULL;
|
|
xmlNode *metadata = NULL;
|
|
xmlNode *match = NULL;
|
|
struct ra_metadata_s *md = NULL;
|
|
@@ -210,20 +209,19 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
|
|
|
|
key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type);
|
|
if (!key) {
|
|
- crm_crit("Could not allocate memory for resource metadata");
|
|
+ reason = "Invalid resource agent standard or type";
|
|
goto err;
|
|
}
|
|
|
|
metadata = string2xml(metadata_str);
|
|
if (!metadata) {
|
|
- crm_err("Metadata for %s:%s:%s is not valid XML",
|
|
- rsc->standard, rsc->provider, rsc->type);
|
|
+ reason = "Metadata is not valid XML";
|
|
goto err;
|
|
}
|
|
|
|
md = calloc(1, sizeof(struct ra_metadata_s));
|
|
if (md == NULL) {
|
|
- crm_crit("Could not allocate memory for resource metadata");
|
|
+ reason = "Could not allocate memory";
|
|
goto err;
|
|
}
|
|
|
|
@@ -281,6 +279,7 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
|
|
struct ra_param_s *p = ra_param_from_xml(match);
|
|
|
|
if (p == NULL) {
|
|
+ reason = "Could not allocate memory";
|
|
goto err;
|
|
}
|
|
if (pcmk_is_set(p->rap_flags, ra_param_private)) {
|
|
@@ -311,6 +310,9 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc,
|
|
return md;
|
|
|
|
err:
|
|
+ crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s",
|
|
+ rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
|
|
+ (rsc->provider != NULL ? rsc->provider : ""), rsc->type, reason);
|
|
free(key);
|
|
free_xml(metadata);
|
|
metadata_free(md);
|
|
@@ -377,13 +379,8 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
return NULL;
|
|
}
|
|
|
|
- metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
|
|
- metadata_str);
|
|
+ metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc,
|
|
+ metadata_str);
|
|
free(metadata_str);
|
|
- if (metadata == NULL) {
|
|
- crm_warn("Failed to update metadata for %s (%s%s%s:%s)",
|
|
- rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"),
|
|
- ((rsc->provider == NULL)? "" : rsc->provider), rsc->type);
|
|
- }
|
|
return metadata;
|
|
}
|
|
diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h
|
|
index 7354f94..52d3336 100644
|
|
--- a/daemons/controld/controld_metadata.h
|
|
+++ b/daemons/controld/controld_metadata.h
|
|
@@ -73,9 +73,9 @@ void metadata_cache_free(GHashTable *mdc);
|
|
void metadata_cache_reset(GHashTable *mdc);
|
|
void metadata_cache_fini(void);
|
|
|
|
-struct ra_metadata_s *metadata_cache_update(GHashTable *mdc,
|
|
- lrmd_rsc_info_t *rsc,
|
|
- const char *metadata_str);
|
|
+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc,
|
|
+ lrmd_rsc_info_t *rsc,
|
|
+ const char *metadata_str);
|
|
struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state,
|
|
lrmd_rsc_info_t *rsc,
|
|
uint32_t source);
|
|
--
|
|
2.31.1
|
|
|
|
From caeed447d0d8a980d431efd70e5b6f9c91ffac7f Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 15 Sep 2022 13:33:36 -0500
|
|
Subject: [PATCH 17/24] Fix: controller: pre-load agent metadata asynchronously
|
|
|
|
The controller needs resource agent metadata to record digests with pending and
|
|
completed resource actions.
|
|
|
|
Previously, metadata was collected synchronously when needed. This caused
|
|
several problems, two of which are fixed here for most actions: synchronous
|
|
execution blocks the controller from doing anything else (and if the agent's
|
|
metadata action tries to contact the controller, that blocks everything until
|
|
the action times out), and the metadata action ate into the real action's
|
|
timeout.
|
|
|
|
Now, if we're likely to need metadata for an action, attempt to get it
|
|
asynchronously before executing that action, so the metadata is available in
|
|
cache when needed.
|
|
|
|
This is not a complete solution, as there are other code paths that might
|
|
require metadata and still lead to synchronous execution, but it handles the
|
|
most important cases.
|
|
|
|
Fixes T554
|
|
---
|
|
daemons/controld/controld_execd.c | 105 +++++++++++++++++++++++----
|
|
daemons/controld/controld_metadata.c | 22 +++---
|
|
2 files changed, 102 insertions(+), 25 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index fe16c96..c56fdf5 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -670,7 +670,6 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
|
|
struct ra_metadata_s *metadata = NULL;
|
|
const char *caller_version = NULL;
|
|
lrm_state_t *lrm_state = NULL;
|
|
- uint32_t metadata_source = controld_metadata_from_agent;
|
|
|
|
if (op == NULL) {
|
|
return FALSE;
|
|
@@ -703,19 +702,14 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
|
|
return TRUE;
|
|
}
|
|
|
|
- /* Getting meta-data from cache is OK unless this is a successful start
|
|
- * action -- always refresh from the agent for those, in case the
|
|
- * resource agent was updated.
|
|
+ /* Ideally the metadata is cached, and the agent is just a fallback.
|
|
*
|
|
- * @TODO Only refresh the meta-data after starts if the agent actually
|
|
- * changed (using something like inotify, or a hash or modification time of
|
|
- * the agent executable).
|
|
+ * @TODO Go through all callers and ensure they get metadata asynchronously
|
|
+ * first.
|
|
*/
|
|
- if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc)
|
|
- || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) {
|
|
- metadata_source |= controld_metadata_from_cache;
|
|
- }
|
|
- metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source);
|
|
+ metadata = controld_get_rsc_metadata(lrm_state, rsc,
|
|
+ controld_metadata_from_agent
|
|
+ |controld_metadata_from_cache);
|
|
if (metadata == NULL) {
|
|
return TRUE;
|
|
}
|
|
@@ -1673,6 +1667,56 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
|
|
user_name, input, unregister);
|
|
}
|
|
|
|
+// User data for asynchronous metadata execution
|
|
+struct metadata_cb_data {
|
|
+ lrmd_rsc_info_t *rsc; // Copy of resource information
|
|
+ xmlNode *input_xml; // Copy of FSA input XML
|
|
+};
|
|
+
|
|
+static struct metadata_cb_data *
|
|
+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
|
|
+{
|
|
+ struct metadata_cb_data *data = NULL;
|
|
+
|
|
+ data = calloc(1, sizeof(struct metadata_cb_data));
|
|
+ CRM_ASSERT(data != NULL);
|
|
+ data->input_xml = copy_xml(input_xml);
|
|
+ data->rsc = lrmd_copy_rsc_info(rsc);
|
|
+ return data;
|
|
+}
|
|
+
|
|
+static void
|
|
+free_metadata_cb_data(struct metadata_cb_data *data)
|
|
+{
|
|
+ lrmd_free_rsc_info(data->rsc);
|
|
+ free_xml(data->input_xml);
|
|
+ free(data);
|
|
+}
|
|
+
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Execute an action after metadata has been retrieved
|
|
+ *
|
|
+ * \param[in] pid Ignored
|
|
+ * \param[in] result Result of metadata action
|
|
+ * \param[in] user_data Metadata callback data
|
|
+ */
|
|
+static void
|
|
+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
|
|
+{
|
|
+ struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
|
|
+
|
|
+ struct ra_metadata_s *md = NULL;
|
|
+ lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
|
|
+
|
|
+ if ((lrm_state != NULL) && pcmk__result_ok(result)) {
|
|
+ md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
|
|
+ result->action_stdout);
|
|
+ }
|
|
+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
|
|
+ free_metadata_cb_data(data);
|
|
+}
|
|
+
|
|
/* A_LRM_INVOKE */
|
|
void
|
|
do_lrm_invoke(long long action,
|
|
@@ -1811,9 +1855,40 @@ do_lrm_invoke(long long action,
|
|
} else {
|
|
struct ra_metadata_s *md = NULL;
|
|
|
|
- md = controld_get_rsc_metadata(lrm_state, rsc,
|
|
- controld_metadata_from_cache);
|
|
- do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
|
|
+ /* Getting metadata from cache is OK except for start actions --
|
|
+ * always refresh from the agent for those, in case the resource
|
|
+ * agent was updated.
|
|
+ *
|
|
+ * @TODO Only refresh metadata for starts if the agent actually
|
|
+ * changed (using something like inotify, or a hash or modification
|
|
+ * time of the agent executable).
|
|
+ */
|
|
+ if (strcmp(operation, CRMD_ACTION_START) != 0) {
|
|
+ md = controld_get_rsc_metadata(lrm_state, rsc,
|
|
+ controld_metadata_from_cache);
|
|
+ }
|
|
+
|
|
+ if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
|
|
+ operation)) {
|
|
+ /* Most likely, we'll need the agent metadata to record the
|
|
+ * pending operation and the operation result. Get it now rather
|
|
+ * than wait until then, so the metadata action doesn't eat into
|
|
+ * the real action's timeout.
|
|
+ *
|
|
+ * @TODO Metadata is retrieved via direct execution of the
|
|
+ * agent, which has a couple of related issues: the executor
|
|
+ * should execute agents, not the controller; and metadata for
|
|
+ * Pacemaker Remote nodes should be collected on those nodes,
|
|
+ * not locally.
|
|
+ */
|
|
+ struct metadata_cb_data *data = NULL;
|
|
+
|
|
+ data = new_metadata_cb_data(rsc, input->xml);
|
|
+ (void) lrmd__metadata_async(rsc, metadata_complete,
|
|
+ (void *) data);
|
|
+ } else {
|
|
+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
|
|
+ }
|
|
}
|
|
|
|
lrmd_free_rsc_info(rsc);
|
|
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
|
|
index 91a6a10..a954ebd 100644
|
|
--- a/daemons/controld/controld_metadata.c
|
|
+++ b/daemons/controld/controld_metadata.c
|
|
@@ -356,17 +356,19 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
return NULL;
|
|
}
|
|
|
|
- /* For now, we always collect resource agent meta-data via a local,
|
|
- * synchronous, direct execution of the agent. This has multiple issues:
|
|
- * the executor should execute agents, not the controller; meta-data for
|
|
- * Pacemaker Remote nodes should be collected on those nodes, not
|
|
- * locally; and the meta-data call shouldn't eat into the timeout of the
|
|
- * real action being performed.
|
|
+ /* For most actions, metadata was cached asynchronously before action
|
|
+ * execution (via metadata_complete()).
|
|
*
|
|
- * These issues are planned to be addressed by having the scheduler
|
|
- * schedule a meta-data cache check at the beginning of each transition.
|
|
- * Once that is working, this block will only be a fallback in case the
|
|
- * initial collection fails.
|
|
+ * However if that failed, and for other actions, retrieve the metadata now
|
|
+ * via a local, synchronous, direct execution of the agent.
|
|
+ *
|
|
+ * This has multiple issues, which is why this is just a fallback: the
|
|
+ * executor should execute agents, not the controller; metadata for
|
|
+ * Pacemaker Remote nodes should be collected on those nodes, not locally;
|
|
+ * the metadata call shouldn't eat into the timeout of the real action being
|
|
+ * performed; and the synchronous call blocks the controller (which also
|
|
+ * means that if the metadata action tries to contact the controller,
|
|
+ * everything will hang until the timeout).
|
|
*/
|
|
rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
|
|
rsc->type, &metadata_str, 0);
|
|
--
|
|
2.31.1
|
|
|
|
From fddf663d5285740771145e83c41f33c0bfb86dfb Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 19 Sep 2022 15:19:06 -0500
|
|
Subject: [PATCH 18/24] Low: libstonithd: return CRM_EX_NOSUCH for bad agent
|
|
namespace
|
|
|
|
Callers can't rely on a particular exit code scheme at this point,
|
|
but it doesn't hurt
|
|
---
|
|
lib/fencing/st_client.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index 91075bd..d41b066 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -2451,7 +2451,7 @@ stonith__metadata_async(const char *agent, int timeout_sec,
|
|
default:
|
|
{
|
|
pcmk__action_result_t result = {
|
|
- .exit_status = CRM_EX_ERROR,
|
|
+ .exit_status = CRM_EX_NOSUCH,
|
|
.execution_status = PCMK_EXEC_ERROR_HARD,
|
|
.exit_reason = crm_strdup_printf("No such agent '%s'",
|
|
agent),
|
|
--
|
|
2.31.1
|
|
|
|
From 2de926f5b2b5dbf28f994bc35477d59ce46d5ab1 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 19 Sep 2022 15:23:43 -0500
|
|
Subject: [PATCH 19/24] Low: liblrmd: consider invalid agent specification a
|
|
fatal error
|
|
|
|
---
|
|
lib/lrmd/lrmd_client.c | 3 ++-
|
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index 4b16bf0..d691dce 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -2402,7 +2402,8 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
|
|
CRM_CHECK(callback != NULL, return EINVAL);
|
|
|
|
if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) {
|
|
- pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR,
|
|
+ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED,
|
|
+ PCMK_EXEC_ERROR_FATAL,
|
|
"Invalid resource specification");
|
|
callback(0, &result, user_data);
|
|
pcmk__reset_result(&result);
|
|
--
|
|
2.31.1
|
|
|
|
From 2d526dae9dbfc6f8658ff96f5f6d58ee09ea879c Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 19 Sep 2022 15:25:12 -0500
|
|
Subject: [PATCH 20/24] Low: liblrmd: use resource ID for metadata actions when
|
|
available
|
|
|
|
---
|
|
lib/lrmd/lrmd_client.c | 10 +++++-----
|
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index d691dce..570a2b8 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -2416,11 +2416,11 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc,
|
|
callback, user_data);
|
|
}
|
|
|
|
- action = services__create_resource_action(rsc->type, rsc->standard,
|
|
- rsc->provider, rsc->type,
|
|
- CRMD_ACTION_METADATA, 0,
|
|
- CRMD_METADATA_CALL_TIMEOUT, NULL,
|
|
- 0);
|
|
+ action = services__create_resource_action((rsc->id != NULL ? rsc->id : rsc->type),
|
|
+ rsc->standard, rsc->provider,
|
|
+ rsc->type, CRMD_ACTION_METADATA,
|
|
+ 0, CRMD_METADATA_CALL_TIMEOUT,
|
|
+ NULL, 0);
|
|
if (action == NULL) {
|
|
pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
|
|
"Out of memory");
|
|
--
|
|
2.31.1
|
|
|
|
From 3d632be58dca13293e4ae974da5dfe2838fcdf12 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 19 Sep 2022 15:27:11 -0500
|
|
Subject: [PATCH 21/24] Refactor: controller: executor query can assume local
|
|
node
|
|
|
|
---
|
|
daemons/controld/controld_execd.c | 6 +++---
|
|
daemons/controld/controld_fsa.h | 4 ++--
|
|
daemons/controld/controld_join_client.c | 2 +-
|
|
daemons/controld/controld_join_dc.c | 2 +-
|
|
4 files changed, 7 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index c56fdf5..039b194 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -796,16 +796,16 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
|
|
}
|
|
|
|
xmlNode *
|
|
-controld_query_executor_state(const char *node_name)
|
|
+controld_query_executor_state(void)
|
|
{
|
|
xmlNode *xml_state = NULL;
|
|
xmlNode *xml_data = NULL;
|
|
xmlNode *rsc_list = NULL;
|
|
crm_node_t *peer = NULL;
|
|
- lrm_state_t *lrm_state = lrm_state_find(node_name);
|
|
+ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname);
|
|
|
|
if (!lrm_state) {
|
|
- crm_err("Could not find executor state for node %s", node_name);
|
|
+ crm_err("Could not find executor state for node %s", fsa_our_uname);
|
|
return NULL;
|
|
}
|
|
|
|
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
|
|
index 296232f..d137310 100644
|
|
--- a/daemons/controld/controld_fsa.h
|
|
+++ b/daemons/controld/controld_fsa.h
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2004-2021 the Pacemaker project contributors
|
|
+ * Copyright 2004-2022 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -518,7 +518,7 @@ extern gboolean ever_had_quorum;
|
|
// These should be moved elsewhere
|
|
void do_update_cib_nodes(gboolean overwrite, const char *caller);
|
|
int crmd_cib_smart_opt(void);
|
|
-xmlNode *controld_query_executor_state(const char *node_name);
|
|
+xmlNode *controld_query_executor_state(void);
|
|
|
|
const char *fsa_input2string(enum crmd_fsa_input input);
|
|
const char *fsa_state2string(enum crmd_fsa_state state);
|
|
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
|
|
index 6485856..bfec430 100644
|
|
--- a/daemons/controld/controld_join_client.c
|
|
+++ b/daemons/controld/controld_join_client.c
|
|
@@ -268,7 +268,7 @@ do_cl_join_finalize_respond(long long action,
|
|
update_dc_expected(input->msg);
|
|
|
|
/* send our status section to the DC */
|
|
- tmp1 = controld_query_executor_state(fsa_our_uname);
|
|
+ tmp1 = controld_query_executor_state();
|
|
if (tmp1 != NULL) {
|
|
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
|
|
CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
|
|
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
|
|
index 9386182..9a8ea3e 100644
|
|
--- a/daemons/controld/controld_join_dc.c
|
|
+++ b/daemons/controld/controld_join_dc.c
|
|
@@ -591,7 +591,7 @@ do_dc_join_ack(long long action,
|
|
}
|
|
controld_delete_node_state(join_from, section, cib_scope_local);
|
|
if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) {
|
|
- xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname);
|
|
+ xmlNode *now_dc_lrmd_state = controld_query_executor_state();
|
|
|
|
if (now_dc_lrmd_state != NULL) {
|
|
fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state,
|
|
--
|
|
2.31.1
|
|
|
|
From d852ec335bd5b518a3f06c7f1b597370094311ae Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 20 Sep 2022 10:18:48 -0500
|
|
Subject: [PATCH 22/24] Log: controller: add messages when getting agent
|
|
metadata
|
|
|
|
---
|
|
daemons/controld/controld_execd.c | 5 +++++
|
|
daemons/controld/controld_metadata.c | 10 ++++++++++
|
|
2 files changed, 15 insertions(+)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 039b194..f02da82 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -1884,6 +1884,11 @@ do_lrm_invoke(long long action,
|
|
struct metadata_cb_data *data = NULL;
|
|
|
|
data = new_metadata_cb_data(rsc, input->xml);
|
|
+ crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
|
|
+ rsc->id, rsc->standard,
|
|
+ ((rsc->provider == NULL)? "" : ":"),
|
|
+ ((rsc->provider == NULL)? "" : rsc->provider),
|
|
+ rsc->type);
|
|
(void) lrmd__metadata_async(rsc, metadata_complete,
|
|
(void *) data);
|
|
} else {
|
|
diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c
|
|
index a954ebd..39b43b0 100644
|
|
--- a/daemons/controld/controld_metadata.c
|
|
+++ b/daemons/controld/controld_metadata.c
|
|
@@ -348,6 +348,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
free(key);
|
|
}
|
|
if (metadata != NULL) {
|
|
+ crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache",
|
|
+ rsc->id, rsc->standard,
|
|
+ ((rsc->provider == NULL)? "" : ":"),
|
|
+ ((rsc->provider == NULL)? "" : rsc->provider),
|
|
+ rsc->type);
|
|
return metadata;
|
|
}
|
|
}
|
|
@@ -370,6 +375,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
|
|
* means that if the metadata action tries to contact the controller,
|
|
* everything will hang until the timeout).
|
|
*/
|
|
+ crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously",
|
|
+ rsc->id, rsc->standard,
|
|
+ ((rsc->provider == NULL)? "" : ":"),
|
|
+ ((rsc->provider == NULL)? "" : rsc->provider),
|
|
+ rsc->type);
|
|
rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider,
|
|
rsc->type, &metadata_str, 0);
|
|
if (rc != pcmk_ok) {
|
|
--
|
|
2.31.1
|
|
|
|
From 5aec773a20e1ded971a4082358e266353615f196 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 14 Sep 2022 14:36:44 -0500
|
|
Subject: [PATCH 23/24] Test: cts-lab: allow any whitespace in "Recover"
|
|
messages
|
|
|
|
This seems to have always been multiple spaces, not sure what happened
|
|
---
|
|
cts/lab/CTStests.py | 12 ++++++------
|
|
cts/lab/patterns.py | 4 ++--
|
|
2 files changed, 8 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
|
|
index 5535177..8b56758 100644
|
|
--- a/cts/lab/CTStests.py
|
|
+++ b/cts/lab/CTStests.py
|
|
@@ -1,7 +1,7 @@
|
|
""" Test-specific classes for Pacemaker's Cluster Test Suite (CTS)
|
|
"""
|
|
|
|
-__copyright__ = "Copyright 2000-2021 the Pacemaker project contributors"
|
|
+__copyright__ = "Copyright 2000-2022 the Pacemaker project contributors"
|
|
__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
|
|
|
|
#
|
|
@@ -1225,7 +1225,7 @@ class MaintenanceMode(CTSTest):
|
|
'''Return list of errors which should be ignored'''
|
|
return [
|
|
r"Updating failcount for %s" % self.rid,
|
|
- r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid,
|
|
+ r"schedulerd.*: Recover\s+%s\s+\(.*\)" % self.rid,
|
|
r"Unknown operation: fail",
|
|
self.templates["Pat:RscOpOK"] % (self.action, self.rid),
|
|
r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
|
|
@@ -1324,7 +1324,7 @@ class ResourceRecover(CTSTest):
|
|
'''Return list of errors which should be ignored'''
|
|
return [
|
|
r"Updating failcount for %s" % self.rid,
|
|
- r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt),
|
|
+ r"schedulerd.*: Recover\s+(%s|%s)\s+\(.*\)" % (self.rid, self.rid_alt),
|
|
r"Unknown operation: fail",
|
|
self.templates["Pat:RscOpOK"] % (self.action, self.rid),
|
|
r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval),
|
|
@@ -2559,7 +2559,7 @@ class RemoteLXC(CTSTest):
|
|
'''Return list of errors which should be ignored'''
|
|
return [
|
|
r"Updating failcount for ping",
|
|
- r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)",
|
|
+ r"schedulerd.*: Recover\s+(ping|lxc-ms|container)\s+\(.*\)",
|
|
# The orphaned lxc-ms resource causes an expected transition error
|
|
# that is a result of the scheduler not having knowledge that the
|
|
# promotable resource used to be a clone. As a result, it looks like that
|
|
@@ -3054,7 +3054,7 @@ class RemoteStonithd(RemoteDriver):
|
|
r"Software caused connection abort",
|
|
r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
|
|
r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
|
|
- r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
|
|
+ r"schedulerd.*:\s+Recover\s+remote-.*\s+\(.*\)",
|
|
r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
|
|
]
|
|
|
|
@@ -3120,7 +3120,7 @@ class RemoteRscFailure(RemoteDriver):
|
|
|
|
def errorstoignore(self):
|
|
ignore_pats = [
|
|
- r"schedulerd.*: Recover remote-rsc\s*\(.*\)",
|
|
+ r"schedulerd.*: Recover\s+remote-rsc\s+\(.*\)",
|
|
r"Dummy.*: No process state file found",
|
|
]
|
|
|
|
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
|
|
index 90cac73..6e718f7 100644
|
|
--- a/cts/lab/patterns.py
|
|
+++ b/cts/lab/patterns.py
|
|
@@ -66,7 +66,7 @@ class BasePatterns(object):
|
|
|
|
"Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s",
|
|
"Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK",
|
|
- "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s",
|
|
+ "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s",
|
|
"Pat:Fencing_active" : r"stonith resource .* is active on 2 nodes (attempting recovery)",
|
|
"Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error",
|
|
|
|
@@ -180,7 +180,7 @@ class crm_corosync(BasePatterns):
|
|
r"Parameters to .* action changed:",
|
|
r"Parameters to .* changed",
|
|
r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)",
|
|
- r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)",
|
|
+ r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)",
|
|
r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting",
|
|
r"Peer is not part of our cluster",
|
|
r"We appear to be in an election loop",
|
|
--
|
|
2.31.1
|
|
|
|
From 338cf55d19cb4ebebedf092dd0a5969ac2eda295 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 19 Sep 2022 15:55:42 -0500
|
|
Subject: [PATCH 24/24] Test: cts-lab: match parentheses correctly
|
|
|
|
---
|
|
cts/lab/patterns.py | 3 ++-
|
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py
|
|
index 6e718f7..856fffb 100644
|
|
--- a/cts/lab/patterns.py
|
|
+++ b/cts/lab/patterns.py
|
|
@@ -271,6 +271,7 @@ class crm_corosync(BasePatterns):
|
|
]
|
|
self.components["pacemaker-based-ignore"] = [
|
|
r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)",
|
|
+ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
|
|
# This is overbroad, but we don't have a way to say that only
|
|
# certain transition errors are acceptable (if the fencer respawns,
|
|
# fence devices may appear multiply active). We have to rely on
|
|
@@ -328,7 +329,7 @@ class crm_corosync(BasePatterns):
|
|
r"crit:.*Fencing daemon connection failed",
|
|
r"error:.*Fencer connection failed \(will retry\)",
|
|
r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations",
|
|
- r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error",
|
|
+ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)",
|
|
# This is overbroad, but we don't have a way to say that only
|
|
# certain transition errors are acceptable (if the fencer respawns,
|
|
# fence devices may appear multiply active). We have to rely on
|
|
--
|
|
2.31.1
|
|
|