parent
356d0e575b
commit
d373233fa3
@ -0,0 +1,26 @@
|
|||||||
|
From ebac530c815a62f7c3a1c24f64e9a530d9753dbe Mon Sep 17 00:00:00 2001
|
||||||
|
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
|
||||||
|
Date: Wed, 19 Jul 2023 18:21:07 +0900
|
||||||
|
Subject: [PATCH] High: tools: The dampen parameter is disabled when setting
|
||||||
|
values with attrd_updater.
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/attrd_updater.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c
|
||||||
|
index b615a3575..4688b9ff6 100644
|
||||||
|
--- a/tools/attrd_updater.c
|
||||||
|
+++ b/tools/attrd_updater.c
|
||||||
|
@@ -445,7 +445,7 @@
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value,
|
||||||
|
- NULL, attr_set, NULL,
|
||||||
|
+ attr_dampen, attr_set, NULL,
|
||||||
|
attr_options | pcmk__node_attr_value);
|
||||||
|
break;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
@ -0,0 +1,109 @@
|
|||||||
|
From 3e31da0016795397bfeacb2f3d76ecfe35cc1f67 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 17 Jul 2023 14:52:42 -0500
|
||||||
|
Subject: [PATCH] Fix: libcrmcommon: wait for reply from appropriate controller
|
||||||
|
commands
|
||||||
|
|
||||||
|
ipc_controld.c:reply_expected() wrongly omitted PCMK__CONTROLD_CMD_NODES (which
|
||||||
|
hasn't been a problem because crm_node uses a mainloop instead of sync dispatch
|
||||||
|
for that) and CRM_OP_RM_NODE_CACHE (which can be sent via
|
||||||
|
ipc_client.c:pcmk_ipc_purge_node()).
|
||||||
|
|
||||||
|
Because CRM_OP_RM_NODE_CACHE gets only an ack and no further replies, we now
|
||||||
|
have to be careful not to return true from the controller's dispatch()
|
||||||
|
function, otherwise crm_node -R would wait forever for more data. That means
|
||||||
|
we have to check for whether any replies are expected, which means we have to
|
||||||
|
increment expected replies *before* sending a request (in case it's sync).
|
||||||
|
|
||||||
|
Regression introduced in 2.0.5 by ae14fa4a
|
||||||
|
|
||||||
|
Fixes T681
|
||||||
|
---
|
||||||
|
lib/common/ipc_controld.c | 49 ++++++++++++++-------------------------
|
||||||
|
1 file changed, 17 insertions(+), 32 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
|
||||||
|
index 3c3a98964..405fd0518 100644
|
||||||
|
--- a/lib/common/ipc_controld.c
|
||||||
|
+++ b/lib/common/ipc_controld.c
|
||||||
|
@@ -143,18 +143,16 @@
|
||||||
|
static bool
|
||||||
|
reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
|
||||||
|
{
|
||||||
|
- const char *command = crm_element_value(request, F_CRM_TASK);
|
||||||
|
-
|
||||||
|
- if (command == NULL) {
|
||||||
|
- return false;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- // We only need to handle commands that functions in this file can send
|
||||||
|
- return !strcmp(command, CRM_OP_REPROBE)
|
||||||
|
- || !strcmp(command, CRM_OP_NODE_INFO)
|
||||||
|
- || !strcmp(command, CRM_OP_PING)
|
||||||
|
- || !strcmp(command, CRM_OP_LRM_FAIL)
|
||||||
|
- || !strcmp(command, CRM_OP_LRM_DELETE);
|
||||||
|
+ // We only need to handle commands that API functions can send
|
||||||
|
+ return pcmk__str_any_of(crm_element_value(request, F_CRM_TASK),
|
||||||
|
+ PCMK__CONTROLD_CMD_NODES,
|
||||||
|
+ CRM_OP_LRM_DELETE,
|
||||||
|
+ CRM_OP_LRM_FAIL,
|
||||||
|
+ CRM_OP_NODE_INFO,
|
||||||
|
+ CRM_OP_PING,
|
||||||
|
+ CRM_OP_REPROBE,
|
||||||
|
+ CRM_OP_RM_NODE_CACHE,
|
||||||
|
+ NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
@@ -168,22 +166,12 @@
|
||||||
|
pcmk_controld_reply_unknown, NULL, NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
- /* If we got an ACK, return true so the caller knows to expect more responses
|
||||||
|
- * from the IPC server. We do this before decrementing replies_expected because
|
||||||
|
- * ACKs are not going to be included in that value.
|
||||||
|
- *
|
||||||
|
- * Note that we cannot do the same kind of status checking here that we do in
|
||||||
|
- * ipc_pacemakerd.c. The ACK message we receive does not necessarily contain
|
||||||
|
- * a status attribute. That is, we may receive this:
|
||||||
|
- *
|
||||||
|
- * <ack function="crmd_remote_proxy_cb" line="556"/>
|
||||||
|
- *
|
||||||
|
- * Instead of this:
|
||||||
|
- *
|
||||||
|
- * <ack function="dispatch_controller_ipc" line="391" status="112"/>
|
||||||
|
- */
|
||||||
|
if (pcmk__str_eq(crm_element_name(reply), "ack", pcmk__str_none)) {
|
||||||
|
- return true; // More replies needed
|
||||||
|
+ /* ACKs are trivial responses that do not count toward expected replies,
|
||||||
|
+ * and do not have all the fields that validation requires, so skip that
|
||||||
|
+ * processing.
|
||||||
|
+ */
|
||||||
|
+ return private->replies_expected > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (private->replies_expected > 0) {
|
||||||
|
@@ -310,18 +298,15 @@
|
||||||
|
send_controller_request(pcmk_ipc_api_t *api, xmlNode *request,
|
||||||
|
bool reply_is_expected)
|
||||||
|
{
|
||||||
|
- int rc;
|
||||||
|
-
|
||||||
|
if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) {
|
||||||
|
return EINVAL;
|
||||||
|
}
|
||||||
|
- rc = pcmk__send_ipc_request(api, request);
|
||||||
|
- if ((rc == pcmk_rc_ok) && reply_is_expected) {
|
||||||
|
+ if (reply_is_expected) {
|
||||||
|
struct controld_api_private_s *private = api->api_data;
|
||||||
|
|
||||||
|
private->replies_expected++;
|
||||||
|
}
|
||||||
|
- return rc;
|
||||||
|
+ return pcmk__send_ipc_request(api, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xmlNode *
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
@ -0,0 +1,163 @@
|
|||||||
|
From 63f4bd4d5a324e6eb279340a42c7c36c8902ada7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 2 Aug 2023 15:55:26 -0500
|
||||||
|
Subject: [PATCH 1/4] Fix: controller: don't try to execute agent action at
|
||||||
|
shutdown
|
||||||
|
|
||||||
|
Normally, agent execution is not possible at shutdown. However, when metadata
|
||||||
|
is needed for some action, the agent can be called asynchronously, and when the
|
||||||
|
metadata action returns, the original action is performed. If the metadata is
|
||||||
|
initiated before shutdown, but completes after shutdown has begun, do not try
|
||||||
|
to attempt the original action, so we avoid unnecessary error logs.
|
||||||
|
---
|
||||||
|
daemons/controld/controld_execd.c | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
||||||
|
index 530e4346c8..a90e8d833e 100644
|
||||||
|
--- a/daemons/controld/controld_execd.c
|
||||||
|
+++ b/daemons/controld/controld_execd.c
|
||||||
|
@@ -1728,7 +1728,9 @@
|
||||||
|
md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
|
||||||
|
result->action_stdout);
|
||||||
|
}
|
||||||
|
- do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
|
||||||
|
+ if (!pcmk_is_set(fsa_input_register, R_HA_DISCONNECTED)) {
|
||||||
|
+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
|
||||||
|
+ }
|
||||||
|
free_metadata_cb_data(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
From 247d9534f36f690c1474e36cedaadb3934022a05 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 2 Aug 2023 16:16:31 -0500
|
||||||
|
Subject: [PATCH 2/4] Refactor: controller: de-functionize lrm_state_destroy()
|
||||||
|
|
||||||
|
It was a one-liner called once
|
||||||
|
---
|
||||||
|
daemons/controld/controld_execd_state.c | 8 +-------
|
||||||
|
daemons/controld/controld_lrm.h | 5 -----
|
||||||
|
2 files changed, 1 insertion(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
|
||||||
|
index 8c68bfca08..4a87a9b332 100644
|
||||||
|
--- a/daemons/controld/controld_execd_state.c
|
||||||
|
+++ b/daemons/controld/controld_execd_state.c
|
||||||
|
@@ -131,12 +131,6 @@
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
-void
|
||||||
|
-lrm_state_destroy(const char *node_name)
|
||||||
|
-{
|
||||||
|
- g_hash_table_remove(lrm_state_table, node_name);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static gboolean
|
||||||
|
remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
|
||||||
|
{
|
||||||
|
@@ -764,7 +758,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
|
||||||
|
- lrm_state_destroy(rsc_id);
|
||||||
|
+ g_hash_table_remove(lrm_state_table, rsc_id);
|
||||||
|
return pcmk_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h
|
||||||
|
index 25f3db3316..c3113e49c3 100644
|
||||||
|
--- a/daemons/controld/controld_lrm.h
|
||||||
|
+++ b/daemons/controld/controld_lrm.h
|
||||||
|
@@ -114,11 +114,6 @@
|
||||||
|
lrm_state_t *lrm_state_create(const char *node_name);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
- * \brief Destroy executor connection by node name
|
||||||
|
- */
|
||||||
|
-void lrm_state_destroy(const char *node_name);
|
||||||
|
-
|
||||||
|
-/*!
|
||||||
|
* \brief Find lrm_state data by node name
|
||||||
|
*/
|
||||||
|
lrm_state_t *lrm_state_find(const char *node_name);
|
||||||
|
|
||||||
|
From 1b915f1ce38756431f7faa142565e3e07aade194 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 2 Aug 2023 15:58:09 -0500
|
||||||
|
Subject: [PATCH 3/4] Low: controller: guard lrm_state_table usage with NULL
|
||||||
|
check
|
||||||
|
|
||||||
|
It is NULL while draining the mainloop during the shutdown sequence.
|
||||||
|
---
|
||||||
|
daemons/controld/controld_execd_state.c | 7 ++++++-
|
||||||
|
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
|
||||||
|
index 4a87a9b332..b90cc5e635 100644
|
||||||
|
--- a/daemons/controld/controld_execd_state.c
|
||||||
|
+++ b/daemons/controld/controld_execd_state.c
|
||||||
|
@@ -246,7 +246,7 @@
|
||||||
|
lrm_state_t *
|
||||||
|
lrm_state_find(const char *node_name)
|
||||||
|
{
|
||||||
|
- if (!node_name) {
|
||||||
|
+ if ((node_name == NULL) || (lrm_state_table == NULL)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return g_hash_table_lookup(lrm_state_table, node_name);
|
||||||
|
@@ -257,6 +257,8 @@
|
||||||
|
{
|
||||||
|
lrm_state_t *lrm_state;
|
||||||
|
|
||||||
|
+ CRM_CHECK(lrm_state_table != NULL, return NULL);
|
||||||
|
+
|
||||||
|
lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
|
||||||
|
if (!lrm_state) {
|
||||||
|
lrm_state = lrm_state_create(node_name);
|
||||||
|
@@ -268,6 +270,9 @@
|
||||||
|
GList *
|
||||||
|
lrm_state_get_list(void)
|
||||||
|
{
|
||||||
|
+ if (lrm_state_table == NULL) {
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
return g_hash_table_get_values(lrm_state_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
From 78581213ed3bf4183b0ec1f391b720d5d91f3f68 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 2 Aug 2023 15:48:36 -0500
|
||||||
|
Subject: [PATCH 4/4] Log: controller: improve messages for resource history
|
||||||
|
updates
|
||||||
|
|
||||||
|
---
|
||||||
|
daemons/controld/controld_execd.c | 11 +++++++++--
|
||||||
|
1 file changed, 9 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
||||||
|
index 22ac42486f..c9dde0b748 100644
|
||||||
|
--- a/daemons/controld/controld_execd.c
|
||||||
|
+++ b/daemons/controld/controld_execd.c
|
||||||
|
@@ -2408,10 +2408,17 @@
|
||||||
|
case pcmk_ok:
|
||||||
|
case -pcmk_err_diff_failed:
|
||||||
|
case -pcmk_err_diff_resync:
|
||||||
|
- crm_trace("Resource update %d complete: rc=%d", call_id, rc);
|
||||||
|
+ crm_trace("Resource history update completed (call=%d rc=%d)",
|
||||||
|
+ call_id, rc);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
- crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
|
||||||
|
+ if (call_id > 0) {
|
||||||
|
+ crm_warn("Resource history update %d failed: %s "
|
||||||
|
+ CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc);
|
||||||
|
+ } else {
|
||||||
|
+ crm_warn("Resource history update failed: %s " CRM_XS " rc=%d",
|
||||||
|
+ pcmk_strerror(rc), rc);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (call_id == last_resource_update) {
|
@ -0,0 +1,45 @@
|
|||||||
|
From f5263c9401c9c38d4e039149deddcc0da0c184ba Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Thu, 3 Aug 2023 12:17:08 -0500
|
||||||
|
Subject: [PATCH] Fix: attrd: avoid race condition when shutting down
|
||||||
|
|
||||||
|
This addresses a race condition that can occur when the DC and the attribute
|
||||||
|
writer are different nodes, and shutting down at the same time. When the DC
|
||||||
|
controller leaves its Corosync process group, the remaining nodes erase its
|
||||||
|
transient node attributes (including "shutdown") from the CIB. However if the
|
||||||
|
(former) DC's attrd is still up, it can win the attribute writer election
|
||||||
|
called after the original writer leaves. As the election winner, it writes out
|
||||||
|
all its attributes to the CIB, including "shutdown". The next time it rejoins
|
||||||
|
the cluster, it will be immediately shut down.
|
||||||
|
|
||||||
|
Fixes T138
|
||||||
|
---
|
||||||
|
daemons/attrd/attrd_elections.c | 10 +++++++++-
|
||||||
|
1 file changed, 9 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
|
||||||
|
index 3b6b55a0f59..6f4916888a9 100644
|
||||||
|
--- a/daemons/attrd/attrd_elections.c
|
||||||
|
+++ b/daemons/attrd/attrd_elections.c
|
||||||
|
@@ -22,12 +22,20 @@ attrd_election_cb(gpointer user_data)
|
||||||
|
{
|
||||||
|
attrd_declare_winner();
|
||||||
|
|
||||||
|
+ if (attrd_requesting_shutdown() || attrd_shutting_down()) {
|
||||||
|
+ /* This node is shutting down or about to, meaning its attributes will
|
||||||
|
+ * be removed (and may have already been removed from the CIB by a
|
||||||
|
+ * controller). Don't sync or write its attributes in this case.
|
||||||
|
+ */
|
||||||
|
+ return G_SOURCE_REMOVE;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Update the peers after an election */
|
||||||
|
attrd_peer_sync(NULL, NULL);
|
||||||
|
|
||||||
|
/* Update the CIB after an election */
|
||||||
|
attrd_write_attributes(true, false);
|
||||||
|
- return FALSE;
|
||||||
|
+ return G_SOURCE_REMOVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
@ -0,0 +1,210 @@
|
|||||||
|
From 83e547cc64f2586031a007ab58e91fc22cd1a68a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Thu, 24 Aug 2023 12:18:23 -0500
|
||||||
|
Subject: [PATCH] Refactor: attrd: use enum instead of bools for
|
||||||
|
attrd_write_attributes()
|
||||||
|
|
||||||
|
---
|
||||||
|
daemons/attrd/attrd_cib.c | 24 ++++++++++++++++++------
|
||||||
|
daemons/attrd/attrd_corosync.c | 2 +-
|
||||||
|
daemons/attrd/attrd_elections.c | 2 +-
|
||||||
|
daemons/attrd/attrd_ipc.c | 2 +-
|
||||||
|
daemons/attrd/attrd_utils.c | 2 +-
|
||||||
|
daemons/attrd/pacemaker-attrd.h | 8 +++++++-
|
||||||
|
6 files changed, 29 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
|
||||||
|
index 928c0133745..9c787fe1024 100644
|
||||||
|
--- a/daemons/attrd/attrd_cib.c
|
||||||
|
+++ b/daemons/attrd/attrd_cib.c
|
||||||
|
@@ -343,16 +343,23 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay)
|
||||||
|
free_xml(xml_top);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*!
|
||||||
|
+ * \internal
|
||||||
|
+ * \brief Write out attributes
|
||||||
|
+ *
|
||||||
|
+ * \param[in] options Group of enum attrd_write_options
|
||||||
|
+ */
|
||||||
|
void
|
||||||
|
-attrd_write_attributes(bool all, bool ignore_delay)
|
||||||
|
+attrd_write_attributes(uint32_t options)
|
||||||
|
{
|
||||||
|
GHashTableIter iter;
|
||||||
|
attribute_t *a = NULL;
|
||||||
|
|
||||||
|
- crm_debug("Writing out %s attributes", all? "all" : "changed");
|
||||||
|
+ crm_debug("Writing out %s attributes",
|
||||||
|
+ pcmk_is_set(options, attrd_write_all)? "all" : "changed");
|
||||||
|
g_hash_table_iter_init(&iter, attributes);
|
||||||
|
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
|
||||||
|
- if (!all && a->unknown_peer_uuids) {
|
||||||
|
+ if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) {
|
||||||
|
// Try writing this attribute again, in case peer ID was learned
|
||||||
|
a->changed = true;
|
||||||
|
} else if (a->force_write) {
|
||||||
|
@@ -360,9 +367,14 @@ attrd_write_attributes(bool all, bool ignore_delay)
|
||||||
|
a->changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if(all || a->changed) {
|
||||||
|
- /* When forced write flag is set, ignore delay. */
|
||||||
|
- attrd_write_attribute(a, (a->force_write ? true : ignore_delay));
|
||||||
|
+ if (pcmk_is_set(options, attrd_write_all) || a->changed) {
|
||||||
|
+ bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);
|
||||||
|
+
|
||||||
|
+ if (a->force_write) {
|
||||||
|
+ // Always ignore delay when forced write flag is set
|
||||||
|
+ ignore_delay = true;
|
||||||
|
+ }
|
||||||
|
+ attrd_write_attribute(a, ignore_delay);
|
||||||
|
} else {
|
||||||
|
crm_trace("Skipping unchanged attribute %s", a->id);
|
||||||
|
}
|
||||||
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
||||||
|
index 1aec35a054e..49631df6e44 100644
|
||||||
|
--- a/daemons/attrd/attrd_corosync.c
|
||||||
|
+++ b/daemons/attrd/attrd_corosync.c
|
||||||
|
@@ -285,7 +285,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
|
||||||
|
|
||||||
|
crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
|
||||||
|
if (attrd_election_won()) {
|
||||||
|
- attrd_write_attributes(false, false);
|
||||||
|
+ attrd_write_attributes(attrd_write_changed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
|
||||||
|
index c25a41a4492..01341db18e4 100644
|
||||||
|
--- a/daemons/attrd/attrd_elections.c
|
||||||
|
+++ b/daemons/attrd/attrd_elections.c
|
||||||
|
@@ -34,7 +34,7 @@ attrd_election_cb(gpointer user_data)
|
||||||
|
attrd_peer_sync(NULL, NULL);
|
||||||
|
|
||||||
|
/* Update the CIB after an election */
|
||||||
|
- attrd_write_attributes(true, false);
|
||||||
|
+ attrd_write_attributes(attrd_write_all);
|
||||||
|
return G_SOURCE_REMOVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
|
||||||
|
index 4be789de7f9..05c4a696a19 100644
|
||||||
|
--- a/daemons/attrd/attrd_ipc.c
|
||||||
|
+++ b/daemons/attrd/attrd_ipc.c
|
||||||
|
@@ -232,7 +232,7 @@ attrd_client_refresh(pcmk__request_t *request)
|
||||||
|
crm_info("Updating all attributes");
|
||||||
|
|
||||||
|
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
|
||||||
|
- attrd_write_attributes(true, true);
|
||||||
|
+ attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
|
||||||
|
|
||||||
|
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
|
||||||
|
return NULL;
|
||||||
|
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
|
||||||
|
index c43eac1695a..bfd51368890 100644
|
||||||
|
--- a/daemons/attrd/attrd_utils.c
|
||||||
|
+++ b/daemons/attrd/attrd_utils.c
|
||||||
|
@@ -156,7 +156,7 @@ attrd_cib_replaced_cb(const char *event, xmlNode * msg)
|
||||||
|
if (attrd_election_won()) {
|
||||||
|
if (change_section & (cib_change_section_nodes | cib_change_section_status)) {
|
||||||
|
crm_notice("Updating all attributes after %s event", event);
|
||||||
|
- attrd_write_attributes(true, false);
|
||||||
|
+ attrd_write_attributes(attrd_write_all);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
|
||||||
|
index 41f31d97b3b..2d781d11394 100644
|
||||||
|
--- a/daemons/attrd/pacemaker-attrd.h
|
||||||
|
+++ b/daemons/attrd/pacemaker-attrd.h
|
||||||
|
@@ -176,8 +176,14 @@ void attrd_free_attribute(gpointer data);
|
||||||
|
void attrd_free_attribute_value(gpointer data);
|
||||||
|
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
|
||||||
|
|
||||||
|
+enum attrd_write_options {
|
||||||
|
+ attrd_write_changed = 0,
|
||||||
|
+ attrd_write_all = (1 << 0),
|
||||||
|
+ attrd_write_no_delay = (1 << 1),
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
void attrd_write_attribute(attribute_t *a, bool ignore_delay);
|
||||||
|
-void attrd_write_attributes(bool all, bool ignore_delay);
|
||||||
|
+void attrd_write_attributes(uint32_t options);
|
||||||
|
void attrd_write_or_elect_attribute(attribute_t *a);
|
||||||
|
|
||||||
|
extern int minimum_protocol_version;
|
||||||
|
From 58400e272cfc51f02eec69cdd0ed0d27a30e78a3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Thu, 24 Aug 2023 12:27:53 -0500
|
||||||
|
Subject: [PATCH] Fix: attrd: avoid race condition at writer election
|
||||||
|
|
||||||
|
f5263c94 was not a complete fix. The issue may also occur if a remaining node
|
||||||
|
(not the original DC or writer) wins the attribute writer election after the
|
||||||
|
original DC's controller has exited but before its attribute manger has exited.
|
||||||
|
|
||||||
|
The long-term solution will be to have the attribute manager (instead of the
|
||||||
|
controller) be in control of erasing transient attributes from the CIB when a
|
||||||
|
node leaves. This short-term workaround simply has new attribute writers skip
|
||||||
|
shutdown attributes when writing out all attributes.
|
||||||
|
|
||||||
|
Fixes T138
|
||||||
|
---
|
||||||
|
daemons/attrd/attrd_cib.c | 5 +++++
|
||||||
|
daemons/attrd/attrd_elections.c | 14 ++++++++++++--
|
||||||
|
daemons/attrd/pacemaker-attrd.h | 1 +
|
||||||
|
3 files changed, 18 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
|
||||||
|
index 9c787fe102..2c910b4c64 100644
|
||||||
|
--- a/daemons/attrd/attrd_cib.c
|
||||||
|
+++ b/daemons/attrd/attrd_cib.c
|
||||||
|
@@ -359,6 +359,11 @@ attrd_write_attributes(uint32_t options)
|
||||||
|
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
|
||||||
|
g_hash_table_iter_init(&iter, attributes);
|
||||||
|
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
|
||||||
|
+ if (pcmk_is_set(options, attrd_write_skip_shutdown)
|
||||||
|
+ && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) {
|
||||||
|
// Try writing this attribute again, in case peer ID was learned
|
||||||
|
a->changed = true;
|
||||||
|
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
|
||||||
|
index 01341db18e..a95cd44cbd 100644
|
||||||
|
--- a/daemons/attrd/attrd_elections.c
|
||||||
|
+++ b/daemons/attrd/attrd_elections.c
|
||||||
|
@@ -33,8 +33,18 @@ attrd_election_cb(gpointer user_data)
|
||||||
|
/* Update the peers after an election */
|
||||||
|
attrd_peer_sync(NULL, NULL);
|
||||||
|
|
||||||
|
- /* Update the CIB after an election */
|
||||||
|
- attrd_write_attributes(attrd_write_all);
|
||||||
|
+ /* After winning an election, update the CIB with the values of all
|
||||||
|
+ * attributes as the winner knows them.
|
||||||
|
+ *
|
||||||
|
+ * However, do not write out any "shutdown" attributes. A node that is
|
||||||
|
+ * shutting down will have all its transient attributes removed from the CIB
|
||||||
|
+ * when its controller exits, and from the attribute manager's memory (on
|
||||||
|
+ * remaining nodes) when its attribute manager exits; if an election is won
|
||||||
|
+ * between when those two things happen, we don't want to write the shutdown
|
||||||
|
+ * attribute back out, which would cause the node to immediately shut down
|
||||||
|
+ * the next time it rejoins.
|
||||||
|
+ */
|
||||||
|
+ attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown);
|
||||||
|
return G_SOURCE_REMOVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
|
||||||
|
index 2d781d1139..2e35bd7ec5 100644
|
||||||
|
--- a/daemons/attrd/pacemaker-attrd.h
|
||||||
|
+++ b/daemons/attrd/pacemaker-attrd.h
|
||||||
|
@@ -180,6 +180,7 @@ enum attrd_write_options {
|
||||||
|
attrd_write_changed = 0,
|
||||||
|
attrd_write_all = (1 << 0),
|
||||||
|
attrd_write_no_delay = (1 << 1),
|
||||||
|
+ attrd_write_skip_shutdown = (1 << 2),
|
||||||
|
};
|
||||||
|
|
||||||
|
void attrd_write_attribute(attribute_t *a, bool ignore_delay);
|
Loading…
Reference in new issue