import pacemaker-2.1.5-8.el8

c8 imports/c8/pacemaker-2.1.5-8.el8
CentOS Sources 2 years ago committed by MSVSphere Packaging Team
commit 90e6c311af

2
.gitignore vendored

@ -0,0 +1,2 @@
SOURCES/nagios-agents-metadata-105ab8a.tar.gz
SOURCES/pacemaker-a3f4479.tar.gz

@ -0,0 +1,2 @@
ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz
883efa27f94c6a07942f51cf7c8959c5fbb624fe SOURCES/pacemaker-a3f4479.tar.gz

File diff suppressed because it is too large Load Diff

@ -0,0 +1,98 @@
From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 23 Jan 2023 14:25:56 -0600
Subject: [PATCH] High: executor: fix regression in remote node shutdown
This reverts the essential part of d61494347, which was based on misdiagnosing
a remote node shutdown issue. Initially, it was thought that a "TLS server
session ended" log just after a remote node requested shutdown indicated that
the proxy connection coincidentally dropped at that moment. It actually is the
routine stopping of accepting new proxy connections, and existing when that
happens makes the remote node exit immediately without waiting for the
all-clear from the cluster.
Fixes T361
---
daemons/execd/pacemaker-execd.c | 19 +------------------
daemons/execd/pacemaker-execd.h | 3 +--
daemons/execd/remoted_tls.c | 6 +-----
3 files changed, 3 insertions(+), 25 deletions(-)
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index db12674f13..491808974a 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -305,23 +305,6 @@ lrmd_exit(gpointer data)
return FALSE;
}
-/*!
- * \internal
- * \brief Clean up and exit if shutdown has started
- *
- * \return Doesn't return
- */
-void
-execd_exit_if_shutting_down(void)
-{
-#ifdef PCMK__COMPILE_REMOTE
- if (shutting_down) {
- crm_warn("exit because TLS connection was closed and 'shutting_down' set");
- lrmd_exit(NULL);
- }
-#endif
-}
-
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h
index 6646ae29e3..f78e8dcdde 100644
--- a/daemons/execd/pacemaker-execd.h
+++ b/daemons/execd/pacemaker-execd.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp);
int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id,
xmlNode *request);
void lrmd_drain_alerts(GMainLoop *mloop);
-void execd_exit_if_shutting_down(void);
#endif // PACEMAKER_EXECD__H
diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c
index 6f4b2d0062..c65e3f394d 100644
--- a/daemons/execd/remoted_tls.c
+++ b/daemons/execd/remoted_tls.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -250,10 +250,6 @@ static void
tls_server_dropped(gpointer user_data)
{
crm_notice("TLS server session ended");
- /* If we are in the process of shutting down, then we should actually exit.
- * bz#1804259
- */
- execd_exit_if_shutting_down();
return;
}
--
2.31.1

File diff suppressed because it is too large Load Diff

@ -0,0 +1,107 @@
From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Fri, 3 Feb 2023 12:08:57 -0800
Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one
QE observed a rarely reproducible core dump in the fencer during
Pacemaker shutdown, in which we try to g_source_remove() an op timer
that's already been removed.
free_stonith_remote_op_list()
-> g_hash_table_destroy()
-> g_hash_table_remove_all_nodes()
-> clear_remote_op_timers()
-> g_source_remove()
-> crm_glib_handler()
-> "Source ID 190 was not found when attempting to remove it"
The likely cause is that request_peer_fencing() doesn't set
op->op_timer_one to 0 after calling g_source_remove() on it, so if that
op is still in the stonith_remote_op_list at shutdown with the same
timer, clear_remote_op_timers() tries to remove the source for
op_timer_one again.
There are only five locations that call g_source_remove() on a
remote_fencing_op_t timer.
* Three of them are in clear_remote_op_timers(), which first 0-checks
the timer and then sets it to 0 after g_source_remove().
* One is in remote_op_query_timeout(), which does the same.
* The last is the one we fix here in request_peer_fencing().
I don't know all the conditions of QE's test scenario at this point.
What I do know:
* have-watchdog=true
* stonith-watchdog-timeout=10
* no explicit topology
* fence agent script is missing for the configured fence device
* requested fencing of one node
* cluster shutdown
Fixes RHBZ2166967
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/fenced/fenced_remote.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index d61b5bd..b7426ff 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
+ op->op_timer_one = 0;
}
if (!((stonith_watchdog_timeout_ms > 0)
--
2.31.1
From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 15 Feb 2023 22:30:27 -0800
Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total
remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove
the source from the main loop after returning. Currently this function
is used as the callback only when creating op->op_timer_total.
If we don't set op->op_timer_total to 0 before returning from
remote_op_timeout(), then we can get an assertion and core dump from
GLib when the op's timers are being cleared (either during op
finalization or during fencer shutdown). This is because
clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to
remove the source, but the source has already been removed.
Note that we're already (correctly) zeroing op->op_timer_one and
op->query_timeout as appropriate in their respective callback functions.
Fortunately, GLib doesn't care whether the source has already been
removed before we return G_SOURCE_REMOVE from a callback. So it's safe
to call finalize_op() (which removes all the op's timer sources) from
within a callback.
Fixes RHBZ#2166967
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/fenced/fenced_remote.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index b7426ff88..adea3d7d8 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
+ op->op_timer_total = 0;
+
if (op->state == st_done) {
crm_debug("Action '%s' targeting %s for client %s already completed "
CRM_XS " id=%.8s",
--
2.39.0

@ -0,0 +1,151 @@
From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 14 Feb 2023 14:00:37 -0500
Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in
pcmk__attrd_api_query.
According to the header file, if node is NULL, pcmk__attrd_api_query
should query the value of the given attribute on all cluster nodes.
This is also what the server expects and how attrd_updater is supposed
to work.
However, pcmk__attrd_api_query has no way of letting callers decide
whether they want to query all nodes or whether they want to use the
local node. We were passing NULL for the node name, which it took to
mean it should look up the local node name. This calls
pcmk__node_attr_target, which probes the local cluster name and returns
that to pcmk__attrd_api_query. If it returns non-NULL, that value will
then be put into the XML IPC call which means the server will only
return the value for that node.
In testing this was usually fine. However, in pratice, the methods
pcmk__node_attr_target uses to figure out the local cluster node name
involves checking the OCF_RESKEY_CRM_meta_on_node environment variable
among others.
This variable was never set in testing, but can be set in the real
world. This leads to circumstances where the user did "attrd_updater -QA"
expecting to get the values on all nodes, but instead only got the value
on the local cluster node.
In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never
called if the node was NULL but was called otherwise.
The fix is to modify pcmk__attrd_api_query to take an option for
querying all nodes. If that's present, we'll query all nodes. If it's
not present, we'll look at the given node name - NULL means look it up,
anything else means just that node.
Regression in 2.1.5 introduced by eb20a65577
---
include/crm/common/attrd_internal.h | 6 +++++-
include/crm/common/ipc_attrd_internal.h | 7 +++++--
lib/common/ipc_attrd.c | 12 ++++++++----
tools/attrd_updater.c | 5 +++--
4 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h
index 389be48..7337c38 100644
--- a/include/crm/common/attrd_internal.h
+++ b/include/crm/common/attrd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2022 the Pacemaker project contributors
+ * Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts {
pcmk__node_attr_perm = (1 << 5),
pcmk__node_attr_sync_local = (1 << 6),
pcmk__node_attr_sync_cluster = (1 << 7),
+ // pcmk__node_attr_utilization is 8, but that has not been backported.
+ // I'm leaving the gap here in case we backport that in the future and
+ // also to avoid problems on mixed-version clusters.
+ pcmk__node_attr_query_all = (1 << 9),
};
#define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \
diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h
index 2c6713f..b1b7584 100644
--- a/include/crm/common/ipc_attrd_internal.h
+++ b/include/crm/common/ipc_attrd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 the Pacemaker project contributors
+ * Copyright 2022-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node);
*
* \param[in,out] api Connection to pacemaker-attrd
* \param[in] node Look up the attribute for this node
- * (or NULL for all nodes)
+ * (or NULL for the local node)
* \param[in] name Attribute name
* \param[in] options Bitmask of pcmk__node_attr_opts
*
+ * \note Passing pcmk__node_attr_query_all will cause the function to query
+ * the value of \p name on all nodes, regardless of the value of \p node.
+ *
* \return Standard Pacemaker return code
*/
int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
index 4606509..dece49b 100644
--- a/lib/common/ipc_attrd.c
+++ b/lib/common/ipc_attrd.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2022 the Pacemaker project contributors
+ * Copyright 2011-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
return EINVAL;
}
- target = pcmk__node_attr_target(node);
+ if (pcmk_is_set(options, pcmk__node_attr_query_all)) {
+ node = NULL;
+ } else {
+ target = pcmk__node_attr_target(node);
- if (target != NULL) {
- node = target;
+ if (target != NULL) {
+ node = target;
+ }
}
request = create_attrd_op(NULL);
diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c
index 3cd766d..cbd341d 100644
--- a/tools/attrd_updater.c
+++ b/tools/attrd_updater.c
@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type,
static int
send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all)
{
+ uint32_t options = pcmk__node_attr_none;
pcmk_ipc_api_t *attrd_api = NULL;
int rc = pcmk_rc_ok;
@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no
/* Decide which node(s) to query */
if (query_all == TRUE) {
- attr_node = NULL;
+ options |= pcmk__node_attr_query_all;
}
- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0);
+ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)",
--
2.31.1

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save