From 90e6c311afbde80567f481a26a486e4bcdf92108 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 16 May 2023 06:07:23 +0000 Subject: [PATCH] import pacemaker-2.1.5-8.el8 --- .gitignore | 2 + .pacemaker.metadata | 2 + SOURCES/001-sync-points.patch | 2429 +++++++++++++++++++++++ SOURCES/002-remote-regression.patch | 98 + SOURCES/003-history-cleanup.patch | 2829 +++++++++++++++++++++++++++ SOURCES/004-g_source_remove.patch | 107 + SOURCES/005-query-null.patch | 151 ++ SPECS/pacemaker.spec | 1607 +++++++++++++++ 8 files changed, 7225 insertions(+) create mode 100644 .gitignore create mode 100644 .pacemaker.metadata create mode 100644 SOURCES/001-sync-points.patch create mode 100644 SOURCES/002-remote-regression.patch create mode 100644 SOURCES/003-history-cleanup.patch create mode 100644 SOURCES/004-g_source_remove.patch create mode 100644 SOURCES/005-query-null.patch create mode 100644 SPECS/pacemaker.spec diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c5fb46 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +SOURCES/nagios-agents-metadata-105ab8a.tar.gz +SOURCES/pacemaker-a3f4479.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata new file mode 100644 index 0000000..7f4f247 --- /dev/null +++ b/.pacemaker.metadata @@ -0,0 +1,2 @@ +ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz +883efa27f94c6a07942f51cf7c8959c5fbb624fe SOURCES/pacemaker-a3f4479.tar.gz diff --git a/SOURCES/001-sync-points.patch b/SOURCES/001-sync-points.patch new file mode 100644 index 0000000..c034c78 --- /dev/null +++ b/SOURCES/001-sync-points.patch @@ -0,0 +1,2429 @@ +From de05f6b52c667155d262ceeb541dc1041d079d71 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 11:36:58 -0400 +Subject: [PATCH 01/26] Refactor: tools: Use a uint32_t for attr_options. + +--- + tools/attrd_updater.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index d90567a..b85a281 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -47,7 +47,7 @@ struct { + gchar *attr_node; + gchar *attr_set; + char *attr_value; +- int attr_options; ++ uint32_t attr_options; + gboolean query_all; + gboolean quiet; + } options = { +-- +2.31.1 + +From c6637520b474d44553ade52c0dbe9e36e873135f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:31:16 -0400 +Subject: [PATCH 02/26] Refactor: libcrmcommon: Make pcmk__xe_match more + broadly useful. + +If attr_v is NULL, simply return the first node with a matching name. +--- + lib/common/xml.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 036dd87..ac6f46a 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -510,7 +510,7 @@ find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find) + * \param[in] parent XML element to search + * \param[in] node_name If not NULL, only match children of this type + * \param[in] attr_n If not NULL, only match children with an attribute +- * of this name and a value of \p attr_v ++ * of this name. + * \param[in] attr_v If \p attr_n and this are not NULL, only match children + * with an attribute named \p attr_n and this value + * +@@ -520,14 +520,16 @@ xmlNode * + pcmk__xe_match(const xmlNode *parent, const char *node_name, + const char *attr_n, const char *attr_v) + { +- /* ensure attr_v specified when attr_n is */ +- CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); ++ CRM_CHECK(parent != NULL, return NULL); ++ CRM_CHECK(attr_v == NULL || attr_n != NULL, return NULL); + + for (xmlNode *child = pcmk__xml_first_child(parent); child != NULL; + child = pcmk__xml_next(child)) { + if (pcmk__str_eq(node_name, (const char *) (child->name), + pcmk__str_null_matches) +- && ((attr_n == NULL) || attr_matches(child, attr_n, attr_v))) { ++ && ((attr_n == NULL) || ++ (attr_v == NULL && xmlHasProp(child, (pcmkXmlStr) attr_n)) || ++ (attr_v != NULL && attr_matches(child, attr_n, attr_v)))) { + return child; + } + } +-- +2.31.1 + +From dd520579484c6ec091f7fbb550347941302dad0e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:32:46 -0400 +Subject: [PATCH 03/26] Tests: libcrmcommon: Add tests for pcmk__xe_match. + +--- + lib/common/tests/xml/Makefile.am | 3 +- + lib/common/tests/xml/pcmk__xe_match_test.c | 105 +++++++++++++++++++++ + 2 files changed, 107 insertions(+), 1 deletion(-) + create mode 100644 lib/common/tests/xml/pcmk__xe_match_test.c + +diff --git a/lib/common/tests/xml/Makefile.am b/lib/common/tests/xml/Makefile.am +index 342ca07..0ccdcc3 100644 +--- a/lib/common/tests/xml/Makefile.am ++++ b/lib/common/tests/xml/Makefile.am +@@ -11,6 +11,7 @@ include $(top_srcdir)/mk/tap.mk + include $(top_srcdir)/mk/unittest.mk + + # Add "_test" to the end of all test program names to simplify .gitignore. +-check_PROGRAMS = pcmk__xe_foreach_child_test ++check_PROGRAMS = pcmk__xe_foreach_child_test \ ++ pcmk__xe_match_test + + TESTS = $(check_PROGRAMS) +diff --git a/lib/common/tests/xml/pcmk__xe_match_test.c b/lib/common/tests/xml/pcmk__xe_match_test.c +new file mode 100644 +index 0000000..fd529ba +--- /dev/null ++++ b/lib/common/tests/xml/pcmk__xe_match_test.c +@@ -0,0 +1,105 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++const char *str1 = ++ "\n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ ""; ++ ++static void ++bad_input(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, NULL)); ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, "attrX")); ++ ++ free_xml(xml); ++} ++ ++static void ++not_found(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ /* No node with an attrX attribute */ ++ assert_null(pcmk__xe_match(xml, NULL, "attrX", NULL)); ++ /* No nodeX node */ ++ assert_null(pcmk__xe_match(xml, "nodeX", NULL, NULL)); ++ /* No nodeA node with attrX */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrX", NULL)); ++ /* No nodeA node with attrA=XYZ */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrA", "XYZ")); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrB(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find the first node with attrB */ ++ result = pcmk__xe_match(xml, NULL, "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "3"); ++ ++ /* Find the first nodeB with attrB */ ++ result = pcmk__xe_match(xml, "nodeB", "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "5"); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrA_matching(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find attrA=456 */ ++ result = pcmk__xe_match(xml, NULL, "attrA", "456"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "2"); ++ ++ /* Find a nodeB with attrA=123 */ ++ result = pcmk__xe_match(xml, "nodeB", "attrA", "123"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "4"); ++ ++ free_xml(xml); ++} ++ ++PCMK__UNIT_TEST(NULL, NULL, ++ cmocka_unit_test(bad_input), ++ cmocka_unit_test(not_found), ++ cmocka_unit_test(find_attrB), ++ cmocka_unit_test(find_attrA_matching)); +-- +2.31.1 + +From 03af8498d8aaf21c509cec9b0ec4b78475da41d7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:22:26 -0400 +Subject: [PATCH 04/26] Feature: libcrmcommon: Add attrd options for specifying + a sync point. + +--- + include/crm/common/attrd_internal.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index f7033ad..389be48 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -16,13 +16,15 @@ extern "C" { + + // Options for clients to use with functions below + enum pcmk__node_attr_opts { +- pcmk__node_attr_none = 0, +- pcmk__node_attr_remote = (1 << 0), +- pcmk__node_attr_private = (1 << 1), +- pcmk__node_attr_pattern = (1 << 2), +- pcmk__node_attr_value = (1 << 3), +- pcmk__node_attr_delay = (1 << 4), +- pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_none = 0, ++ pcmk__node_attr_remote = (1 << 0), ++ pcmk__node_attr_private = (1 << 1), ++ pcmk__node_attr_pattern = (1 << 2), ++ pcmk__node_attr_value = (1 << 3), ++ pcmk__node_attr_delay = (1 << 4), ++ pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_sync_local = (1 << 6), ++ pcmk__node_attr_sync_cluster = (1 << 7), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +-- +2.31.1 + +From 5c8825293ee21d3823bdcd01b0df9c7d39739940 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:23:09 -0400 +Subject: [PATCH 05/26] Feature: libcrmcommon: Add sync point to IPC request + XML. + +If one of the pcmk__node_attr_sync_* options is provided, add an +attribute to the request XML. This will later be inspected by the +server to determine when to send the reply to the client. +--- + include/crm/common/options_internal.h | 2 ++ + include/crm_internal.h | 1 + + lib/common/ipc_attrd.c | 6 ++++++ + 3 files changed, 9 insertions(+) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index b153c67..f29ba3f 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -145,9 +145,11 @@ bool pcmk__valid_sbd_timeout(const char *value); + #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" + + // Constants for enumerated values for various options ++#define PCMK__VALUE_CLUSTER "cluster" + #define PCMK__VALUE_CUSTOM "custom" + #define PCMK__VALUE_FENCING "fencing" + #define PCMK__VALUE_GREEN "green" ++#define PCMK__VALUE_LOCAL "local" + #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" + #define PCMK__VALUE_NONE "none" + #define PCMK__VALUE_NOTHING "nothing" +diff --git a/include/crm_internal.h b/include/crm_internal.h +index e6e2e96..08193c3 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -71,6 +71,7 @@ + #define PCMK__XA_ATTR_RESOURCE "attr_resource" + #define PCMK__XA_ATTR_SECTION "attr_section" + #define PCMK__XA_ATTR_SET "attr_set" ++#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" + #define PCMK__XA_ATTR_USER "attr_user" + #define PCMK__XA_ATTR_UUID "attr_key" + #define PCMK__XA_ATTR_VALUE "attr_value" +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index f6cfbc4..4606509 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -431,6 +431,12 @@ populate_update_op(xmlNode *op, const char *node, const char *name, const char * + pcmk_is_set(options, pcmk__node_attr_remote)); + crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, + pcmk_is_set(options, pcmk__node_attr_private)); ++ ++ if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); ++ } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); ++ } + } + + int +-- +2.31.1 + +From e2b3fee630caf0846ca8bbffcef4d6d2acfd32a5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:26:28 -0400 +Subject: [PATCH 06/26] Feature: tools: Add --wait= parameter to attrd_updater. + +This command line option is used to specify the sync point to use. For +the moment, it has no effect. +--- + tools/attrd_updater.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index b85a281..c4779a6 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -97,6 +97,22 @@ section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError + return TRUE; + } + ++static gboolean ++wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { ++ if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); ++ return TRUE; ++ } else { ++ g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, ++ "--wait= must be one of 'no', 'local', 'cluster'"); ++ return FALSE; ++ } ++} ++ + #define INDENT " " + + static GOptionEntry required_entries[] = { +@@ -175,6 +191,14 @@ static GOptionEntry addl_entries[] = { + "If this creates a new attribute, never write the attribute to CIB", + NULL }, + ++ { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, ++ "Wait for some event to occur before returning. Values are 'no' (wait\n" ++ INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "'local' (wait until the change has propagated to where a local\n" ++ INDENT "query will return the request value, or the value set by a\n" ++ INDENT "later request). Default is 'no'.", ++ "UNTIL" }, ++ + { NULL } + }; + +-- +2.31.1 + +From 52d51ab41b2f00e72724ab39835b3db86605a96b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:40:13 -0400 +Subject: [PATCH 07/26] Feature: daemons: Add functions for checking a request + for a sync point. + +--- + daemons/attrd/Makefile.am | 1 + + daemons/attrd/attrd_sync.c | 38 +++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 42 insertions(+) + create mode 100644 daemons/attrd/attrd_sync.c + +diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am +index 1a3d360..6bb81c4 100644 +--- a/daemons/attrd/Makefile.am ++++ b/daemons/attrd/Makefile.am +@@ -32,6 +32,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ ++ attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +new file mode 100644 +index 0000000..92759d2 +--- /dev/null ++++ b/daemons/attrd/attrd_sync.c +@@ -0,0 +1,38 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include "pacemaker-attrd.h" ++ ++const char * ++attrd_request_sync_point(xmlNode *xml) ++{ ++ if (xml_has_children(xml)) { ++ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); ++ ++ if (child) { ++ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); ++ } else { ++ return NULL; ++ } ++ ++ } else { ++ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); ++ } ++} ++ ++bool ++attrd_request_has_sync_point(xmlNode *xml) ++{ ++ return attrd_request_sync_point(xml) != NULL; ++} +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 71ce90a..ff850bb 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,4 +182,7 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++const char *attrd_request_sync_point(xmlNode *xml); ++bool attrd_request_has_sync_point(xmlNode *xml); ++ + #endif /* PACEMAKER_ATTRD__H */ +-- +2.31.1 + +From 2e0509a12ee7d4a612133ee65b75245eea7d271d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:42:04 -0400 +Subject: [PATCH 08/26] Refactor: daemons: Don't ACK update requests that give + a sync point. + +The ACK is the only response from the server for update messages. If +the message specified that it wanted to wait for a sync point, we need +to delay sending that response until the sync point is reached. +Therefore, do not always immediately send the ACK. +--- + daemons/attrd/attrd_messages.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index de4a28a..9e8ae40 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -137,12 +137,21 @@ handle_update_request(pcmk__request_t *request) + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; ++ + } else { +- /* Because attrd_client_update can be called recursively, we send the ACK +- * here to ensure that the client only ever receives one. +- */ +- attrd_send_ack(request->ipc_client, request->ipc_id, +- request->flags|crm_ipc_client_response); ++ if (!attrd_request_has_sync_point(request->xml)) { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ * ++ * In the normal case, attrd_client_update can be called recursively which ++ * makes where to send the ACK tricky. Doing it here ensures the client ++ * only ever receives one. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->flags|crm_ipc_client_response); ++ } ++ + return attrd_client_update(request); + } + } +-- +2.31.1 + +From 2a0ff66cdf0085c4c8ab1992ef7e785a4facc8c7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:48:48 -0400 +Subject: [PATCH 09/26] Feature: daemons: Add support for local sync points on + updates. + +In the IPC dispatcher for attrd, add the client to a wait list if its +request specifies a sync point. When the attribute's value is changed +on the local attrd, alert any clients waiting on a local sync point by +then sending the previously delayed ACK. + +Sync points for other requests and the global sync point are not yet +supported. + +Fixes T35. +--- + daemons/attrd/attrd_corosync.c | 18 +++++ + daemons/attrd/attrd_messages.c | 12 ++- + daemons/attrd/attrd_sync.c | 137 ++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 7 ++ + 4 files changed, 173 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 539e5bf..4337280 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -568,14 +568,32 @@ void + attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) + { ++ bool handle_sync_point = false; ++ + if (xml_has_children(xml)) { + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + copy_attrs(xml, child); + attrd_peer_update_one(peer, child, filter); ++ ++ if (attrd_request_has_sync_point(child)) { ++ handle_sync_point = true; ++ } + } + + } else { + attrd_peer_update_one(peer, xml, filter); ++ ++ if (attrd_request_has_sync_point(xml)) { ++ handle_sync_point = true; ++ } ++ } ++ ++ /* If the update XML specified that the client wanted to wait for a sync ++ * point, process that now. ++ */ ++ if (handle_sync_point) { ++ crm_debug("Hit local sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9e8ae40..c96700f 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -139,7 +139,17 @@ handle_update_request(pcmk__request_t *request) + return NULL; + + } else { +- if (!attrd_request_has_sync_point(request->xml)) { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 92759d2..2981bd0 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -14,6 +14,143 @@ + + #include "pacemaker-attrd.h" + ++/* A hash table storing clients that are waiting on a sync point to be reached. ++ * The key is waitlist_client - just a plain int. The obvious key would be ++ * the IPC client's ID, but this is not guaranteed to be unique. A single client ++ * could be waiting on a sync point for multiple attributes at the same time. ++ * ++ * It is not expected that this hash table will ever be especially large. ++ */ ++static GHashTable *waitlist = NULL; ++static int waitlist_client = 0; ++ ++struct waitlist_node { ++ /* What kind of sync point does this node describe? */ ++ enum attrd_sync_point sync_point; ++ ++ /* Information required to construct and send a reply to the client. */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++}; ++ ++static void ++next_key(void) ++{ ++ do { ++ waitlist_client++; ++ if (waitlist_client < 0) { ++ waitlist_client = 1; ++ } ++ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); ++} ++ ++static void ++free_waitlist_node(gpointer data) ++{ ++ struct waitlist_node *wl = (struct waitlist_node *) data; ++ ++ free(wl->client_id); ++ free(wl); ++} ++ ++static const char * ++sync_point_str(enum attrd_sync_point sync_point) ++{ ++ if (sync_point == attrd_sync_point_local) { ++ return PCMK__VALUE_LOCAL; ++ } else if (sync_point == attrd_sync_point_cluster) { ++ return PCMK__VALUE_CLUSTER; ++ } else { ++ return "unknown"; ++ } ++} ++ ++void ++attrd_add_client_to_waitlist(pcmk__request_t *request) ++{ ++ const char *sync_point = attrd_request_sync_point(request->xml); ++ struct waitlist_node *wl = NULL; ++ ++ if (sync_point == NULL) { ++ return; ++ } ++ ++ if (waitlist == NULL) { ++ waitlist = pcmk__intkey_table(free_waitlist_node); ++ } ++ ++ wl = calloc(sizeof(struct waitlist_node), 1); ++ ++ CRM_ASSERT(wl != NULL); ++ ++ wl->client_id = strdup(request->ipc_client->id); ++ ++ CRM_ASSERT(wl->client_id); ++ ++ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_local; ++ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_cluster; ++ } else { ++ free_waitlist_node(wl); ++ return; ++ } ++ ++ wl->ipc_id = request->ipc_id; ++ wl->flags = request->flags; ++ ++ crm_debug("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ next_key(); ++ pcmk__intkey_table_insert(waitlist, waitlist_client, wl); ++ ++ /* And then add the key to the request XML so we can uniquely identify ++ * it when it comes time to issue the ACK. ++ */ ++ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); ++} ++ ++void ++attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) ++{ ++ int callid; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_warn("Could not get callid from request XML"); ++ return; ++ } ++ ++ value = pcmk__intkey_table_lookup(waitlist, callid); ++ if (value != NULL) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ pcmk__client_t *client = NULL; ++ ++ if (wl->sync_point != sync_point) { ++ return; ++ } ++ ++ crm_debug("Alerting client %s for reached %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ client = pcmk__find_client_by_id(wl->client_id); ++ if (client == NULL) { ++ return; ++ } ++ ++ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); ++ ++ /* And then remove the client so it doesn't get alerted again. */ ++ pcmk__intkey_table_remove(waitlist, callid); ++ } ++} ++ + const char * + attrd_request_sync_point(xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index ff850bb..9dd8320 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,6 +182,13 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++enum attrd_sync_point { ++ attrd_sync_point_local, ++ attrd_sync_point_cluster, ++}; ++ ++void attrd_add_client_to_waitlist(pcmk__request_t *request); ++void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From 59caaf1682191a91d6062358b770f8b9457ba3eb Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:56:58 -0400 +Subject: [PATCH 10/26] Feature: daemons: If a client disconnects, remove it + from the waitlist. + +--- + daemons/attrd/attrd_ipc.c | 5 +++++ + daemons/attrd/attrd_sync.c | 21 +++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 1 + + 3 files changed, 27 insertions(+) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 7e4a1c0..8aa39c2 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -438,8 +438,13 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + crm_trace("Ignoring request to clean up unknown connection %p", c); + } else { + crm_trace("Cleaning up closed client connection %p", c); ++ ++ /* Remove the client from the sync point waitlist if it's present. */ ++ attrd_remove_client_from_waitlist(client); ++ + pcmk__free_client(client); + } ++ + return FALSE; + } + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 2981bd0..7293318 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,27 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_remove_client_from_waitlist(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, waitlist); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ ++ if (wl->client_id == client->id) { ++ g_hash_table_iter_remove(&iter); ++ } ++ } ++} ++ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 9dd8320..b6ecb75 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -189,6 +189,7 @@ enum attrd_sync_point { + + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From b28042e1d64b48c96dbd9da1e9ee3ff481bbf620 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 11:00:20 -0400 +Subject: [PATCH 11/26] Feature: daemons: Add support for local sync points on + clearing failures. + +attrd_clear_client_failure just calls attrd_client_update underneath, so +that function will handle all the rest of the sync point functionality +for us. +--- + daemons/attrd/attrd_ipc.c | 2 -- + daemons/attrd/attrd_messages.c | 19 +++++++++++++++++++ + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 8aa39c2..2e614e8 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -101,8 +101,6 @@ attrd_client_clear_failure(pcmk__request_t *request) + xmlNode *xml = request->xml; + const char *rsc, *op, *interval_spec; + +- attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); +- + if (minimum_protocol_version >= 2) { + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index c96700f..3ba14a6 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -42,6 +42,25 @@ handle_clear_failure_request(pcmk__request_t *request) + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->ipc_flags); ++ } ++ + return attrd_client_clear_failure(request); + } + } +-- +2.31.1 + +From 291dc3b91e57f2584bbf88cfbe3a360e0332e814 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:17:24 -0400 +Subject: [PATCH 12/26] Refactor: daemons: Free the waitlist on attrd exit. + +--- + daemons/attrd/attrd_sync.c | 11 +++++++++++ + daemons/attrd/attrd_utils.c | 2 ++ + daemons/attrd/pacemaker-attrd.c | 1 + + daemons/attrd/pacemaker-attrd.h | 1 + + 4 files changed, 15 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 7293318..557e49a 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,17 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_free_waitlist(void) ++{ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_destroy(waitlist); ++ waitlist = NULL; ++} ++ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 6a19009..00b879b 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -93,6 +93,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGUSR2); + mainloop_destroy_signal(SIGTRAP); + ++ attrd_free_waitlist(); ++ + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible + * only if we get SIGTERM in brief windows at start-up and shutdown. +diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c +index 2100db4..1336542 100644 +--- a/daemons/attrd/pacemaker-attrd.c ++++ b/daemons/attrd/pacemaker-attrd.c +@@ -300,6 +300,7 @@ main(int argc, char **argv) + attrd_ipc_fini(); + attrd_lrmd_disconnect(); + attrd_cib_disconnect(); ++ attrd_free_waitlist(); + g_hash_table_destroy(attributes); + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index b6ecb75..537bf85 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -52,6 +52,7 @@ void attrd_run_mainloop(void); + + void attrd_set_requesting_shutdown(void); + void attrd_clear_requesting_shutdown(void); ++void attrd_free_waitlist(void); + bool attrd_requesting_shutdown(void); + bool attrd_shutting_down(void); + void attrd_shutdown(int nsig); +-- +2.31.1 + +From 7715ce617c520e14687a82e11ff794c93cd7f64a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:21:16 -0400 +Subject: [PATCH 13/26] Feature: includes: Bump CRM_FEATURE_SET for local sync + points. + +--- + include/crm/crm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index 5710e4b..7c5c602 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -66,7 +66,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.16.1" ++# define CRM_FEATURE_SET "3.16.2" + + /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and + * recipient of a CPG message. This imposes an arbitrary limit on cluster node +-- +2.31.1 + +From b9054425a76d03f538cd0b3ae27490b1874eee8a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:23:49 -0400 +Subject: [PATCH 14/26] Refactor: daemons: Add comments for previously added + sync point code. + +--- + daemons/attrd/attrd_sync.c | 63 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 557e49a..e9690b5 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -66,6 +66,20 @@ sync_point_str(enum attrd_sync_point sync_point) + } + } + ++/*! ++ * \internal ++ * \brief Add a client to the attrd waitlist ++ * ++ * Typically, a client receives an ACK for its XML IPC request immediately. However, ++ * some clients want to wait until their request has been processed and taken effect. ++ * This is called a sync point. Any client placed on this waitlist will have its ++ * ACK message delayed until either its requested sync point is hit, or until it ++ * times out. ++ * ++ * The XML IPC request must specify the type of sync point it wants to wait for. ++ * ++ * \param[in,out] request The request describing the client to place on the waitlist. ++ */ + void + attrd_add_client_to_waitlist(pcmk__request_t *request) + { +@@ -112,6 +126,11 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++/*! ++ * \internal ++ * \brief Free all memory associated with the waitlist. This is most typically ++ * used when attrd shuts down. ++ */ + void + attrd_free_waitlist(void) + { +@@ -123,6 +142,13 @@ attrd_free_waitlist(void) + waitlist = NULL; + } + ++/*! ++ * \internal ++ * \brief Unconditionally remove a client from the waitlist, such as when the client ++ * node disconnects from the cluster ++ * ++ * \param[in] client The client to remove ++ */ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +@@ -144,6 +170,18 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + } + } + ++/*! ++ * \internal ++ * \brief Send an IPC ACK message to all awaiting clients ++ * ++ * This function will search the waitlist for all clients that are currently awaiting ++ * an ACK indicating their attrd operation is complete. Only those clients with a ++ * matching sync point type and callid from their original XML IPC request will be ++ * ACKed. Once they have received an ACK, they will be removed from the waitlist. ++ * ++ * \param[in] sync_point What kind of sync point have we hit? ++ * \param[in] xml The original XML IPC request. ++ */ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +@@ -183,6 +221,23 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Return the sync point attribute for an IPC request ++ * ++ * This function will check both the top-level element of \p xml for a sync ++ * point attribute, as well as all of its \p op children, if any. The latter ++ * is useful for newer versions of attrd that can put multiple IPC requests ++ * into a single message. ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \note It is assumed that if one child element has a sync point attribute, ++ * all will have a sync point attribute and they will all be the same ++ * sync point. No other configuration is supported. ++ * ++ * \return The sync point attribute of \p xml, or NULL if none. ++ */ + const char * + attrd_request_sync_point(xmlNode *xml) + { +@@ -200,6 +255,14 @@ attrd_request_sync_point(xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Does an IPC request contain any sync point attribute? ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \return true if there's a sync point attribute, false otherwise ++ */ + bool + attrd_request_has_sync_point(xmlNode *xml) + { +-- +2.31.1 + +From 64219fb7075ee58d29f94f077a3b8f94174bb32a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 12:43:05 -0400 +Subject: [PATCH 15/26] Feature: tools: Add --wait=cluster option to + attrd_updater. + +--- + tools/attrd_updater.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index c4779a6..3cd766d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -106,6 +106,10 @@ wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError ** + pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); + pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); + return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); ++ return TRUE; + } else { + g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, + "--wait= must be one of 'no', 'local', 'cluster'"); +@@ -193,10 +197,12 @@ static GOptionEntry addl_entries[] = { + + { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, + "Wait for some event to occur before returning. Values are 'no' (wait\n" +- INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "only for the attribute daemon to acknowledge the request),\n" + INDENT "'local' (wait until the change has propagated to where a local\n" + INDENT "query will return the request value, or the value set by a\n" +- INDENT "later request). Default is 'no'.", ++ INDENT "later request), or 'cluster' (wait until the change has propagated\n" ++ INDENT "to where a query anywhere on the cluster will return the requested\n" ++ INDENT "value, or the value set by a later request). Default is 'no'.", + "UNTIL" }, + + { NULL } +-- +2.31.1 + +From 1bc5511fadf6ad670508bd3a2a55129bde16f774 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 16 Sep 2022 14:55:06 -0400 +Subject: [PATCH 16/26] Refactor: daemons: Add a confirm= attribute to attrd + messages. + +This allows informing the originator of a message that the message has +been received and processed. As yet, there is no mechanism for handling +and returning the confirmation, only for requesting it. +--- + daemons/attrd/attrd_corosync.c | 6 +++--- + daemons/attrd/attrd_ipc.c | 26 +++++++++++++++++++++----- + daemons/attrd/attrd_messages.c | 11 +++++++++-- + daemons/attrd/pacemaker-attrd.h | 7 ++++--- + include/crm_internal.h | 1 + + 5 files changed, 38 insertions(+), 13 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 4337280..e86ca07 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -124,7 +124,7 @@ broadcast_local_value(const attribute_t *a) + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + attrd_add_value_xml(sync, a, v, false); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + return v; + } +@@ -387,7 +387,7 @@ broadcast_unseen_local_values(void) + + if (sync != NULL) { + crm_debug("Broadcasting local-only values"); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + } + } +@@ -539,7 +539,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) + } + + crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); +- attrd_send_message(peer, sync); ++ attrd_send_message(peer, sync, false); + free_xml(sync); + } + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 2e614e8..0fc5e93 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -105,7 +105,7 @@ attrd_client_clear_failure(pcmk__request_t *request) + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). + */ +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +@@ -184,7 +184,7 @@ attrd_client_peer_remove(pcmk__request_t *request) + if (host) { + crm_info("Client %s is requesting all values for %s be removed", + pcmk__client_name(request->ipc_client), host); +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + free(host_alloc); + } else { + crm_info("Ignoring request by client %s to remove all peer values without specifying peer", +@@ -314,7 +314,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -358,7 +358,7 @@ attrd_client_update(pcmk__request_t *request) + if (status == 0) { + crm_trace("Matched %s with %s", attr, regex); + crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + } + } + +@@ -388,7 +388,23 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 3ba14a6..78df0d0 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -279,16 +279,23 @@ attrd_broadcast_protocol(void) + crm_debug("Broadcasting attrd protocol version %s for node %s", + ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); + +- attrd_send_message(NULL, attrd_op); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ + + free_xml(attrd_op); + } + + gboolean +-attrd_send_message(crm_node_t * node, xmlNode * data) ++attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); ++ ++ /* Request a confirmation from the destination peer node (which could ++ * be all if node is NULL) that the message has been received and ++ * acted upon. ++ */ ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 537bf85..25f7c8a 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -39,10 +39,11 @@ + * PCMK__ATTRD_CMD_UPDATE_DELAY + * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE + * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes +- * 4 2.2.0 Multiple attributes can be updated in a single IPC ++ * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message ++ * 5 2.1.5 Peers can request confirmation of a sent message + */ +-#define ATTRD_PROTOCOL_VERSION "4" ++#define ATTRD_PROTOCOL_VERSION "5" + + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) +@@ -162,7 +163,7 @@ xmlNode *attrd_client_clear_failure(pcmk__request_t *request); + xmlNode *attrd_client_update(pcmk__request_t *request); + xmlNode *attrd_client_refresh(pcmk__request_t *request); + xmlNode *attrd_client_query(pcmk__request_t *request); +-gboolean attrd_send_message(crm_node_t * node, xmlNode * data); ++gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); + + xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 08193c3..63a1726 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -79,6 +79,7 @@ + #define PCMK__XA_ATTR_WRITER "attr_writer" + #define PCMK__XA_CONFIG_ERRORS "config-errors" + #define PCMK__XA_CONFIG_WARNINGS "config-warnings" ++#define PCMK__XA_CONFIRM "confirm" + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_MODE "mode" +-- +2.31.1 + +From 6f389038fc0b11f6291c022c99f188666c65f530 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 14:44:42 -0400 +Subject: [PATCH 17/26] Feature: daemons: Respond to received attrd + confirmation requests. + +On the receiving peer side, if the XML request contains confirm="true", +construct a confirmation message after handling the request completes +and send it back to the originating peer. + +On the originating peer side, add a skeleton handler for confirmation +messages. This does nothing at the moment except log it. +--- + daemons/attrd/attrd_corosync.c | 38 ++++++++++++++++++++++++++++++++++ + daemons/attrd/attrd_messages.c | 13 ++++++++++++ + include/crm_internal.h | 1 + + 3 files changed, 52 insertions(+) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index e86ca07..1245d9c 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -25,6 +25,19 @@ + + extern crm_exit_t attrd_exit_status; + ++static xmlNode * ++attrd_confirmation(int callid) ++{ ++ xmlNode *node = create_xml_node(NULL, __func__); ++ ++ crm_xml_add(node, F_TYPE, T_ATTRD); ++ crm_xml_add(node, F_ORIG, get_local_node_name()); ++ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); ++ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); ++ ++ return node; ++} ++ + static void + attrd_peer_message(crm_node_t *peer, xmlNode *xml) + { +@@ -57,6 +70,31 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + CRM_CHECK(request.op != NULL, return); + + attrd_handle_request(&request); ++ ++ /* Having finished handling the request, check to see if the originating ++ * peer requested confirmation. If so, send that confirmation back now. ++ */ ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ int callid = 0; ++ xmlNode *reply = NULL; ++ ++ /* Add the confirmation ID for the message we are confirming to the ++ * response so the originating peer knows what they're a confirmation ++ * for. ++ */ ++ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); ++ reply = attrd_confirmation(callid); ++ ++ /* And then send the confirmation back to the originating peer. This ++ * ends up right back in this same function (attrd_peer_message) on the ++ * peer where it will have to do something with a PCMK__XA_CONFIRM type ++ * message. ++ */ ++ crm_debug("Sending %s a confirmation", peer->uname); ++ attrd_send_message(peer, reply, false); ++ free_xml(reply); ++ } ++ + pcmk__reset_request(&request); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 78df0d0..9c792b2 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -65,6 +65,18 @@ handle_clear_failure_request(pcmk__request_t *request) + } + } + ++static xmlNode * ++handle_confirm_request(pcmk__request_t *request) ++{ ++ if (request->peer != NULL) { ++ crm_debug("Received confirmation from %s", request->peer); ++ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); ++ return NULL; ++ } else { ++ return handle_unknown_request(request); ++ } ++} ++ + static xmlNode * + handle_flush_request(pcmk__request_t *request) + { +@@ -190,6 +202,7 @@ attrd_register_handlers(void) + { + pcmk__server_command_t handlers[] = { + { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, ++ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, + { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, + { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, + { PCMK__ATTRD_CMD_QUERY, handle_query_request }, +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 63a1726..f60e7b4 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -108,6 +108,7 @@ + #define PCMK__ATTRD_CMD_SYNC "sync" + #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" + #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" ++#define PCMK__ATTRD_CMD_CONFIRM "confirm" + + #define PCMK__CONTROLD_CMD_NODES "list-nodes" + +-- +2.31.1 + +From dfb730e9ced9dc75886fda9452c584860573fe30 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 15:58:00 -0400 +Subject: [PATCH 18/26] Feature: daemons: Keep track of #attrd-protocol from + each peer. + +This information can be used in the future when dealing with +cluster-wide sync points to know which peers we are waiting on a reply +from. +--- + daemons/attrd/attrd_corosync.c | 3 +- + daemons/attrd/attrd_utils.c | 60 ++++++++++++++++++++++++++++++--- + daemons/attrd/pacemaker-attrd.h | 4 ++- + 3 files changed, 60 insertions(+), 7 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1245d9c..6f88ab6 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -268,6 +268,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + // Remove votes from cluster nodes that leave, in case election in progress + if (gone && !is_remote) { + attrd_remove_voter(peer); ++ attrd_remove_peer_protocol_ver(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +@@ -395,7 +396,7 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + * version, check to see if it's a new minimum version. + */ + if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { +- attrd_update_minimum_protocol_ver(value); ++ attrd_update_minimum_protocol_ver(peer->uname, value); + } + } + +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 00b879b..421faed 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -29,6 +29,11 @@ static bool requesting_shutdown = false; + static bool shutting_down = false; + static GMainLoop *mloop = NULL; + ++/* A hash table storing information on the protocol version of each peer attrd. ++ * The key is the peer's uname, and the value is the protocol version number. ++ */ ++GHashTable *peer_protocol_vers = NULL; ++ + /*! + * \internal + * \brief Set requesting_shutdown state +@@ -94,6 +99,10 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_destroy(peer_protocol_vers); ++ peer_protocol_vers = NULL; ++ } + + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible +@@ -273,16 +282,57 @@ attrd_free_attribute(gpointer data) + } + } + ++/*! ++ * \internal ++ * \brief When a peer node leaves the cluster, stop tracking its protocol version. ++ * ++ * \param[in] host The peer node's uname to be removed ++ */ ++void ++attrd_remove_peer_protocol_ver(const char *host) ++{ ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_remove(peer_protocol_vers, host); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief When a peer node broadcasts a message with its protocol version, keep ++ * track of that information. ++ * ++ * We keep track of each peer's protocol version so we know which peers to ++ * expect confirmation messages from when handling cluster-wide sync points. ++ * We additionally keep track of the lowest protocol version supported by all ++ * peers so we know when we can send IPC messages containing more than one ++ * request. ++ * ++ * \param[in] host The peer node's uname to be tracked ++ * \param[in] value The peer node's protocol version ++ */ + void +-attrd_update_minimum_protocol_ver(const char *value) ++attrd_update_minimum_protocol_ver(const char *host, const char *value) + { + int ver; + ++ if (peer_protocol_vers == NULL) { ++ peer_protocol_vers = pcmk__strkey_table(free, NULL); ++ } ++ + pcmk__scan_min_int(value, &ver, 0); + +- if (ver > 0 && (minimum_protocol_version == -1 || ver < minimum_protocol_version)) { +- minimum_protocol_version = ver; +- crm_trace("Set minimum attrd protocol version to %d", +- minimum_protocol_version); ++ if (ver > 0) { ++ char *host_name = strdup(host); ++ ++ /* Record the peer attrd's protocol version. */ ++ CRM_ASSERT(host_name != NULL); ++ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); ++ ++ /* If the protocol version is a new minimum, record it as such. */ ++ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { ++ minimum_protocol_version = ver; ++ crm_trace("Set minimum attrd protocol version to %d", ++ minimum_protocol_version); ++ } + } + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 25f7c8a..302ef63 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -145,6 +145,7 @@ typedef struct attribute_value_s { + + extern crm_cluster_t *attrd_cluster; + extern GHashTable *attributes; ++extern GHashTable *peer_protocol_vers; + + #define CIB_OP_TIMEOUT_S 120 + +@@ -177,7 +178,8 @@ void attrd_write_attributes(bool all, bool ignore_delay); + void attrd_write_or_elect_attribute(attribute_t *a); + + extern int minimum_protocol_version; +-void attrd_update_minimum_protocol_ver(const char *value); ++void attrd_remove_peer_protocol_ver(const char *host); ++void attrd_update_minimum_protocol_ver(const char *host, const char *value); + + mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); + +-- +2.31.1 + +From 945f0fe51d3bf69c2cb1258b394f2f11b8996525 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 27 Oct 2022 14:42:59 -0400 +Subject: [PATCH 19/26] Feature: daemons: Handle cluster-wide sync points in + attrd. + +When an attrd receives an IPC request to update some value, record the +protocol versions of all peer attrds. Additionally register a function +that will be called when all confirmations are received. + +The originating IPC cilent (attrd_updater for instance) will sit there +waiting for an ACK until its timeout is hit. + +As each confirmation message comes back to attrd, mark it off the list +of peers we are waiting on. When no more peers are expected, call the +previously registered function. + +For attribute updates, this function just sends an ack back to +attrd_updater. + +Fixes T35 +--- + daemons/attrd/attrd_corosync.c | 1 + + daemons/attrd/attrd_ipc.c | 4 + + daemons/attrd/attrd_messages.c | 10 ++ + daemons/attrd/attrd_sync.c | 260 +++++++++++++++++++++++++++++++- + daemons/attrd/attrd_utils.c | 2 + + daemons/attrd/pacemaker-attrd.h | 8 + + 6 files changed, 281 insertions(+), 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 6f88ab6..37701aa 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -269,6 +269,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + if (gone && !is_remote) { + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); ++ attrd_do_not_expect_from_peer(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 0fc5e93..c70aa1b 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -393,6 +393,7 @@ attrd_client_update(pcmk__request_t *request) + * the response ACK is not sent until this attrd broadcasts the update + * and receives its own confirmation back from all peers. + */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); + attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ + + } else { +@@ -456,6 +457,9 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + /* Remove the client from the sync point waitlist if it's present. */ + attrd_remove_client_from_waitlist(client); + ++ /* And no longer wait for confirmations from any peers. */ ++ attrd_do_not_wait_for_client(client); ++ + pcmk__free_client(client); + } + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9c792b2..f7b9c7c 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -69,7 +69,17 @@ static xmlNode * + handle_confirm_request(pcmk__request_t *request) + { + if (request->peer != NULL) { ++ int callid; ++ + crm_debug("Received confirmation from %s", request->peer); ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, ++ "Could not get callid from XML"); ++ } else { ++ attrd_handle_confirmation(callid, request->peer); ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e9690b5..d3d7108 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -34,6 +34,51 @@ struct waitlist_node { + uint32_t flags; + }; + ++/* A hash table storing information on in-progress IPC requests that are awaiting ++ * confirmations. These requests are currently being processed by peer attrds and ++ * we are waiting to receive confirmation messages from each peer indicating that ++ * processing is complete. ++ * ++ * Multiple requests could be waiting on confirmations at the same time. ++ * ++ * The key is the unique callid for the IPC request, and the value is a ++ * confirmation_action struct. ++ */ ++static GHashTable *expected_confirmations = NULL; ++ ++/*! ++ * \internal ++ * \brief A structure describing a single IPC request that is awaiting confirmations ++ */ ++struct confirmation_action { ++ /*! ++ * \brief A list of peer attrds that we are waiting to receive confirmation ++ * messages from ++ * ++ * This list is dynamic - as confirmations arrive from peer attrds, they will ++ * be removed from this list. When the list is empty, all peers have processed ++ * the request and the associated confirmation action will be taken. ++ */ ++ GList *respondents; ++ ++ /*! ++ * \brief A function to run when all confirmations have been received ++ */ ++ attrd_confirmation_action_fn fn; ++ ++ /*! ++ * \brief Information required to construct and send a reply to the client ++ */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++ ++ /*! ++ * \brief The XML request containing the callid associated with this action ++ */ ++ void *xml; ++}; ++ + static void + next_key(void) + { +@@ -114,12 +159,13 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + wl->ipc_id = request->ipc_id; + wl->flags = request->flags; + +- crm_debug("Added client %s to waitlist for %s sync point", +- wl->client_id, sync_point_str(wl->sync_point)); +- + next_key(); + pcmk__intkey_table_insert(waitlist, waitlist_client, wl); + ++ crm_trace("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); ++ + /* And then add the key to the request XML so we can uniquely identify + * it when it comes time to issue the ACK. + */ +@@ -166,6 +212,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + + if (wl->client_id == client->id) { + g_hash_table_iter_remove(&iter); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + } +@@ -206,7 +253,7 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + return; + } + +- crm_debug("Alerting client %s for reached %s sync point", ++ crm_trace("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + + client = pcmk__find_client_by_id(wl->client_id); +@@ -218,9 +265,28 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + + /* And then remove the client so it doesn't get alerted again. */ + pcmk__intkey_table_remove(waitlist, callid); ++ ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + ++/*! ++ * \internal ++ * \brief Action to take when a cluster sync point is hit for a ++ * PCMK__ATTRD_CMD_UPDATE* message. ++ * ++ * \param[in] xml The request that should be passed along to ++ * attrd_ack_waitlist_clients. This should be the original ++ * IPC request containing the callid for this update message. ++ */ ++int ++attrd_cluster_sync_point_update(xmlNode *xml) ++{ ++ crm_trace("Hit cluster sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); ++ return pcmk_rc_ok; ++} ++ + /*! + * \internal + * \brief Return the sync point attribute for an IPC request +@@ -268,3 +334,189 @@ attrd_request_has_sync_point(xmlNode *xml) + { + return attrd_request_sync_point(xml) != NULL; + } ++ ++static void ++free_action(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ g_list_free_full(action->respondents, free); ++ free_xml(action->xml); ++ free(action->client_id); ++ free(action); ++} ++ ++/*! ++ * \internal ++ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation ++ * for any IPC action. If this peer is the last one being waited on, this will ++ * trigger the confirmation action. ++ * ++ * \param[in] host The disconnecting peer attrd's uname ++ */ ++void ++attrd_do_not_expect_from_peer(const char *host) ++{ ++ GList *keys = g_hash_table_get_keys(expected_confirmations); ++ ++ crm_trace("Removing peer %s from expected confirmations", host); ++ ++ for (GList *node = keys; node != NULL; node = node->next) { ++ int callid = *(int *) node->data; ++ attrd_handle_confirmation(callid, host); ++ } ++ ++ g_list_free(keys); ++} ++ ++/*! ++ * \internal ++ * \brief When a client disconnects from the cluster, no longer wait on confirmations ++ * for it. Because the peer attrds may still be processing the original IPC ++ * message, they may still send us confirmations. However, we will take no ++ * action on them. ++ * ++ * \param[in] client The disconnecting client ++ */ ++void ++attrd_do_not_wait_for_client(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct confirmation_action *action = (struct confirmation_action *) value; ++ ++ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { ++ crm_trace("Removing client %s from expected confirmations", client->id); ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Register some action to be taken when IPC request confirmations are ++ * received ++ * ++ * When this function is called, a list of all peer attrds that support confirming ++ * requests is generated. As confirmations from these peer attrds are received, ++ * they are removed from this list. When the list is empty, the registered action ++ * will be called. ++ * ++ * \note This function should always be called before attrd_send_message is called ++ * to broadcast to the peers to ensure that we know what replies we are ++ * waiting on. Otherwise, it is possible the peer could finish and confirm ++ * before we know to expect it. ++ * ++ * \param[in] request The request that is awaiting confirmations ++ * \param[in] fn A function to be run after all confirmations are received ++ */ ++void ++attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) ++{ ++ struct confirmation_action *action = NULL; ++ GHashTableIter iter; ++ gpointer host, ver; ++ GList *respondents = NULL; ++ int callid; ++ ++ if (expected_confirmations == NULL) { ++ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); ++ } ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_err("Could not get callid from xml"); ++ return; ++ } ++ ++ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { ++ crm_err("Already waiting on confirmations for call id %d", callid); ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, peer_protocol_vers); ++ while (g_hash_table_iter_next(&iter, &host, &ver)) { ++ if (GPOINTER_TO_INT(ver) >= 5) { ++ char *s = strdup((char *) host); ++ ++ CRM_ASSERT(s != NULL); ++ respondents = g_list_prepend(respondents, s); ++ } ++ } ++ ++ action = calloc(1, sizeof(struct confirmation_action)); ++ CRM_ASSERT(action != NULL); ++ ++ action->respondents = respondents; ++ action->fn = fn; ++ action->xml = copy_xml(request->xml); ++ ++ action->client_id = strdup(request->ipc_client->id); ++ CRM_ASSERT(action->client_id != NULL); ++ ++ action->ipc_id = request->ipc_id; ++ action->flags = request->flags; ++ ++ pcmk__intkey_table_insert(expected_confirmations, callid, action); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++} ++ ++void ++attrd_free_confirmations(void) ++{ ++ if (expected_confirmations != NULL) { ++ g_hash_table_destroy(expected_confirmations); ++ expected_confirmations = NULL; ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Process a confirmation message from a peer attrd ++ * ++ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is ++ * received from a peer attrd. If this is the last confirmation we are waiting ++ * on for a given operation, the registered action will be called. ++ * ++ * \param[in] callid The unique callid for the XML IPC request ++ * \param[in] host The confirming peer attrd's uname ++ */ ++void ++attrd_handle_confirmation(int callid, const char *host) ++{ ++ struct confirmation_action *action = NULL; ++ GList *node = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ action = pcmk__intkey_table_lookup(expected_confirmations, callid); ++ if (action == NULL) { ++ return; ++ } ++ ++ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); ++ ++ if (node == NULL) { ++ return; ++ } ++ ++ action->respondents = g_list_remove(action->respondents, node->data); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); ++ ++ if (action->respondents == NULL) { ++ action->fn(action->xml); ++ pcmk__intkey_table_remove(expected_confirmations, callid); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ } ++} +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 421faed..f3a2059 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -99,6 +99,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ attrd_free_confirmations(); ++ + if (peer_protocol_vers != NULL) { + g_hash_table_destroy(peer_protocol_vers); + peer_protocol_vers = NULL; +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 302ef63..bcc329d 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -191,8 +191,16 @@ enum attrd_sync_point { + attrd_sync_point_cluster, + }; + ++typedef int (*attrd_confirmation_action_fn)(xmlNode *); ++ + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++int attrd_cluster_sync_point_update(xmlNode *xml); ++void attrd_do_not_expect_from_peer(const char *host); ++void attrd_do_not_wait_for_client(pcmk__client_t *client); ++void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); ++void attrd_free_confirmations(void); ++void attrd_handle_confirmation(int callid, const char *host); + void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); +-- +2.31.1 + +From 07a032a7eb2f03dce18a7c94c56b8c837dedda15 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:54:15 -0400 +Subject: [PATCH 20/26] Refactor: daemons: Add some attrd version checking + macros. + +These are just to make it a little more obvious what is actually being +asked in the code, instead of having magic numbers sprinkled around. +--- + daemons/attrd/attrd_ipc.c | 2 +- + daemons/attrd/attrd_sync.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index c70aa1b..16bfff4 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -294,7 +294,7 @@ attrd_client_update(pcmk__request_t *request) + * two ways we can handle that. + */ + if (xml_has_children(xml)) { +- if (minimum_protocol_version >= 4) { ++ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { + /* First, if all peers support a certain protocol version, we can + * just broadcast the big message and they'll handle it. However, + * we also need to apply all the transformations in this function +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index d3d7108..e48f82e 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -444,7 +444,7 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + + g_hash_table_iter_init(&iter, peer_protocol_vers); + while (g_hash_table_iter_next(&iter, &host, &ver)) { +- if (GPOINTER_TO_INT(ver) >= 5) { ++ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { + char *s = strdup((char *) host); + + CRM_ASSERT(s != NULL); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index bcc329d..83d7c6b 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -45,6 +45,9 @@ + */ + #define ATTRD_PROTOCOL_VERSION "5" + ++#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) ++#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) ++ + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) + +-- +2.31.1 + +From 811361b96c6f26a1f5eccc54b6e8bf6e6fd003be Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 31 Oct 2022 12:53:22 -0400 +Subject: [PATCH 21/26] Low: attrd: Fix removing clients from the waitlist when + they disconnect. + +The client ID is a string, so it must be compared like a string. +--- + daemons/attrd/attrd_sync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e48f82e..c9b4784 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -210,7 +210,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct waitlist_node *wl = (struct waitlist_node *) value; + +- if (wl->client_id == client->id) { ++ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { + g_hash_table_iter_remove(&iter); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } +-- +2.31.1 + +From 4e933ad14456af85c60701410c3b23b4eab03f86 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 1 Nov 2022 12:35:12 -0400 +Subject: [PATCH 22/26] Feature: daemons: Handle an attrd client timing out. + +If the update confirmations do not come back in time, use a main loop +timer to remove the client from the table. +--- + daemons/attrd/attrd_sync.c | 49 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index c9b4784..9d07796 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -61,6 +61,12 @@ struct confirmation_action { + */ + GList *respondents; + ++ /*! ++ * \brief A timer that will be used to remove the client should it time out ++ * before receiving all confirmations ++ */ ++ mainloop_timer_t *timer; ++ + /*! + * \brief A function to run when all confirmations have been received + */ +@@ -340,11 +346,51 @@ free_action(gpointer data) + { + struct confirmation_action *action = (struct confirmation_action *) data; + g_list_free_full(action->respondents, free); ++ mainloop_timer_del(action->timer); + free_xml(action->xml); + free(action->client_id); + free(action); + } + ++/* Remove an IPC request from the expected_confirmations table if the peer attrds ++ * don't respond before the timeout is hit. We set the timeout to 15s. The exact ++ * number isn't critical - we just want to make sure that the table eventually gets ++ * cleared of things that didn't complete. ++ */ ++static gboolean ++confirmation_timeout_cb(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ if (value == action) { ++ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); ++ if (client == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ crm_trace("Timed out waiting for confirmations for client %s", client->id); ++ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, ++ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); ++ ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++ ++ return G_SOURCE_REMOVE; ++} ++ + /*! + * \internal + * \brief When a peer disconnects from the cluster, no longer wait for its confirmation +@@ -465,6 +511,9 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + action->ipc_id = request->ipc_id; + action->flags = request->flags; + ++ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); ++ mainloop_timer_start(action->timer); ++ + pcmk__intkey_table_insert(expected_confirmations, callid, action); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); +-- +2.31.1 + +From 101896383cbe0103c98078e46540c076af08f040 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 2 Nov 2022 14:40:30 -0400 +Subject: [PATCH 23/26] Refactor: Demote a sync point related message to trace. + +--- + daemons/attrd/attrd_corosync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 37701aa..5cbed7e 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -633,7 +633,7 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + * point, process that now. + */ + if (handle_sync_point) { +- crm_debug("Hit local sync point for attribute update"); ++ crm_trace("Hit local sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +-- +2.31.1 + +From acd13246d4c2bef7982ca103e34896efcad22348 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 3 Nov 2022 10:29:20 -0400 +Subject: [PATCH 24/26] Low: daemons: Avoid infinite confirm loops in attrd. + +On the sending side, do not add confirm="yes" to a message with +op="confirm". On the receiving side, do not confirm a message with +op="confirm" even if confirm="yes" is set. +--- + daemons/attrd/attrd_corosync.c | 3 ++- + daemons/attrd/attrd_messages.c | 6 +++++- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 5cbed7e..88c1ecc 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -74,7 +74,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + /* Having finished handling the request, check to see if the originating + * peer requested confirmation. If so, send that confirmation back now. + */ +- if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && ++ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + int callid = 0; + xmlNode *reply = NULL; + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index f7b9c7c..184176a 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -310,6 +310,8 @@ attrd_broadcast_protocol(void) + gboolean + attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { ++ const char *op = crm_element_value(data, PCMK__XA_TASK); ++ + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + +@@ -317,7 +319,9 @@ attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + * be all if node is NULL) that the message has been received and + * acted upon. + */ +- pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ } + + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); +-- +2.31.1 + +From 115e6c3a0d8db4df3eccf6da1c344168799f890d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 15 Nov 2022 09:35:28 -0500 +Subject: [PATCH 25/26] Fix: daemons: Check for NULL in + attrd_do_not_expect_from_peer. + +--- + daemons/attrd/attrd_sync.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 9d07796..6936771 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -402,7 +402,13 @@ confirmation_timeout_cb(gpointer data) + void + attrd_do_not_expect_from_peer(const char *host) + { +- GList *keys = g_hash_table_get_keys(expected_confirmations); ++ GList *keys = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ keys = g_hash_table_get_keys(expected_confirmations); + + crm_trace("Removing peer %s from expected confirmations", host); + +-- +2.31.1 + +From 05da14f97ccd4f63f53801acc107ad661e5fd0c8 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 16 Nov 2022 17:37:44 -0500 +Subject: [PATCH 26/26] Low: daemons: Support cluster-wide sync points for + multi IPC messages. + +Supporting cluster-wide sync points means attrd_expect_confirmations +needs to be called, and then attrd_send_message needs "true" as a third +argument. This indicates attrd wants confirmations back from all its +peers when they have applied the update. + +We're already doing this at the end of attrd_client_update for +single-update IPC messages, and handling it for multi-update messages is +a simple matter of breaking that code out into a function and making +sure it's called. + +Note that this leaves two other spots where sync points still need to be +dealt with: + +* An update message that uses a regex. See + https://projects.clusterlabs.org/T600 for details. + +* A multi-update IPC message in a cluster where that is not supported. + See https://projects.clusterlabs.org/T601 for details. +--- + daemons/attrd/attrd_ipc.c | 43 ++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 19 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 16bfff4..8c5660d 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -283,6 +283,28 @@ handle_value_expansion(const char **value, xmlNode *xml, const char *op, + return pcmk_rc_ok; + } + ++static void ++send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) ++{ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++} ++ + xmlNode * + attrd_client_update(pcmk__request_t *request) + { +@@ -314,7 +336,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml, false); ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -388,24 +410,7 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { +- /* The client is waiting on the cluster-wide sync point. In this case, +- * the response ACK is not sent until this attrd broadcasts the update +- * and receives its own confirmation back from all peers. +- */ +- attrd_expect_confirmations(request, attrd_cluster_sync_point_update); +- attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ +- +- } else { +- /* The client is either waiting on the local sync point or was not +- * waiting on any sync point at all. For the local sync point, the +- * response ACK is sent in attrd_peer_update. For clients not +- * waiting on any sync point, the response ACK is sent in +- * handle_update_request immediately before this function was called. +- */ +- attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ +- } +- ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +-- +2.31.1 + diff --git a/SOURCES/002-remote-regression.patch b/SOURCES/002-remote-regression.patch new file mode 100644 index 0000000..0f0bea8 --- /dev/null +++ b/SOURCES/002-remote-regression.patch @@ -0,0 +1,98 @@ +From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 23 Jan 2023 14:25:56 -0600 +Subject: [PATCH] High: executor: fix regression in remote node shutdown + +This reverts the essential part of d61494347, which was based on misdiagnosing +a remote node shutdown issue. Initially, it was thought that a "TLS server +session ended" log just after a remote node requested shutdown indicated that +the proxy connection coincidentally dropped at that moment. It actually is the +routine stopping of accepting new proxy connections, and existing when that +happens makes the remote node exit immediately without waiting for the +all-clear from the cluster. + +Fixes T361 +--- + daemons/execd/pacemaker-execd.c | 19 +------------------ + daemons/execd/pacemaker-execd.h | 3 +-- + daemons/execd/remoted_tls.c | 6 +----- + 3 files changed, 3 insertions(+), 25 deletions(-) + +diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c +index db12674f13..491808974a 100644 +--- a/daemons/execd/pacemaker-execd.c ++++ b/daemons/execd/pacemaker-execd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -305,23 +305,6 @@ lrmd_exit(gpointer data) + return FALSE; + } + +-/*! +- * \internal +- * \brief Clean up and exit if shutdown has started +- * +- * \return Doesn't return +- */ +-void +-execd_exit_if_shutting_down(void) +-{ +-#ifdef PCMK__COMPILE_REMOTE +- if (shutting_down) { +- crm_warn("exit because TLS connection was closed and 'shutting_down' set"); +- lrmd_exit(NULL); +- } +-#endif +-} +- + /*! + * \internal + * \brief Request cluster shutdown if appropriate, otherwise exit immediately +diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h +index 6646ae29e3..f78e8dcdde 100644 +--- a/daemons/execd/pacemaker-execd.h ++++ b/daemons/execd/pacemaker-execd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp); + int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, + xmlNode *request); + void lrmd_drain_alerts(GMainLoop *mloop); +-void execd_exit_if_shutting_down(void); + + #endif // PACEMAKER_EXECD__H +diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c +index 6f4b2d0062..c65e3f394d 100644 +--- a/daemons/execd/remoted_tls.c ++++ b/daemons/execd/remoted_tls.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -250,10 +250,6 @@ static void + tls_server_dropped(gpointer user_data) + { + crm_notice("TLS server session ended"); +- /* If we are in the process of shutting down, then we should actually exit. +- * bz#1804259 +- */ +- execd_exit_if_shutting_down(); + return; + } + +-- +2.31.1 + diff --git a/SOURCES/003-history-cleanup.patch b/SOURCES/003-history-cleanup.patch new file mode 100644 index 0000000..87a3e27 --- /dev/null +++ b/SOURCES/003-history-cleanup.patch @@ -0,0 +1,2829 @@ +From e953591a9796edebd4796c344df0eddcbc7a2dff Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 30 Jan 2023 16:34:32 -0600 +Subject: [PATCH 01/14] Refactor: scheduler: drop unneeded arguments from + process_rsc_state() + +migrate_op has been unused since at least 2011 +--- + lib/pengine/unpack.c | 36 +++++++++++++++--------------------- + 1 file changed, 15 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 5fcba3b..9524def 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -1963,8 +1963,7 @@ process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t + + static void + process_rsc_state(pe_resource_t * rsc, pe_node_t * node, +- enum action_fail_response on_fail, +- xmlNode * migrate_op, pe_working_set_t * data_set) ++ enum action_fail_response on_fail) + { + pe_node_t *tmpnode = NULL; + char *reason = NULL; +@@ -2016,7 +2015,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + should_fence = TRUE; + +- } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + if (pe__is_remote_node(node) && node->details->remote_rsc + && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + +@@ -2039,7 +2038,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + if (reason == NULL) { + reason = crm_strdup_printf("%s is thought to be active there", rsc->id); + } +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + } + free(reason); + } +@@ -2069,7 +2068,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * but also mark the node as unclean + */ + reason = crm_strdup_printf("%s failed there", rsc->id); +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + free(reason); + break; + +@@ -2090,7 +2089,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* make sure it comes up somewhere else + * or not at all + */ +- resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); ++ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", ++ rsc->cluster); + break; + + case action_fail_stop: +@@ -2112,8 +2112,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * container is running yet, so remember it and add a stop + * action for it later. + */ +- data_set->stop_needed = g_list_prepend(data_set->stop_needed, +- rsc->container); ++ rsc->cluster->stop_needed = ++ g_list_prepend(rsc->cluster->stop_needed, rsc->container); + } else if (rsc->container) { + stop_action(rsc->container, node, FALSE); + } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { +@@ -2123,10 +2123,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + + case action_fail_reset_remote: + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + tmpnode = NULL; + if (rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + } + if (tmpnode && + pe__is_remote_node(tmpnode) && +@@ -2135,7 +2135,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* The remote connection resource failed in a way that + * should result in fencing the remote node. + */ +- pe_fence_node(data_set, tmpnode, ++ pe_fence_node(rsc->cluster, tmpnode, + "remote connection is unrecoverable", FALSE); + } + } +@@ -2158,7 +2158,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * result in a fencing operation regardless if we're going to attempt to + * reconnect to the remote-node in this transition or not. */ + if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + if (tmpnode && tmpnode->details->unclean) { + tmpnode->details->unseen = FALSE; + } +@@ -2177,7 +2177,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + } + } + +- native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore)); ++ native_add_running(rsc, node, rsc->cluster, ++ (save_on_fail != action_fail_ignore)); + switch (on_fail) { + case action_fail_ignore: + break; +@@ -2376,14 +2377,12 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + int start_index = -1; + enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; + +- const char *task = NULL; + const char *rsc_id = ID(lrm_resource); + + pe_resource_t *rsc = NULL; + GList *op_list = NULL; + GList *sorted_op_list = NULL; + +- xmlNode *migrate_op = NULL; + xmlNode *rsc_op = NULL; + xmlNode *last_failure = NULL; + +@@ -2437,11 +2436,6 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { + xmlNode *rsc_op = (xmlNode *) gIter->data; + +- task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); +- if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { +- migrate_op = rsc_op; +- } +- + unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); + } + +@@ -2452,7 +2446,7 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + /* no need to free the contents */ + g_list_free(sorted_op_list); + +- process_rsc_state(rsc, node, on_fail, migrate_op, data_set); ++ process_rsc_state(rsc, node, on_fail); + + if (get_target_role(rsc, &req_role)) { + if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { +-- +2.31.1 + +From 6f4e34cccc4864961d2020a2dd547450ac53a44e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 16:30:20 -0600 +Subject: [PATCH 02/14] Log: scheduler: improve trace logs when unpacking + resource history + +--- + lib/pengine/unpack.c | 112 +++++++++++++++++++++++++++---------------- + 1 file changed, 71 insertions(+), 41 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 9524def..b7b2873 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -3363,6 +3363,24 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + pe__set_resource_flags(rsc, pe_rsc_block); + } + ++/*! ++ * \internal ++ * \brief Update an integer value and why ++ * ++ * \param[in,out] i Pointer to integer to update ++ * \param[in,out] why Where to store reason for update ++ * \param[in] value New value ++ * \param[in,out] reason Description of why value was changed ++ */ ++static inline void ++remap_because(int *i, const char **why, int value, const char *reason) ++{ ++ if (*i != value) { ++ *i = value; ++ *why = reason; ++ } ++} ++ + /*! + * \internal + * \brief Remap informational monitor results and operation status +@@ -3393,29 +3411,34 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + static void + remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + pe_working_set_t *data_set, enum action_fail_response *on_fail, +- int target_rc, int *rc, int *status) { ++ int target_rc, int *rc, int *status) ++{ + bool is_probe = false; ++ int orig_exit_status = *rc; ++ int orig_exec_status = *status; ++ const char *why = NULL; + const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + const char *key = get_op_key(xml_op); + const char *exit_reason = crm_element_value(xml_op, + XML_LRM_ATTR_EXIT_REASON); + + if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { +- int remapped_rc = pcmk__effective_rc(*rc); +- +- if (*rc != remapped_rc) { +- crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc); ++ // Remap degraded results to their usual counterparts ++ *rc = pcmk__effective_rc(*rc); ++ if (*rc != orig_exit_status) { ++ why = "degraded monitor result"; + if (!node->details->shutdown || node->details->online) { + record_failed_op(xml_op, node, rsc, data_set); + } +- +- *rc = remapped_rc; + } + } + + if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { +- *status = PCMK_EXEC_DONE; +- *rc = PCMK_OCF_NOT_RUNNING; ++ if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { ++ *status = PCMK_EXEC_DONE; ++ *rc = PCMK_OCF_NOT_RUNNING; ++ why = "irrelevant probe result"; ++ } + } + + /* If the executor reported an operation status of anything but done or +@@ -3423,22 +3446,19 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * it should be treated as a failure or not, because we know the expected + * result. + */ +- if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) { +- return; ++ switch (*status) { ++ case PCMK_EXEC_DONE: ++ case PCMK_EXEC_ERROR: ++ break; ++ default: ++ goto remap_done; + } + +- CRM_ASSERT(rsc); +- CRM_CHECK(task != NULL, +- *status = PCMK_EXEC_ERROR; return); +- +- *status = PCMK_EXEC_DONE; +- + if (exit_reason == NULL) { + exit_reason = ""; + } + + is_probe = pcmk_xe_is_probe(xml_op); +- + if (is_probe) { + task = "probe"; + } +@@ -3452,12 +3472,15 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * those versions or processing of saved CIB files from those versions, + * so we do not need to care much about this case. + */ +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); + crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", + key, pe__node_name(node)); + +- } else if (target_rc != *rc) { +- *status = PCMK_EXEC_ERROR; ++ } else if (*rc == target_rc) { ++ remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); ++ ++ } else { ++ remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); + pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", + key, pe__node_name(node), + target_rc, services_ocf_exitcode_str(target_rc), +@@ -3468,7 +3491,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why,PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +@@ -3479,7 +3502,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + if (is_probe || (target_rc == *rc) + || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); + rsc->role = RSC_ROLE_STOPPED; + + /* clear any previous failure actions */ +@@ -3490,7 +3513,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + case PCMK_OCF_RUNNING_PROMOTED: + if (is_probe && (*rc != target_rc)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, + "Probe found %s active and promoted on %s at %s", + rsc->id, pe__node_name(node), +@@ -3502,11 +3525,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_DEGRADED_PROMOTED: + case PCMK_OCF_FAILED_PROMOTED: + rsc->role = RSC_ROLE_PROMOTED; +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); + break; + + case PCMK_OCF_NOT_CONFIGURED: +- *status = PCMK_EXEC_ERROR_FATAL; ++ remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); + break; + + case PCMK_OCF_UNIMPLEMENT_FEATURE: +@@ -3517,9 +3540,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + if (interval_ms == 0) { + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, ++ "exit status"); + } else { +- *status = PCMK_EXEC_NOT_SUPPORTED; ++ remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, ++ "exit status"); + } + } + break; +@@ -3528,7 +3553,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_INVALID_PARAM: + case PCMK_OCF_INSUFFICIENT_PRIV: + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); + break; + + default: +@@ -3537,13 +3562,21 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + "on %s at %s as failure", + *rc, task, rsc->id, pe__node_name(node), + last_change_str(xml_op)); +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, ++ "unknown exit status"); + } + break; + } + +- pe_rsc_trace(rsc, "Remapped %s status to '%s'", +- key, pcmk_exec_status_str(*status)); ++remap_done: ++ if (why != NULL) { ++ pe_rsc_trace(rsc, ++ "Remapped %s result from [%s: %s] to [%s: %s] " ++ "because of %s", ++ key, pcmk_exec_status_str(orig_exec_status), ++ crm_exit_str(orig_exit_status), ++ pcmk_exec_status_str(*status), crm_exit_str(*rc), why); ++ } + } + + // return TRUE if start or monitor last failure but parameters changed +@@ -3947,9 +3980,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + parent = uber_parent(rsc); + } + +- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", +- task_key, task, task_id, status, rc, pe__node_name(node), +- role2text(rsc->role)); ++ pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", ++ ID(xml_op), task, task_id, pe__node_name(node), ++ pcmk_exec_status_str(status), crm_exit_str(rc)); + + if (node->details->unclean) { + pe_rsc_trace(rsc, +@@ -4077,9 +4110,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + goto done; + + case PCMK_EXEC_DONE: +- pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", +- task, rsc->id, pe__node_name(node), +- last_change_str(xml_op), ID(xml_op)); + update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); + goto done; + +@@ -4175,9 +4205,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + } + + done: +- pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", +- rsc->id, task, role2text(rsc->role), +- role2text(rsc->next_role)); ++ pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", ++ rsc->id, pe__node_name(node), ID(xml_op), ++ role2text(rsc->role), role2text(rsc->next_role)); + } + + static void +-- +2.31.1 + +From 5a1d2a3ba58fa73225433dab40cee0a6e0ef9bda Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 12:08:55 -0600 +Subject: [PATCH 03/14] Low: scheduler: improve migration history validation + +Instead of a simple CRM_CHECK(), functionize parsing the source and target node +names from a migration action's resource history entry. This reduces +duplication and allows us to log more helpful errors. + +Also, CRM_CHECK() tries to dump core for debugging, and that's not helpful for +corrupted CIB entries. +--- + lib/pengine/unpack.c | 87 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 75 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b7b2873..cd1b038 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2786,6 +2786,60 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, + data_set); + } ++ ++/*! ++ * \internal ++ * \brief Parse migration source and target node names from history entry ++ * ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node ++ * \param[out] source_name Where to store migration source node name ++ * \param[out] target_name Where to store migration target node name ++ * ++ * \return Standard Pacemaker return code ++ */ ++static int ++get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, ++ const pe_node_t *target_node, ++ const char **source_name, const char **target_name) ++{ ++ const char *id = ID(entry); ++ ++ if (id == NULL) { ++ crm_err("Ignoring resource history entry without ID"); ++ return pcmk_rc_unpack_error; ++ } ++ ++ *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); ++ *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); ++ if ((*source_name == NULL) || (*target_name == NULL)) { ++ crm_err("Ignoring resource history entry %s without " ++ XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, ++ id); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((source_node != NULL) ++ && !pcmk__str_eq(*source_name, source_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", ++ id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((target_node != NULL) ++ && !pcmk__str_eq(*target_name, target_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", ++ id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ return pcmk_rc_ok; ++} + + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +@@ -2834,13 +2888,16 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If there's any newer non-monitor operation on the source, this migrate_to + * potentially no longer matters for the source. +@@ -2949,11 +3006,14 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { + xmlNode *target_migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +@@ -3001,11 +3061,14 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + xmlNode *xml_op, pe_working_set_t *data_set) + { + xmlNode *source_migrate_to = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, NULL, node, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +-- +2.31.1 + +From 5139e5369769e733b05bc28940d3dccb4f7fca95 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 14:30:16 -0600 +Subject: [PATCH 04/14] Refactor: scheduler: functionize adding a dangling + migration + +... for code isolation and readability +--- + lib/pengine/unpack.c | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index cd1b038..fa7c2cc 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2841,6 +2841,28 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + return pcmk_rc_ok; + } + ++/* ++ * \internal ++ * \brief Add a migration source to a resource's list of dangling migrations ++ * ++ * If the migrate_to and migrate_from actions in a live migration both ++ * succeeded, but there is no stop on the source, the migration is considered ++ * "dangling." Add the source to the resource's dangling migration list, which ++ * will be used to schedule a stop on the source without affecting the target. ++ * ++ * \param[in,out] rsc Resource involved in migration ++ * \param[in] node Migration source ++ */ ++static void ++add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) ++{ ++ pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", ++ rsc->id, pe__node_name(node)); ++ rsc->role = RSC_ROLE_STOPPED; ++ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, ++ (gpointer) node); ++} ++ + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) +@@ -2941,14 +2963,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + if (migrate_from && from_rc == PCMK_OCF_OK + && (from_status == PCMK_EXEC_DONE)) { +- /* The migrate_to and migrate_from both succeeded, so mark the migration +- * as "dangling". This will be used to schedule a stop action on the +- * source without affecting the target. +- */ +- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), +- source); +- rsc->role = RSC_ROLE_STOPPED; +- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); ++ add_dangling_migration(rsc, node); + + } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed + /* If the resource has newer state on the target, this migrate_to no +-- +2.31.1 + +From da71c04463d31338dd5da54d1d48b53e413716dc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 16:57:55 -0600 +Subject: [PATCH 05/14] Refactor: scheduler: check for dangling migration + before setting role + +Previously, unpack_migrate_to_success() set rsc->role = RSC_ROLE_STARTED +then checked for dangling migration, which would reset it to RSC_ROLE_STOPPED. + +For clarity, do the dangling migration check first. +--- + lib/pengine/unpack.c | 47 ++++++++++++++++++++++++-------------------- + 1 file changed, 26 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index fa7c2cc..b858b59 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2905,8 +2905,8 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration is considered to be "dangling". Schedule a stop on the source + * in this case. + */ +- int from_rc = 0; +- int from_status = 0; ++ int from_rc = PCMK_OCF_OK; ++ int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +@@ -2930,12 +2930,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // Check whether there was a migrate_from action on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); +- +- /* Even if there's a newer non-monitor operation on the source, we still +- * need to check how this migrate_to might matter for the target. +- */ +- if (source_newer_op && migrate_from) { +- return; ++ if (migrate_from != NULL) { ++ if (source_newer_op) { ++ /* There's a newer non-monitor operation on the source and a ++ * migrate_from on the target, so this migrate_to is irrelevant to ++ * the resource's state. ++ */ ++ return; ++ } ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, ++ &from_status); + } + + /* If the resource has newer state on the target after the migration +@@ -2948,24 +2953,24 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- // Clones are not allowed to migrate, so role can't be promoted ++ /* Check for dangling migration (migrate_from succeeded but stop not done). ++ * We know there's no stop because we already returned if the target has a ++ * migrate_from and the source has any newer non-monitor operation. ++ */ ++ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { ++ add_dangling_migration(rsc, node); ++ return; ++ } ++ ++ /* Without newer state, this migrate_to implies the resource is active. ++ * (Clones are not allowed to migrate, so role can't be promoted.) ++ */ + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); + +- if (migrate_from) { +- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); +- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); +- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", +- ID(migrate_from), target, from_status, from_rc); +- } +- +- if (migrate_from && from_rc == PCMK_OCF_OK +- && (from_status == PCMK_EXEC_DONE)) { +- add_dangling_migration(rsc, node); +- +- } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed ++ if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +-- +2.31.1 + +From d98a2687d68747b0598554939dea05c420456a12 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:05:50 -0600 +Subject: [PATCH 06/14] Refactor: scheduler: avoid duplication of + active-on-target check + +--- + lib/pengine/unpack.c | 24 ++++++------------------ + 1 file changed, 6 insertions(+), 18 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b858b59..8cfc0ef 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2914,6 +2914,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; ++ bool active_on_target = false; + + // Get source and target node names from XML + if (get_migration_node_names(xml_op, node, NULL, &source, +@@ -2969,23 +2970,14 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); ++ active_on_target = !target_newer_state && (target_node != NULL) ++ && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, TRUE); +- + } else { +- /* With the earlier bail logic, migrate_from != NULL here implies +- * source_newer_op is false, meaning this migrate_to still matters +- * for the source. +- * Consider it failed here - forces a restart, prevents migration +- */ ++ // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } +@@ -2994,11 +2986,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); +- ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, FALSE); + if (source_node && source_node->details->online) { + /* This is a partial migration: the migrate_to completed +-- +2.31.1 + +From ae145309e3fdb26608e99f6d1fe1a7859d98efd0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:07:58 -0600 +Subject: [PATCH 07/14] Refactor: scheduler: improve unpacking of successful + migrate_to + +Improve log messages, comments, and formatting, and avoid doing things until +needed, to improve efficiency of early returns. +--- + lib/pengine/unpack.c | 109 +++++++++++++++++++------------------------ + 1 file changed, 48 insertions(+), 61 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8cfc0ef..224b7b5 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2867,48 +2867,40 @@ static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { +- /* A successful migration sequence is: +- * migrate_to on source node +- * migrate_from on target node +- * stop on source node ++ /* A complete migration sequence is: ++ * 1. migrate_to on source node (which succeeded if we get to this function) ++ * 2. migrate_from on target node ++ * 3. stop on source node + * +- * But there could be scenarios like (It's easier to produce with cluster +- * property batch-limit=1): +- * +- * - rscA is live-migrating from node1 to node2. +- * +- * - Before migrate_to on node1 returns, put node2 into standby. +- * +- * - Transition aborts upon return of successful migrate_to on node1. New +- * transition is going to stop the rscA on both nodes and start it on +- * node1. ++ * If no migrate_from has happened, the migration is considered to be ++ * "partial". If the migrate_from succeeded but no stop has happened, the ++ * migration is considered to be "dangling". + * +- * - While it is stopping on node1, run something that is going to make +- * the transition abort again like: +- * crm_resource --resource rscA --ban --node node2 ++ * If a successful migrate_to and stop have happened on the source node, we ++ * still need to check for a partial migration, due to scenarios (easier to ++ * produce with batch-limit=1) like: + * +- * - Transition aborts upon return of stop on node1. ++ * - A resource is migrating from node1 to node2, and a migrate_to is ++ * initiated for it on node1. + * +- * Now although there's a stop on node1, it's still a partial migration and +- * rscA is still potentially active on node2. ++ * - node2 goes into standby mode while the migrate_to is pending, which ++ * aborts the transition. + * +- * So even if a migrate_to is followed by a stop, we still need to check +- * whether there's a corresponding migrate_from or any newer operation on +- * the target. ++ * - Upon completion of the migrate_to, a new transition schedules a stop ++ * on both nodes and a start on node1. + * +- * If no migrate_from has happened, the migration is considered to be +- * "partial". If the migrate_from failed, make sure the resource gets +- * stopped on both source and target (if up). ++ * - If the new transition is aborted for any reason while the resource is ++ * stopping on node1, the transition after that stop completes will see ++ * the migrate_from and stop on the source, but it's still a partial ++ * migration, and the resource must be stopped on node2 because it is ++ * potentially active there due to the migrate_to. + * +- * If the migrate_to and migrate_from both succeeded (which also implies the +- * resource is no longer running on the source), but there is no stop, the +- * migration is considered to be "dangling". Schedule a stop on the source +- * in this case. ++ * We also need to take into account that either node's history may be ++ * cleared at any point in the migration process. + */ + int from_rc = PCMK_OCF_OK; + int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; +- pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; + const char *source = NULL; + const char *target = NULL; +@@ -2922,13 +2914,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- /* If there's any newer non-monitor operation on the source, this migrate_to +- * potentially no longer matters for the source. +- */ ++ // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, + data_set); + +- // Check whether there was a migrate_from action on the target ++ // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); + if (migrate_from != NULL) { +@@ -2944,12 +2934,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + &from_status); + } + +- /* If the resource has newer state on the target after the migration +- * events, this migrate_to no longer matters for the target. ++ /* If the resource has newer state on both the source and target after the ++ * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, + migrate_from, data_set); +- + if (source_newer_op && target_newer_state) { + return; + } +@@ -2969,7 +2958,6 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); +- source_node = pe_find_node(data_set->nodes, source); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + +@@ -2981,31 +2969,30 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } ++ return; ++ } + +- } else { // Pending, or complete but erased +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (active_on_target) { +- native_add_running(rsc, target_node, data_set, FALSE); +- if (source_node && source_node->details->online) { +- /* This is a partial migration: the migrate_to completed +- * successfully on the source, but the migrate_from has not +- * completed. Remember the source and target; if the newly +- * chosen target remains the same when we schedule actions +- * later, we may continue with the migration. +- */ +- rsc->partial_migration_target = target_node; +- rsc->partial_migration_source = source_node; +- } +- } else if (!source_newer_op) { +- /* This migrate_to matters for the source only if it's the last +- * non-monitor operation here. +- * Consider it failed here - forces a restart, prevents migration ++ // The migrate_from is pending, complete but erased, or to be scheduled ++ ++ if (active_on_target) { ++ pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ ++ native_add_running(rsc, target_node, data_set, FALSE); ++ if ((source_node != NULL) && source_node->details->online) { ++ /* This is a partial migration: the migrate_to completed ++ * successfully on the source, but the migrate_from has not ++ * completed. Remember the source and target; if the newly ++ * chosen target remains the same when we schedule actions ++ * later, we may continue with the migration. + */ +- pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); ++ rsc->partial_migration_target = target_node; ++ rsc->partial_migration_source = source_node; + } ++ ++ } else if (!source_newer_op) { ++ // Mark resource as failed, require recovery, and prevent migration ++ pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); ++ pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } + } + +-- +2.31.1 + +From 7d63ed8d52f64d2523367cff36bf77bd85296bd9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:14:57 -0600 +Subject: [PATCH 08/14] Refactor: scheduler: drop redundant argument from + unpack_migrate_to_success() + +--- + lib/pengine/unpack.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 224b7b5..6222115 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2864,8 +2864,7 @@ add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) + } + + static void +-unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +- pe_working_set_t *data_set) ++unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + { + /* A complete migration sequence is: + * 1. migrate_to on source node (which succeeded if we get to this function) +@@ -2916,11 +2915,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, +- data_set); ++ rsc->cluster); + + // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, +- source, -1, data_set); ++ source, -1, rsc->cluster); + if (migrate_from != NULL) { + if (source_newer_op) { + /* There's a newer non-monitor operation on the source and a +@@ -2938,7 +2937,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, +- migrate_from, data_set); ++ migrate_from, rsc->cluster); + if (source_newer_op && target_newer_state) { + return; + } +@@ -2957,13 +2956,13 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + */ + rsc->role = RSC_ROLE_STARTED; + +- target_node = pe_find_node(data_set->nodes, target); ++ target_node = pe_find_node(rsc->cluster->nodes, target); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + if (active_on_target) { +- native_add_running(rsc, target_node, data_set, TRUE); ++ native_add_running(rsc, target_node, rsc->cluster, TRUE); + } else { + // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +@@ -2975,9 +2974,9 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // The migrate_from is pending, complete but erased, or to be scheduled + + if (active_on_target) { +- pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +- native_add_running(rsc, target_node, data_set, FALSE); ++ native_add_running(rsc, target_node, rsc->cluster, FALSE); + if ((source_node != NULL) && source_node->details->online) { + /* This is a partial migration: the migrate_to completed + * successfully on the source, but the migrate_from has not +@@ -3946,7 +3945,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c + clear_past_failure = TRUE; + + } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { +- unpack_migrate_to_success(rsc, node, xml_op, data_set); ++ unpack_migrate_to_success(rsc, node, xml_op); + + } else if (rsc->role < RSC_ROLE_STARTED) { + pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); +-- +2.31.1 + +From 3be487f87bf5e26277379148922525fd98d29681 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:13:30 -0600 +Subject: [PATCH 09/14] Doc: scheduler: clarify comments about unpacking + migration history + +per review +--- + lib/pengine/unpack.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 6222115..ec2cf26 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2791,9 +2791,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + * \internal + * \brief Parse migration source and target node names from history entry + * +- * \param[in] entry Resource history entry for a migration action +- * \param[in] source_node If not NULL, source must match this node +- * \param[in] target_node If not NULL, target must match this node ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node + * \param[out] source_name Where to store migration source node name + * \param[out] target_name Where to store migration target node name + * +@@ -2825,7 +2825,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", +- id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ id, *source_name, pe__node_name(source_node)); + return pcmk_rc_unpack_error; + } + +@@ -2834,7 +2834,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", +- id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ id, *target_name, pe__node_name(target_node)); + return pcmk_rc_unpack_error; + } + +@@ -2890,7 +2890,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + * + * - If the new transition is aborted for any reason while the resource is + * stopping on node1, the transition after that stop completes will see +- * the migrate_from and stop on the source, but it's still a partial ++ * the migrate_to and stop on the source, but it's still a partial + * migration, and the resource must be stopped on node2 because it is + * potentially active there due to the migrate_to. + * +@@ -3425,9 +3425,9 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + * \brief Update an integer value and why + * + * \param[in,out] i Pointer to integer to update +- * \param[in,out] why Where to store reason for update ++ * \param[out] why Where to store reason for update + * \param[in] value New value +- * \param[in,out] reason Description of why value was changed ++ * \param[in] reason Description of why value was changed + */ + static inline void + remap_because(int *i, const char **why, int value, const char *reason) +@@ -3456,7 +3456,7 @@ remap_because(int *i, const char **why, int value, const char *reason) + * \param[in] data_set Current cluster working set + * \param[in,out] on_fail What should be done about the result + * \param[in] target_rc Expected return code of operation +- * \param[in,out] rc Actual return code of operation ++ * \param[in,out] rc Actual return code of operation (treated as OCF) + * \param[in,out] status Operation execution status + * + * \note If the result is remapped and the node is not shutting down or failed, +@@ -3548,7 +3548,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- remap_because(status, &why,PCMK_EXEC_DONE, "probe"); ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +-- +2.31.1 + +From 3ef6c84a7b0dd434731e72d91f2724bdb52e292e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:42:01 -0600 +Subject: [PATCH 10/14] Refactor: scheduler: improve xpath efficiency when + unpacking + +Using "//" means that every child must be searched recursively. If we know the +exact path, we should explicitly specify it. +--- + lib/pengine/unpack.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index ec2cf26..8aead58 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2571,6 +2571,13 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) + node->weight = *score; + } + ++#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ ++ "/" XML_CIB_TAG_STATE ++#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ ++ "/" XML_LRM_TAG_RESOURCES \ ++ "/" XML_LRM_TAG_RESOURCE ++#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP ++ + static xmlNode * + find_lrm_op(const char *resource, const char *op, const char *node, const char *source, + int target_rc, pe_working_set_t *data_set) +@@ -2583,10 +2590,9 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", resource, "']" +- "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", + NULL); + + /* Need to check against transition_magic too? */ +@@ -2631,10 +2637,8 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE +- "[@" XML_ATTR_UNAME "='", node_name, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", rsc_id, "']", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", + NULL); + + xml = get_xpath_object((const char *) xpath->str, data_set->input, +-- +2.31.1 + +From 1869f99bc8eeedb976f96f0f1cc3d4dd86735504 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 10:25:53 -0600 +Subject: [PATCH 11/14] Low: scheduler: unknown_on_node() should ignore pending + actions + +Previously, unknown_on_node() looked for any lrm_rsc_op at all to decide +whether a resource is known on a node. However if the only action is pending, +the resource is not yet known. + +Also drop a redundant argument and add a doxygen block. (The rsc argument is +not const due to a getDocPtr() call in the chain, as well as libxml2 calls that +are likely const in practice but aren't marked as such.) +--- + lib/pengine/unpack.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8aead58..14dc202 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2648,19 +2648,32 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + return xml; + } + ++/*! ++ * \internal ++ * \brief Check whether a resource has no completed action history on a node ++ * ++ * \param[in,out] rsc Resource to check ++ * \param[in] node_name Node to check ++ * ++ * \return true if \p rsc_id is unknown on \p node_name, otherwise false ++ */ + static bool +-unknown_on_node(const char *rsc_id, const char *node_name, +- pe_working_set_t *data_set) ++unknown_on_node(pe_resource_t *rsc, const char *node_name) + { +- xmlNode *lrm_resource = NULL; +- +- lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); ++ bool result = false; ++ xmlXPathObjectPtr search; ++ GString *xpath = g_string_sized_new(256); + +- /* If the resource has no lrm_rsc_op history on the node, that means its +- * state is unknown there. +- */ +- return (lrm_resource == NULL +- || first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL); ++ pcmk__g_strcat(xpath, ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", ++ NULL); ++ search = xpath_search(rsc->cluster->input, (const char *) xpath->str); ++ result = (numXpathResults(search) == 0); ++ freeXpathObject(search); ++ g_string_free(xpath, TRUE); ++ return result; + } + + /*! +@@ -3027,7 +3040,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, target, data_set) ++ !unknown_on_node(rsc, target) + /* If the resource has newer state on the target after the migration + * events, this migrate_to no longer matters for the target. + */ +@@ -3082,7 +3095,7 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, source, data_set) ++ !unknown_on_node(rsc, source) + /* If the resource has newer state on the source after the migration + * events, this migrate_from no longer matters for the source. + */ +-- +2.31.1 + +From 22fbab8e0d449d2accb231dfcec94294ded27f4e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 12:11:19 -0600 +Subject: [PATCH 12/14] Test: scheduler: add regression test for migration + intermediary + +As of this commit, the cluster wrongly restarts the migrated resource +--- + cts/cts-scheduler.in | 3 + + .../dot/migration-intermediary-cleaned.dot | 46 ++ + .../exp/migration-intermediary-cleaned.exp | 316 +++++++++++ + .../migration-intermediary-cleaned.scores | 201 +++++++ + .../migration-intermediary-cleaned.summary | 94 ++++ + .../xml/migration-intermediary-cleaned.xml | 513 ++++++++++++++++++ + 6 files changed, 1173 insertions(+) + create mode 100644 cts/scheduler/dot/migration-intermediary-cleaned.dot + create mode 100644 cts/scheduler/exp/migration-intermediary-cleaned.exp + create mode 100644 cts/scheduler/scores/migration-intermediary-cleaned.scores + create mode 100644 cts/scheduler/summary/migration-intermediary-cleaned.summary + create mode 100644 cts/scheduler/xml/migration-intermediary-cleaned.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index feb5dc8..9899c36 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -387,6 +387,9 @@ TESTS = [ + [ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ], + [ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ], + [ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ], ++ [ "migration-intermediary-cleaned", ++ "Probe live-migration intermediary with no history" ++ ], + [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ], + ], + [ +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +new file mode 100644 +index 0000000..09568d0 +--- /dev/null ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -0,0 +1,46 @@ ++ digraph "g" { ++"Connectivity_running_0" [ style=bold color="green" fontcolor="orange"] ++"Connectivity_start_0" -> "Connectivity_running_0" [ style = bold] ++"Connectivity_start_0" -> "ping-1_start_0 rhel8-2" [ style = bold] ++"Connectivity_start_0" [ style=bold color="green" fontcolor="orange"] ++"FencingFail_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] ++"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] ++"ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_60000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_start_0 rhel8-2" -> "Connectivity_running_0" [ style = bold] ++"ping-1_start_0 rhel8-2" -> "ping-1_monitor_60000 rhel8-2" [ style = bold] ++"ping-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.207_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.208_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_0 rhel8-2" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_start_0 rhel8-2" -> "rsc_rhel8-1_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_stop_0 rhel8-3" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_0 rhel8-2" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_start_0 rhel8-2" -> "rsc_rhel8-2_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-2_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_stop_0 rhel8-4" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-3_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-4_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-5_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"stateful-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +new file mode 100644 +index 0000000..28fa776 +--- /dev/null ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -0,0 +1,316 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +new file mode 100644 +index 0000000..b3b8dff +--- /dev/null ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -0,0 +1,201 @@ ++ ++pcmk__clone_allocate: Connectivity allocation score on rhel8-1: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-2: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-3: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-4: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-3: 10 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-4: 5 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-5: 5 ++pcmk__group_assign: group-1 allocation score on rhel8-1: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-2: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-3: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-4: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-5: 0 ++pcmk__group_assign: petulant allocation score on rhel8-1: 0 ++pcmk__group_assign: petulant allocation score on rhel8-2: 0 ++pcmk__group_assign: petulant allocation score on rhel8-3: 0 ++pcmk__group_assign: petulant allocation score on rhel8-4: 0 ++pcmk__group_assign: petulant allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-5: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-3: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-1: 100 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-2: 100 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-3: 100 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-4: 100 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-5: 100 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-5: -INFINITY ++stateful-1:0 promotion score on rhel8-3: 10 ++stateful-1:1 promotion score on rhel8-4: 5 ++stateful-1:2 promotion score on rhel8-5: 5 ++stateful-1:3 promotion score on none: 0 ++stateful-1:4 promotion score on none: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +new file mode 100644 +index 0000000..5de1355 +--- /dev/null ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -0,0 +1,94 @@ ++Using the original execution date of: 2023-01-19 21:05:59Z ++Current cluster status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 ++ ++Transition Summary: ++ * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) ++ * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) ++ * Restart migrator ( rhel8-5 ) ++ * Start ping-1:3 ( rhel8-2 ) ++ ++Executing Cluster Transition: ++ * Resource action: Fencing monitor on rhel8-2 ++ * Resource action: FencingPass monitor on rhel8-2 ++ * Resource action: FencingFail monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 stop on rhel8-3 ++ * Resource action: rsc_rhel8-1 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-2 stop on rhel8-4 ++ * Resource action: rsc_rhel8-2 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-3 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-4 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-5 monitor on rhel8-2 ++ * Resource action: migrator stop on rhel8-2 ++ * Resource action: migrator stop on rhel8-5 ++ * Resource action: migrator monitor on rhel8-2 ++ * Resource action: ping-1 monitor on rhel8-2 ++ * Pseudo action: Connectivity_start_0 ++ * Resource action: stateful-1 monitor on rhel8-2 ++ * Resource action: r192.168.122.207 monitor on rhel8-2 ++ * Resource action: petulant monitor on rhel8-2 ++ * Resource action: r192.168.122.208 monitor on rhel8-2 ++ * Resource action: lsb-dummy monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 start on rhel8-2 ++ * Resource action: rsc_rhel8-2 start on rhel8-2 ++ * Resource action: migrator start on rhel8-5 ++ * Resource action: migrator monitor=10000 on rhel8-5 ++ * Resource action: ping-1 start on rhel8-2 ++ * Pseudo action: Connectivity_running_0 ++ * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 ++ * Resource action: rsc_rhel8-2 monitor=5000 on rhel8-2 ++ * Resource action: ping-1 monitor=60000 on rhel8-2 ++Using the original execution date of: 2023-01-19 21:05:59Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 +diff --git a/cts/scheduler/xml/migration-intermediary-cleaned.xml b/cts/scheduler/xml/migration-intermediary-cleaned.xml +new file mode 100644 +index 0000000..bec7888 +--- /dev/null ++++ b/cts/scheduler/xml/migration-intermediary-cleaned.xml +@@ -0,0 +1,513 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 1f9fadbb06baded3fc393cfe30a0cb620aca0829 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 17:12:13 -0600 +Subject: [PATCH 13/14] Fix: scheduler: handle cleaned migrate_from history + correctly + +Fixes T623 +--- + lib/pengine/unpack.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 14dc202..9c99183 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2990,6 +2990,15 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + + // The migrate_from is pending, complete but erased, or to be scheduled + ++ /* If there is no history at all for the resource on an online target, then ++ * it was likely cleaned. Just return, and we'll schedule a probe. Once we ++ * have the probe result, it will be reflected in target_newer_state. ++ */ ++ if ((target_node != NULL) && target_node->details->online ++ && unknown_on_node(rsc, target)) { ++ return; ++ } ++ + if (active_on_target) { + pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +-- +2.31.1 + +From d9d1bf19e8522ea29c87f0c39b05828947bc5b0f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 15:48:01 -0600 +Subject: [PATCH 14/14] Test: scheduler: update expected output for migration + fix + +--- + .../dot/migration-intermediary-cleaned.dot | 8 -- + .../exp/migration-intermediary-cleaned.exp | 88 ++++--------------- + .../migration-intermediary-cleaned.scores | 2 +- + .../migration-intermediary-cleaned.summary | 9 +- + 4 files changed, 22 insertions(+), 85 deletions(-) + +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +index 09568d0..f6eabba 100644 +--- a/cts/scheduler/dot/migration-intermediary-cleaned.dot ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -7,15 +7,7 @@ + "FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] + "migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] +-"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] + "petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] + "ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +index 28fa776..8b9bb39 100644 +--- a/cts/scheduler/exp/migration-intermediary-cleaned.exp ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -148,91 +148,41 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + + + +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + + + +@@ -241,24 +191,24 @@ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + +@@ -268,7 +218,7 @@ + + + +- ++ + + + +@@ -277,7 +227,7 @@ + + + +- ++ + + + +@@ -286,7 +236,7 @@ + + + +- ++ + + + +@@ -295,7 +245,7 @@ + + + +- ++ + + + +@@ -304,7 +254,7 @@ + + + +- ++ + + + +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +index b3b8dff..09f05d1 100644 +--- a/cts/scheduler/scores/migration-intermediary-cleaned.scores ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -103,7 +103,7 @@ pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 +-pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 1 + pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +index 5de1355..dd127a8 100644 +--- a/cts/scheduler/summary/migration-intermediary-cleaned.summary ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -13,7 +13,7 @@ Current cluster status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 rhel8-2 ] +@@ -30,7 +30,6 @@ Current cluster status: + Transition Summary: + * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) + * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) +- * Restart migrator ( rhel8-5 ) + * Start ping-1:3 ( rhel8-2 ) + + Executing Cluster Transition: +@@ -44,8 +43,6 @@ Executing Cluster Transition: + * Resource action: rsc_rhel8-3 monitor on rhel8-2 + * Resource action: rsc_rhel8-4 monitor on rhel8-2 + * Resource action: rsc_rhel8-5 monitor on rhel8-2 +- * Resource action: migrator stop on rhel8-2 +- * Resource action: migrator stop on rhel8-5 + * Resource action: migrator monitor on rhel8-2 + * Resource action: ping-1 monitor on rhel8-2 + * Pseudo action: Connectivity_start_0 +@@ -56,8 +53,6 @@ Executing Cluster Transition: + * Resource action: lsb-dummy monitor on rhel8-2 + * Resource action: rsc_rhel8-1 start on rhel8-2 + * Resource action: rsc_rhel8-2 start on rhel8-2 +- * Resource action: migrator start on rhel8-5 +- * Resource action: migrator monitor=10000 on rhel8-5 + * Resource action: ping-1 start on rhel8-2 + * Pseudo action: Connectivity_running_0 + * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 +@@ -79,7 +74,7 @@ Revised Cluster Status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 ] +-- +2.31.1 + diff --git a/SOURCES/004-g_source_remove.patch b/SOURCES/004-g_source_remove.patch new file mode 100644 index 0000000..2af0f47 --- /dev/null +++ b/SOURCES/004-g_source_remove.patch @@ -0,0 +1,107 @@ +From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 3 Feb 2023 12:08:57 -0800 +Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one + +QE observed a rarely reproducible core dump in the fencer during +Pacemaker shutdown, in which we try to g_source_remove() an op timer +that's already been removed. + +free_stonith_remote_op_list() +-> g_hash_table_destroy() +-> g_hash_table_remove_all_nodes() +-> clear_remote_op_timers() +-> g_source_remove() +-> crm_glib_handler() +-> "Source ID 190 was not found when attempting to remove it" + +The likely cause is that request_peer_fencing() doesn't set +op->op_timer_one to 0 after calling g_source_remove() on it, so if that +op is still in the stonith_remote_op_list at shutdown with the same +timer, clear_remote_op_timers() tries to remove the source for +op_timer_one again. + +There are only five locations that call g_source_remove() on a +remote_fencing_op_t timer. +* Three of them are in clear_remote_op_timers(), which first 0-checks + the timer and then sets it to 0 after g_source_remove(). +* One is in remote_op_query_timeout(), which does the same. +* The last is the one we fix here in request_peer_fencing(). + +I don't know all the conditions of QE's test scenario at this point. +What I do know: +* have-watchdog=true +* stonith-watchdog-timeout=10 +* no explicit topology +* fence agent script is missing for the configured fence device +* requested fencing of one node +* cluster shutdown + +Fixes RHBZ2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index d61b5bd..b7426ff 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->state = st_exec; + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); ++ op->op_timer_one = 0; + } + + if (!((stonith_watchdog_timeout_ms > 0) +-- +2.31.1 + +From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 15 Feb 2023 22:30:27 -0800 +Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total + +remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove +the source from the main loop after returning. Currently this function +is used as the callback only when creating op->op_timer_total. + +If we don't set op->op_timer_total to 0 before returning from +remote_op_timeout(), then we can get an assertion and core dump from +GLib when the op's timers are being cleared (either during op +finalization or during fencer shutdown). This is because +clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to +remove the source, but the source has already been removed. + +Note that we're already (correctly) zeroing op->op_timer_one and +op->query_timeout as appropriate in their respective callback functions. + +Fortunately, GLib doesn't care whether the source has already been +removed before we return G_SOURCE_REMOVE from a callback. So it's safe +to call finalize_op() (which removes all the op's timer sources) from +within a callback. + +Fixes RHBZ#2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index b7426ff88..adea3d7d8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) + { + remote_fencing_op_t *op = userdata; + ++ op->op_timer_total = 0; ++ + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", +-- +2.39.0 diff --git a/SOURCES/005-query-null.patch b/SOURCES/005-query-null.patch new file mode 100644 index 0000000..194cd33 --- /dev/null +++ b/SOURCES/005-query-null.patch @@ -0,0 +1,151 @@ +From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 14 Feb 2023 14:00:37 -0500 +Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in + pcmk__attrd_api_query. + +According to the header file, if node is NULL, pcmk__attrd_api_query +should query the value of the given attribute on all cluster nodes. +This is also what the server expects and how attrd_updater is supposed +to work. + +However, pcmk__attrd_api_query has no way of letting callers decide +whether they want to query all nodes or whether they want to use the +local node. We were passing NULL for the node name, which it took to +mean it should look up the local node name. This calls +pcmk__node_attr_target, which probes the local cluster name and returns +that to pcmk__attrd_api_query. If it returns non-NULL, that value will +then be put into the XML IPC call which means the server will only +return the value for that node. + +In testing this was usually fine. However, in pratice, the methods +pcmk__node_attr_target uses to figure out the local cluster node name +involves checking the OCF_RESKEY_CRM_meta_on_node environment variable +among others. + +This variable was never set in testing, but can be set in the real +world. This leads to circumstances where the user did "attrd_updater -QA" +expecting to get the values on all nodes, but instead only got the value +on the local cluster node. + +In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never +called if the node was NULL but was called otherwise. + +The fix is to modify pcmk__attrd_api_query to take an option for +querying all nodes. If that's present, we'll query all nodes. If it's +not present, we'll look at the given node name - NULL means look it up, +anything else means just that node. + +Regression in 2.1.5 introduced by eb20a65577 +--- + include/crm/common/attrd_internal.h | 6 +++++- + include/crm/common/ipc_attrd_internal.h | 7 +++++-- + lib/common/ipc_attrd.c | 12 ++++++++---- + tools/attrd_updater.c | 5 +++-- + 4 files changed, 21 insertions(+), 9 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index 389be48..7337c38 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2022 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { + pcmk__node_attr_perm = (1 << 5), + pcmk__node_attr_sync_local = (1 << 6), + pcmk__node_attr_sync_cluster = (1 << 7), ++ // pcmk__node_attr_utilization is 8, but that has not been backported. ++ // I'm leaving the gap here in case we backport that in the future and ++ // also to avoid problems on mixed-version clusters. ++ pcmk__node_attr_query_all = (1 << 9), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index 2c6713f..b1b7584 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2022 the Pacemaker project contributors ++ * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); + * + * \param[in,out] api Connection to pacemaker-attrd + * \param[in] node Look up the attribute for this node +- * (or NULL for all nodes) ++ * (or NULL for the local node) + * \param[in] name Attribute name + * \param[in] options Bitmask of pcmk__node_attr_opts + * ++ * \note Passing pcmk__node_attr_query_all will cause the function to query ++ * the value of \p name on all nodes, regardless of the value of \p node. ++ * + * \return Standard Pacemaker return code + */ + int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 4606509..dece49b 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2011-2022 the Pacemaker project contributors ++ * Copyright 2011-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + return EINVAL; + } + +- target = pcmk__node_attr_target(node); ++ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { ++ node = NULL; ++ } else { ++ target = pcmk__node_attr_target(node); + +- if (target != NULL) { +- node = target; ++ if (target != NULL) { ++ node = target; ++ } + } + + request = create_attrd_op(NULL); +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index 3cd766d..cbd341d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, + static int + send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) + { ++ uint32_t options = pcmk__node_attr_none; + pcmk_ipc_api_t *attrd_api = NULL; + int rc = pcmk_rc_ok; + +@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no + + /* Decide which node(s) to query */ + if (query_all == TRUE) { +- attr_node = NULL; ++ options |= pcmk__node_attr_query_all; + } + +- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); ++ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); + + if (rc != pcmk_rc_ok) { + g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", +-- +2.31.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec new file mode 100644 index 0000000..59a80f7 --- /dev/null +++ b/SPECS/pacemaker.spec @@ -0,0 +1,1607 @@ +# User-configurable globals and defines to control package behavior +# (these should not test {with X} values, which are declared later) + +## User and group to use for nonprivileged services +%global uname hacluster +%global gname haclient + +## Where to install Pacemaker documentation +%if 0%{?suse_version} > 0 +%global pcmk_docdir %{_docdir}/%{name}-%{version} +%else +%if 0%{?rhel} > 7 +%global pcmk_docdir %{_docdir}/%{name}-doc +%else +%global pcmk_docdir %{_docdir}/%{name} +%endif +%endif + +## GitHub entity that distributes source (for ease of using a fork) +%global github_owner ClusterLabs + +## Where bug reports should be submitted +## Leave bug_url undefined to use ClusterLabs default, others define it here +%if 0%{?rhel} +%global bug_url https://bugzilla.redhat.com/ +%else +%if 0%{?fedora} +%global bug_url https://bugz.fedoraproject.org/%{name} +%endif +%endif + +## What to use as the OCF resource agent root directory +%global ocf_root %{_prefix}/lib/ocf + +## Upstream pacemaker version, and its package version (specversion +## can be incremented to build packages reliably considered "newer" +## than previously built packages with the same pcmkversion) +%global pcmkversion 2.1.5 +%global specversion 8 + +## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build +%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 + +## Since git v2.11, the extent of abbreviation is autoscaled by default +## (used to be constant of 7), so we need to convey it for non-tags, too. +%if (0%{?fedora} >= 26) || (0%{?rhel} >= 9) +%global commit_abbrev 9 +%else +%global commit_abbrev 7 +%endif + +## Nagios source control identifiers +%global nagios_name nagios-agents-metadata +%global nagios_hash 105ab8a + + +# Define conditionals so that "rpmbuild --with " and +# "rpmbuild --without " can enable and disable specific features + +## Add option to enable support for stonith/external fencing agents +%bcond_with stonithd + +## Add option for whether to support storing sensitive information outside CIB +%bcond_without cibsecrets + +## Add option to enable Native Language Support (experimental) +%bcond_with nls + +## Add option to create binaries suitable for use with profiling tools +%bcond_with profiling + +## Allow deprecated option to skip (or enable, on RHEL) documentation +%if 0%{?rhel} +%bcond_with doc +%else +%bcond_without doc +%endif + +## Add option to default to start-up synchronization with SBD. +## +## If enabled, SBD *MUST* be built to default similarly, otherwise data +## corruption could occur. Building both Pacemaker and SBD to default +## to synchronization improves safety, without requiring higher-level tools +## to be aware of the setting or requiring users to modify configurations +## after upgrading to versions that support synchronization. +%bcond_without sbd_sync + +## Add option to prefix package version with "0." +## (so later "official" packages will be considered updates) +%bcond_with pre_release + +## Add option to ship Upstart job files +%bcond_with upstart_job + +## Add option to turn off hardening of libraries and daemon executables +%bcond_without hardening + +## Add option to enable (or disable, on RHEL 8) links for legacy daemon names +%if 0%{?rhel} && 0%{?rhel} <= 8 +%bcond_without legacy_links +%else +%bcond_with legacy_links +%endif + +# Define globals for convenient use later + +## Workaround to use parentheses in other globals +%global lparen ( +%global rparen ) + +## Whether this is a tagged release (final or release candidate) +%define tag_release %(c=%{commit}; case ${c} in Pacemaker-*%{rparen} echo 1 ;; + *%{rparen} echo 0 ;; esac) + +## Portion of export/dist tarball name after "pacemaker-", and release version +%if 0%{tag_release} +%define archive_version %(c=%{commit}; echo ${c:10}) +%define archive_github_url %{commit}#/%{name}-%{archive_version}.tar.gz +%else +%if "%{commit}" == "DIST" +%define archive_version %{pcmkversion} +%define archive_github_url %{archive_version}#/%{name}-%{pcmkversion}.tar.gz +%else +%define archive_version %(c=%{commit}; echo ${c:0:%{commit_abbrev}}) +%define archive_github_url %{archive_version}#/%{name}-%{archive_version}.tar.gz +%endif +%endif +### Always use a simple release number +%define pcmk_release %{specversion} + +## Whether this platform defaults to using systemd as an init system +## (needs to be evaluated prior to BuildRequires being enumerated and +## installed as it's intended to conditionally select some of these, and +## for that there are only few indicators with varying reliability: +## - presence of systemd-defined macros (when building in a full-fledged +## environment, which is not the case with ordinary mock-based builds) +## - systemd-aware rpm as manifested with the presence of particular +## macro (rpm itself will trivially always be present when building) +## - existence of /usr/lib/os-release file, which is something heavily +## propagated by systemd project +## - when not good enough, there's always a possibility to check +## particular distro-specific macros (incl. version comparison) +%define systemd_native (%{?_unitdir:1}%{!?_unitdir:0}%{nil \ + } || %{?__transaction_systemd_inhibit:1}%{!?__transaction_systemd_inhibit:0}%{nil \ + } || %(test -f /usr/lib/os-release; test $? -ne 0; echo $?)) + +# Even though we pass @SYSTEM here, Pacemaker is still an exception to the +# crypto policies because it adds "+ANON-DH" for CIB remote commands and +# "+DHE-PSK:+PSK" for Pacemaker Remote connections. This is currently +# required for the respective functionality. +%if 0%{?fedora} > 20 || 0%{?rhel} > 7 +## Base GnuTLS cipher priorities (presumably only the initial, required keyword) +## overridable with "rpmbuild --define 'pcmk_gnutls_priorities PRIORITY-SPEC'" +%define gnutls_priorities %{?pcmk_gnutls_priorities}%{!?pcmk_gnutls_priorities:@SYSTEM} +%endif + +%if 0%{?fedora} > 22 || 0%{?rhel} > 7 +%global supports_recommends 1 +%endif + +## Different distros name certain packages differently +## (note: corosync libraries also differ, but all provide corosync-devel) +%if 0%{?suse_version} > 0 +%global pkgname_bzip2_devel libbz2-devel +%global pkgname_docbook_xsl docbook-xsl-stylesheets +%global pkgname_gettext gettext-tools +%global pkgname_gnutls_devel libgnutls-devel +%global pkgname_shadow_utils shadow +%global pkgname_procps procps +%global pkgname_glue_libs libglue +%global pkgname_pcmk_libs lib%{name}3 +%global hacluster_id 90 +%else +%global pkgname_libtool_devel libtool-ltdl-devel +%global pkgname_libtool_devel_arch libtool-ltdl-devel%{?_isa} +%global pkgname_bzip2_devel bzip2-devel +%global pkgname_docbook_xsl docbook-style-xsl +%global pkgname_gettext gettext-devel +%global pkgname_gnutls_devel gnutls-devel +%global pkgname_shadow_utils shadow-utils +%global pkgname_procps procps-ng +%global pkgname_glue_libs cluster-glue-libs +%global pkgname_pcmk_libs %{name}-libs +%global hacluster_id 189 +%endif + +## Distro-specific configuration choices + +### Use 2.0-style output when other distro packages don't support current output +%if 0%{?fedora} || ( 0%{?rhel} && 0%{?rhel} <= 8 ) +%global compat20 --enable-compat-2.0 +%endif + +### Default concurrent-fencing to true when distro prefers that +%if 0%{?rhel} >= 7 +%global concurrent_fencing --with-concurrent-fencing-default=true +%endif + +### Default resource-stickiness to 1 when distro prefers that +%if 0%{?fedora} >= 35 || 0%{?rhel} >= 9 +%global resource_stickiness --with-resource-stickiness-default=1 +%endif + + +# Python-related definitions + +## Turn off auto-compilation of Python files outside Python specific paths, +## so there's no risk that unexpected "__python" macro gets picked to do the +## RPM-native byte-compiling there (only "{_datadir}/pacemaker/tests" affected) +## -- distro-dependent tricks or automake's fallback to be applied there +%if %{defined _python_bytecompile_extra} +%global _python_bytecompile_extra 0 +%else +### the statement effectively means no RPM-native byte-compiling will occur at +### all, so distro-dependent tricks for Python-specific packages to be applied +%global __os_install_post %(echo '%{__os_install_post}' | { + sed -e 's!/usr/lib[^[:space:]]*/brp-python-bytecompile[[:space:]].*$!!g'; }) +%endif + +## Prefer Python 3 definitions explicitly, in case 2 is also available +%if %{defined __python3} +%global python_name python3 +%global python_path %{__python3} +%define python_site %{?python3_sitelib}%{!?python3_sitelib:%( + %{python_path} -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} +%else +%if %{defined python_version} +%global python_name python%(echo %{python_version} | cut -d'.' -f1) +%define python_path %{?__python}%{!?__python:/usr/bin/%{python_name}} +%else +%global python_name python +%global python_path %{?__python}%{!?__python:/usr/bin/python%{?python_pkgversion}} +%endif +%define python_site %{?python_sitelib}%{!?python_sitelib:%( + %{python_name} -c 'from distutils.sysconfig import get_python_lib as gpl; print(gpl(1))' 2>/dev/null)} +%endif + + +# Keep sane profiling data if requested +%if %{with profiling} + +## Disable -debuginfo package and stripping binaries/libraries +%define debug_package %{nil} + +%endif + + +Name: pacemaker +Summary: Scalable High-Availability cluster resource manager +Version: %{pcmkversion} +Release: %{pcmk_release}%{?dist} +%if %{defined _unitdir} +License: GPLv2+ and LGPLv2+ +%else +# initscript is Revised BSD +License: GPLv2+ and LGPLv2+ and BSD +%endif +Url: https://www.clusterlabs.org/ + +# Example: https://codeload.github.com/ClusterLabs/pacemaker/tar.gz/e91769e +# will download pacemaker-e91769e.tar.gz +# +# The ending part starting with '#' is ignored by github but necessary for +# rpmbuild to know what the tar archive name is. (The downloaded file will be +# named correctly only for commit IDs, not tagged releases.) +# +# You can use "spectool -s 0 pacemaker.spec" (rpmdevtools) to show final URL. +Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{archive_github_url} +Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz + +# upstream commits +Patch001: 001-sync-points.patch +Patch002: 002-remote-regression.patch +Patch003: 003-history-cleanup.patch +Patch004: 004-g_source_remove.patch +Patch005: 005-query-null.patch + +# downstream-only commits +#Patch1xx: 1xx-xxxx.patch + +Requires: resource-agents +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +%if !%{defined _unitdir} +Requires: %{pkgname_procps} +Requires: psmisc +%endif +%{?systemd_requires} + +%if %{defined centos} +ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 %{arm} +%else +%if 0%{?rhel} +ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 +%endif +%endif + +Requires: %{python_path} +BuildRequires: %{python_name}-devel + +# Pacemaker requires a minimum libqb functionality +Requires: libqb >= 0.17.0 +BuildRequires: libqb-devel >= 0.17.0 + +# Required basic build tools +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: coreutils +BuildRequires: findutils +BuildRequires: gcc +BuildRequires: grep +BuildRequires: libtool +%if %{defined pkgname_libtool_devel} +BuildRequires: %{?pkgname_libtool_devel} +%endif +BuildRequires: make +BuildRequires: pkgconfig +BuildRequires: sed + +# Required for core functionality +BuildRequires: pkgconfig(glib-2.0) >= 2.42 +BuildRequires: libxml2-devel +BuildRequires: libxslt-devel +BuildRequires: libuuid-devel +BuildRequires: %{pkgname_bzip2_devel} + +# Enables optional functionality +BuildRequires: pkgconfig(dbus-1) +BuildRequires: %{pkgname_docbook_xsl} +BuildRequires: %{pkgname_gnutls_devel} +BuildRequires: help2man +BuildRequires: ncurses-devel +BuildRequires: pam-devel +BuildRequires: %{pkgname_gettext} >= 0.18 + +# Required for "make check" +BuildRequires: libcmocka-devel + +%if %{systemd_native} +BuildRequires: pkgconfig(systemd) +%endif + +# RH patches are created by git, so we need git to apply them +BuildRequires: git + +# The RHEL 8.5+ build root has corosync_cfg_trackstart() available, so +# Pacemaker's configure script will build support for it. Add a hard dependency +# to ensure users have compatible Corosync libraries if they upgrade Pacemaker. +Requires: corosync >= 3.1.1 +BuildRequires: corosync-devel >= 3.1.1 + +%if %{with stonithd} +BuildRequires: %{pkgname_glue_libs}-devel +%endif + +%if %{with doc} +BuildRequires: asciidoc +BuildRequires: inkscape +BuildRequires: %{python_name}-sphinx +%endif + +# Booth requires this +Provides: pacemaker-ticket-support = 2.0 + +Provides: pcmk-cluster-manager = %{version}-%{release} +Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} + +# Bundled bits +## Pacemaker uses the crypto/md5-buffer module from gnulib +%if 0%{?fedora} || 0%{?rhel} +Provides: bundled(gnulib) = 20200404 +%endif + +%description +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +It supports more than 16 node clusters with significant capabilities +for managing resources and dependencies. + +It will run scripts at initialization, when machines go up or down, +when related resources fail and can be configured to periodically check +resource health. + +Available rpmbuild rebuild options: + --with(out) : cibsecrets hardening nls pre_release profiling + stonithd + +%package cli +License: GPLv2+ and LGPLv2+ +Summary: Command line tools for controlling Pacemaker clusters +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +# For crm_report +Requires: tar +Requires: bzip2 +Requires: perl-TimeDate +Requires: %{pkgname_procps} +Requires: psmisc +Requires(post):coreutils + +%description cli +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-cli package contains command line tools that can be used +to query and control the cluster from machines that may, or may not, +be part of the cluster. + +%package -n %{pkgname_pcmk_libs} +License: GPLv2+ and LGPLv2+ +Summary: Core Pacemaker libraries +Requires(pre): %{pkgname_shadow_utils} +Requires: %{name}-schemas = %{version}-%{release} +# sbd 1.4.0+ supports the libpe_status API for pe_working_set_t +# sbd 1.4.2+ supports startup/shutdown handshake via pacemakerd-api +# and handshake defaults to enabled in this spec +Conflicts: sbd < 1.4.2 + +%description -n %{pkgname_pcmk_libs} +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{pkgname_pcmk_libs} package contains shared libraries needed for cluster +nodes and those just running the CLI tools. + +%package cluster-libs +License: GPLv2+ and LGPLv2+ +Summary: Cluster Libraries used by Pacemaker +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} + +%description cluster-libs +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-cluster-libs package contains cluster-aware shared +libraries needed for nodes that will form part of the cluster nodes. + +%package remote +%if %{defined _unitdir} +License: GPLv2+ and LGPLv2+ +%else +# initscript is Revised BSD +License: GPLv2+ and LGPLv2+ and BSD +%endif +Summary: Pacemaker remote executor daemon for non-cluster nodes +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +Requires: resource-agents +%if !%{defined _unitdir} +Requires: %{pkgname_procps} +%endif +# -remote can be fully independent of systemd +%{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}} +Provides: pcmk-cluster-manager = %{version}-%{release} +Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} + +%description remote +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{name}-remote package contains the Pacemaker Remote daemon +which is capable of extending pacemaker functionality to remote +nodes not running the full corosync/cluster stack. + +%package -n %{pkgname_pcmk_libs}-devel +License: GPLv2+ and LGPLv2+ +Summary: Pacemaker development package +Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} +Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} +Requires: %{pkgname_bzip2_devel}%{?_isa} +Requires: corosync-devel >= 2.0.0 +Requires: glib2-devel%{?_isa} +Requires: libqb-devel%{?_isa} +%if %{defined pkgname_libtool_devel_arch} +Requires: %{?pkgname_libtool_devel_arch} +%endif +Requires: libuuid-devel%{?_isa} +Requires: libxml2-devel%{?_isa} +Requires: libxslt-devel%{?_isa} + +%description -n %{pkgname_pcmk_libs}-devel +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{pkgname_pcmk_libs}-devel package contains headers and shared libraries +for developing tools for Pacemaker. + +%package cts +License: GPLv2+ and LGPLv2+ +Summary: Test framework for cluster-related technologies like Pacemaker +Requires: %{python_path} +Requires: %{pkgname_pcmk_libs} = %{version}-%{release} +Requires: %{name}-cli = %{version}-%{release} +Requires: %{pkgname_procps} +Requires: psmisc +Requires: %{python_name}-psutil +BuildArch: noarch + +# systemd Python bindings are a separate package in some distros +%if %{defined systemd_requires} +%if 0%{?fedora} > 22 || 0%{?rhel} > 7 +Requires: %{python_name}-systemd +%endif +%endif + +%description cts +Test framework for cluster-related technologies like Pacemaker + +%package doc +License: CC-BY-SA-4.0 +Summary: Documentation for Pacemaker +BuildArch: noarch + +%description doc +Documentation for Pacemaker. + +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +%package schemas +License: GPLv2+ +Summary: Schemas and upgrade stylesheets for Pacemaker +BuildArch: noarch + +%description schemas +Schemas and upgrade stylesheets for Pacemaker + +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +%package nagios-plugins-metadata +License: GPLv3 +Summary: Pacemaker Nagios Metadata +# NOTE below are the plugins this metadata uses. +# These packages are not requirements because RHEL does not ship these plugins. +# This metadata provides third-party support for nagios. Users may install the +# plugins via third-party rpm packages, or source. If RHEL ships the plugins in +# the future, we should consider enabling the following required fields. +#Requires: nagios-plugins-http +#Requires: nagios-plugins-ldap +#Requires: nagios-plugins-mysql +#Requires: nagios-plugins-pgsql +#Requires: nagios-plugins-tcp +Requires: pcmk-cluster-manager +BuildArch: noarch + +%description nagios-plugins-metadata +The metadata files required for Pacemaker to execute the nagios plugin +monitor resources. + +%prep +%autosetup -a 1 -n %{name}-%{archive_version} -S git_am -p 1 + +%build + +export systemdsystemunitdir=%{?_unitdir}%{!?_unitdir:no} + +%if %{with hardening} +# prefer distro-provided hardening flags in case they are defined +# through _hardening_{c,ld}flags macros, configure script will +# use its own defaults otherwise; if such hardenings are completely +# undesired, rpmbuild using "--without hardening" +# (or "--define '_without_hardening 1'") +export CFLAGS_HARDENED_EXE="%{?_hardening_cflags}" +export CFLAGS_HARDENED_LIB="%{?_hardening_cflags}" +export LDFLAGS_HARDENED_EXE="%{?_hardening_ldflags}" +export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" +%endif + +./autogen.sh + +%{configure} \ + PYTHON=%{python_path} \ + %{!?with_hardening: --disable-hardening} \ + %{?with_legacy_links: --enable-legacy-links} \ + %{?with_profiling: --with-profiling} \ + %{?with_cibsecrets: --with-cibsecrets} \ + %{?with_nls: --enable-nls} \ + %{?with_sbd_sync: --with-sbd-sync-default="true"} \ + %{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \ + %{?bug_url: --with-bug-url=%{bug_url}} \ + %{?ocf_root: --with-ocfdir=%{ocf_root}} \ + %{?concurrent_fencing} \ + %{?resource_stickiness} \ + %{?compat20} \ + --disable-static \ + --with-initdir=%{_initrddir} \ + --with-runstatedir=%{_rundir} \ + --localstatedir=%{_var} \ + --with-nagios \ + --with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \ + --with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ \ + --with-version=%{version}-%{release} + +%if 0%{?suse_version} >= 1200 +# Fedora handles rpath removal automagically +sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool +sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool +%endif + +make %{_smp_mflags} V=1 + +%check +make %{_smp_mflags} check +{ cts/cts-scheduler --run load-stopped-loop \ + && cts/cts-cli \ + && touch .CHECKED +} 2>&1 | sed 's/[fF]ail/faiil/g' # prevent false positives in rpmlint +[ -f .CHECKED ] && rm -f -- .CHECKED +exit $? # TODO remove when rpm<4.14 compatibility irrelevant + +%install +# skip automake-native Python byte-compilation, since RPM-native one (possibly +# distro-confined to Python-specific directories, which is currently the only +# relevant place, anyway) assures proper intrinsic alignment with wider system +# (such as with py_byte_compile macro, which is concurrent Fedora/EL specific) +make install \ + DESTDIR=%{buildroot} V=1 docdir=%{pcmk_docdir} \ + %{?_python_bytecompile_extra:%{?py_byte_compile:am__py_compile=true}} + +%if %{with upstart_job} +mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/init +install -m 644 pacemakerd/pacemaker.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.conf +install -m 644 pacemakerd/pacemaker.combined.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/pacemaker.combined.conf +install -m 644 tools/crm_mon.upstart ${RPM_BUILD_ROOT}%{_sysconfdir}/init/crm_mon.conf +%endif + +mkdir -p %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +for file in $(find %{nagios_name}-%{nagios_hash}/metadata -type f); do + install -m 644 $file %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata +done + +%if %{defined _unitdir} +mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/lib/rpm-state/%{name} +%endif + +%if %{with nls} +%find_lang %{name} +%endif + +# Don't package libtool archives +find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f + +# Do not package these either on RHEL +rm -f %{buildroot}/%{_sbindir}/fence_legacy +rm -f %{buildroot}/%{_mandir}/man8/fence_legacy.* +find %{buildroot} -name '*o2cb*' -type f -print0 | xargs -0 rm -f + +# For now, don't package the servicelog-related binaries built only for +# ppc64le when certain dependencies are installed. If they get more exercise by +# advanced users, we can reconsider. +rm -f %{buildroot}/%{_sbindir}/notifyServicelogEvent +rm -f %{buildroot}/%{_sbindir}/ipmiservicelogd + +# Byte-compile Python sources where suitable and the distro procedures known +%if %{defined py_byte_compile} +%{py_byte_compile %{python_path} %{buildroot}%{_datadir}/pacemaker/tests} +%if !%{defined _python_bytecompile_extra} +%{py_byte_compile %{python_path} %{buildroot}%{python_site}/cts} +%endif +%endif + +%post +%if %{defined _unitdir} +%systemd_post pacemaker.service +%else +/sbin/chkconfig --add pacemaker || : +%endif + +%preun +%if %{defined _unitdir} +%systemd_preun pacemaker.service +%else +/sbin/service pacemaker stop >/dev/null 2>&1 || : +if [ "$1" -eq 0 ]; then + # Package removal, not upgrade + /sbin/chkconfig --del pacemaker || : +fi +%endif + +%postun +%if %{defined _unitdir} +%systemd_postun_with_restart pacemaker.service +%endif + +%pre remote +%if %{defined _unitdir} +# Stop the service before anything is touched, and remember to restart +# it as one of the last actions (compared to using systemd_postun_with_restart, +# this avoids suicide when sbd is in use) +systemctl --quiet is-active pacemaker_remote +if [ $? -eq 0 ] ; then + mkdir -p %{_localstatedir}/lib/rpm-state/%{name} + touch %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote + systemctl stop pacemaker_remote >/dev/null 2>&1 +else + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%post remote +%if %{defined _unitdir} +%systemd_post pacemaker_remote.service +%else +/sbin/chkconfig --add pacemaker_remote || : +%endif + +%preun remote +%if %{defined _unitdir} +%systemd_preun pacemaker_remote.service +%else +/sbin/service pacemaker_remote stop >/dev/null 2>&1 || : +if [ "$1" -eq 0 ]; then + # Package removal, not upgrade + /sbin/chkconfig --del pacemaker_remote || : +fi +%endif + +%postun remote +%if %{defined _unitdir} +# This next line is a no-op, because we stopped the service earlier, but +# we leave it here because it allows us to revert to the standard behavior +# in the future if desired +%systemd_postun_with_restart pacemaker_remote.service +# Explicitly take care of removing the flag-file(s) upon final removal +if [ "$1" -eq 0 ] ; then + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%posttrans remote +%if %{defined _unitdir} +if [ -e %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote ] ; then + systemctl start pacemaker_remote >/dev/null 2>&1 + rm -f %{_localstatedir}/lib/rpm-state/%{name}/restart_pacemaker_remote +fi +%endif + +%post cli +%if %{defined _unitdir} +%systemd_post crm_mon.service +%endif +if [ "$1" -eq 2 ]; then + # Package upgrade, not initial install: + # Move any pre-2.0 logs to new location to ensure they get rotated + { mv -fbS.rpmsave %{_var}/log/pacemaker.log* %{_var}/log/pacemaker \ + || mv -f %{_var}/log/pacemaker.log* %{_var}/log/pacemaker + } >/dev/null 2>/dev/null || : +fi + +%preun cli +%if %{defined _unitdir} +%systemd_preun crm_mon.service +%endif + +%postun cli +%if %{defined _unitdir} +%systemd_postun_with_restart crm_mon.service +%endif + +%pre -n %{pkgname_pcmk_libs} +getent group %{gname} >/dev/null || groupadd -r %{gname} -g %{hacluster_id} +getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u %{hacluster_id} -s /sbin/nologin -c "cluster user" %{uname} +exit 0 + +%if %{defined ldconfig_scriptlets} +%ldconfig_scriptlets -n %{pkgname_pcmk_libs} +%ldconfig_scriptlets cluster-libs +%else +%post -n %{pkgname_pcmk_libs} -p /sbin/ldconfig +%postun -n %{pkgname_pcmk_libs} -p /sbin/ldconfig + +%post cluster-libs -p /sbin/ldconfig +%postun cluster-libs -p /sbin/ldconfig +%endif + +%files +########################################################### +%config(noreplace) %{_sysconfdir}/sysconfig/pacemaker +%{_sbindir}/pacemakerd + +%if %{defined _unitdir} +%{_unitdir}/pacemaker.service +%else +%{_initrddir}/pacemaker +%endif + +%exclude %{_libexecdir}/pacemaker/cts-log-watcher +%exclude %{_libexecdir}/pacemaker/cts-support +%exclude %{_sbindir}/pacemaker-remoted +%exclude %{_sbindir}/pacemaker_remoted +%exclude %{_datadir}/pacemaker/nagios +%{_libexecdir}/pacemaker/* + +%{_sbindir}/crm_master +%{_sbindir}/fence_watchdog + +%doc %{_mandir}/man7/pacemaker-controld.* +%doc %{_mandir}/man7/pacemaker-schedulerd.* +%doc %{_mandir}/man7/pacemaker-fenced.* +%doc %{_mandir}/man7/ocf_pacemaker_controld.* +%doc %{_mandir}/man7/ocf_pacemaker_remote.* +%doc %{_mandir}/man8/crm_master.* +%doc %{_mandir}/man8/fence_watchdog.* +%doc %{_mandir}/man8/pacemakerd.* + +%doc %{_datadir}/pacemaker/alerts + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine +%{ocf_root}/resource.d/pacemaker/controld +%{ocf_root}/resource.d/pacemaker/remote + +%if %{with upstart_job} +%config(noreplace) %{_sysconfdir}/init/pacemaker.conf +%config(noreplace) %{_sysconfdir}/init/pacemaker.combined.conf +%endif + +%files cli +%dir %attr (750, root, %{gname}) %{_sysconfdir}/pacemaker +%config(noreplace) %{_sysconfdir}/logrotate.d/pacemaker +%config(noreplace) %{_sysconfdir}/sysconfig/crm_mon + +%if %{defined _unitdir} +%{_unitdir}/crm_mon.service +%endif + +%if %{with upstart_job} +%config(noreplace) %{_sysconfdir}/init/crm_mon.conf +%endif + +%{_sbindir}/attrd_updater +%{_sbindir}/cibadmin +%if %{with cibsecrets} +%{_sbindir}/cibsecret +%endif +%{_sbindir}/crm_attribute +%{_sbindir}/crm_diff +%{_sbindir}/crm_error +%{_sbindir}/crm_failcount +%{_sbindir}/crm_mon +%{_sbindir}/crm_node +%{_sbindir}/crm_resource +%{_sbindir}/crm_rule +%{_sbindir}/crm_standby +%{_sbindir}/crm_verify +%{_sbindir}/crmadmin +%{_sbindir}/iso8601 +%{_sbindir}/crm_shadow +%{_sbindir}/crm_simulate +%{_sbindir}/crm_report +%{_sbindir}/crm_ticket +%{_sbindir}/stonith_admin +# "dirname" is owned by -schemas, which is a prerequisite +%{_datadir}/pacemaker/report.collector +%{_datadir}/pacemaker/report.common +# XXX "dirname" is not owned by any prerequisite +%{_datadir}/snmp/mibs/PCMK-MIB.txt + +%exclude %{ocf_root}/resource.d/pacemaker/controld +%exclude %{ocf_root}/resource.d/pacemaker/remote + +%dir %{ocf_root} +%dir %{ocf_root}/resource.d +%{ocf_root}/resource.d/pacemaker + +%doc %{_mandir}/man7/* +%exclude %{_mandir}/man7/pacemaker-controld.* +%exclude %{_mandir}/man7/pacemaker-schedulerd.* +%exclude %{_mandir}/man7/pacemaker-fenced.* +%exclude %{_mandir}/man7/ocf_pacemaker_controld.* +%exclude %{_mandir}/man7/ocf_pacemaker_remote.* +%doc %{_mandir}/man8/* +%exclude %{_mandir}/man8/crm_master.* +%exclude %{_mandir}/man8/fence_watchdog.* +%exclude %{_mandir}/man8/pacemakerd.* +%exclude %{_mandir}/man8/pacemaker-remoted.* + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/blackbox +%dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cores +%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker +%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles + +%files -n %{pkgname_pcmk_libs} %{?with_nls:-f %{name}.lang} +%{_libdir}/libcib.so.* +%{_libdir}/liblrmd.so.* +%{_libdir}/libcrmservice.so.* +%{_libdir}/libcrmcommon.so.* +%{_libdir}/libpe_status.so.* +%{_libdir}/libpe_rules.so.* +%{_libdir}/libpacemaker.so.* +%{_libdir}/libstonithd.so.* +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files cluster-libs +%{_libdir}/libcrmcluster.so.* +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files remote +%config(noreplace) %{_sysconfdir}/sysconfig/pacemaker +%if %{defined _unitdir} +# state directory is shared between the subpackets +# let rpm take care of removing it once it isn't +# referenced anymore and empty +%ghost %dir %{_localstatedir}/lib/rpm-state/%{name} +%{_unitdir}/pacemaker_remote.service +%else +%{_initrddir}/pacemaker_remote +%endif + +%{_sbindir}/pacemaker-remoted +%{_sbindir}/pacemaker_remoted +%{_mandir}/man8/pacemaker-remoted.* +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%files doc +%doc %{pcmk_docdir} +%license licenses/CC-BY-SA-4.0 + +%files cts +%{python_site}/cts +%{_datadir}/pacemaker/tests + +%{_libexecdir}/pacemaker/cts-log-watcher +%{_libexecdir}/pacemaker/cts-support + +%license licenses/GPLv2 +%doc COPYING +%doc ChangeLog + +%files -n %{pkgname_pcmk_libs}-devel +%{_includedir}/pacemaker +%{_libdir}/*.so +%{_libdir}/pkgconfig/*.pc +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + +%files schemas +%license licenses/GPLv2 +%dir %{_datadir}/pacemaker +%{_datadir}/pacemaker/*.rng +%{_datadir}/pacemaker/*.xsl +%{_datadir}/pacemaker/api +%{_datadir}/pacemaker/base +%{_datadir}/pkgconfig/pacemaker-schemas.pc + +%files nagios-plugins-metadata +%dir %{_datadir}/pacemaker/nagios/plugins-metadata +%attr(0644,root,root) %{_datadir}/pacemaker/nagios/plugins-metadata/* +%license %{nagios_name}-%{nagios_hash}/COPYING + +%changelog +* Wed Feb 22 2023 Chris Lumens - 2.1.5-8 +- Rebuild with new release due to build system problems +- Related: rhbz2168249 +- Related: rhbz2168675 + +* Tue Feb 21 2023 Chris Lumens - 2.1.5-7 +- Additional fixes for SIGABRT during pacemaker-fenced shutdown +- Backport fix for attrd_updater -QA not displaying all nodes +- Related: rhbz2168249 +- Resolves: rhbz2168675 + +* Wed Feb 8 2023 Chris Lumens - 2.1.5-6 +- Backport fix for migration history cleanup causing resource recovery +- Backport fix for SIGABRT during pacemaker-fenced shutdown +- Resolves: rhbz2166388 +- Resolves: rhbz2168249 + +* Tue Jan 24 2023 Ken Gaillot - 2.1.5-5 +- Backport fix for remote node shutdown regression +- Resolves: rhbz2163567 + +* Fri Dec 9 2022 Chris Lumens - 2.1.5-4 +- Rebase pacemaker on upstream 2.1.5 final release +- Add support for sync points to attribute daemon +- Resolves: rhbz1463033 +- Resolves: rhbz1866578 +- Resolves: rhbz2122352 + +* Tue Dec 6 2022 Chris Lumens - 2.1.5-3 +- Fix errors found by covscan +- Related: rhbz2122352 + +* Wed Nov 23 2022 Chris Lumens - 2.1.5-2 +- Rebase on upstream 2.1.5-rc3 release +- Resolves: rhbz1626546 +- Related: rhbz2122352 + +* Tue Nov 22 2022 Chris Lumens - 2.1.5-1 +- Rebase on upstream 2.1.5-rc2 release +- Resolves: rhbz1822125 +- Resolves: rhbz2095662 +- Resolves: rhbz2121852 +- Resolves: rhbz2122806 +- Resolves: rhbz2133497 +- Resolves: rhbz2142681 + +* Wed Aug 10 2022 Ken Gaillot - 2.1.4-5 +- Fix regression in crm_resource -O +- Resolves: rhbz2118337 + +* Wed Jul 20 2022 Ken Gaillot - 2.1.4-4 +- Ensure all nodes are re-unfenced after device configuration change +- crm_resource --why now checks node health status +- Resolves: rhbz1872483 +- Resolves: rhbz2065818 + +* Wed Jun 29 2022 Ken Gaillot - 2.1.4-3 +- Add support for ACL groups +- Resolves: rhbz1724310 + +* Tue Jun 28 2022 Ken Gaillot - 2.1.4-2 +- Restore crm_attribute query behavior when attribute does not exist +- Resolves: rhbz2072107 + +* Wed Jun 15 2022 Ken Gaillot - 2.1.4-1 +- Fencer should not ignore CIB updates when stonith is disabled +- Rebase pacemaker on upstream 2.1.4 final release +- Fix typo in ocf:pacemaker:HealthSMART meta-data +- Resolves: rhbz2055935 +- Resolves: rhbz2072107 +- Resolves: rhbz2094855 + +* Wed Jun 1 2022 Ken Gaillot - 2.1.3-2 +- crm_attribute works on remote node command line when hostname differs +- Rebase pacemaker on upstream 2.1.3 final release +- Resolves: rhbz1384172 +- Resolves: rhbz2072107 + +* Wed May 18 2022 Ken Gaillot - 2.1.3-1 +- crm_resource --restart fails to restart clone instances except instance 0 +- Add new multiple-active option for "stop unexpected instances" +- Unable to show metadata for "service" agents with "@" and "." in the name +- Resource ocf:pacemaker:attribute does not comply with the OCF 1.1 standard +- Allow resource meta-attribute to exempt resource from node health restrictions +- Show node health states in crm_mon +- Rebase pacemaker on upstream 2.1.3-rc2 release +- crm_mon API result does not validate against schema if fence event has exit-reason +- Resolves: rhbz1930578 +- Resolves: rhbz2036815 +- Resolves: rhbz2045096 +- Resolves: rhbz2049722 +- Resolves: rhbz2059638 +- Resolves: rhbz2065812 +- Resolves: rhbz2072107 +- Resolves: rhbz2086230 + +* Wed Jan 26 2022 Ken Gaillot - 2.1.2-4 +- Fix regression in down event detection that affects remote nodes +- Resolves: rhbz2046446 + +* Fri Jan 21 2022 Ken Gaillot - 2.1.2-3 +- Improve display of failed actions +- Handle certain probe failures as stopped instead of failed +- Update pcmk_delay_base description in option meta-data +- Avoid crash when using clone notifications +- Retry Corosync shutdown tracking if first attempt fails +- Resolves: rhbz1470834 +- Resolves: rhbz1506372 +- Resolves: rhbz2027370 +- Resolves: rhbz2039675 +- Resolves: rhbz2042550 + +* Thu Dec 16 2021 Ken Gaillot - 2.1.2-2 +- Correctly get metadata for systemd agent names that end in '@' +- Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data +- Fix regression in displayed times in crm_mon's fence history +- Resolves: rhbz2003151 +- Resolves: rhbz2027370 +- Resolves: rhbz2032027 + +* Tue Nov 30 2021 Ken Gaillot - 2.1.2-1 +- Allow per-host fence delays for a single fence device +- Use OCF 1.1 enum type in cluster option metadata for better validation +- crm-resource --force-* now works with LSB resources +- Allow spaces in pcmk_host_map +- ACL group names are no longer restricted to a unique XML id +- Rebase on upstream 2.1.2 +- Ensure upgrades get compatible Corosync libraries +- Resolves: rhbz1082146 +- Resolves: rhbz1281463 +- Resolves: rhbz1346014 +- Resolves: rhbz1376538 +- Resolves: rhbz1384420 +- Resolves: rhbz2011973 +- Resolves: rhbz2027006 + +* Fri Aug 20 2021 Ken Gaillot - 2.1.0-8 +- Fix XML issue in fence_watchdog meta-data +- Resolves: rhbz1443666 + +* Thu Aug 12 2021 Ken Gaillot - 2.1.0-7 +- Fix minor issue with crm_resource error message change +- Resolves: rhbz1447918 + +* Tue Aug 10 2021 Ken Gaillot - 2.1.0-6 +- Fix watchdog agent version information +- Ensure transient attributes are cleared when multiple nodes are lost +- Resolves: rhbz1443666 +- Resolves: rhbz1986998 + +* Fri Aug 06 2021 Ken Gaillot - 2.1.0-5 +- Allow configuring specific nodes to use watchdog-only sbd for fencing +- Resolves: rhbz1443666 + +* Fri Jul 30 2021 Ken Gaillot - 2.1.0-4 +- Show better error messages in crm_resource with invalid resource types +- Avoid selecting wrong device when dynamic-list fencing is used with host map +- Do not schedule probes of unmanaged resources on pending nodes +- Fix argument handling regressions in crm_attribute and wrappers +- Resolves: rhbz1447918 +- Resolves: rhbz1978010 +- Resolves: rhbz1982453 +- Resolves: rhbz1984120 + +* Tue Jun 22 2021 Ken Gaillot - 2.1.0-3 +- crm_resource now supports XML output from resource agent actions +- Correct output for crm_simulate --show-failcounts +- Avoid remote node unfencing loop +- Resolves: rhbz1644628 +- Resolves: rhbz1686426 +- Resolves: rhbz1961857 + +* Wed Jun 9 2021 Ken Gaillot - 2.1.0-2 +- Rebase on upstream 2.1.0 final release +- Correct schema for crm_resource XML output +- Resolves: rhbz1935464 +- Resolves: rhbz1967087 + +* Thu May 20 2021 Ken Gaillot - 2.1.0-1 +- Add crm_simulate --show-attrs and --show-failcounts options +- Retry getting fence agent meta-data after initial failure +- Add debug option for more verbose ocf:pacemaker:ping logs +- Rebase on upstream 2.1.0-rc2 release +- Support OCF Resource Agent API 1.1 standard +- Fix crm_mon regression that could cause certain agents to fail at shutdown +- Allow setting OCF check level for crm_resource --validate and --force-check +- Resolves: rhbz1686426 +- Resolves: rhbz1797579 +- Resolves: rhbz1843177 +- Resolves: rhbz1935464 +- Resolves: rhbz1936696 +- Resolves: rhbz1948620 +- Resolves: rhbz1955792 + +* Mon Feb 15 2021 Ken Gaillot - 2.0.5-8 +- Route cancellations through correct node when remote connection is moving +- Resolves: rhbz1928762 + +* Fri Feb 12 2021 Ken Gaillot - 2.0.5-7 +- Do not introduce regression in crm_resource --locate +- Resolves: rhbz1925681 + +* Wed Feb 3 2021 Ken Gaillot - 2.0.5-6 +- crm_mon --daemonize should reconnect if cluster restarts +- crm_mon should show more informative messages when cluster is starting +- crm_mon should show rest of status if fencing history is unavailable +- cibsecret now works on remote nodes (as long as name can be reached via ssh) +- Stop remote nodes correctly when connection history is later than node history +- Resolves: rhbz1466875 +- Resolves: rhbz1872490 +- Resolves: rhbz1880426 +- Resolves: rhbz1881537 +- Resolves: rhbz1898457 + +* Thu Jan 14 2021 Ken Gaillot - 2.0.5-5 +- Allow non-critical resources that stop rather than make another resource move +- Support crm_resource --digests option for showing operation digests +- Clean-up of all resources should work from remote nodes +- Resolves: rhbz1371576 +- Resolves: rhbz1872376 +- Resolves: rhbz1907726 + +* Wed Dec 2 2020 Klaus Wenninger - 2.0.5-4 +- Rebase on upstream 2.0.5 release +- Make waiting to be pinged by sbd via pacemakerd-api the default +- Resolves: rhbz1885645 +- Resolves: rhbz1873138 + +* Wed Nov 18 2020 Ken Gaillot - 2.0.5-3 +- Rebase on upstream 2.0.5-rc3 release +- Resolves: rhbz1885645 + +* Wed Oct 28 2020 Ken Gaillot - 2.0.5-2 +- Rebase on upstream 2.0.5-rc2 release +- Prevent ACL bypass (CVE-2020-25654) +- Resolves: rhbz1885645 +- Resolves: rhbz1889582 + +* Tue Oct 20 2020 Ken Gaillot - 2.0.5-1 +- crm_mon --resource option to filter output by resource +- Avoid filling /dev/shm after frequent corosync errors +- Allow configurable permissions for log files +- Ensure ACL write permission always supersedes read +- Use fence device monitor timeout for starts and probes +- Allow type="integer" in rule expressions +- Avoid error messages when running crm_node inside bundles +- Avoid situation where promotion is not scheduled until next transition +- crm_mon should show more clearly when an entire group is disabled +- Rebase on upstream 2.0.5-rc1 release +- Resolves: rhbz1300597 +- Resolves: rhbz1614166 +- Resolves: rhbz1647136 +- Resolves: rhbz1833173 +- Resolves: rhbz1856015 +- Resolves: rhbz1866573 +- Resolves: rhbz1874391 +- Resolves: rhbz1835717 +- Resolves: rhbz1748139 +- Resolves: rhbz1885645 + +* Thu Aug 20 2020 Ken Gaillot - 2.0.4-6 +- Fix cibsecret bug when node name is different from hostname +- Resolves: rhbz1870873 + +* Fri Jul 24 2020 Ken Gaillot - 2.0.4-5 +- Synchronize start-up and shutdown with SBD +- Resolves: rhbz1718324 + +* Wed Jul 22 2020 Ken Gaillot - 2.0.4-4 +- Allow crm_node -l/-p options to work from Pacemaker Remote nodes +- Correct action timeout value listed in log message +- Fix regression in crm_mon --daemonize with HTML output +- Resolves: rhbz1796824 +- Resolves: rhbz1856035 +- Resolves: rhbz1857728 + +* Thu Jun 25 2020 Ken Gaillot - 2.0.4-3 +- Allow resource and operation defaults per resource or operation type +- Rebase on upstream 2.0.4 final release +- Support on-fail="demote" and no-quorum-policy="demote" options +- Remove incorrect comment from sysconfig file +- Resolves: rhbz1628701 +- Resolves: rhbz1828488 +- Resolves: rhbz1837747 +- Resolves: rhbz1848789 + +* Wed Jun 10 2020 Ken Gaillot - 2.0.4-2 +- Improve cibsecret help and clean up code per static analysis +- Resolves: rhbz1793860 + +* Mon Jun 8 2020 Ken Gaillot - 2.0.4-1 +- Clear leaving node's attributes if there is no DC +- Add crm_mon --node option to limit display to particular node or tagged nodes +- Add crm_mon --include/--exclude options to select what sections are shown +- priority-fencing-delay option bases delay on where resources are active +- Pending DC fencing gets 'stuck' in status display +- crm_rule can now check rule expiration when "years" is specified +- crm_mon now formats error messages better +- Support for CIB secrets is enabled +- Rebase on latest upstream Pacemaker release +- Fix regression introduced in 8.2 so crm_node -n works on remote nodes +- Avoid infinite loop when topology is removed while unfencing is in progress +- Resolves: rhbz1300604 +- Resolves: rhbz1363907 +- Resolves: rhbz1784601 +- Resolves: rhbz1787751 +- Resolves: rhbz1790591 +- Resolves: rhbz1793653 +- Resolves: rhbz1793860 +- Resolves: rhbz1828488 +- Resolves: rhbz1830535 +- Resolves: rhbz1831775 + +* Mon Jan 27 2020 Ken Gaillot - 2.0.3-5 +- Clear leaving node's attributes if there is no DC +- Resolves: rhbz1791841 + +* Thu Jan 16 2020 Ken Gaillot - 2.0.3-4 +- Implement shutdown-lock feature +- Resolves: rhbz1712584 + +* Wed Nov 27 2019 Ken Gaillot - 2.0.3-3 +- Rebase on Pacemaker-2.0.3 final release +- Resolves: rhbz1752538 + +* Wed Nov 13 2019 Ken Gaillot - 2.0.3-2 +- Rebase on Pacemaker-2.0.3-rc3 +- Resolves: rhbz1752538 + +* Thu Oct 31 2019 Ken Gaillot - 2.0.3-1 +- Rebase on Pacemaker-2.0.3-rc2 +- Parse crm_mon --fence-history option correctly +- Put timeout on controller waiting for scheduler response +- Offer Pacemaker Remote option for bind address +- Calculate cluster recheck interval dynamically +- Clarify crm_resource help text +- Reduce system calls after forking a child process +- Resolves: rhbz1699978 +- Resolves: rhbz1725236 +- Resolves: rhbz1743377 +- Resolves: rhbz1747553 +- Resolves: rhbz1748805 +- Resolves: rhbz1752538 +- Resolves: rhbz1762025 + +* Mon Aug 26 2019 Ken Gaillot - 2.0.2-3 +- Make pacemaker-cli require tar and bzip2 +- Resolves: rhbz#1741580 + +* Fri Jun 21 2019 Klaus Wenninger - 2.0.2-2 +- Synchronize fence-history on fenced-restart +- Cleanup leftover pending-fence-actions when fenced is restarted +- Improve fencing of remote-nodes +- Resolves: rhbz#1708380 +- Resolves: rhbz#1708378 +- Resolves: rhbz#1721198 +- Resolves: rhbz#1695737 + +* Thu Jun 6 2019 Ken Gaillot - 2.0.2-1 +- Add stonith_admin option to display XML output +- Add new crm_rule tool to check date/time rules +- List any constraints cleared by crm_resource --clear +- crm_resource --validate can now get resource parameters from command line +- Rebase on upstream version 2.0.2 +- Default concurrent-fencing to true +- Resolves: rhbz#1555939 +- Resolves: rhbz#1572116 +- Resolves: rhbz#1631752 +- Resolves: rhbz#1637020 +- Resolves: rhbz#1695737 +- Resolves: rhbz#1715426 + +* Wed May 15 2019 Ken Gaillot - 2.0.1-5 +- Add gating tests for CI +- Restore correct behavior when live migration is interrupted +- Improve clients' authentication of IPC servers (CVE-2018-16877) +- Fix use-after-free with potential information disclosure (CVE-2019-3885) +- Improve pacemakerd authentication of running subdaemons (CVE-2018-16878) +- Resolves: rhbz#1682116 +- Resolves: rhbz#1684306 +- Resolves: rhbz#1694558 +- Resolves: rhbz#1694560 +- Resolves: rhbz#1694908 + +* Tue Jan 29 2019 Ken Gaillot - 2.0.1-4 +- Remove duplicate fence history state listing in crm_mon XML output +- Resolves: rhbz#1667191 + +* Thu Jan 10 2019 Ken Gaillot - 2.0.1-3 +- Fix bundle recovery regression in 2.0.1-2 +- Resolves: rhbz#1660592 + +* Fri Dec 14 2018 Ken Gaillot - 2.0.1-2 +- Move pacemaker-doc installed files to /usr/share/doc/pacemaker-doc + to avoid conflict with RHEL 8 location of pacemaker subpackage docs +- Resolves: rhbz#1543494 + +* Thu Dec 13 2018 Ken Gaillot - 2.0.1-1 +- Rebase on upstream commit 0eb799156489376e13fb79dca47ea9160e9d4595 (Pacemaker-2.0.1-rc1) +- Follow upstream change of splitting XML schemas into separate package +- Resolves: rhbz#1543494 + +* Fri Nov 16 2018 Ken Gaillot - 2.0.0-11 +- Rebase on upstream commit efbf81b65931423b34c91cde7204a2d0a71e77e6 +- Resolves: rhbz#1543494 + +* Fri Sep 28 2018 Ken Gaillot - 2.0.0-10 +- Rebase on upstream commit b67d8d0de9794e59719608d9b156b4a3c6556344 +- Update spec for Python macro changes +- Resolves: rhbz#1543494 +- Resolves: rhbz#1633612 + +* Mon Sep 17 2018 Ken Gaillot - 2.0.0-9 +- Rebase on upstream commit c4330b46bf1c3dcd3e367b436efb3bbf82ef51cd +- Support podman as bundle container launcher +- Ignore fence history in crm_mon when using CIB_file +- Resolves: rhbz#1543494 +- Resolves: rhbz#1607898 +- Resolves: rhbz#1625231 + +* Thu Aug 30 2018 Ken Gaillot - 2.0.0-8 +- Rebase on upstream commit dd6fd26f77945b9bb100d5a3134f149b27601552 +- Fixes (unreleased) API regression +- Resolves: rhbz#1543494 +- Resolves: rhbz#1622969 + +* Mon Aug 13 2018 Ken Gaillot - 2.0.0-7 +- Include upstream main branch commits through 975347d4 +- Resolves: rhbz#1543494 +- Resolves: rhbz#1602650 +- Resolves: rhbz#1608369 + +* Mon Jul 30 2018 Florian Weimer - 2.0.0-6 +- Rebuild with fixed binutils + +* Mon Jul 9 2018 Ken Gaillot - 2.0.0-5 +- Rebase to upstream version 2.0.0 final +- Resolves: rhbz#1543494 + +* Wed Jun 6 2018 Ken Gaillot - 2.0.0-4 +- Rebase to upstream version 2.0.0-rc5 +- Resolves: rhbz#1543494 + +* Mon Apr 30 2018 Ken Gaillot - 2.0.0-2 +- Rebase to upstream version 2.0.0-rc3 +- Resolves: rhbz#1543494 + +* Tue Apr 17 2018 Ken Gaillot - 2.0.0-1 +- Rebase to upstream version 2.0.0-rc2 with later fixes +- Resolves: rhbz#1543494 + +* Tue Apr 17 2018 Josh Boyer - 1.1.17-3 +- Stop hard requiring nagios-plugins + +* Wed Oct 18 2017 Jan Pokorný - 1.1.17-2 +- Rebuilt to fix libqb vs. ld.bfd/binutils-2.29 incompatibility making + some CLI executables unusable under some circumstances (rhbz#1503843) + +* Thu Aug 03 2017 Fedora Release Engineering - 1.1.17-1.2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild + +* Thu Jul 27 2017 Fedora Release Engineering - 1.1.17-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Mass_Rebuild + +* Fri Jul 07 2017 Jan Pokorný - 1.1.17-1 +- Update for new upstream tarball: Pacemaker-1.1.17, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17 + +* Thu Jun 22 2017 Jan Pokorný - 1.1.17-0.1.rc4 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc4, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc4 +- Add an imposed lower bound for glib2 BuildRequires + +* Thu Jun 01 2017 Jan Pokorný - 1.1.17-0.1.rc3 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc3, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc3 + +* Wed May 24 2017 Jan Pokorný - 1.1.17-0.1.rc2 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc2, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc2 + +* Tue May 09 2017 Jan Pokorný - 1.1.17-0.1.rc1 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.17-rc1, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.17-rc1 + +* Mon Feb 06 2017 Jan Pokorný - 1.1.16-2.a39ea6491.git +- Update for (slightly stabilized) snapshot beyond Pacemaker-1.1.16 + (commit a39ea6491), including: + . prevent FTBFS with new GCC 7 (a7476dd96) +- Adapt spec file more akin to upstream version including: + . better pre-release vs. tags logic (4581d4366) + +* Fri Dec 02 2016 Jan Pokorný - 1.1.16-1 +- Update for new upstream tarball: Pacemaker-1.1.16, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.16 +- Adapt spec file more akin to upstream version including: + . clarify licensing, especially for -doc (f01f734) + . fix pacemaker-remote upgrade (779e0e3) + . require python >= 2.6 (31ef7f0) + . older libqb is sufficient (based on 30fe1ce) + . remove openssl-devel and libselinux-devel as BRs (2e05c17) + . make systemd BR pkgconfig-driven (6285924) + . defines instead of some globals + error suppression (625d427) +- Rectify -nagios-plugins-metadata declared license and install + also respective license text + +* Thu Nov 03 2016 Jan Pokorný - 1.1.15-3 +- Apply fix for CVE-2016-7035 (improper IPC guarding) + +* Tue Jul 19 2016 Fedora Release Engineering - 1.1.15-2.1 +- https://fedoraproject.org/wiki/Changes/Automatic_Provides_for_Python_RPM_Packages + +* Thu Jul 07 2016 Jan Pokorný - 1.1.15-2 +- Stop building with -fstack-protector-all using the upstream patches + overhauling toolchain hardening (Fedora natively uses + -fstack-protector-strong so this effectively relaxed stack protection + is the only effect as hardened flags are already used by default: + https://fedoraproject.org/wiki/Changes/Harden_All_Packages) + +* Wed Jun 22 2016 Jan Pokorný - 1.1.15-1 +- Update for new upstream tarball: Pacemaker-1.1.15, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15 +- Adapt spec file more akin to upstream version: + . move xml schema files + PCMK-MIB.txt (81ef956), logrotate configuration + file (ce576cf; drop it from -remote package as well), attrd_updater + (aff80ae), the normal resource agents (1fc7287), and common directories + under /var/lib/pacemaker (3492794) from main package under -cli + . simplify docdir build parameter passing and drop as of now + redundant chmod invocations (e91769e) + +* Fri May 27 2016 Jan Pokorný - 1.1.15-0.1.rc3 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc3, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc3 +- Drop fence_pcmk (incl. man page) from the package (no use where no CMAN) +- Drop license macro emulation for cases when not supported natively + (several recent Fedora releases do not need that) + +* Mon May 16 2016 Jan Pokorný - 1.1.15-0.1.rc2 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc2, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc2 + +* Tue Apr 26 2016 Jan Pokorný - 1.1.15-0.1.rc1 +- Update for new upstream tarball for release candidate: Pacemaker-1.1.15-rc1, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.15-rc1 +- Adapt spec file more akin to upstream version (also to reflect recent + changes like ability to built explicitly without Publican-based docs) + +* Thu Mar 31 2016 Jan Pokorný - 1.1.14-2.5a6cdd1.git +- Update for currently stabilized snapshot beyond Pacemaker-1.1.14 + (commit 5a6cdd1), but restore old-style notifications to the state at + Pacemaker-1.1.14 point release (disabled) +- Definitely get rid of Corosync v1 (Flatiron) hypothetical support +- Remove some of the spec file cruft, not required for years + (BuildRoot, AutoReqProv, "clean" scriptlet, etc.) and adapt the file + per https://github.com/ClusterLabs/pacemaker/pull/965 + +* Thu Feb 04 2016 Fedora Release Engineering - 1.1.14-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild + +* Mon Jan 18 2016 Jan Pokorný - 1.1.14-1 +- Update for new upstream tarball: Pacemaker-1.1.14, + for full details, see included ChangeLog file or + https://github.com/ClusterLabs/pacemaker/releases/tag/Pacemaker-1.1.14 +- Disable Fedora crypto policies conformance patch for now (rhbz#1179335) +- Better align specfile with the upstream version (also fix issue with + crm_mon sysconfig file not being installed) +- Further specfile modifications: + - drop unused gcc-c++ and repeatedly mentioned pkgconfig packages + from BuildRequires + - refer to python_sitearch macro first, if defined + - tolerate license macro not being defined (e.g., for EPEL rebuilds) +- Prevent console mode not available in crm_mon due to curses library test + fragility of configure script in hardened build environment (rhbz#1297985) + +* Tue Oct 20 2015 Jan Pokorný - 1.1.13-4 +- Adapt to follow Fedora crypto policies (rhbz#1179335) + +* Wed Oct 14 2015 Jan Pokorný - 1.1.13-3 +- Update to Pacemaker-1.1.13 post-release + patches (sync) +- Add nagios-plugins-metadata subpackage enabling support of selected + Nagios plugins as resources recognized by Pacemaker +- Several specfile improvements: drop irrelevant stuff, rehash the + included/excluded files + dependencies, add check scriptlet, + reflect current packaging practice, do minor cleanups + (mostly adopted from another spec) + +* Thu Aug 20 2015 Andrew Beekhof - 1.1.13-2 +- Update for new upstream tarball: Pacemaker-1.1.13 +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + +* Thu Jun 18 2015 Fedora Release Engineering - 1.1.12-2.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Wed Nov 05 2014 Andrew Beekhof - 1.1.12-2 +- Address incorrect use of the dbus API for interacting with systemd + +* Tue Oct 28 2014 Andrew Beekhof - 1.1.12-1 +- Update for new upstream tarball: Pacemaker-1.1.12+ (a9c8177) +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + +* Sun Aug 17 2014 Fedora Release Engineering - 1.1.11-1.2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + +* Fri Jun 06 2014 Fedora Release Engineering - 1.1.11-1.1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Tue Feb 18 2014 Andrew Beekhof - 1.1.11-1 +- Update for new upstream tarball: Pacemaker-1.1.11 (9d39a6b) +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + +* Thu Jun 20 2013 Andrew Beekhof - 1.1.9-3 +- Update to upstream 7d8acec +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + + + Feature: Turn off auto-respawning of systemd services when the cluster starts them + + Fix: crmd: Ensure operations for cleaned up resources don't block recovery + + Fix: logging: If SIGTRAP is sent before tracing is turned on, turn it on instead of crashing + +* Mon Jun 17 2013 Andrew Beekhof - 1.1.9-2 +- Update for new upstream tarball: 781a388 +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + +* Wed May 12 2010 Andrew Beekhof - 1.1.2-1 +- Update the tarball from the upstream 1.1.2 release +- See included ChangeLog file or https://raw.github.com/ClusterLabs/pacemaker/main/ChangeLog for full details + +* Tue Jul 14 2009 Andrew Beekhof - 1.0.4-1 +- Initial checkin