From d93c02c7fbc36c36975df8b23e25ed5de4716794 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 9 May 2023 05:16:40 +0000 Subject: [PATCH] import pacemaker-2.1.5-7.el9 --- .gitignore | 2 +- .pacemaker.metadata | 2 +- SOURCES/001-stonith-enabled.patch | 127 - SOURCES/001-sync-points.patch | 2429 ++++++++++++++ SOURCES/002-acl_group.patch | 425 --- SOURCES/002-remote-regression.patch | 98 + SOURCES/003-history-cleanup.patch | 2829 ++++++++++++++++ SOURCES/003-regression.patch | 88 - SOURCES/004-g_source_remove.patch | 107 + SOURCES/004-schema.patch | 624 ---- SOURCES/005-query-null.patch | 151 + SOURCES/005-schema.patch | 46 - SOURCES/006-crm_resource.patch | 1686 ---------- SOURCES/007-stonith_admin.patch | 108 - SOURCES/008-metadata.patch | 34 - SOURCES/009-validate.patch | 94 - SOURCES/010-regression.patch | 47 - SOURCES/011-unfencing.patch | 178 - SOURCES/012-crm_resource.patch | 38 - SOURCES/013-rolling-upgrade-monitor.patch | 1978 ------------ SOURCES/014-abort-transition.patch | 59 - SOURCES/015-one_shot.patch | 3589 --------------------- SPECS/pacemaker.spec | 88 +- 23 files changed, 5672 insertions(+), 9155 deletions(-) delete mode 100644 SOURCES/001-stonith-enabled.patch create mode 100644 SOURCES/001-sync-points.patch delete mode 100644 SOURCES/002-acl_group.patch create mode 100644 SOURCES/002-remote-regression.patch create mode 100644 SOURCES/003-history-cleanup.patch delete mode 100644 SOURCES/003-regression.patch create mode 100644 SOURCES/004-g_source_remove.patch delete mode 100644 SOURCES/004-schema.patch create mode 100644 SOURCES/005-query-null.patch delete mode 100644 SOURCES/005-schema.patch delete mode 100644 SOURCES/006-crm_resource.patch delete mode 100644 SOURCES/007-stonith_admin.patch delete mode 100644 SOURCES/008-metadata.patch delete mode 100644 SOURCES/009-validate.patch delete mode 100644 SOURCES/010-regression.patch delete mode 100644 SOURCES/011-unfencing.patch delete mode 100644 SOURCES/012-crm_resource.patch delete mode 100644 SOURCES/013-rolling-upgrade-monitor.patch delete mode 100644 SOURCES/014-abort-transition.patch delete mode 100644 SOURCES/015-one_shot.patch diff --git a/.gitignore b/.gitignore index 66cdb0e..e31915e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -SOURCES/pacemaker-dc6eb4362.tar.gz +SOURCES/pacemaker-a3f44794f.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index 315149b..f5f737d 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -24ccc9f234896595a1f7a8baec22652620fd609f SOURCES/pacemaker-dc6eb4362.tar.gz +b16198db5f86857ba8bc0ebd04fd386da360478a SOURCES/pacemaker-a3f44794f.tar.gz diff --git a/SOURCES/001-stonith-enabled.patch b/SOURCES/001-stonith-enabled.patch deleted file mode 100644 index ebeb650..0000000 --- a/SOURCES/001-stonith-enabled.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 243139b2ec0f6b17877a4e7f651fc3f70f76b11a Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Fri, 6 May 2022 15:23:43 +0100 -Subject: [PATCH 1/2] fenced: Don't ignore CIB updates if stonith-enabled=false - -Fixes: T378 ---- - daemons/fenced/pacemaker-fenced.c | 23 +++-------------------- - 1 file changed, 3 insertions(+), 20 deletions(-) - -diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c -index caab7de83..dadd187b6 100644 ---- a/daemons/fenced/pacemaker-fenced.c -+++ b/daemons/fenced/pacemaker-fenced.c -@@ -1136,11 +1136,8 @@ static void - update_cib_cache_cb(const char *event, xmlNode * msg) - { - int rc = pcmk_ok; -- xmlNode *stonith_enabled_xml = NULL; -- static gboolean stonith_enabled_saved = TRUE; - long timeout_ms_saved = stonith_watchdog_timeout_ms; - gboolean need_full_refresh = FALSE; -- bool value = false; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); -@@ -1191,32 +1188,18 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - return; - } - CRM_ASSERT(local_cib != NULL); -- stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ -+ need_full_refresh = TRUE; - } - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - -- stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", -- local_cib, LOG_NEVER); -- if (pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &value) == pcmk_rc_ok && !value) { -- crm_trace("Ignoring CIB updates while fencing is disabled"); -- stonith_enabled_saved = FALSE; -- -- } else if (stonith_enabled_saved == FALSE) { -- crm_info("Updating fencing device and topology lists " -- "now that fencing is enabled"); -- stonith_enabled_saved = TRUE; -- need_full_refresh = TRUE; -- -- } else { -- if (timeout_ms_saved != stonith_watchdog_timeout_ms) { -+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) { - need_full_refresh = TRUE; -- } else { -+ } else { - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - watchdog_device_update(); -- } - } - - if (need_full_refresh) { --- -2.31.1 - - -From c600ef49022e7473acbe121fae50a0c1aa2d7c03 Mon Sep 17 00:00:00 2001 -From: Christine Caulfield -Date: Thu, 9 Jun 2022 11:08:43 +0100 -Subject: [PATCH 2/2] Also don't check for stonith-disabled in - update_stonith_watchdog_timeout_ms - ---- - daemons/fenced/pacemaker-fenced.c | 34 +++++++++++-------------------- - 1 file changed, 12 insertions(+), 22 deletions(-) - -diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c -index dadd187b6..ec42d5bc2 100644 ---- a/daemons/fenced/pacemaker-fenced.c -+++ b/daemons/fenced/pacemaker-fenced.c -@@ -643,31 +643,21 @@ watchdog_device_update(void) - static void - update_stonith_watchdog_timeout_ms(xmlNode *cib) - { -- xmlNode *stonith_enabled_xml = NULL; -- bool stonith_enabled = false; -- int rc = pcmk_rc_ok; - long timeout_ms = 0; -+ xmlNode *stonith_watchdog_xml = NULL; -+ const char *value = NULL; - -- stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", -- cib, LOG_NEVER); -- rc = pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &stonith_enabled); -- -- if (rc != pcmk_rc_ok || stonith_enabled) { -- xmlNode *stonith_watchdog_xml = NULL; -- const char *value = NULL; -- -- stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", -- cib, LOG_NEVER); -- if (stonith_watchdog_xml) { -- value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); -- } -- if (value) { -- timeout_ms = crm_get_msec(value); -- } -+ stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", -+ cib, LOG_NEVER); -+ if (stonith_watchdog_xml) { -+ value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); -+ } -+ if (value) { -+ timeout_ms = crm_get_msec(value); -+ } - -- if (timeout_ms < 0) { -- timeout_ms = pcmk__auto_watchdog_timeout(); -- } -+ if (timeout_ms < 0) { -+ timeout_ms = pcmk__auto_watchdog_timeout(); - } - - stonith_watchdog_timeout_ms = timeout_ms; --- -2.31.1 - diff --git a/SOURCES/001-sync-points.patch b/SOURCES/001-sync-points.patch new file mode 100644 index 0000000..c034c78 --- /dev/null +++ b/SOURCES/001-sync-points.patch @@ -0,0 +1,2429 @@ +From de05f6b52c667155d262ceeb541dc1041d079d71 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 11:36:58 -0400 +Subject: [PATCH 01/26] Refactor: tools: Use a uint32_t for attr_options. + +--- + tools/attrd_updater.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index d90567a..b85a281 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -47,7 +47,7 @@ struct { + gchar *attr_node; + gchar *attr_set; + char *attr_value; +- int attr_options; ++ uint32_t attr_options; + gboolean query_all; + gboolean quiet; + } options = { +-- +2.31.1 + +From c6637520b474d44553ade52c0dbe9e36e873135f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:31:16 -0400 +Subject: [PATCH 02/26] Refactor: libcrmcommon: Make pcmk__xe_match more + broadly useful. + +If attr_v is NULL, simply return the first node with a matching name. +--- + lib/common/xml.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 036dd87..ac6f46a 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -510,7 +510,7 @@ find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find) + * \param[in] parent XML element to search + * \param[in] node_name If not NULL, only match children of this type + * \param[in] attr_n If not NULL, only match children with an attribute +- * of this name and a value of \p attr_v ++ * of this name. + * \param[in] attr_v If \p attr_n and this are not NULL, only match children + * with an attribute named \p attr_n and this value + * +@@ -520,14 +520,16 @@ xmlNode * + pcmk__xe_match(const xmlNode *parent, const char *node_name, + const char *attr_n, const char *attr_v) + { +- /* ensure attr_v specified when attr_n is */ +- CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); ++ CRM_CHECK(parent != NULL, return NULL); ++ CRM_CHECK(attr_v == NULL || attr_n != NULL, return NULL); + + for (xmlNode *child = pcmk__xml_first_child(parent); child != NULL; + child = pcmk__xml_next(child)) { + if (pcmk__str_eq(node_name, (const char *) (child->name), + pcmk__str_null_matches) +- && ((attr_n == NULL) || attr_matches(child, attr_n, attr_v))) { ++ && ((attr_n == NULL) || ++ (attr_v == NULL && xmlHasProp(child, (pcmkXmlStr) attr_n)) || ++ (attr_v != NULL && attr_matches(child, attr_n, attr_v)))) { + return child; + } + } +-- +2.31.1 + +From dd520579484c6ec091f7fbb550347941302dad0e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 21 Oct 2022 14:32:46 -0400 +Subject: [PATCH 03/26] Tests: libcrmcommon: Add tests for pcmk__xe_match. + +--- + lib/common/tests/xml/Makefile.am | 3 +- + lib/common/tests/xml/pcmk__xe_match_test.c | 105 +++++++++++++++++++++ + 2 files changed, 107 insertions(+), 1 deletion(-) + create mode 100644 lib/common/tests/xml/pcmk__xe_match_test.c + +diff --git a/lib/common/tests/xml/Makefile.am b/lib/common/tests/xml/Makefile.am +index 342ca07..0ccdcc3 100644 +--- a/lib/common/tests/xml/Makefile.am ++++ b/lib/common/tests/xml/Makefile.am +@@ -11,6 +11,7 @@ include $(top_srcdir)/mk/tap.mk + include $(top_srcdir)/mk/unittest.mk + + # Add "_test" to the end of all test program names to simplify .gitignore. +-check_PROGRAMS = pcmk__xe_foreach_child_test ++check_PROGRAMS = pcmk__xe_foreach_child_test \ ++ pcmk__xe_match_test + + TESTS = $(check_PROGRAMS) +diff --git a/lib/common/tests/xml/pcmk__xe_match_test.c b/lib/common/tests/xml/pcmk__xe_match_test.c +new file mode 100644 +index 0000000..fd529ba +--- /dev/null ++++ b/lib/common/tests/xml/pcmk__xe_match_test.c +@@ -0,0 +1,105 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++const char *str1 = ++ "\n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ " \n" ++ " \n" ++ " content\n" ++ " \n" ++ ""; ++ ++static void ++bad_input(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, NULL)); ++ assert_null(pcmk__xe_match(NULL, NULL, NULL, "attrX")); ++ ++ free_xml(xml); ++} ++ ++static void ++not_found(void **state) { ++ xmlNode *xml = string2xml(str1); ++ ++ /* No node with an attrX attribute */ ++ assert_null(pcmk__xe_match(xml, NULL, "attrX", NULL)); ++ /* No nodeX node */ ++ assert_null(pcmk__xe_match(xml, "nodeX", NULL, NULL)); ++ /* No nodeA node with attrX */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrX", NULL)); ++ /* No nodeA node with attrA=XYZ */ ++ assert_null(pcmk__xe_match(xml, "nodeA", "attrA", "XYZ")); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrB(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find the first node with attrB */ ++ result = pcmk__xe_match(xml, NULL, "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "3"); ++ ++ /* Find the first nodeB with attrB */ ++ result = pcmk__xe_match(xml, "nodeB", "attrB", NULL); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "5"); ++ ++ free_xml(xml); ++} ++ ++static void ++find_attrA_matching(void **state) { ++ xmlNode *xml = string2xml(str1); ++ xmlNode *result = NULL; ++ ++ /* Find attrA=456 */ ++ result = pcmk__xe_match(xml, NULL, "attrA", "456"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "2"); ++ ++ /* Find a nodeB with attrA=123 */ ++ result = pcmk__xe_match(xml, "nodeB", "attrA", "123"); ++ assert_non_null(result); ++ assert_string_equal(crm_element_value(result, "id"), "4"); ++ ++ free_xml(xml); ++} ++ ++PCMK__UNIT_TEST(NULL, NULL, ++ cmocka_unit_test(bad_input), ++ cmocka_unit_test(not_found), ++ cmocka_unit_test(find_attrB), ++ cmocka_unit_test(find_attrA_matching)); +-- +2.31.1 + +From 03af8498d8aaf21c509cec9b0ec4b78475da41d7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:22:26 -0400 +Subject: [PATCH 04/26] Feature: libcrmcommon: Add attrd options for specifying + a sync point. + +--- + include/crm/common/attrd_internal.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index f7033ad..389be48 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -16,13 +16,15 @@ extern "C" { + + // Options for clients to use with functions below + enum pcmk__node_attr_opts { +- pcmk__node_attr_none = 0, +- pcmk__node_attr_remote = (1 << 0), +- pcmk__node_attr_private = (1 << 1), +- pcmk__node_attr_pattern = (1 << 2), +- pcmk__node_attr_value = (1 << 3), +- pcmk__node_attr_delay = (1 << 4), +- pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_none = 0, ++ pcmk__node_attr_remote = (1 << 0), ++ pcmk__node_attr_private = (1 << 1), ++ pcmk__node_attr_pattern = (1 << 2), ++ pcmk__node_attr_value = (1 << 3), ++ pcmk__node_attr_delay = (1 << 4), ++ pcmk__node_attr_perm = (1 << 5), ++ pcmk__node_attr_sync_local = (1 << 6), ++ pcmk__node_attr_sync_cluster = (1 << 7), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +-- +2.31.1 + +From 5c8825293ee21d3823bdcd01b0df9c7d39739940 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:23:09 -0400 +Subject: [PATCH 05/26] Feature: libcrmcommon: Add sync point to IPC request + XML. + +If one of the pcmk__node_attr_sync_* options is provided, add an +attribute to the request XML. This will later be inspected by the +server to determine when to send the reply to the client. +--- + include/crm/common/options_internal.h | 2 ++ + include/crm_internal.h | 1 + + lib/common/ipc_attrd.c | 6 ++++++ + 3 files changed, 9 insertions(+) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index b153c67..f29ba3f 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -145,9 +145,11 @@ bool pcmk__valid_sbd_timeout(const char *value); + #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" + + // Constants for enumerated values for various options ++#define PCMK__VALUE_CLUSTER "cluster" + #define PCMK__VALUE_CUSTOM "custom" + #define PCMK__VALUE_FENCING "fencing" + #define PCMK__VALUE_GREEN "green" ++#define PCMK__VALUE_LOCAL "local" + #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" + #define PCMK__VALUE_NONE "none" + #define PCMK__VALUE_NOTHING "nothing" +diff --git a/include/crm_internal.h b/include/crm_internal.h +index e6e2e96..08193c3 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -71,6 +71,7 @@ + #define PCMK__XA_ATTR_RESOURCE "attr_resource" + #define PCMK__XA_ATTR_SECTION "attr_section" + #define PCMK__XA_ATTR_SET "attr_set" ++#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" + #define PCMK__XA_ATTR_USER "attr_user" + #define PCMK__XA_ATTR_UUID "attr_key" + #define PCMK__XA_ATTR_VALUE "attr_value" +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index f6cfbc4..4606509 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -431,6 +431,12 @@ populate_update_op(xmlNode *op, const char *node, const char *name, const char * + pcmk_is_set(options, pcmk__node_attr_remote)); + crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, + pcmk_is_set(options, pcmk__node_attr_private)); ++ ++ if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); ++ } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { ++ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); ++ } + } + + int +-- +2.31.1 + +From e2b3fee630caf0846ca8bbffcef4d6d2acfd32a5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Sep 2022 12:26:28 -0400 +Subject: [PATCH 06/26] Feature: tools: Add --wait= parameter to attrd_updater. + +This command line option is used to specify the sync point to use. For +the moment, it has no effect. +--- + tools/attrd_updater.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index b85a281..c4779a6 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -97,6 +97,22 @@ section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError + return TRUE; + } + ++static gboolean ++wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { ++ if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); ++ return TRUE; ++ } else { ++ g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, ++ "--wait= must be one of 'no', 'local', 'cluster'"); ++ return FALSE; ++ } ++} ++ + #define INDENT " " + + static GOptionEntry required_entries[] = { +@@ -175,6 +191,14 @@ static GOptionEntry addl_entries[] = { + "If this creates a new attribute, never write the attribute to CIB", + NULL }, + ++ { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, ++ "Wait for some event to occur before returning. Values are 'no' (wait\n" ++ INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "'local' (wait until the change has propagated to where a local\n" ++ INDENT "query will return the request value, or the value set by a\n" ++ INDENT "later request). Default is 'no'.", ++ "UNTIL" }, ++ + { NULL } + }; + +-- +2.31.1 + +From 52d51ab41b2f00e72724ab39835b3db86605a96b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:40:13 -0400 +Subject: [PATCH 07/26] Feature: daemons: Add functions for checking a request + for a sync point. + +--- + daemons/attrd/Makefile.am | 1 + + daemons/attrd/attrd_sync.c | 38 +++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 42 insertions(+) + create mode 100644 daemons/attrd/attrd_sync.c + +diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am +index 1a3d360..6bb81c4 100644 +--- a/daemons/attrd/Makefile.am ++++ b/daemons/attrd/Makefile.am +@@ -32,6 +32,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ + attrd_elections.c \ + attrd_ipc.c \ + attrd_messages.c \ ++ attrd_sync.c \ + attrd_utils.c \ + pacemaker-attrd.c + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +new file mode 100644 +index 0000000..92759d2 +--- /dev/null ++++ b/daemons/attrd/attrd_sync.c +@@ -0,0 +1,38 @@ ++/* ++ * Copyright 2022 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include "pacemaker-attrd.h" ++ ++const char * ++attrd_request_sync_point(xmlNode *xml) ++{ ++ if (xml_has_children(xml)) { ++ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); ++ ++ if (child) { ++ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); ++ } else { ++ return NULL; ++ } ++ ++ } else { ++ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); ++ } ++} ++ ++bool ++attrd_request_has_sync_point(xmlNode *xml) ++{ ++ return attrd_request_sync_point(xml) != NULL; ++} +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 71ce90a..ff850bb 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,4 +182,7 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++const char *attrd_request_sync_point(xmlNode *xml); ++bool attrd_request_has_sync_point(xmlNode *xml); ++ + #endif /* PACEMAKER_ATTRD__H */ +-- +2.31.1 + +From 2e0509a12ee7d4a612133ee65b75245eea7d271d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:42:04 -0400 +Subject: [PATCH 08/26] Refactor: daemons: Don't ACK update requests that give + a sync point. + +The ACK is the only response from the server for update messages. If +the message specified that it wanted to wait for a sync point, we need +to delay sending that response until the sync point is reached. +Therefore, do not always immediately send the ACK. +--- + daemons/attrd/attrd_messages.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index de4a28a..9e8ae40 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -137,12 +137,21 @@ handle_update_request(pcmk__request_t *request) + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; ++ + } else { +- /* Because attrd_client_update can be called recursively, we send the ACK +- * here to ensure that the client only ever receives one. +- */ +- attrd_send_ack(request->ipc_client, request->ipc_id, +- request->flags|crm_ipc_client_response); ++ if (!attrd_request_has_sync_point(request->xml)) { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ * ++ * In the normal case, attrd_client_update can be called recursively which ++ * makes where to send the ACK tricky. Doing it here ensures the client ++ * only ever receives one. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->flags|crm_ipc_client_response); ++ } ++ + return attrd_client_update(request); + } + } +-- +2.31.1 + +From 2a0ff66cdf0085c4c8ab1992ef7e785a4facc8c7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:48:48 -0400 +Subject: [PATCH 09/26] Feature: daemons: Add support for local sync points on + updates. + +In the IPC dispatcher for attrd, add the client to a wait list if its +request specifies a sync point. When the attribute's value is changed +on the local attrd, alert any clients waiting on a local sync point by +then sending the previously delayed ACK. + +Sync points for other requests and the global sync point are not yet +supported. + +Fixes T35. +--- + daemons/attrd/attrd_corosync.c | 18 +++++ + daemons/attrd/attrd_messages.c | 12 ++- + daemons/attrd/attrd_sync.c | 137 ++++++++++++++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 7 ++ + 4 files changed, 173 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 539e5bf..4337280 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -568,14 +568,32 @@ void + attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) + { ++ bool handle_sync_point = false; ++ + if (xml_has_children(xml)) { + for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; + child = crm_next_same_xml(child)) { + copy_attrs(xml, child); + attrd_peer_update_one(peer, child, filter); ++ ++ if (attrd_request_has_sync_point(child)) { ++ handle_sync_point = true; ++ } + } + + } else { + attrd_peer_update_one(peer, xml, filter); ++ ++ if (attrd_request_has_sync_point(xml)) { ++ handle_sync_point = true; ++ } ++ } ++ ++ /* If the update XML specified that the client wanted to wait for a sync ++ * point, process that now. ++ */ ++ if (handle_sync_point) { ++ crm_debug("Hit local sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9e8ae40..c96700f 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -139,7 +139,17 @@ handle_update_request(pcmk__request_t *request) + return NULL; + + } else { +- if (!attrd_request_has_sync_point(request->xml)) { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { + /* If the client doesn't want to wait for a sync point, go ahead and send + * the ACK immediately. Otherwise, we'll send the ACK when the appropriate + * sync point is reached. +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 92759d2..2981bd0 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -14,6 +14,143 @@ + + #include "pacemaker-attrd.h" + ++/* A hash table storing clients that are waiting on a sync point to be reached. ++ * The key is waitlist_client - just a plain int. The obvious key would be ++ * the IPC client's ID, but this is not guaranteed to be unique. A single client ++ * could be waiting on a sync point for multiple attributes at the same time. ++ * ++ * It is not expected that this hash table will ever be especially large. ++ */ ++static GHashTable *waitlist = NULL; ++static int waitlist_client = 0; ++ ++struct waitlist_node { ++ /* What kind of sync point does this node describe? */ ++ enum attrd_sync_point sync_point; ++ ++ /* Information required to construct and send a reply to the client. */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++}; ++ ++static void ++next_key(void) ++{ ++ do { ++ waitlist_client++; ++ if (waitlist_client < 0) { ++ waitlist_client = 1; ++ } ++ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); ++} ++ ++static void ++free_waitlist_node(gpointer data) ++{ ++ struct waitlist_node *wl = (struct waitlist_node *) data; ++ ++ free(wl->client_id); ++ free(wl); ++} ++ ++static const char * ++sync_point_str(enum attrd_sync_point sync_point) ++{ ++ if (sync_point == attrd_sync_point_local) { ++ return PCMK__VALUE_LOCAL; ++ } else if (sync_point == attrd_sync_point_cluster) { ++ return PCMK__VALUE_CLUSTER; ++ } else { ++ return "unknown"; ++ } ++} ++ ++void ++attrd_add_client_to_waitlist(pcmk__request_t *request) ++{ ++ const char *sync_point = attrd_request_sync_point(request->xml); ++ struct waitlist_node *wl = NULL; ++ ++ if (sync_point == NULL) { ++ return; ++ } ++ ++ if (waitlist == NULL) { ++ waitlist = pcmk__intkey_table(free_waitlist_node); ++ } ++ ++ wl = calloc(sizeof(struct waitlist_node), 1); ++ ++ CRM_ASSERT(wl != NULL); ++ ++ wl->client_id = strdup(request->ipc_client->id); ++ ++ CRM_ASSERT(wl->client_id); ++ ++ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_local; ++ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ wl->sync_point = attrd_sync_point_cluster; ++ } else { ++ free_waitlist_node(wl); ++ return; ++ } ++ ++ wl->ipc_id = request->ipc_id; ++ wl->flags = request->flags; ++ ++ crm_debug("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ next_key(); ++ pcmk__intkey_table_insert(waitlist, waitlist_client, wl); ++ ++ /* And then add the key to the request XML so we can uniquely identify ++ * it when it comes time to issue the ACK. ++ */ ++ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); ++} ++ ++void ++attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) ++{ ++ int callid; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_warn("Could not get callid from request XML"); ++ return; ++ } ++ ++ value = pcmk__intkey_table_lookup(waitlist, callid); ++ if (value != NULL) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ pcmk__client_t *client = NULL; ++ ++ if (wl->sync_point != sync_point) { ++ return; ++ } ++ ++ crm_debug("Alerting client %s for reached %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ ++ client = pcmk__find_client_by_id(wl->client_id); ++ if (client == NULL) { ++ return; ++ } ++ ++ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); ++ ++ /* And then remove the client so it doesn't get alerted again. */ ++ pcmk__intkey_table_remove(waitlist, callid); ++ } ++} ++ + const char * + attrd_request_sync_point(xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index ff850bb..9dd8320 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -182,6 +182,13 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a + void attrd_unregister_handlers(void); + void attrd_handle_request(pcmk__request_t *request); + ++enum attrd_sync_point { ++ attrd_sync_point_local, ++ attrd_sync_point_cluster, ++}; ++ ++void attrd_add_client_to_waitlist(pcmk__request_t *request); ++void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From 59caaf1682191a91d6062358b770f8b9457ba3eb Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 20 Oct 2022 14:56:58 -0400 +Subject: [PATCH 10/26] Feature: daemons: If a client disconnects, remove it + from the waitlist. + +--- + daemons/attrd/attrd_ipc.c | 5 +++++ + daemons/attrd/attrd_sync.c | 21 +++++++++++++++++++++ + daemons/attrd/pacemaker-attrd.h | 1 + + 3 files changed, 27 insertions(+) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 7e4a1c0..8aa39c2 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -438,8 +438,13 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + crm_trace("Ignoring request to clean up unknown connection %p", c); + } else { + crm_trace("Cleaning up closed client connection %p", c); ++ ++ /* Remove the client from the sync point waitlist if it's present. */ ++ attrd_remove_client_from_waitlist(client); ++ + pcmk__free_client(client); + } ++ + return FALSE; + } + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 2981bd0..7293318 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,27 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_remove_client_from_waitlist(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, waitlist); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct waitlist_node *wl = (struct waitlist_node *) value; ++ ++ if (wl->client_id == client->id) { ++ g_hash_table_iter_remove(&iter); ++ } ++ } ++} ++ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 9dd8320..b6ecb75 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -189,6 +189,7 @@ enum attrd_sync_point { + + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); + +-- +2.31.1 + +From b28042e1d64b48c96dbd9da1e9ee3ff481bbf620 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 11:00:20 -0400 +Subject: [PATCH 11/26] Feature: daemons: Add support for local sync points on + clearing failures. + +attrd_clear_client_failure just calls attrd_client_update underneath, so +that function will handle all the rest of the sync point functionality +for us. +--- + daemons/attrd/attrd_ipc.c | 2 -- + daemons/attrd/attrd_messages.c | 19 +++++++++++++++++++ + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 8aa39c2..2e614e8 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -101,8 +101,6 @@ attrd_client_clear_failure(pcmk__request_t *request) + xmlNode *xml = request->xml; + const char *rsc, *op, *interval_spec; + +- attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); +- + if (minimum_protocol_version >= 2) { + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index c96700f..3ba14a6 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -42,6 +42,25 @@ handle_clear_failure_request(pcmk__request_t *request) + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { ++ if (attrd_request_has_sync_point(request->xml)) { ++ /* If this client supplied a sync point it wants to wait for, add it to ++ * the wait list. Clients on this list will not receive an ACK until ++ * their sync point is hit which will result in the client stalled there ++ * until it receives a response. ++ * ++ * All other clients will receive the expected response as normal. ++ */ ++ attrd_add_client_to_waitlist(request); ++ ++ } else { ++ /* If the client doesn't want to wait for a sync point, go ahead and send ++ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate ++ * sync point is reached. ++ */ ++ attrd_send_ack(request->ipc_client, request->ipc_id, ++ request->ipc_flags); ++ } ++ + return attrd_client_clear_failure(request); + } + } +-- +2.31.1 + +From 291dc3b91e57f2584bbf88cfbe3a360e0332e814 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:17:24 -0400 +Subject: [PATCH 12/26] Refactor: daemons: Free the waitlist on attrd exit. + +--- + daemons/attrd/attrd_sync.c | 11 +++++++++++ + daemons/attrd/attrd_utils.c | 2 ++ + daemons/attrd/pacemaker-attrd.c | 1 + + daemons/attrd/pacemaker-attrd.h | 1 + + 4 files changed, 15 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 7293318..557e49a 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -112,6 +112,17 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++void ++attrd_free_waitlist(void) ++{ ++ if (waitlist == NULL) { ++ return; ++ } ++ ++ g_hash_table_destroy(waitlist); ++ waitlist = NULL; ++} ++ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 6a19009..00b879b 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -93,6 +93,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGUSR2); + mainloop_destroy_signal(SIGTRAP); + ++ attrd_free_waitlist(); ++ + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible + * only if we get SIGTERM in brief windows at start-up and shutdown. +diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c +index 2100db4..1336542 100644 +--- a/daemons/attrd/pacemaker-attrd.c ++++ b/daemons/attrd/pacemaker-attrd.c +@@ -300,6 +300,7 @@ main(int argc, char **argv) + attrd_ipc_fini(); + attrd_lrmd_disconnect(); + attrd_cib_disconnect(); ++ attrd_free_waitlist(); + g_hash_table_destroy(attributes); + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index b6ecb75..537bf85 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -52,6 +52,7 @@ void attrd_run_mainloop(void); + + void attrd_set_requesting_shutdown(void); + void attrd_clear_requesting_shutdown(void); ++void attrd_free_waitlist(void); + bool attrd_requesting_shutdown(void); + bool attrd_shutting_down(void); + void attrd_shutdown(int nsig); +-- +2.31.1 + +From 7715ce617c520e14687a82e11ff794c93cd7f64a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 10 Oct 2022 13:21:16 -0400 +Subject: [PATCH 13/26] Feature: includes: Bump CRM_FEATURE_SET for local sync + points. + +--- + include/crm/crm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/crm/crm.h b/include/crm/crm.h +index 5710e4b..7c5c602 100644 +--- a/include/crm/crm.h ++++ b/include/crm/crm.h +@@ -66,7 +66,7 @@ extern "C" { + * >=3.0.13: Fail counts include operation name and interval + * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED + */ +-# define CRM_FEATURE_SET "3.16.1" ++# define CRM_FEATURE_SET "3.16.2" + + /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and + * recipient of a CPG message. This imposes an arbitrary limit on cluster node +-- +2.31.1 + +From b9054425a76d03f538cd0b3ae27490b1874eee8a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:23:49 -0400 +Subject: [PATCH 14/26] Refactor: daemons: Add comments for previously added + sync point code. + +--- + daemons/attrd/attrd_sync.c | 63 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 557e49a..e9690b5 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -66,6 +66,20 @@ sync_point_str(enum attrd_sync_point sync_point) + } + } + ++/*! ++ * \internal ++ * \brief Add a client to the attrd waitlist ++ * ++ * Typically, a client receives an ACK for its XML IPC request immediately. However, ++ * some clients want to wait until their request has been processed and taken effect. ++ * This is called a sync point. Any client placed on this waitlist will have its ++ * ACK message delayed until either its requested sync point is hit, or until it ++ * times out. ++ * ++ * The XML IPC request must specify the type of sync point it wants to wait for. ++ * ++ * \param[in,out] request The request describing the client to place on the waitlist. ++ */ + void + attrd_add_client_to_waitlist(pcmk__request_t *request) + { +@@ -112,6 +126,11 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); + } + ++/*! ++ * \internal ++ * \brief Free all memory associated with the waitlist. This is most typically ++ * used when attrd shuts down. ++ */ + void + attrd_free_waitlist(void) + { +@@ -123,6 +142,13 @@ attrd_free_waitlist(void) + waitlist = NULL; + } + ++/*! ++ * \internal ++ * \brief Unconditionally remove a client from the waitlist, such as when the client ++ * node disconnects from the cluster ++ * ++ * \param[in] client The client to remove ++ */ + void + attrd_remove_client_from_waitlist(pcmk__client_t *client) + { +@@ -144,6 +170,18 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + } + } + ++/*! ++ * \internal ++ * \brief Send an IPC ACK message to all awaiting clients ++ * ++ * This function will search the waitlist for all clients that are currently awaiting ++ * an ACK indicating their attrd operation is complete. Only those clients with a ++ * matching sync point type and callid from their original XML IPC request will be ++ * ACKed. Once they have received an ACK, they will be removed from the waitlist. ++ * ++ * \param[in] sync_point What kind of sync point have we hit? ++ * \param[in] xml The original XML IPC request. ++ */ + void + attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + { +@@ -183,6 +221,23 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Return the sync point attribute for an IPC request ++ * ++ * This function will check both the top-level element of \p xml for a sync ++ * point attribute, as well as all of its \p op children, if any. The latter ++ * is useful for newer versions of attrd that can put multiple IPC requests ++ * into a single message. ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \note It is assumed that if one child element has a sync point attribute, ++ * all will have a sync point attribute and they will all be the same ++ * sync point. No other configuration is supported. ++ * ++ * \return The sync point attribute of \p xml, or NULL if none. ++ */ + const char * + attrd_request_sync_point(xmlNode *xml) + { +@@ -200,6 +255,14 @@ attrd_request_sync_point(xmlNode *xml) + } + } + ++/*! ++ * \internal ++ * \brief Does an IPC request contain any sync point attribute? ++ * ++ * \param[in] xml An XML IPC request ++ * ++ * \return true if there's a sync point attribute, false otherwise ++ */ + bool + attrd_request_has_sync_point(xmlNode *xml) + { +-- +2.31.1 + +From 64219fb7075ee58d29f94f077a3b8f94174bb32a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 12:43:05 -0400 +Subject: [PATCH 15/26] Feature: tools: Add --wait=cluster option to + attrd_updater. + +--- + tools/attrd_updater.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index c4779a6..3cd766d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -106,6 +106,10 @@ wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError ** + pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); + pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); + return TRUE; ++ } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); ++ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); ++ return TRUE; + } else { + g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, + "--wait= must be one of 'no', 'local', 'cluster'"); +@@ -193,10 +197,12 @@ static GOptionEntry addl_entries[] = { + + { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, + "Wait for some event to occur before returning. Values are 'no' (wait\n" +- INDENT "only for the attribute daemon to acknowledge the request) or\n" ++ INDENT "only for the attribute daemon to acknowledge the request),\n" + INDENT "'local' (wait until the change has propagated to where a local\n" + INDENT "query will return the request value, or the value set by a\n" +- INDENT "later request). Default is 'no'.", ++ INDENT "later request), or 'cluster' (wait until the change has propagated\n" ++ INDENT "to where a query anywhere on the cluster will return the requested\n" ++ INDENT "value, or the value set by a later request). Default is 'no'.", + "UNTIL" }, + + { NULL } +-- +2.31.1 + +From 1bc5511fadf6ad670508bd3a2a55129bde16f774 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 16 Sep 2022 14:55:06 -0400 +Subject: [PATCH 16/26] Refactor: daemons: Add a confirm= attribute to attrd + messages. + +This allows informing the originator of a message that the message has +been received and processed. As yet, there is no mechanism for handling +and returning the confirmation, only for requesting it. +--- + daemons/attrd/attrd_corosync.c | 6 +++--- + daemons/attrd/attrd_ipc.c | 26 +++++++++++++++++++++----- + daemons/attrd/attrd_messages.c | 11 +++++++++-- + daemons/attrd/pacemaker-attrd.h | 7 ++++--- + include/crm_internal.h | 1 + + 5 files changed, 38 insertions(+), 13 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 4337280..e86ca07 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -124,7 +124,7 @@ broadcast_local_value(const attribute_t *a) + + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); + attrd_add_value_xml(sync, a, v, false); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + return v; + } +@@ -387,7 +387,7 @@ broadcast_unseen_local_values(void) + + if (sync != NULL) { + crm_debug("Broadcasting local-only values"); +- attrd_send_message(NULL, sync); ++ attrd_send_message(NULL, sync, false); + free_xml(sync); + } + } +@@ -539,7 +539,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) + } + + crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); +- attrd_send_message(peer, sync); ++ attrd_send_message(peer, sync, false); + free_xml(sync); + } + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 2e614e8..0fc5e93 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -105,7 +105,7 @@ attrd_client_clear_failure(pcmk__request_t *request) + /* Propagate to all peers (including ourselves). + * This ends up at attrd_peer_message(). + */ +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +@@ -184,7 +184,7 @@ attrd_client_peer_remove(pcmk__request_t *request) + if (host) { + crm_info("Client %s is requesting all values for %s be removed", + pcmk__client_name(request->ipc_client), host); +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ + free(host_alloc); + } else { + crm_info("Ignoring request by client %s to remove all peer values without specifying peer", +@@ -314,7 +314,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -358,7 +358,7 @@ attrd_client_update(pcmk__request_t *request) + if (status == 0) { + crm_trace("Matched %s with %s", attr, regex); + crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); +- attrd_send_message(NULL, xml); ++ attrd_send_message(NULL, xml, false); + } + } + +@@ -388,7 +388,23 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 3ba14a6..78df0d0 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -279,16 +279,23 @@ attrd_broadcast_protocol(void) + crm_debug("Broadcasting attrd protocol version %s for node %s", + ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); + +- attrd_send_message(NULL, attrd_op); /* ends up at attrd_peer_message() */ ++ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ + + free_xml(attrd_op); + } + + gboolean +-attrd_send_message(crm_node_t * node, xmlNode * data) ++attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); ++ ++ /* Request a confirmation from the destination peer node (which could ++ * be all if node is NULL) that the message has been received and ++ * acted upon. ++ */ ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 537bf85..25f7c8a 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -39,10 +39,11 @@ + * PCMK__ATTRD_CMD_UPDATE_DELAY + * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE + * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes +- * 4 2.2.0 Multiple attributes can be updated in a single IPC ++ * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message ++ * 5 2.1.5 Peers can request confirmation of a sent message + */ +-#define ATTRD_PROTOCOL_VERSION "4" ++#define ATTRD_PROTOCOL_VERSION "5" + + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) +@@ -162,7 +163,7 @@ xmlNode *attrd_client_clear_failure(pcmk__request_t *request); + xmlNode *attrd_client_update(pcmk__request_t *request); + xmlNode *attrd_client_refresh(pcmk__request_t *request); + xmlNode *attrd_client_query(pcmk__request_t *request); +-gboolean attrd_send_message(crm_node_t * node, xmlNode * data); ++gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); + + xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 08193c3..63a1726 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -79,6 +79,7 @@ + #define PCMK__XA_ATTR_WRITER "attr_writer" + #define PCMK__XA_CONFIG_ERRORS "config-errors" + #define PCMK__XA_CONFIG_WARNINGS "config-warnings" ++#define PCMK__XA_CONFIRM "confirm" + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_MODE "mode" +-- +2.31.1 + +From 6f389038fc0b11f6291c022c99f188666c65f530 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 14:44:42 -0400 +Subject: [PATCH 17/26] Feature: daemons: Respond to received attrd + confirmation requests. + +On the receiving peer side, if the XML request contains confirm="true", +construct a confirmation message after handling the request completes +and send it back to the originating peer. + +On the originating peer side, add a skeleton handler for confirmation +messages. This does nothing at the moment except log it. +--- + daemons/attrd/attrd_corosync.c | 38 ++++++++++++++++++++++++++++++++++ + daemons/attrd/attrd_messages.c | 13 ++++++++++++ + include/crm_internal.h | 1 + + 3 files changed, 52 insertions(+) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index e86ca07..1245d9c 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -25,6 +25,19 @@ + + extern crm_exit_t attrd_exit_status; + ++static xmlNode * ++attrd_confirmation(int callid) ++{ ++ xmlNode *node = create_xml_node(NULL, __func__); ++ ++ crm_xml_add(node, F_TYPE, T_ATTRD); ++ crm_xml_add(node, F_ORIG, get_local_node_name()); ++ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); ++ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); ++ ++ return node; ++} ++ + static void + attrd_peer_message(crm_node_t *peer, xmlNode *xml) + { +@@ -57,6 +70,31 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + CRM_CHECK(request.op != NULL, return); + + attrd_handle_request(&request); ++ ++ /* Having finished handling the request, check to see if the originating ++ * peer requested confirmation. If so, send that confirmation back now. ++ */ ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ int callid = 0; ++ xmlNode *reply = NULL; ++ ++ /* Add the confirmation ID for the message we are confirming to the ++ * response so the originating peer knows what they're a confirmation ++ * for. ++ */ ++ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); ++ reply = attrd_confirmation(callid); ++ ++ /* And then send the confirmation back to the originating peer. This ++ * ends up right back in this same function (attrd_peer_message) on the ++ * peer where it will have to do something with a PCMK__XA_CONFIRM type ++ * message. ++ */ ++ crm_debug("Sending %s a confirmation", peer->uname); ++ attrd_send_message(peer, reply, false); ++ free_xml(reply); ++ } ++ + pcmk__reset_request(&request); + } + } +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 78df0d0..9c792b2 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -65,6 +65,18 @@ handle_clear_failure_request(pcmk__request_t *request) + } + } + ++static xmlNode * ++handle_confirm_request(pcmk__request_t *request) ++{ ++ if (request->peer != NULL) { ++ crm_debug("Received confirmation from %s", request->peer); ++ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); ++ return NULL; ++ } else { ++ return handle_unknown_request(request); ++ } ++} ++ + static xmlNode * + handle_flush_request(pcmk__request_t *request) + { +@@ -190,6 +202,7 @@ attrd_register_handlers(void) + { + pcmk__server_command_t handlers[] = { + { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, ++ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, + { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, + { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, + { PCMK__ATTRD_CMD_QUERY, handle_query_request }, +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 63a1726..f60e7b4 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -108,6 +108,7 @@ + #define PCMK__ATTRD_CMD_SYNC "sync" + #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" + #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" ++#define PCMK__ATTRD_CMD_CONFIRM "confirm" + + #define PCMK__CONTROLD_CMD_NODES "list-nodes" + +-- +2.31.1 + +From dfb730e9ced9dc75886fda9452c584860573fe30 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 26 Oct 2022 15:58:00 -0400 +Subject: [PATCH 18/26] Feature: daemons: Keep track of #attrd-protocol from + each peer. + +This information can be used in the future when dealing with +cluster-wide sync points to know which peers we are waiting on a reply +from. +--- + daemons/attrd/attrd_corosync.c | 3 +- + daemons/attrd/attrd_utils.c | 60 ++++++++++++++++++++++++++++++--- + daemons/attrd/pacemaker-attrd.h | 4 ++- + 3 files changed, 60 insertions(+), 7 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1245d9c..6f88ab6 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -268,6 +268,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + // Remove votes from cluster nodes that leave, in case election in progress + if (gone && !is_remote) { + attrd_remove_voter(peer); ++ attrd_remove_peer_protocol_ver(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +@@ -395,7 +396,7 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + * version, check to see if it's a new minimum version. + */ + if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { +- attrd_update_minimum_protocol_ver(value); ++ attrd_update_minimum_protocol_ver(peer->uname, value); + } + } + +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 00b879b..421faed 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -29,6 +29,11 @@ static bool requesting_shutdown = false; + static bool shutting_down = false; + static GMainLoop *mloop = NULL; + ++/* A hash table storing information on the protocol version of each peer attrd. ++ * The key is the peer's uname, and the value is the protocol version number. ++ */ ++GHashTable *peer_protocol_vers = NULL; ++ + /*! + * \internal + * \brief Set requesting_shutdown state +@@ -94,6 +99,10 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_destroy(peer_protocol_vers); ++ peer_protocol_vers = NULL; ++ } + + if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { + /* If there's no main loop active, just exit. This should be possible +@@ -273,16 +282,57 @@ attrd_free_attribute(gpointer data) + } + } + ++/*! ++ * \internal ++ * \brief When a peer node leaves the cluster, stop tracking its protocol version. ++ * ++ * \param[in] host The peer node's uname to be removed ++ */ ++void ++attrd_remove_peer_protocol_ver(const char *host) ++{ ++ if (peer_protocol_vers != NULL) { ++ g_hash_table_remove(peer_protocol_vers, host); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief When a peer node broadcasts a message with its protocol version, keep ++ * track of that information. ++ * ++ * We keep track of each peer's protocol version so we know which peers to ++ * expect confirmation messages from when handling cluster-wide sync points. ++ * We additionally keep track of the lowest protocol version supported by all ++ * peers so we know when we can send IPC messages containing more than one ++ * request. ++ * ++ * \param[in] host The peer node's uname to be tracked ++ * \param[in] value The peer node's protocol version ++ */ + void +-attrd_update_minimum_protocol_ver(const char *value) ++attrd_update_minimum_protocol_ver(const char *host, const char *value) + { + int ver; + ++ if (peer_protocol_vers == NULL) { ++ peer_protocol_vers = pcmk__strkey_table(free, NULL); ++ } ++ + pcmk__scan_min_int(value, &ver, 0); + +- if (ver > 0 && (minimum_protocol_version == -1 || ver < minimum_protocol_version)) { +- minimum_protocol_version = ver; +- crm_trace("Set minimum attrd protocol version to %d", +- minimum_protocol_version); ++ if (ver > 0) { ++ char *host_name = strdup(host); ++ ++ /* Record the peer attrd's protocol version. */ ++ CRM_ASSERT(host_name != NULL); ++ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); ++ ++ /* If the protocol version is a new minimum, record it as such. */ ++ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { ++ minimum_protocol_version = ver; ++ crm_trace("Set minimum attrd protocol version to %d", ++ minimum_protocol_version); ++ } + } + } +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 25f7c8a..302ef63 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -145,6 +145,7 @@ typedef struct attribute_value_s { + + extern crm_cluster_t *attrd_cluster; + extern GHashTable *attributes; ++extern GHashTable *peer_protocol_vers; + + #define CIB_OP_TIMEOUT_S 120 + +@@ -177,7 +178,8 @@ void attrd_write_attributes(bool all, bool ignore_delay); + void attrd_write_or_elect_attribute(attribute_t *a); + + extern int minimum_protocol_version; +-void attrd_update_minimum_protocol_ver(const char *value); ++void attrd_remove_peer_protocol_ver(const char *host); ++void attrd_update_minimum_protocol_ver(const char *host, const char *value); + + mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); + +-- +2.31.1 + +From 945f0fe51d3bf69c2cb1258b394f2f11b8996525 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 27 Oct 2022 14:42:59 -0400 +Subject: [PATCH 19/26] Feature: daemons: Handle cluster-wide sync points in + attrd. + +When an attrd receives an IPC request to update some value, record the +protocol versions of all peer attrds. Additionally register a function +that will be called when all confirmations are received. + +The originating IPC cilent (attrd_updater for instance) will sit there +waiting for an ACK until its timeout is hit. + +As each confirmation message comes back to attrd, mark it off the list +of peers we are waiting on. When no more peers are expected, call the +previously registered function. + +For attribute updates, this function just sends an ack back to +attrd_updater. + +Fixes T35 +--- + daemons/attrd/attrd_corosync.c | 1 + + daemons/attrd/attrd_ipc.c | 4 + + daemons/attrd/attrd_messages.c | 10 ++ + daemons/attrd/attrd_sync.c | 260 +++++++++++++++++++++++++++++++- + daemons/attrd/attrd_utils.c | 2 + + daemons/attrd/pacemaker-attrd.h | 8 + + 6 files changed, 281 insertions(+), 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 6f88ab6..37701aa 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -269,6 +269,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + if (gone && !is_remote) { + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); ++ attrd_do_not_expect_from_peer(peer->uname); + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 0fc5e93..c70aa1b 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -393,6 +393,7 @@ attrd_client_update(pcmk__request_t *request) + * the response ACK is not sent until this attrd broadcasts the update + * and receives its own confirmation back from all peers. + */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); + attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ + + } else { +@@ -456,6 +457,9 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) + /* Remove the client from the sync point waitlist if it's present. */ + attrd_remove_client_from_waitlist(client); + ++ /* And no longer wait for confirmations from any peers. */ ++ attrd_do_not_wait_for_client(client); ++ + pcmk__free_client(client); + } + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 9c792b2..f7b9c7c 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -69,7 +69,17 @@ static xmlNode * + handle_confirm_request(pcmk__request_t *request) + { + if (request->peer != NULL) { ++ int callid; ++ + crm_debug("Received confirmation from %s", request->peer); ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, ++ "Could not get callid from XML"); ++ } else { ++ attrd_handle_confirmation(callid, request->peer); ++ } ++ + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e9690b5..d3d7108 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -34,6 +34,51 @@ struct waitlist_node { + uint32_t flags; + }; + ++/* A hash table storing information on in-progress IPC requests that are awaiting ++ * confirmations. These requests are currently being processed by peer attrds and ++ * we are waiting to receive confirmation messages from each peer indicating that ++ * processing is complete. ++ * ++ * Multiple requests could be waiting on confirmations at the same time. ++ * ++ * The key is the unique callid for the IPC request, and the value is a ++ * confirmation_action struct. ++ */ ++static GHashTable *expected_confirmations = NULL; ++ ++/*! ++ * \internal ++ * \brief A structure describing a single IPC request that is awaiting confirmations ++ */ ++struct confirmation_action { ++ /*! ++ * \brief A list of peer attrds that we are waiting to receive confirmation ++ * messages from ++ * ++ * This list is dynamic - as confirmations arrive from peer attrds, they will ++ * be removed from this list. When the list is empty, all peers have processed ++ * the request and the associated confirmation action will be taken. ++ */ ++ GList *respondents; ++ ++ /*! ++ * \brief A function to run when all confirmations have been received ++ */ ++ attrd_confirmation_action_fn fn; ++ ++ /*! ++ * \brief Information required to construct and send a reply to the client ++ */ ++ char *client_id; ++ uint32_t ipc_id; ++ uint32_t flags; ++ ++ /*! ++ * \brief The XML request containing the callid associated with this action ++ */ ++ void *xml; ++}; ++ + static void + next_key(void) + { +@@ -114,12 +159,13 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) + wl->ipc_id = request->ipc_id; + wl->flags = request->flags; + +- crm_debug("Added client %s to waitlist for %s sync point", +- wl->client_id, sync_point_str(wl->sync_point)); +- + next_key(); + pcmk__intkey_table_insert(waitlist, waitlist_client, wl); + ++ crm_trace("Added client %s to waitlist for %s sync point", ++ wl->client_id, sync_point_str(wl->sync_point)); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); ++ + /* And then add the key to the request XML so we can uniquely identify + * it when it comes time to issue the ACK. + */ +@@ -166,6 +212,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + + if (wl->client_id == client->id) { + g_hash_table_iter_remove(&iter); ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + } +@@ -206,7 +253,7 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + return; + } + +- crm_debug("Alerting client %s for reached %s sync point", ++ crm_trace("Alerting client %s for reached %s sync point", + wl->client_id, sync_point_str(wl->sync_point)); + + client = pcmk__find_client_by_id(wl->client_id); +@@ -218,9 +265,28 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) + + /* And then remove the client so it doesn't get alerted again. */ + pcmk__intkey_table_remove(waitlist, callid); ++ ++ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } + } + ++/*! ++ * \internal ++ * \brief Action to take when a cluster sync point is hit for a ++ * PCMK__ATTRD_CMD_UPDATE* message. ++ * ++ * \param[in] xml The request that should be passed along to ++ * attrd_ack_waitlist_clients. This should be the original ++ * IPC request containing the callid for this update message. ++ */ ++int ++attrd_cluster_sync_point_update(xmlNode *xml) ++{ ++ crm_trace("Hit cluster sync point for attribute update"); ++ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); ++ return pcmk_rc_ok; ++} ++ + /*! + * \internal + * \brief Return the sync point attribute for an IPC request +@@ -268,3 +334,189 @@ attrd_request_has_sync_point(xmlNode *xml) + { + return attrd_request_sync_point(xml) != NULL; + } ++ ++static void ++free_action(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ g_list_free_full(action->respondents, free); ++ free_xml(action->xml); ++ free(action->client_id); ++ free(action); ++} ++ ++/*! ++ * \internal ++ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation ++ * for any IPC action. If this peer is the last one being waited on, this will ++ * trigger the confirmation action. ++ * ++ * \param[in] host The disconnecting peer attrd's uname ++ */ ++void ++attrd_do_not_expect_from_peer(const char *host) ++{ ++ GList *keys = g_hash_table_get_keys(expected_confirmations); ++ ++ crm_trace("Removing peer %s from expected confirmations", host); ++ ++ for (GList *node = keys; node != NULL; node = node->next) { ++ int callid = *(int *) node->data; ++ attrd_handle_confirmation(callid, host); ++ } ++ ++ g_list_free(keys); ++} ++ ++/*! ++ * \internal ++ * \brief When a client disconnects from the cluster, no longer wait on confirmations ++ * for it. Because the peer attrds may still be processing the original IPC ++ * message, they may still send us confirmations. However, we will take no ++ * action on them. ++ * ++ * \param[in] client The disconnecting client ++ */ ++void ++attrd_do_not_wait_for_client(pcmk__client_t *client) ++{ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ struct confirmation_action *action = (struct confirmation_action *) value; ++ ++ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { ++ crm_trace("Removing client %s from expected confirmations", client->id); ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Register some action to be taken when IPC request confirmations are ++ * received ++ * ++ * When this function is called, a list of all peer attrds that support confirming ++ * requests is generated. As confirmations from these peer attrds are received, ++ * they are removed from this list. When the list is empty, the registered action ++ * will be called. ++ * ++ * \note This function should always be called before attrd_send_message is called ++ * to broadcast to the peers to ensure that we know what replies we are ++ * waiting on. Otherwise, it is possible the peer could finish and confirm ++ * before we know to expect it. ++ * ++ * \param[in] request The request that is awaiting confirmations ++ * \param[in] fn A function to be run after all confirmations are received ++ */ ++void ++attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) ++{ ++ struct confirmation_action *action = NULL; ++ GHashTableIter iter; ++ gpointer host, ver; ++ GList *respondents = NULL; ++ int callid; ++ ++ if (expected_confirmations == NULL) { ++ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); ++ } ++ ++ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { ++ crm_err("Could not get callid from xml"); ++ return; ++ } ++ ++ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { ++ crm_err("Already waiting on confirmations for call id %d", callid); ++ return; ++ } ++ ++ g_hash_table_iter_init(&iter, peer_protocol_vers); ++ while (g_hash_table_iter_next(&iter, &host, &ver)) { ++ if (GPOINTER_TO_INT(ver) >= 5) { ++ char *s = strdup((char *) host); ++ ++ CRM_ASSERT(s != NULL); ++ respondents = g_list_prepend(respondents, s); ++ } ++ } ++ ++ action = calloc(1, sizeof(struct confirmation_action)); ++ CRM_ASSERT(action != NULL); ++ ++ action->respondents = respondents; ++ action->fn = fn; ++ action->xml = copy_xml(request->xml); ++ ++ action->client_id = strdup(request->ipc_client->id); ++ CRM_ASSERT(action->client_id != NULL); ++ ++ action->ipc_id = request->ipc_id; ++ action->flags = request->flags; ++ ++ pcmk__intkey_table_insert(expected_confirmations, callid, action); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++} ++ ++void ++attrd_free_confirmations(void) ++{ ++ if (expected_confirmations != NULL) { ++ g_hash_table_destroy(expected_confirmations); ++ expected_confirmations = NULL; ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Process a confirmation message from a peer attrd ++ * ++ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is ++ * received from a peer attrd. If this is the last confirmation we are waiting ++ * on for a given operation, the registered action will be called. ++ * ++ * \param[in] callid The unique callid for the XML IPC request ++ * \param[in] host The confirming peer attrd's uname ++ */ ++void ++attrd_handle_confirmation(int callid, const char *host) ++{ ++ struct confirmation_action *action = NULL; ++ GList *node = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ action = pcmk__intkey_table_lookup(expected_confirmations, callid); ++ if (action == NULL) { ++ return; ++ } ++ ++ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); ++ ++ if (node == NULL) { ++ return; ++ } ++ ++ action->respondents = g_list_remove(action->respondents, node->data); ++ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); ++ ++ if (action->respondents == NULL) { ++ action->fn(action->xml); ++ pcmk__intkey_table_remove(expected_confirmations, callid); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ } ++} +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index 421faed..f3a2059 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -99,6 +99,8 @@ attrd_shutdown(int nsig) + mainloop_destroy_signal(SIGTRAP); + + attrd_free_waitlist(); ++ attrd_free_confirmations(); ++ + if (peer_protocol_vers != NULL) { + g_hash_table_destroy(peer_protocol_vers); + peer_protocol_vers = NULL; +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 302ef63..bcc329d 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -191,8 +191,16 @@ enum attrd_sync_point { + attrd_sync_point_cluster, + }; + ++typedef int (*attrd_confirmation_action_fn)(xmlNode *); ++ + void attrd_add_client_to_waitlist(pcmk__request_t *request); + void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); ++int attrd_cluster_sync_point_update(xmlNode *xml); ++void attrd_do_not_expect_from_peer(const char *host); ++void attrd_do_not_wait_for_client(pcmk__client_t *client); ++void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); ++void attrd_free_confirmations(void); ++void attrd_handle_confirmation(int callid, const char *host); + void attrd_remove_client_from_waitlist(pcmk__client_t *client); + const char *attrd_request_sync_point(xmlNode *xml); + bool attrd_request_has_sync_point(xmlNode *xml); +-- +2.31.1 + +From 07a032a7eb2f03dce18a7c94c56b8c837dedda15 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 28 Oct 2022 14:54:15 -0400 +Subject: [PATCH 20/26] Refactor: daemons: Add some attrd version checking + macros. + +These are just to make it a little more obvious what is actually being +asked in the code, instead of having magic numbers sprinkled around. +--- + daemons/attrd/attrd_ipc.c | 2 +- + daemons/attrd/attrd_sync.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 3 +++ + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index c70aa1b..16bfff4 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -294,7 +294,7 @@ attrd_client_update(pcmk__request_t *request) + * two ways we can handle that. + */ + if (xml_has_children(xml)) { +- if (minimum_protocol_version >= 4) { ++ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { + /* First, if all peers support a certain protocol version, we can + * just broadcast the big message and they'll handle it. However, + * we also need to apply all the transformations in this function +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index d3d7108..e48f82e 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -444,7 +444,7 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + + g_hash_table_iter_init(&iter, peer_protocol_vers); + while (g_hash_table_iter_next(&iter, &host, &ver)) { +- if (GPOINTER_TO_INT(ver) >= 5) { ++ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { + char *s = strdup((char *) host); + + CRM_ASSERT(s != NULL); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index bcc329d..83d7c6b 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -45,6 +45,9 @@ + */ + #define ATTRD_PROTOCOL_VERSION "5" + ++#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) ++#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) ++ + #define attrd_send_ack(client, id, flags) \ + pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) + +-- +2.31.1 + +From 811361b96c6f26a1f5eccc54b6e8bf6e6fd003be Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 31 Oct 2022 12:53:22 -0400 +Subject: [PATCH 21/26] Low: attrd: Fix removing clients from the waitlist when + they disconnect. + +The client ID is a string, so it must be compared like a string. +--- + daemons/attrd/attrd_sync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index e48f82e..c9b4784 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -210,7 +210,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) + while (g_hash_table_iter_next(&iter, NULL, &value)) { + struct waitlist_node *wl = (struct waitlist_node *) value; + +- if (wl->client_id == client->id) { ++ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { + g_hash_table_iter_remove(&iter); + crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); + } +-- +2.31.1 + +From 4e933ad14456af85c60701410c3b23b4eab03f86 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 1 Nov 2022 12:35:12 -0400 +Subject: [PATCH 22/26] Feature: daemons: Handle an attrd client timing out. + +If the update confirmations do not come back in time, use a main loop +timer to remove the client from the table. +--- + daemons/attrd/attrd_sync.c | 49 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index c9b4784..9d07796 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -61,6 +61,12 @@ struct confirmation_action { + */ + GList *respondents; + ++ /*! ++ * \brief A timer that will be used to remove the client should it time out ++ * before receiving all confirmations ++ */ ++ mainloop_timer_t *timer; ++ + /*! + * \brief A function to run when all confirmations have been received + */ +@@ -340,11 +346,51 @@ free_action(gpointer data) + { + struct confirmation_action *action = (struct confirmation_action *) data; + g_list_free_full(action->respondents, free); ++ mainloop_timer_del(action->timer); + free_xml(action->xml); + free(action->client_id); + free(action); + } + ++/* Remove an IPC request from the expected_confirmations table if the peer attrds ++ * don't respond before the timeout is hit. We set the timeout to 15s. The exact ++ * number isn't critical - we just want to make sure that the table eventually gets ++ * cleared of things that didn't complete. ++ */ ++static gboolean ++confirmation_timeout_cb(gpointer data) ++{ ++ struct confirmation_action *action = (struct confirmation_action *) data; ++ ++ GHashTableIter iter; ++ gpointer value; ++ ++ if (expected_confirmations == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ g_hash_table_iter_init(&iter, expected_confirmations); ++ ++ while (g_hash_table_iter_next(&iter, NULL, &value)) { ++ if (value == action) { ++ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); ++ if (client == NULL) { ++ return G_SOURCE_REMOVE; ++ } ++ ++ crm_trace("Timed out waiting for confirmations for client %s", client->id); ++ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, ++ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); ++ ++ g_hash_table_iter_remove(&iter); ++ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); ++ break; ++ } ++ } ++ ++ return G_SOURCE_REMOVE; ++} ++ + /*! + * \internal + * \brief When a peer disconnects from the cluster, no longer wait for its confirmation +@@ -465,6 +511,9 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f + action->ipc_id = request->ipc_id; + action->flags = request->flags; + ++ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); ++ mainloop_timer_start(action->timer); ++ + pcmk__intkey_table_insert(expected_confirmations, callid, action); + crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); + crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); +-- +2.31.1 + +From 101896383cbe0103c98078e46540c076af08f040 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 2 Nov 2022 14:40:30 -0400 +Subject: [PATCH 23/26] Refactor: Demote a sync point related message to trace. + +--- + daemons/attrd/attrd_corosync.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 37701aa..5cbed7e 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -633,7 +633,7 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + * point, process that now. + */ + if (handle_sync_point) { +- crm_debug("Hit local sync point for attribute update"); ++ crm_trace("Hit local sync point for attribute update"); + attrd_ack_waitlist_clients(attrd_sync_point_local, xml); + } + } +-- +2.31.1 + +From acd13246d4c2bef7982ca103e34896efcad22348 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 3 Nov 2022 10:29:20 -0400 +Subject: [PATCH 24/26] Low: daemons: Avoid infinite confirm loops in attrd. + +On the sending side, do not add confirm="yes" to a message with +op="confirm". On the receiving side, do not confirm a message with +op="confirm" even if confirm="yes" is set. +--- + daemons/attrd/attrd_corosync.c | 3 ++- + daemons/attrd/attrd_messages.c | 6 +++++- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 5cbed7e..88c1ecc 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -74,7 +74,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) + /* Having finished handling the request, check to see if the originating + * peer requested confirmation. If so, send that confirmation back now. + */ +- if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { ++ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && ++ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { + int callid = 0; + xmlNode *reply = NULL; + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index f7b9c7c..184176a 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -310,6 +310,8 @@ attrd_broadcast_protocol(void) + gboolean + attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + { ++ const char *op = crm_element_value(data, PCMK__XA_TASK); ++ + crm_xml_add(data, F_TYPE, T_ATTRD); + crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); + +@@ -317,7 +319,9 @@ attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) + * be all if node is NULL) that the message has been received and + * acted upon. + */ +- pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { ++ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); ++ } + + attrd_xml_add_writer(data); + return send_cluster_message(node, crm_msg_attrd, data, TRUE); +-- +2.31.1 + +From 115e6c3a0d8db4df3eccf6da1c344168799f890d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 15 Nov 2022 09:35:28 -0500 +Subject: [PATCH 25/26] Fix: daemons: Check for NULL in + attrd_do_not_expect_from_peer. + +--- + daemons/attrd/attrd_sync.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c +index 9d07796..6936771 100644 +--- a/daemons/attrd/attrd_sync.c ++++ b/daemons/attrd/attrd_sync.c +@@ -402,7 +402,13 @@ confirmation_timeout_cb(gpointer data) + void + attrd_do_not_expect_from_peer(const char *host) + { +- GList *keys = g_hash_table_get_keys(expected_confirmations); ++ GList *keys = NULL; ++ ++ if (expected_confirmations == NULL) { ++ return; ++ } ++ ++ keys = g_hash_table_get_keys(expected_confirmations); + + crm_trace("Removing peer %s from expected confirmations", host); + +-- +2.31.1 + +From 05da14f97ccd4f63f53801acc107ad661e5fd0c8 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 16 Nov 2022 17:37:44 -0500 +Subject: [PATCH 26/26] Low: daemons: Support cluster-wide sync points for + multi IPC messages. + +Supporting cluster-wide sync points means attrd_expect_confirmations +needs to be called, and then attrd_send_message needs "true" as a third +argument. This indicates attrd wants confirmations back from all its +peers when they have applied the update. + +We're already doing this at the end of attrd_client_update for +single-update IPC messages, and handling it for multi-update messages is +a simple matter of breaking that code out into a function and making +sure it's called. + +Note that this leaves two other spots where sync points still need to be +dealt with: + +* An update message that uses a regex. See + https://projects.clusterlabs.org/T600 for details. + +* A multi-update IPC message in a cluster where that is not supported. + See https://projects.clusterlabs.org/T601 for details. +--- + daemons/attrd/attrd_ipc.c | 43 ++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 19 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 16bfff4..8c5660d 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -283,6 +283,28 @@ handle_value_expansion(const char **value, xmlNode *xml, const char *op, + return pcmk_rc_ok; + } + ++static void ++send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) ++{ ++ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { ++ /* The client is waiting on the cluster-wide sync point. In this case, ++ * the response ACK is not sent until this attrd broadcasts the update ++ * and receives its own confirmation back from all peers. ++ */ ++ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); ++ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ ++ ++ } else { ++ /* The client is either waiting on the local sync point or was not ++ * waiting on any sync point at all. For the local sync point, the ++ * response ACK is sent in attrd_peer_update. For clients not ++ * waiting on any sync point, the response ACK is sent in ++ * handle_update_request immediately before this function was called. ++ */ ++ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ ++ } ++} ++ + xmlNode * + attrd_client_update(pcmk__request_t *request) + { +@@ -314,7 +336,7 @@ attrd_client_update(pcmk__request_t *request) + } + } + +- attrd_send_message(NULL, xml, false); ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -388,24 +410,7 @@ attrd_client_update(pcmk__request_t *request) + crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), + value, (attrd_election_won()? " (writer)" : "")); + +- if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { +- /* The client is waiting on the cluster-wide sync point. In this case, +- * the response ACK is not sent until this attrd broadcasts the update +- * and receives its own confirmation back from all peers. +- */ +- attrd_expect_confirmations(request, attrd_cluster_sync_point_update); +- attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ +- +- } else { +- /* The client is either waiting on the local sync point or was not +- * waiting on any sync point at all. For the local sync point, the +- * response ACK is sent in attrd_peer_update. For clients not +- * waiting on any sync point, the response ACK is sent in +- * handle_update_request immediately before this function was called. +- */ +- attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ +- } +- ++ send_update_msg_to_cluster(request, xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } +-- +2.31.1 + diff --git a/SOURCES/002-acl_group.patch b/SOURCES/002-acl_group.patch deleted file mode 100644 index 3114887..0000000 --- a/SOURCES/002-acl_group.patch +++ /dev/null @@ -1,425 +0,0 @@ -From 80c64be80f2bffdcf5d2432e1e59d633fd68d516 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:02:32 -0400 -Subject: [PATCH 1/4] Add pcmk__is_user_in_group() - ---- - lib/common/crmcommon_private.h | 3 +++ - lib/common/utils.c | 33 +++++++++++++++++++++++++++++++++ - 2 files changed, 36 insertions(+) - -diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h -index 6b7be9c68..c2fcb0adf 100644 ---- a/lib/common/crmcommon_private.h -+++ b/lib/common/crmcommon_private.h -@@ -96,6 +96,9 @@ void pcmk__free_acls(GList *acls); - G_GNUC_INTERNAL - void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); - -+G_GNUC_INTERNAL -+bool pcmk__is_user_in_group(const char *user, const char *group); -+ - G_GNUC_INTERNAL - void pcmk__apply_acl(xmlNode *xml); - -diff --git a/lib/common/utils.c b/lib/common/utils.c -index 2dfbef278..f23583acb 100644 ---- a/lib/common/utils.c -+++ b/lib/common/utils.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #include - -@@ -53,6 +54,38 @@ gboolean crm_config_error = FALSE; - gboolean crm_config_warning = FALSE; - char *crm_system_name = NULL; - -+bool -+pcmk__is_user_in_group(const char *user, const char *group) -+{ -+ struct group *grent; -+ char **gr_mem; -+ -+ if (user == NULL || group == NULL) { -+ return false; -+ } -+ -+ setgrent(); -+ while ((grent = getgrent()) != NULL) { -+ if (grent->gr_mem == NULL) { -+ continue; -+ } -+ -+ if(strcmp(group, grent->gr_name) != 0) { -+ continue; -+ } -+ -+ gr_mem = grent->gr_mem; -+ while (*gr_mem != NULL) { -+ if (!strcmp(user, *gr_mem++)) { -+ endgrent(); -+ return true; -+ } -+ } -+ } -+ endgrent(); -+ return false; -+} -+ - int - crm_user_lookup(const char *name, uid_t * uid, gid_t * gid) - { --- -2.31.1 - - -From 5fbe5c310de00390fb36d866823a7745ba4812e3 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:04:57 -0400 -Subject: [PATCH 2/4] Add unit test for pcmk__is_user_in_group() - ---- - lib/common/Makefile.am | 2 +- - lib/common/mock.c | 31 +++++-- - lib/common/mock_private.h | 11 +++ - lib/common/tests/acl/Makefile.am | 11 ++- - .../tests/acl/pcmk__is_user_in_group_test.c | 92 +++++++++++++++++++ - 5 files changed, 137 insertions(+), 10 deletions(-) - create mode 100644 lib/common/tests/acl/pcmk__is_user_in_group_test.c - -diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am -index d7aae53bf..04d56dc3c 100644 ---- a/lib/common/Makefile.am -+++ b/lib/common/Makefile.am -@@ -94,7 +94,7 @@ libcrmcommon_la_SOURCES += watchdog.c - libcrmcommon_la_SOURCES += xml.c - libcrmcommon_la_SOURCES += xpath.c - --WRAPPED = calloc getenv getpwnam_r uname -+WRAPPED = calloc getenv getpwnam_r uname setgrent getgrent endgrent - WRAPPED_FLAGS = $(foreach fn,$(WRAPPED),-Wl,--wrap=$(fn)) - - libcrmcommon_test_la_SOURCES = $(libcrmcommon_la_SOURCES) -diff --git a/lib/common/mock.c b/lib/common/mock.c -index 55812ddbc..fa9431e6d 100644 ---- a/lib/common/mock.c -+++ b/lib/common/mock.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - - #include "mock_private.h" - -@@ -18,13 +19,13 @@ - * libcrmcommon_test.a, not into libcrmcommon.so. It is used to support - * constructing mock versions of library functions for unit testing. - * -- * Each unit test will only ever want to use a mocked version of one or two -- * library functions. However, we need to mark all the mocked functions as -- * wrapped (with -Wl,--wrap= in the LDFLAGS) in libcrmcommon_test.a so that -- * all those unit tests can share the same special test library. The unit -- * test then defines its own wrapped function. Because a unit test won't -- * define every single wrapped function, there will be undefined references -- * at link time. -+ * Each unit test will only ever want to use a mocked version of a few -+ * library functions (i.e. not all of them). However, we need to mark all -+ * the mocked functions as wrapped (with -Wl,--wrap= in the LDFLAGS) in -+ * libcrmcommon_test.a so that all those unit tests can share the same -+ * special test library. The unit test then defines its own wrapped -+ * function. Because a unit test won't define every single wrapped -+ * function, there will be undefined references at link time. - * - * This file takes care of those undefined references. It defines a - * wrapped version of every function that simply calls the real libc -@@ -74,3 +75,19 @@ int __attribute__((weak)) - __wrap_uname(struct utsname *buf) { - return __real_uname(buf); - } -+ -+void __attribute__((weak)) -+__wrap_setgrent(void) { -+ __real_setgrent(); -+} -+ -+struct group * __attribute__((weak)) -+__wrap_getgrent(void) { -+ return __real_getgrent(); -+} -+ -+void __attribute__((weak)) -+__wrap_endgrent(void) { -+ __real_endgrent(); -+} -+ -diff --git a/lib/common/mock_private.h b/lib/common/mock_private.h -index 3df7c9839..0c1134cc3 100644 ---- a/lib/common/mock_private.h -+++ b/lib/common/mock_private.h -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - - /* This header is for the sole use of libcrmcommon_test. */ - -@@ -31,4 +32,14 @@ int __wrap_getpwnam_r(const char *name, struct passwd *pwd, - int __real_uname(struct utsname *buf); - int __wrap_uname(struct utsname *buf); - -+void __real_setgrent(void); -+void __wrap_setgrent(void); -+ -+struct group *__real_getgrent(void); -+struct group *__wrap_getgrent(void); -+ -+void __real_endgrent(void); -+void __wrap_endgrent(void); -+ -+ - #endif // MOCK_PRIVATE__H -diff --git a/lib/common/tests/acl/Makefile.am b/lib/common/tests/acl/Makefile.am -index 679c9cb8e..a73fc354c 100644 ---- a/lib/common/tests/acl/Makefile.am -+++ b/lib/common/tests/acl/Makefile.am -@@ -1,19 +1,26 @@ - # --# Copyright 2021 the Pacemaker project contributors -+# Copyright 2021-2022 the Pacemaker project contributors - # - # The version control history for this file may have further details. - # - # This source code is licensed under the GNU General Public License version 2 - # or later (GPLv2+) WITHOUT ANY WARRANTY. - # --AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/lib/common - LDADD = $(top_builddir)/lib/common/libcrmcommon.la -lcmocka - -+pcmk__is_user_in_group_test_LDADD = $(top_builddir)/lib/common/libcrmcommon_test.la -lcmocka -+pcmk__is_user_in_group_test_LDFLAGS = \ -+ -Wl,--wrap=setgrent \ -+ -Wl,--wrap=getgrent \ -+ -Wl,--wrap=endgrent -+ - include $(top_srcdir)/mk/tap.mk - - # Add "_test" to the end of all test program names to simplify .gitignore. - - check_PROGRAMS = \ -+ pcmk__is_user_in_group_test \ - pcmk_acl_required_test \ - xml_acl_denied_test \ - xml_acl_enabled_test -diff --git a/lib/common/tests/acl/pcmk__is_user_in_group_test.c b/lib/common/tests/acl/pcmk__is_user_in_group_test.c -new file mode 100644 -index 000000000..67b8c2c7c ---- /dev/null -+++ b/lib/common/tests/acl/pcmk__is_user_in_group_test.c -@@ -0,0 +1,92 @@ -+/* -+ * Copyright 2020-2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+#include -+#include "../../crmcommon_private.h" -+ -+#include "mock_private.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+// THe index of the group that is going to be returned next from "get group entry" (getgrent) -+static int group_idx = 0; -+ -+// Data used for testing -+static const char* grp0_members[] = { -+ "user0", "user1", NULL -+}; -+ -+static const char* grp1_members[] = { -+ "user1", NULL -+}; -+ -+static const char* grp2_members[] = { -+ "user2", "user1", NULL -+}; -+ -+// an array of "groups" (a struct from grp.h), the members of the groups are initalized here to some testing data. -+// Casting away the consts to make the compiler happy and simplify initialization. -+// We never actually change these variables during the test! -+// string literal = const char* (cannot be changed b/c ? ) vs. char* (its getting casted to this) -+static const int NUM_GROUPS = 3; -+static struct group groups[] = { -+ {(char*)"grp0", (char*)"", 0, (char**)grp0_members}, -+ {(char*)"grp1", (char*)"", 1, (char**)grp1_members}, -+ {(char*)"grp2", (char*)"", 2, (char**)grp2_members}, -+}; -+ -+// This function resets the group_idx to 0. -+void -+__wrap_setgrent(void) { -+ group_idx = 0; -+} -+ -+// This function returns the next group entry in the list of groups, or -+// NULL if there aren't any left. -+// group_idx is a global variable which keeps track of where you are in the list -+struct group * -+__wrap_getgrent(void) { -+ if(group_idx >= NUM_GROUPS) return NULL; -+ return &groups[group_idx++]; -+} -+ -+void -+__wrap_endgrent(void) { -+} -+ -+static void -+is_pcmk__is_user_in_group(void **state) -+{ -+ // null user -+ assert_false(pcmk__is_user_in_group(NULL, "grp0")); -+ // null group -+ assert_false(pcmk__is_user_in_group("user0", NULL)); -+ // nonexistent group -+ assert_false(pcmk__is_user_in_group("user0", "nonexistent_group")); -+ // user is in group -+ assert_true(pcmk__is_user_in_group("user0", "grp0")); -+ // user is not in group -+ assert_false(pcmk__is_user_in_group("user2", "grp0")); -+} -+ -+int -+main(int argc, char **argv) -+{ -+ const struct CMUnitTest tests[] = { -+ cmocka_unit_test(is_pcmk__is_user_in_group) -+ }; -+ -+ cmocka_set_message_output(CM_OUTPUT_TAP); -+ return cmocka_run_group_tests(tests, NULL, NULL); -+} --- -2.31.1 - - -From 1bb7fda60f5b8547d7457f20543b7e50089cf06b Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:17:36 -0400 -Subject: [PATCH 3/4] Add ACL group support - -closes T61 ---- - lib/common/acl.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/lib/common/acl.c b/lib/common/acl.c -index f68069bbd..d7f8469b1 100644 ---- a/lib/common/acl.c -+++ b/lib/common/acl.c -@@ -320,6 +320,13 @@ pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user) - crm_debug("Unpacking ACLs for user '%s'", id); - p->acls = parse_acl_entry(acls, child, p->acls); - } -+ } else if (!strcmp(tag, XML_ACL_TAG_GROUP)) { -+ const char *id = crm_element_value(child, XML_ATTR_ID); -+ -+ if (id && pcmk__is_user_in_group(user,id)) { -+ crm_debug("Unpacking ACLs for group '%s'", id); -+ p->acls = parse_acl_entry(acls, child, p->acls); -+ } - } - } - } --- -2.31.1 - - -From f4efd55d9424d34908ba3e2bcffe16c00b2cf660 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Mon, 13 Jun 2022 09:20:36 -0400 -Subject: [PATCH 4/4] Allow acl_target and acl_group elements to take a 'name' - attribute to use a name different from 'id' - -closes T60 ---- - include/crm/msg_xml.h | 1 + - lib/common/acl.c | 21 +++++++++++++++++---- - 2 files changed, 18 insertions(+), 4 deletions(-) - -diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h -index b36dcf060..6470520b1 100644 ---- a/include/crm/msg_xml.h -+++ b/include/crm/msg_xml.h -@@ -133,6 +133,7 @@ extern "C" { - # define XML_ATTR_VERSION "version" - # define XML_ATTR_DESC "description" - # define XML_ATTR_ID "id" -+# define XML_ATTR_NAME "name" - # define XML_ATTR_IDREF "id-ref" - # define XML_ATTR_ID_LONG "long-id" - # define XML_ATTR_TYPE "type" -diff --git a/lib/common/acl.c b/lib/common/acl.c -index d7f8469b1..b9f7472ee 100644 ---- a/lib/common/acl.c -+++ b/lib/common/acl.c -@@ -278,8 +278,13 @@ pcmk__apply_acl(xmlNode *xml) - - /*! - * \internal -- * \brief Unpack ACLs for a given user -- * -+ * \brief Unpack ACLs for a given user into the -+ * metadata of the target XML tree -+ * -+ * Taking the description of ACLs from the source XML tree and -+ * marking up the target XML tree with access information for the -+ * given user by tacking it onto the relevant nodes -+ * - * \param[in] source XML with ACL definitions - * \param[in,out] target XML that ACLs will be applied to - * \param[in] user Username whose ACLs need to be unpacked -@@ -314,14 +319,22 @@ pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user) - - if (!strcmp(tag, XML_ACL_TAG_USER) - || !strcmp(tag, XML_ACL_TAG_USERv1)) { -- const char *id = crm_element_value(child, XML_ATTR_ID); -+ const char *id = crm_element_value(child, XML_ATTR_NAME); -+ -+ if (id == NULL) { -+ id = crm_element_value(child, XML_ATTR_ID); -+ } - - if (id && strcmp(id, user) == 0) { - crm_debug("Unpacking ACLs for user '%s'", id); - p->acls = parse_acl_entry(acls, child, p->acls); - } - } else if (!strcmp(tag, XML_ACL_TAG_GROUP)) { -- const char *id = crm_element_value(child, XML_ATTR_ID); -+ const char *id = crm_element_value(child, XML_ATTR_NAME); -+ -+ if (id == NULL) { -+ id = crm_element_value(child, XML_ATTR_ID); -+ } - - if (id && pcmk__is_user_in_group(user,id)) { - crm_debug("Unpacking ACLs for group '%s'", id); --- -2.31.1 - diff --git a/SOURCES/002-remote-regression.patch b/SOURCES/002-remote-regression.patch new file mode 100644 index 0000000..0f0bea8 --- /dev/null +++ b/SOURCES/002-remote-regression.patch @@ -0,0 +1,98 @@ +From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 23 Jan 2023 14:25:56 -0600 +Subject: [PATCH] High: executor: fix regression in remote node shutdown + +This reverts the essential part of d61494347, which was based on misdiagnosing +a remote node shutdown issue. Initially, it was thought that a "TLS server +session ended" log just after a remote node requested shutdown indicated that +the proxy connection coincidentally dropped at that moment. It actually is the +routine stopping of accepting new proxy connections, and existing when that +happens makes the remote node exit immediately without waiting for the +all-clear from the cluster. + +Fixes T361 +--- + daemons/execd/pacemaker-execd.c | 19 +------------------ + daemons/execd/pacemaker-execd.h | 3 +-- + daemons/execd/remoted_tls.c | 6 +----- + 3 files changed, 3 insertions(+), 25 deletions(-) + +diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c +index db12674f13..491808974a 100644 +--- a/daemons/execd/pacemaker-execd.c ++++ b/daemons/execd/pacemaker-execd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -305,23 +305,6 @@ lrmd_exit(gpointer data) + return FALSE; + } + +-/*! +- * \internal +- * \brief Clean up and exit if shutdown has started +- * +- * \return Doesn't return +- */ +-void +-execd_exit_if_shutting_down(void) +-{ +-#ifdef PCMK__COMPILE_REMOTE +- if (shutting_down) { +- crm_warn("exit because TLS connection was closed and 'shutting_down' set"); +- lrmd_exit(NULL); +- } +-#endif +-} +- + /*! + * \internal + * \brief Request cluster shutdown if appropriate, otherwise exit immediately +diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h +index 6646ae29e3..f78e8dcdde 100644 +--- a/daemons/execd/pacemaker-execd.h ++++ b/daemons/execd/pacemaker-execd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp); + int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, + xmlNode *request); + void lrmd_drain_alerts(GMainLoop *mloop); +-void execd_exit_if_shutting_down(void); + + #endif // PACEMAKER_EXECD__H +diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c +index 6f4b2d0062..c65e3f394d 100644 +--- a/daemons/execd/remoted_tls.c ++++ b/daemons/execd/remoted_tls.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2012-2022 the Pacemaker project contributors ++ * Copyright 2012-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -250,10 +250,6 @@ static void + tls_server_dropped(gpointer user_data) + { + crm_notice("TLS server session ended"); +- /* If we are in the process of shutting down, then we should actually exit. +- * bz#1804259 +- */ +- execd_exit_if_shutting_down(); + return; + } + +-- +2.31.1 + diff --git a/SOURCES/003-history-cleanup.patch b/SOURCES/003-history-cleanup.patch new file mode 100644 index 0000000..87a3e27 --- /dev/null +++ b/SOURCES/003-history-cleanup.patch @@ -0,0 +1,2829 @@ +From e953591a9796edebd4796c344df0eddcbc7a2dff Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 30 Jan 2023 16:34:32 -0600 +Subject: [PATCH 01/14] Refactor: scheduler: drop unneeded arguments from + process_rsc_state() + +migrate_op has been unused since at least 2011 +--- + lib/pengine/unpack.c | 36 +++++++++++++++--------------------- + 1 file changed, 15 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 5fcba3b..9524def 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -1963,8 +1963,7 @@ process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t + + static void + process_rsc_state(pe_resource_t * rsc, pe_node_t * node, +- enum action_fail_response on_fail, +- xmlNode * migrate_op, pe_working_set_t * data_set) ++ enum action_fail_response on_fail) + { + pe_node_t *tmpnode = NULL; + char *reason = NULL; +@@ -2016,7 +2015,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + should_fence = TRUE; + +- } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + if (pe__is_remote_node(node) && node->details->remote_rsc + && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { + +@@ -2039,7 +2038,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + if (reason == NULL) { + reason = crm_strdup_printf("%s is thought to be active there", rsc->id); + } +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + } + free(reason); + } +@@ -2069,7 +2068,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * but also mark the node as unclean + */ + reason = crm_strdup_printf("%s failed there", rsc->id); +- pe_fence_node(data_set, node, reason, FALSE); ++ pe_fence_node(rsc->cluster, node, reason, FALSE); + free(reason); + break; + +@@ -2090,7 +2089,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* make sure it comes up somewhere else + * or not at all + */ +- resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); ++ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", ++ rsc->cluster); + break; + + case action_fail_stop: +@@ -2112,8 +2112,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * container is running yet, so remember it and add a stop + * action for it later. + */ +- data_set->stop_needed = g_list_prepend(data_set->stop_needed, +- rsc->container); ++ rsc->cluster->stop_needed = ++ g_list_prepend(rsc->cluster->stop_needed, rsc->container); + } else if (rsc->container) { + stop_action(rsc->container, node, FALSE); + } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { +@@ -2123,10 +2123,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + + case action_fail_reset_remote: + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { ++ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { + tmpnode = NULL; + if (rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + } + if (tmpnode && + pe__is_remote_node(tmpnode) && +@@ -2135,7 +2135,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + /* The remote connection resource failed in a way that + * should result in fencing the remote node. + */ +- pe_fence_node(data_set, tmpnode, ++ pe_fence_node(rsc->cluster, tmpnode, + "remote connection is unrecoverable", FALSE); + } + } +@@ -2158,7 +2158,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + * result in a fencing operation regardless if we're going to attempt to + * reconnect to the remote-node in this transition or not. */ + if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { +- tmpnode = pe_find_node(data_set->nodes, rsc->id); ++ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); + if (tmpnode && tmpnode->details->unclean) { + tmpnode->details->unseen = FALSE; + } +@@ -2177,7 +2177,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, + } + } + +- native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore)); ++ native_add_running(rsc, node, rsc->cluster, ++ (save_on_fail != action_fail_ignore)); + switch (on_fail) { + case action_fail_ignore: + break; +@@ -2376,14 +2377,12 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + int start_index = -1; + enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; + +- const char *task = NULL; + const char *rsc_id = ID(lrm_resource); + + pe_resource_t *rsc = NULL; + GList *op_list = NULL; + GList *sorted_op_list = NULL; + +- xmlNode *migrate_op = NULL; + xmlNode *rsc_op = NULL; + xmlNode *last_failure = NULL; + +@@ -2437,11 +2436,6 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { + xmlNode *rsc_op = (xmlNode *) gIter->data; + +- task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); +- if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { +- migrate_op = rsc_op; +- } +- + unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); + } + +@@ -2452,7 +2446,7 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, + /* no need to free the contents */ + g_list_free(sorted_op_list); + +- process_rsc_state(rsc, node, on_fail, migrate_op, data_set); ++ process_rsc_state(rsc, node, on_fail); + + if (get_target_role(rsc, &req_role)) { + if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { +-- +2.31.1 + +From 6f4e34cccc4864961d2020a2dd547450ac53a44e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 16:30:20 -0600 +Subject: [PATCH 02/14] Log: scheduler: improve trace logs when unpacking + resource history + +--- + lib/pengine/unpack.c | 112 +++++++++++++++++++++++++++---------------- + 1 file changed, 71 insertions(+), 41 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 9524def..b7b2873 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -3363,6 +3363,24 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + pe__set_resource_flags(rsc, pe_rsc_block); + } + ++/*! ++ * \internal ++ * \brief Update an integer value and why ++ * ++ * \param[in,out] i Pointer to integer to update ++ * \param[in,out] why Where to store reason for update ++ * \param[in] value New value ++ * \param[in,out] reason Description of why value was changed ++ */ ++static inline void ++remap_because(int *i, const char **why, int value, const char *reason) ++{ ++ if (*i != value) { ++ *i = value; ++ *why = reason; ++ } ++} ++ + /*! + * \internal + * \brief Remap informational monitor results and operation status +@@ -3393,29 +3411,34 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + static void + remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + pe_working_set_t *data_set, enum action_fail_response *on_fail, +- int target_rc, int *rc, int *status) { ++ int target_rc, int *rc, int *status) ++{ + bool is_probe = false; ++ int orig_exit_status = *rc; ++ int orig_exec_status = *status; ++ const char *why = NULL; + const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); + const char *key = get_op_key(xml_op); + const char *exit_reason = crm_element_value(xml_op, + XML_LRM_ATTR_EXIT_REASON); + + if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { +- int remapped_rc = pcmk__effective_rc(*rc); +- +- if (*rc != remapped_rc) { +- crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc); ++ // Remap degraded results to their usual counterparts ++ *rc = pcmk__effective_rc(*rc); ++ if (*rc != orig_exit_status) { ++ why = "degraded monitor result"; + if (!node->details->shutdown || node->details->online) { + record_failed_op(xml_op, node, rsc, data_set); + } +- +- *rc = remapped_rc; + } + } + + if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { +- *status = PCMK_EXEC_DONE; +- *rc = PCMK_OCF_NOT_RUNNING; ++ if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { ++ *status = PCMK_EXEC_DONE; ++ *rc = PCMK_OCF_NOT_RUNNING; ++ why = "irrelevant probe result"; ++ } + } + + /* If the executor reported an operation status of anything but done or +@@ -3423,22 +3446,19 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * it should be treated as a failure or not, because we know the expected + * result. + */ +- if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) { +- return; ++ switch (*status) { ++ case PCMK_EXEC_DONE: ++ case PCMK_EXEC_ERROR: ++ break; ++ default: ++ goto remap_done; + } + +- CRM_ASSERT(rsc); +- CRM_CHECK(task != NULL, +- *status = PCMK_EXEC_ERROR; return); +- +- *status = PCMK_EXEC_DONE; +- + if (exit_reason == NULL) { + exit_reason = ""; + } + + is_probe = pcmk_xe_is_probe(xml_op); +- + if (is_probe) { + task = "probe"; + } +@@ -3452,12 +3472,15 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + * those versions or processing of saved CIB files from those versions, + * so we do not need to care much about this case. + */ +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); + crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", + key, pe__node_name(node)); + +- } else if (target_rc != *rc) { +- *status = PCMK_EXEC_ERROR; ++ } else if (*rc == target_rc) { ++ remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); ++ ++ } else { ++ remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); + pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", + key, pe__node_name(node), + target_rc, services_ocf_exitcode_str(target_rc), +@@ -3468,7 +3491,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why,PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +@@ -3479,7 +3502,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + if (is_probe || (target_rc == *rc) + || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { + +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); + rsc->role = RSC_ROLE_STOPPED; + + /* clear any previous failure actions */ +@@ -3490,7 +3513,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + case PCMK_OCF_RUNNING_PROMOTED: + if (is_probe && (*rc != target_rc)) { +- *status = PCMK_EXEC_DONE; ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, + "Probe found %s active and promoted on %s at %s", + rsc->id, pe__node_name(node), +@@ -3502,11 +3525,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_DEGRADED_PROMOTED: + case PCMK_OCF_FAILED_PROMOTED: + rsc->role = RSC_ROLE_PROMOTED; +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); + break; + + case PCMK_OCF_NOT_CONFIGURED: +- *status = PCMK_EXEC_ERROR_FATAL; ++ remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); + break; + + case PCMK_OCF_UNIMPLEMENT_FEATURE: +@@ -3517,9 +3540,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + + if (interval_ms == 0) { + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, ++ "exit status"); + } else { +- *status = PCMK_EXEC_NOT_SUPPORTED; ++ remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, ++ "exit status"); + } + } + break; +@@ -3528,7 +3553,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + case PCMK_OCF_INVALID_PARAM: + case PCMK_OCF_INSUFFICIENT_PRIV: + check_recoverable(rsc, node, task, *rc, xml_op); +- *status = PCMK_EXEC_ERROR_HARD; ++ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); + break; + + default: +@@ -3537,13 +3562,21 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + "on %s at %s as failure", + *rc, task, rsc->id, pe__node_name(node), + last_change_str(xml_op)); +- *status = PCMK_EXEC_ERROR; ++ remap_because(status, &why, PCMK_EXEC_ERROR, ++ "unknown exit status"); + } + break; + } + +- pe_rsc_trace(rsc, "Remapped %s status to '%s'", +- key, pcmk_exec_status_str(*status)); ++remap_done: ++ if (why != NULL) { ++ pe_rsc_trace(rsc, ++ "Remapped %s result from [%s: %s] to [%s: %s] " ++ "because of %s", ++ key, pcmk_exec_status_str(orig_exec_status), ++ crm_exit_str(orig_exit_status), ++ pcmk_exec_status_str(*status), crm_exit_str(*rc), why); ++ } + } + + // return TRUE if start or monitor last failure but parameters changed +@@ -3947,9 +3980,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + parent = uber_parent(rsc); + } + +- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", +- task_key, task, task_id, status, rc, pe__node_name(node), +- role2text(rsc->role)); ++ pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", ++ ID(xml_op), task, task_id, pe__node_name(node), ++ pcmk_exec_status_str(status), crm_exit_str(rc)); + + if (node->details->unclean) { + pe_rsc_trace(rsc, +@@ -4077,9 +4110,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + goto done; + + case PCMK_EXEC_DONE: +- pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", +- task, rsc->id, pe__node_name(node), +- last_change_str(xml_op), ID(xml_op)); + update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); + goto done; + +@@ -4175,9 +4205,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + } + + done: +- pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", +- rsc->id, task, role2text(rsc->role), +- role2text(rsc->next_role)); ++ pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", ++ rsc->id, pe__node_name(node), ID(xml_op), ++ role2text(rsc->role), role2text(rsc->next_role)); + } + + static void +-- +2.31.1 + +From 5a1d2a3ba58fa73225433dab40cee0a6e0ef9bda Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 12:08:55 -0600 +Subject: [PATCH 03/14] Low: scheduler: improve migration history validation + +Instead of a simple CRM_CHECK(), functionize parsing the source and target node +names from a migration action's resource history entry. This reduces +duplication and allows us to log more helpful errors. + +Also, CRM_CHECK() tries to dump core for debugging, and that's not helpful for +corrupted CIB entries. +--- + lib/pengine/unpack.c | 87 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 75 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b7b2873..cd1b038 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2786,6 +2786,60 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, + data_set); + } ++ ++/*! ++ * \internal ++ * \brief Parse migration source and target node names from history entry ++ * ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node ++ * \param[out] source_name Where to store migration source node name ++ * \param[out] target_name Where to store migration target node name ++ * ++ * \return Standard Pacemaker return code ++ */ ++static int ++get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, ++ const pe_node_t *target_node, ++ const char **source_name, const char **target_name) ++{ ++ const char *id = ID(entry); ++ ++ if (id == NULL) { ++ crm_err("Ignoring resource history entry without ID"); ++ return pcmk_rc_unpack_error; ++ } ++ ++ *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); ++ *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); ++ if ((*source_name == NULL) || (*target_name == NULL)) { ++ crm_err("Ignoring resource history entry %s without " ++ XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, ++ id); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((source_node != NULL) ++ && !pcmk__str_eq(*source_name, source_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", ++ id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ if ((target_node != NULL) ++ && !pcmk__str_eq(*target_name, target_node->details->uname, ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ crm_err("Ignoring resource history entry %s because " ++ XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", ++ id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ return pcmk_rc_unpack_error; ++ } ++ ++ return pcmk_rc_ok; ++} + + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +@@ -2834,13 +2888,16 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If there's any newer non-monitor operation on the source, this migrate_to + * potentially no longer matters for the source. +@@ -2949,11 +3006,14 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { + xmlNode *target_migrate_from = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, node, NULL, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +@@ -3001,11 +3061,14 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + xmlNode *xml_op, pe_working_set_t *data_set) + { + xmlNode *source_migrate_to = NULL; +- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); +- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); ++ const char *source = NULL; ++ const char *target = NULL; + +- // Sanity check +- CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); ++ // Get source and target node names from XML ++ if (get_migration_node_names(xml_op, NULL, node, &source, ++ &target) != pcmk_rc_ok) { ++ return; ++ } + + /* If a migration failed, we have to assume the resource is active. Clones + * are not allowed to migrate, so role can't be promoted. +-- +2.31.1 + +From 5139e5369769e733b05bc28940d3dccb4f7fca95 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 14:30:16 -0600 +Subject: [PATCH 04/14] Refactor: scheduler: functionize adding a dangling + migration + +... for code isolation and readability +--- + lib/pengine/unpack.c | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index cd1b038..fa7c2cc 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2841,6 +2841,28 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + return pcmk_rc_ok; + } + ++/* ++ * \internal ++ * \brief Add a migration source to a resource's list of dangling migrations ++ * ++ * If the migrate_to and migrate_from actions in a live migration both ++ * succeeded, but there is no stop on the source, the migration is considered ++ * "dangling." Add the source to the resource's dangling migration list, which ++ * will be used to schedule a stop on the source without affecting the target. ++ * ++ * \param[in,out] rsc Resource involved in migration ++ * \param[in] node Migration source ++ */ ++static void ++add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) ++{ ++ pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", ++ rsc->id, pe__node_name(node)); ++ rsc->role = RSC_ROLE_STOPPED; ++ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, ++ (gpointer) node); ++} ++ + static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) +@@ -2941,14 +2963,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + if (migrate_from && from_rc == PCMK_OCF_OK + && (from_status == PCMK_EXEC_DONE)) { +- /* The migrate_to and migrate_from both succeeded, so mark the migration +- * as "dangling". This will be used to schedule a stop action on the +- * source without affecting the target. +- */ +- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), +- source); +- rsc->role = RSC_ROLE_STOPPED; +- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); ++ add_dangling_migration(rsc, node); + + } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed + /* If the resource has newer state on the target, this migrate_to no +-- +2.31.1 + +From da71c04463d31338dd5da54d1d48b53e413716dc Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 16:57:55 -0600 +Subject: [PATCH 05/14] Refactor: scheduler: check for dangling migration + before setting role + +Previously, unpack_migrate_to_success() set rsc->role = RSC_ROLE_STARTED +then checked for dangling migration, which would reset it to RSC_ROLE_STOPPED. + +For clarity, do the dangling migration check first. +--- + lib/pengine/unpack.c | 47 ++++++++++++++++++++++++-------------------- + 1 file changed, 26 insertions(+), 21 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index fa7c2cc..b858b59 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2905,8 +2905,8 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration is considered to be "dangling". Schedule a stop on the source + * in this case. + */ +- int from_rc = 0; +- int from_status = 0; ++ int from_rc = PCMK_OCF_OK; ++ int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; + pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; +@@ -2930,12 +2930,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // Check whether there was a migrate_from action on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); +- +- /* Even if there's a newer non-monitor operation on the source, we still +- * need to check how this migrate_to might matter for the target. +- */ +- if (source_newer_op && migrate_from) { +- return; ++ if (migrate_from != NULL) { ++ if (source_newer_op) { ++ /* There's a newer non-monitor operation on the source and a ++ * migrate_from on the target, so this migrate_to is irrelevant to ++ * the resource's state. ++ */ ++ return; ++ } ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); ++ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, ++ &from_status); + } + + /* If the resource has newer state on the target after the migration +@@ -2948,24 +2953,24 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- // Clones are not allowed to migrate, so role can't be promoted ++ /* Check for dangling migration (migrate_from succeeded but stop not done). ++ * We know there's no stop because we already returned if the target has a ++ * migrate_from and the source has any newer non-monitor operation. ++ */ ++ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { ++ add_dangling_migration(rsc, node); ++ return; ++ } ++ ++ /* Without newer state, this migrate_to implies the resource is active. ++ * (Clones are not allowed to migrate, so role can't be promoted.) ++ */ + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); + +- if (migrate_from) { +- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); +- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); +- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", +- ID(migrate_from), target, from_status, from_rc); +- } +- +- if (migrate_from && from_rc == PCMK_OCF_OK +- && (from_status == PCMK_EXEC_DONE)) { +- add_dangling_migration(rsc, node); +- +- } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed ++ if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +-- +2.31.1 + +From d98a2687d68747b0598554939dea05c420456a12 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:05:50 -0600 +Subject: [PATCH 06/14] Refactor: scheduler: avoid duplication of + active-on-target check + +--- + lib/pengine/unpack.c | 24 ++++++------------------ + 1 file changed, 6 insertions(+), 18 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index b858b59..8cfc0ef 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2914,6 +2914,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + const char *target = NULL; + bool source_newer_op = false; + bool target_newer_state = false; ++ bool active_on_target = false; + + // Get source and target node names from XML + if (get_migration_node_names(xml_op, node, NULL, &source, +@@ -2969,23 +2970,14 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + target_node = pe_find_node(data_set->nodes, target); + source_node = pe_find_node(data_set->nodes, source); ++ active_on_target = !target_newer_state && (target_node != NULL) ++ && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, TRUE); +- + } else { +- /* With the earlier bail logic, migrate_from != NULL here implies +- * source_newer_op is false, meaning this migrate_to still matters +- * for the source. +- * Consider it failed here - forces a restart, prevents migration +- */ ++ // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } +@@ -2994,11 +2986,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + /* If the resource has newer state on the target, this migrate_to no + * longer matters for the target. + */ +- if (!target_newer_state +- && target_node && target_node->details->online) { +- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, +- target_node->details->online); +- ++ if (active_on_target) { + native_add_running(rsc, target_node, data_set, FALSE); + if (source_node && source_node->details->online) { + /* This is a partial migration: the migrate_to completed +-- +2.31.1 + +From ae145309e3fdb26608e99f6d1fe1a7859d98efd0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:07:58 -0600 +Subject: [PATCH 07/14] Refactor: scheduler: improve unpacking of successful + migrate_to + +Improve log messages, comments, and formatting, and avoid doing things until +needed, to improve efficiency of early returns. +--- + lib/pengine/unpack.c | 109 +++++++++++++++++++------------------------ + 1 file changed, 48 insertions(+), 61 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8cfc0ef..224b7b5 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2867,48 +2867,40 @@ static void + unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe_working_set_t *data_set) + { +- /* A successful migration sequence is: +- * migrate_to on source node +- * migrate_from on target node +- * stop on source node ++ /* A complete migration sequence is: ++ * 1. migrate_to on source node (which succeeded if we get to this function) ++ * 2. migrate_from on target node ++ * 3. stop on source node + * +- * But there could be scenarios like (It's easier to produce with cluster +- * property batch-limit=1): +- * +- * - rscA is live-migrating from node1 to node2. +- * +- * - Before migrate_to on node1 returns, put node2 into standby. +- * +- * - Transition aborts upon return of successful migrate_to on node1. New +- * transition is going to stop the rscA on both nodes and start it on +- * node1. ++ * If no migrate_from has happened, the migration is considered to be ++ * "partial". If the migrate_from succeeded but no stop has happened, the ++ * migration is considered to be "dangling". + * +- * - While it is stopping on node1, run something that is going to make +- * the transition abort again like: +- * crm_resource --resource rscA --ban --node node2 ++ * If a successful migrate_to and stop have happened on the source node, we ++ * still need to check for a partial migration, due to scenarios (easier to ++ * produce with batch-limit=1) like: + * +- * - Transition aborts upon return of stop on node1. ++ * - A resource is migrating from node1 to node2, and a migrate_to is ++ * initiated for it on node1. + * +- * Now although there's a stop on node1, it's still a partial migration and +- * rscA is still potentially active on node2. ++ * - node2 goes into standby mode while the migrate_to is pending, which ++ * aborts the transition. + * +- * So even if a migrate_to is followed by a stop, we still need to check +- * whether there's a corresponding migrate_from or any newer operation on +- * the target. ++ * - Upon completion of the migrate_to, a new transition schedules a stop ++ * on both nodes and a start on node1. + * +- * If no migrate_from has happened, the migration is considered to be +- * "partial". If the migrate_from failed, make sure the resource gets +- * stopped on both source and target (if up). ++ * - If the new transition is aborted for any reason while the resource is ++ * stopping on node1, the transition after that stop completes will see ++ * the migrate_from and stop on the source, but it's still a partial ++ * migration, and the resource must be stopped on node2 because it is ++ * potentially active there due to the migrate_to. + * +- * If the migrate_to and migrate_from both succeeded (which also implies the +- * resource is no longer running on the source), but there is no stop, the +- * migration is considered to be "dangling". Schedule a stop on the source +- * in this case. ++ * We also need to take into account that either node's history may be ++ * cleared at any point in the migration process. + */ + int from_rc = PCMK_OCF_OK; + int from_status = PCMK_EXEC_PENDING; + pe_node_t *target_node = NULL; +- pe_node_t *source_node = NULL; + xmlNode *migrate_from = NULL; + const char *source = NULL; + const char *target = NULL; +@@ -2922,13 +2914,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + return; + } + +- /* If there's any newer non-monitor operation on the source, this migrate_to +- * potentially no longer matters for the source. +- */ ++ // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, + data_set); + +- // Check whether there was a migrate_from action on the target ++ // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, + source, -1, data_set); + if (migrate_from != NULL) { +@@ -2944,12 +2934,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + &from_status); + } + +- /* If the resource has newer state on the target after the migration +- * events, this migrate_to no longer matters for the target. ++ /* If the resource has newer state on both the source and target after the ++ * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, + migrate_from, data_set); +- + if (source_newer_op && target_newer_state) { + return; + } +@@ -2969,7 +2958,6 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + rsc->role = RSC_ROLE_STARTED; + + target_node = pe_find_node(data_set->nodes, target); +- source_node = pe_find_node(data_set->nodes, source); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + +@@ -2981,31 +2969,30 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); + pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } ++ return; ++ } + +- } else { // Pending, or complete but erased +- /* If the resource has newer state on the target, this migrate_to no +- * longer matters for the target. +- */ +- if (active_on_target) { +- native_add_running(rsc, target_node, data_set, FALSE); +- if (source_node && source_node->details->online) { +- /* This is a partial migration: the migrate_to completed +- * successfully on the source, but the migrate_from has not +- * completed. Remember the source and target; if the newly +- * chosen target remains the same when we schedule actions +- * later, we may continue with the migration. +- */ +- rsc->partial_migration_target = target_node; +- rsc->partial_migration_source = source_node; +- } +- } else if (!source_newer_op) { +- /* This migrate_to matters for the source only if it's the last +- * non-monitor operation here. +- * Consider it failed here - forces a restart, prevents migration ++ // The migrate_from is pending, complete but erased, or to be scheduled ++ ++ if (active_on_target) { ++ pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ ++ native_add_running(rsc, target_node, data_set, FALSE); ++ if ((source_node != NULL) && source_node->details->online) { ++ /* This is a partial migration: the migrate_to completed ++ * successfully on the source, but the migrate_from has not ++ * completed. Remember the source and target; if the newly ++ * chosen target remains the same when we schedule actions ++ * later, we may continue with the migration. + */ +- pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +- pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); ++ rsc->partial_migration_target = target_node; ++ rsc->partial_migration_source = source_node; + } ++ ++ } else if (!source_newer_op) { ++ // Mark resource as failed, require recovery, and prevent migration ++ pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); ++ pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); + } + } + +-- +2.31.1 + +From 7d63ed8d52f64d2523367cff36bf77bd85296bd9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 17:14:57 -0600 +Subject: [PATCH 08/14] Refactor: scheduler: drop redundant argument from + unpack_migrate_to_success() + +--- + lib/pengine/unpack.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 224b7b5..6222115 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2864,8 +2864,7 @@ add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) + } + + static void +-unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, +- pe_working_set_t *data_set) ++unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + { + /* A complete migration sequence is: + * 1. migrate_to on source node (which succeeded if we get to this function) +@@ -2916,11 +2915,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + + // Check for newer state on the source + source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, +- data_set); ++ rsc->cluster); + + // Check for a migrate_from action from this source on the target + migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, +- source, -1, data_set); ++ source, -1, rsc->cluster); + if (migrate_from != NULL) { + if (source_newer_op) { + /* There's a newer non-monitor operation on the source and a +@@ -2938,7 +2937,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * migration events, this migrate_to is irrelevant to the resource's state. + */ + target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, +- migrate_from, data_set); ++ migrate_from, rsc->cluster); + if (source_newer_op && target_newer_state) { + return; + } +@@ -2957,13 +2956,13 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + */ + rsc->role = RSC_ROLE_STARTED; + +- target_node = pe_find_node(data_set->nodes, target); ++ target_node = pe_find_node(rsc->cluster->nodes, target); + active_on_target = !target_newer_state && (target_node != NULL) + && target_node->details->online; + + if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target + if (active_on_target) { +- native_add_running(rsc, target_node, data_set, TRUE); ++ native_add_running(rsc, target_node, rsc->cluster, TRUE); + } else { + // Mark resource as failed, require recovery, and prevent migration + pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); +@@ -2975,9 +2974,9 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + // The migrate_from is pending, complete but erased, or to be scheduled + + if (active_on_target) { +- pe_node_t *source_node = pe_find_node(data_set->nodes, source); ++ pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +- native_add_running(rsc, target_node, data_set, FALSE); ++ native_add_running(rsc, target_node, rsc->cluster, FALSE); + if ((source_node != NULL) && source_node->details->online) { + /* This is a partial migration: the migrate_to completed + * successfully on the source, but the migrate_from has not +@@ -3946,7 +3945,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c + clear_past_failure = TRUE; + + } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { +- unpack_migrate_to_success(rsc, node, xml_op, data_set); ++ unpack_migrate_to_success(rsc, node, xml_op); + + } else if (rsc->role < RSC_ROLE_STARTED) { + pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); +-- +2.31.1 + +From 3be487f87bf5e26277379148922525fd98d29681 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:13:30 -0600 +Subject: [PATCH 09/14] Doc: scheduler: clarify comments about unpacking + migration history + +per review +--- + lib/pengine/unpack.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 6222115..ec2cf26 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2791,9 +2791,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, + * \internal + * \brief Parse migration source and target node names from history entry + * +- * \param[in] entry Resource history entry for a migration action +- * \param[in] source_node If not NULL, source must match this node +- * \param[in] target_node If not NULL, target must match this node ++ * \param[in] entry Resource history entry for a migration action ++ * \param[in] source_node If not NULL, source must match this node ++ * \param[in] target_node If not NULL, target must match this node + * \param[out] source_name Where to store migration source node name + * \param[out] target_name Where to store migration target node name + * +@@ -2825,7 +2825,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", +- id, pcmk__s(*source_name, ""), pe__node_name(source_node)); ++ id, *source_name, pe__node_name(source_node)); + return pcmk_rc_unpack_error; + } + +@@ -2834,7 +2834,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, + pcmk__str_casei|pcmk__str_null_matches)) { + crm_err("Ignoring resource history entry %s because " + XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", +- id, pcmk__s(*target_name, ""), pe__node_name(target_node)); ++ id, *target_name, pe__node_name(target_node)); + return pcmk_rc_unpack_error; + } + +@@ -2890,7 +2890,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + * + * - If the new transition is aborted for any reason while the resource is + * stopping on node1, the transition after that stop completes will see +- * the migrate_from and stop on the source, but it's still a partial ++ * the migrate_to and stop on the source, but it's still a partial + * migration, and the resource must be stopped on node2 because it is + * potentially active there due to the migrate_to. + * +@@ -3425,9 +3425,9 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, + * \brief Update an integer value and why + * + * \param[in,out] i Pointer to integer to update +- * \param[in,out] why Where to store reason for update ++ * \param[out] why Where to store reason for update + * \param[in] value New value +- * \param[in,out] reason Description of why value was changed ++ * \param[in] reason Description of why value was changed + */ + static inline void + remap_because(int *i, const char **why, int value, const char *reason) +@@ -3456,7 +3456,7 @@ remap_because(int *i, const char **why, int value, const char *reason) + * \param[in] data_set Current cluster working set + * \param[in,out] on_fail What should be done about the result + * \param[in] target_rc Expected return code of operation +- * \param[in,out] rc Actual return code of operation ++ * \param[in,out] rc Actual return code of operation (treated as OCF) + * \param[in,out] status Operation execution status + * + * \note If the result is remapped and the node is not shutting down or failed, +@@ -3548,7 +3548,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, + switch (*rc) { + case PCMK_OCF_OK: + if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { +- remap_because(status, &why,PCMK_EXEC_DONE, "probe"); ++ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); + pe_rsc_info(rsc, "Probe found %s active on %s at %s", + rsc->id, pe__node_name(node), + last_change_str(xml_op)); +-- +2.31.1 + +From 3ef6c84a7b0dd434731e72d91f2724bdb52e292e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 09:42:01 -0600 +Subject: [PATCH 10/14] Refactor: scheduler: improve xpath efficiency when + unpacking + +Using "//" means that every child must be searched recursively. If we know the +exact path, we should explicitly specify it. +--- + lib/pengine/unpack.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index ec2cf26..8aead58 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2571,6 +2571,13 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) + node->weight = *score; + } + ++#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ ++ "/" XML_CIB_TAG_STATE ++#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ ++ "/" XML_LRM_TAG_RESOURCES \ ++ "/" XML_LRM_TAG_RESOURCE ++#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP ++ + static xmlNode * + find_lrm_op(const char *resource, const char *op, const char *node, const char *source, + int target_rc, pe_working_set_t *data_set) +@@ -2583,10 +2590,9 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", resource, "']" +- "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", + NULL); + + /* Need to check against transition_magic too? */ +@@ -2631,10 +2637,8 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, +- "//" XML_CIB_TAG_STATE +- "[@" XML_ATTR_UNAME "='", node_name, "']" +- "//" XML_LRM_TAG_RESOURCE +- "[@" XML_ATTR_ID "='", rsc_id, "']", ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", + NULL); + + xml = get_xpath_object((const char *) xpath->str, data_set->input, +-- +2.31.1 + +From 1869f99bc8eeedb976f96f0f1cc3d4dd86735504 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 10:25:53 -0600 +Subject: [PATCH 11/14] Low: scheduler: unknown_on_node() should ignore pending + actions + +Previously, unknown_on_node() looked for any lrm_rsc_op at all to decide +whether a resource is known on a node. However if the only action is pending, +the resource is not yet known. + +Also drop a redundant argument and add a doxygen block. (The rsc argument is +not const due to a getDocPtr() call in the chain, as well as libxml2 calls that +are likely const in practice but aren't marked as such.) +--- + lib/pengine/unpack.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 8aead58..14dc202 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2648,19 +2648,32 @@ find_lrm_resource(const char *rsc_id, const char *node_name, + return xml; + } + ++/*! ++ * \internal ++ * \brief Check whether a resource has no completed action history on a node ++ * ++ * \param[in,out] rsc Resource to check ++ * \param[in] node_name Node to check ++ * ++ * \return true if \p rsc_id is unknown on \p node_name, otherwise false ++ */ + static bool +-unknown_on_node(const char *rsc_id, const char *node_name, +- pe_working_set_t *data_set) ++unknown_on_node(pe_resource_t *rsc, const char *node_name) + { +- xmlNode *lrm_resource = NULL; +- +- lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); ++ bool result = false; ++ xmlXPathObjectPtr search; ++ GString *xpath = g_string_sized_new(256); + +- /* If the resource has no lrm_rsc_op history on the node, that means its +- * state is unknown there. +- */ +- return (lrm_resource == NULL +- || first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL); ++ pcmk__g_strcat(xpath, ++ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" ++ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" ++ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", ++ NULL); ++ search = xpath_search(rsc->cluster->input, (const char *) xpath->str); ++ result = (numXpathResults(search) == 0); ++ freeXpathObject(search); ++ g_string_free(xpath, TRUE); ++ return result; + } + + /*! +@@ -3027,7 +3040,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, target, data_set) ++ !unknown_on_node(rsc, target) + /* If the resource has newer state on the target after the migration + * events, this migrate_to no longer matters for the target. + */ +@@ -3082,7 +3095,7 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, + * Don't just consider it running there. We will get back here anyway in + * case the probe detects it's running there. + */ +- !unknown_on_node(rsc->id, source, data_set) ++ !unknown_on_node(rsc, source) + /* If the resource has newer state on the source after the migration + * events, this migrate_from no longer matters for the source. + */ +-- +2.31.1 + +From 22fbab8e0d449d2accb231dfcec94294ded27f4e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 31 Jan 2023 12:11:19 -0600 +Subject: [PATCH 12/14] Test: scheduler: add regression test for migration + intermediary + +As of this commit, the cluster wrongly restarts the migrated resource +--- + cts/cts-scheduler.in | 3 + + .../dot/migration-intermediary-cleaned.dot | 46 ++ + .../exp/migration-intermediary-cleaned.exp | 316 +++++++++++ + .../migration-intermediary-cleaned.scores | 201 +++++++ + .../migration-intermediary-cleaned.summary | 94 ++++ + .../xml/migration-intermediary-cleaned.xml | 513 ++++++++++++++++++ + 6 files changed, 1173 insertions(+) + create mode 100644 cts/scheduler/dot/migration-intermediary-cleaned.dot + create mode 100644 cts/scheduler/exp/migration-intermediary-cleaned.exp + create mode 100644 cts/scheduler/scores/migration-intermediary-cleaned.scores + create mode 100644 cts/scheduler/summary/migration-intermediary-cleaned.summary + create mode 100644 cts/scheduler/xml/migration-intermediary-cleaned.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index feb5dc8..9899c36 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -387,6 +387,9 @@ TESTS = [ + [ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ], + [ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ], + [ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ], ++ [ "migration-intermediary-cleaned", ++ "Probe live-migration intermediary with no history" ++ ], + [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ], + ], + [ +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +new file mode 100644 +index 0000000..09568d0 +--- /dev/null ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -0,0 +1,46 @@ ++ digraph "g" { ++"Connectivity_running_0" [ style=bold color="green" fontcolor="orange"] ++"Connectivity_start_0" -> "Connectivity_running_0" [ style = bold] ++"Connectivity_start_0" -> "ping-1_start_0 rhel8-2" [ style = bold] ++"Connectivity_start_0" [ style=bold color="green" fontcolor="orange"] ++"FencingFail_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] ++"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] ++"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] ++"petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] ++"ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_monitor_60000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"ping-1_start_0 rhel8-2" -> "Connectivity_running_0" [ style = bold] ++"ping-1_start_0 rhel8-2" -> "ping-1_monitor_60000 rhel8-2" [ style = bold] ++"ping-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.207_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"r192.168.122.208_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_0 rhel8-2" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_start_0 rhel8-2" -> "rsc_rhel8-1_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-1_stop_0 rhel8-3" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-1_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_0 rhel8-2" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_start_0 rhel8-2" -> "rsc_rhel8-2_monitor_5000 rhel8-2" [ style = bold] ++"rsc_rhel8-2_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-2_stop_0 rhel8-4" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] ++"rsc_rhel8-2_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-3_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-4_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"rsc_rhel8-5_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++"stateful-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +new file mode 100644 +index 0000000..28fa776 +--- /dev/null ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -0,0 +1,316 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +new file mode 100644 +index 0000000..b3b8dff +--- /dev/null ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -0,0 +1,201 @@ ++ ++pcmk__clone_allocate: Connectivity allocation score on rhel8-1: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-2: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-3: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-4: 0 ++pcmk__clone_allocate: Connectivity allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__clone_allocate: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-1: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-2: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: ping-1:4 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: promotable-1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-3: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-4: 0 ++pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-5: 0 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-3: 10 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-4: 5 ++pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-5: 5 ++pcmk__group_assign: group-1 allocation score on rhel8-1: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-2: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-3: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-4: 0 ++pcmk__group_assign: group-1 allocation score on rhel8-5: 0 ++pcmk__group_assign: petulant allocation score on rhel8-1: 0 ++pcmk__group_assign: petulant allocation score on rhel8-2: 0 ++pcmk__group_assign: petulant allocation score on rhel8-3: 0 ++pcmk__group_assign: petulant allocation score on rhel8-4: 0 ++pcmk__group_assign: petulant allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.207 allocation score on rhel8-5: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-1: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-2: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-4: 0 ++pcmk__group_assign: r192.168.122.208 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingFail allocation score on rhel8-5: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-1: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-2: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-3: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-4: 0 ++pcmk__primitive_assign: FencingPass allocation score on rhel8-5: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-3: 0 ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: lsb-dummy allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 ++pcmk__primitive_assign: petulant allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: petulant allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-3: 1 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: ping-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-4: 1 ++pcmk__primitive_assign: ping-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:2 allocation score on rhel8-5: 1 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: ping-1:4 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-1: 100 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-2: 100 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-3: 100 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-4: 100 ++pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-1: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-2: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-3: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-5: 100 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-3: 11 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-4: 0 ++pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-4: 6 ++pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-5: 0 ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-5: 6 ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-5: -INFINITY ++stateful-1:0 promotion score on rhel8-3: 10 ++stateful-1:1 promotion score on rhel8-4: 5 ++stateful-1:2 promotion score on rhel8-5: 5 ++stateful-1:3 promotion score on none: 0 ++stateful-1:4 promotion score on none: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +new file mode 100644 +index 0000000..5de1355 +--- /dev/null ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -0,0 +1,94 @@ ++Using the original execution date of: 2023-01-19 21:05:59Z ++Current cluster status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 ++ ++Transition Summary: ++ * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) ++ * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) ++ * Restart migrator ( rhel8-5 ) ++ * Start ping-1:3 ( rhel8-2 ) ++ ++Executing Cluster Transition: ++ * Resource action: Fencing monitor on rhel8-2 ++ * Resource action: FencingPass monitor on rhel8-2 ++ * Resource action: FencingFail monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 stop on rhel8-3 ++ * Resource action: rsc_rhel8-1 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-2 stop on rhel8-4 ++ * Resource action: rsc_rhel8-2 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-3 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-4 monitor on rhel8-2 ++ * Resource action: rsc_rhel8-5 monitor on rhel8-2 ++ * Resource action: migrator stop on rhel8-2 ++ * Resource action: migrator stop on rhel8-5 ++ * Resource action: migrator monitor on rhel8-2 ++ * Resource action: ping-1 monitor on rhel8-2 ++ * Pseudo action: Connectivity_start_0 ++ * Resource action: stateful-1 monitor on rhel8-2 ++ * Resource action: r192.168.122.207 monitor on rhel8-2 ++ * Resource action: petulant monitor on rhel8-2 ++ * Resource action: r192.168.122.208 monitor on rhel8-2 ++ * Resource action: lsb-dummy monitor on rhel8-2 ++ * Resource action: rsc_rhel8-1 start on rhel8-2 ++ * Resource action: rsc_rhel8-2 start on rhel8-2 ++ * Resource action: migrator start on rhel8-5 ++ * Resource action: migrator monitor=10000 on rhel8-5 ++ * Resource action: ping-1 start on rhel8-2 ++ * Pseudo action: Connectivity_running_0 ++ * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 ++ * Resource action: rsc_rhel8-2 monitor=5000 on rhel8-2 ++ * Resource action: ping-1 monitor=60000 on rhel8-2 ++Using the original execution date of: 2023-01-19 21:05:59Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * OFFLINE: [ rhel8-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-3 ++ * FencingPass (stonith:fence_dummy): Started rhel8-4 ++ * FencingFail (stonith:fence_dummy): Started rhel8-5 ++ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-2 ++ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 ++ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 ++ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * Clone Set: Connectivity [ping-1]: ++ * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 ] ++ * Clone Set: promotable-1 [stateful-1] (promotable): ++ * Promoted: [ rhel8-3 ] ++ * Unpromoted: [ rhel8-4 rhel8-5 ] ++ * Stopped: [ rhel8-1 rhel8-2 ] ++ * Resource Group: group-1: ++ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 ++ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 ++ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 +diff --git a/cts/scheduler/xml/migration-intermediary-cleaned.xml b/cts/scheduler/xml/migration-intermediary-cleaned.xml +new file mode 100644 +index 0000000..bec7888 +--- /dev/null ++++ b/cts/scheduler/xml/migration-intermediary-cleaned.xml +@@ -0,0 +1,513 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.31.1 + +From 1f9fadbb06baded3fc393cfe30a0cb620aca0829 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 1 Feb 2023 17:12:13 -0600 +Subject: [PATCH 13/14] Fix: scheduler: handle cleaned migrate_from history + correctly + +Fixes T623 +--- + lib/pengine/unpack.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 14dc202..9c99183 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -2990,6 +2990,15 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) + + // The migrate_from is pending, complete but erased, or to be scheduled + ++ /* If there is no history at all for the resource on an online target, then ++ * it was likely cleaned. Just return, and we'll schedule a probe. Once we ++ * have the probe result, it will be reflected in target_newer_state. ++ */ ++ if ((target_node != NULL) && target_node->details->online ++ && unknown_on_node(rsc, target)) { ++ return; ++ } ++ + if (active_on_target) { + pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); + +-- +2.31.1 + +From d9d1bf19e8522ea29c87f0c39b05828947bc5b0f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 2 Feb 2023 15:48:01 -0600 +Subject: [PATCH 14/14] Test: scheduler: update expected output for migration + fix + +--- + .../dot/migration-intermediary-cleaned.dot | 8 -- + .../exp/migration-intermediary-cleaned.exp | 88 ++++--------------- + .../migration-intermediary-cleaned.scores | 2 +- + .../migration-intermediary-cleaned.summary | 9 +- + 4 files changed, 22 insertions(+), 85 deletions(-) + +diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot +index 09568d0..f6eabba 100644 +--- a/cts/scheduler/dot/migration-intermediary-cleaned.dot ++++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot +@@ -7,15 +7,7 @@ + "FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] + "migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] +-"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +-"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] +-"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] + "petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] + "ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] + "ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] +diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp +index 28fa776..8b9bb39 100644 +--- a/cts/scheduler/exp/migration-intermediary-cleaned.exp ++++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp +@@ -148,91 +148,41 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + + + +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + + + +@@ -241,24 +191,24 @@ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + +@@ -268,7 +218,7 @@ + + + +- ++ + + + +@@ -277,7 +227,7 @@ + + + +- ++ + + + +@@ -286,7 +236,7 @@ + + + +- ++ + + + +@@ -295,7 +245,7 @@ + + + +- ++ + + + +@@ -304,7 +254,7 @@ + + + +- ++ + + + +diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores +index b3b8dff..09f05d1 100644 +--- a/cts/scheduler/scores/migration-intermediary-cleaned.scores ++++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores +@@ -103,7 +103,7 @@ pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 + pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 +-pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 ++pcmk__primitive_assign: migrator allocation score on rhel8-5: 1 + pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 +diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary +index 5de1355..dd127a8 100644 +--- a/cts/scheduler/summary/migration-intermediary-cleaned.summary ++++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary +@@ -13,7 +13,7 @@ Current cluster status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 rhel8-2 ] +@@ -30,7 +30,6 @@ Current cluster status: + Transition Summary: + * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) + * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) +- * Restart migrator ( rhel8-5 ) + * Start ping-1:3 ( rhel8-2 ) + + Executing Cluster Transition: +@@ -44,8 +43,6 @@ Executing Cluster Transition: + * Resource action: rsc_rhel8-3 monitor on rhel8-2 + * Resource action: rsc_rhel8-4 monitor on rhel8-2 + * Resource action: rsc_rhel8-5 monitor on rhel8-2 +- * Resource action: migrator stop on rhel8-2 +- * Resource action: migrator stop on rhel8-5 + * Resource action: migrator monitor on rhel8-2 + * Resource action: ping-1 monitor on rhel8-2 + * Pseudo action: Connectivity_start_0 +@@ -56,8 +53,6 @@ Executing Cluster Transition: + * Resource action: lsb-dummy monitor on rhel8-2 + * Resource action: rsc_rhel8-1 start on rhel8-2 + * Resource action: rsc_rhel8-2 start on rhel8-2 +- * Resource action: migrator start on rhel8-5 +- * Resource action: migrator monitor=10000 on rhel8-5 + * Resource action: ping-1 start on rhel8-2 + * Pseudo action: Connectivity_running_0 + * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 +@@ -79,7 +74,7 @@ Revised Cluster Status: + * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 + * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 + * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 +- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] ++ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 + * Clone Set: Connectivity [ping-1]: + * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] + * Stopped: [ rhel8-1 ] +-- +2.31.1 + diff --git a/SOURCES/003-regression.patch b/SOURCES/003-regression.patch deleted file mode 100644 index 0185c2d..0000000 --- a/SOURCES/003-regression.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 9853f4d05a376062d60f2e4c90938e587992237b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 27 Jun 2022 12:06:24 -0400 -Subject: [PATCH 1/2] Fix: tools: Don't output "(null)" in crm_attribute's - quiet mode. - -If the attribute queried for has no value, simply do not output -anything. - -Regression in 2.1.3 introduced by 8c03553bbf - -Fixes T502 -See: rhbz#2099331 ---- - tools/crm_attribute.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 0bd9dee81..b1463f906 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -56,7 +56,9 @@ attribute_text(pcmk__output_t *out, va_list args) - char *host G_GNUC_UNUSED = va_arg(args, char *); - - if (out->quiet) { -- pcmk__formatted_printf(out, "%s\n", value); -+ if (value != NULL) { -+ pcmk__formatted_printf(out, "%s\n", value); -+ } - } else { - out->info(out, "%s%s %s%s %s%s value=%s", - scope ? "scope=" : "", scope ? scope : "", --- -2.31.1 - - -From 16d00a9b3ef27afd09f5c046ea1be50fc664ed84 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 27 Jun 2022 12:18:06 -0400 -Subject: [PATCH 2/2] Test: cts: Add a test for querying an attribute that does - not exist. - ---- - cts/cli/regression.tools.exp | 4 ++++ - cts/cts-cli.in | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 0d1cfa2ab..464472d42 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -24,6 +24,10 @@ A new shadow instance was created. To begin using it paste the following into y - - =#=#=#= End test: Validate CIB - OK (0) =#=#=#= - * Passed: cibadmin - Validate CIB -+=#=#=#= Begin test: Query the value of an attribute that does not exist =#=#=#= -+crm_attribute: Error performing operation: No such device or address -+=#=#=#= End test: Query the value of an attribute that does not exist - No such object (105) =#=#=#= -+* Passed: crm_attribute - Query the value of an attribute that does not exist - =#=#=#= Begin test: Configure something before erasing =#=#=#= - =#=#=#= Current cib after: Configure something before erasing =#=#=#= - -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 8565c485a..b895d36ec 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -511,6 +511,10 @@ function test_tools() { - cmd="cibadmin -Q" - test_assert $CRM_EX_OK - -+ desc="Query the value of an attribute that does not exist" -+ cmd="crm_attribute -n ABCD --query --quiet" -+ test_assert $CRM_EX_NOSUCH 0 -+ - desc="Configure something before erasing" - cmd="crm_attribute -n cluster-delay -v 60s" - test_assert $CRM_EX_OK -@@ -1980,6 +1984,7 @@ for t in $tests; do - -e 's/ end=\"[0-9][-+: 0-9]*Z*\"/ end=\"\"/' \ - -e 's/ start=\"[0-9][-+: 0-9]*Z*\"/ start=\"\"/' \ - -e 's/^Error checking rule: Device not configured/Error checking rule: No such device or address/' \ -+ -e 's/Error performing operation: Device not configured/Error performing operation: No such device or address/' \ - -e 's/\(Injecting attribute last-failure-ping#monitor_10000=\)[0-9]*/\1/' \ - -e 's/^lt-//' \ - -e 's/ocf::/ocf:/' \ --- -2.31.1 - diff --git a/SOURCES/004-g_source_remove.patch b/SOURCES/004-g_source_remove.patch new file mode 100644 index 0000000..2af0f47 --- /dev/null +++ b/SOURCES/004-g_source_remove.patch @@ -0,0 +1,107 @@ +From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 3 Feb 2023 12:08:57 -0800 +Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one + +QE observed a rarely reproducible core dump in the fencer during +Pacemaker shutdown, in which we try to g_source_remove() an op timer +that's already been removed. + +free_stonith_remote_op_list() +-> g_hash_table_destroy() +-> g_hash_table_remove_all_nodes() +-> clear_remote_op_timers() +-> g_source_remove() +-> crm_glib_handler() +-> "Source ID 190 was not found when attempting to remove it" + +The likely cause is that request_peer_fencing() doesn't set +op->op_timer_one to 0 after calling g_source_remove() on it, so if that +op is still in the stonith_remote_op_list at shutdown with the same +timer, clear_remote_op_timers() tries to remove the source for +op_timer_one again. + +There are only five locations that call g_source_remove() on a +remote_fencing_op_t timer. +* Three of them are in clear_remote_op_timers(), which first 0-checks + the timer and then sets it to 0 after g_source_remove(). +* One is in remote_op_query_timeout(), which does the same. +* The last is the one we fix here in request_peer_fencing(). + +I don't know all the conditions of QE's test scenario at this point. +What I do know: +* have-watchdog=true +* stonith-watchdog-timeout=10 +* no explicit topology +* fence agent script is missing for the configured fence device +* requested fencing of one node +* cluster shutdown + +Fixes RHBZ2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index d61b5bd..b7426ff 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->state = st_exec; + if (op->op_timer_one) { + g_source_remove(op->op_timer_one); ++ op->op_timer_one = 0; + } + + if (!((stonith_watchdog_timeout_ms > 0) +-- +2.31.1 + +From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 15 Feb 2023 22:30:27 -0800 +Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total + +remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove +the source from the main loop after returning. Currently this function +is used as the callback only when creating op->op_timer_total. + +If we don't set op->op_timer_total to 0 before returning from +remote_op_timeout(), then we can get an assertion and core dump from +GLib when the op's timers are being cleared (either during op +finalization or during fencer shutdown). This is because +clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to +remove the source, but the source has already been removed. + +Note that we're already (correctly) zeroing op->op_timer_one and +op->query_timeout as appropriate in their respective callback functions. + +Fortunately, GLib doesn't care whether the source has already been +removed before we return G_SOURCE_REMOVE from a callback. So it's safe +to call finalize_op() (which removes all the op's timer sources) from +within a callback. + +Fixes RHBZ#2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index b7426ff88..adea3d7d8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) + { + remote_fencing_op_t *op = userdata; + ++ op->op_timer_total = 0; ++ + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", +-- +2.39.0 diff --git a/SOURCES/004-schema.patch b/SOURCES/004-schema.patch deleted file mode 100644 index 2632a9d..0000000 --- a/SOURCES/004-schema.patch +++ /dev/null @@ -1,624 +0,0 @@ -From e8f96dec79bb33c11d39c9037ac623f18a67b539 Mon Sep 17 00:00:00 2001 -From: Petr Pavlu -Date: Tue, 24 May 2022 18:08:57 +0200 -Subject: [PATCH] Low: schemas: copy API schemas in preparation for changes - -Copy crm_mon, crm_simulate and nodes API schemas in preparation for -changes and bump the external reference version in crm_mon and -crm_simulate to point to the new nodes schema. ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_mon-2.21.rng | 183 +++++++++++++++ - xml/api/crm_simulate-2.21.rng | 338 +++++++++++++++++++++++++++ - xml/api/nodes-2.21.rng | 51 ++++ - 4 files changed, 573 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_mon-2.21.rng - create mode 100644 xml/api/crm_simulate-2.21.rng - create mode 100644 xml/api/nodes-2.21.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 577fd5247..74ee833c1 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.20" -+# define PCMK__API_VERSION "2.21" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_mon-2.21.rng b/xml/api/crm_mon-2.21.rng -new file mode 100644 -index 000000000..37036d665 ---- /dev/null -+++ b/xml/api/crm_mon-2.21.rng -@@ -0,0 +1,183 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ granted -+ revoked -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/crm_simulate-2.21.rng b/xml/api/crm_simulate-2.21.rng -new file mode 100644 -index 000000000..75a9b399b ---- /dev/null -+++ b/xml/api/crm_simulate-2.21.rng -@@ -0,0 +1,338 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/nodes-2.21.rng b/xml/api/nodes-2.21.rng -new file mode 100644 -index 000000000..df4c77f37 ---- /dev/null -+++ b/xml/api/nodes-2.21.rng -@@ -0,0 +1,51 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ red -+ yellow -+ green -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ unknown -+ member -+ remote -+ ping -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - diff --git a/SOURCES/005-query-null.patch b/SOURCES/005-query-null.patch new file mode 100644 index 0000000..194cd33 --- /dev/null +++ b/SOURCES/005-query-null.patch @@ -0,0 +1,151 @@ +From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 14 Feb 2023 14:00:37 -0500 +Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in + pcmk__attrd_api_query. + +According to the header file, if node is NULL, pcmk__attrd_api_query +should query the value of the given attribute on all cluster nodes. +This is also what the server expects and how attrd_updater is supposed +to work. + +However, pcmk__attrd_api_query has no way of letting callers decide +whether they want to query all nodes or whether they want to use the +local node. We were passing NULL for the node name, which it took to +mean it should look up the local node name. This calls +pcmk__node_attr_target, which probes the local cluster name and returns +that to pcmk__attrd_api_query. If it returns non-NULL, that value will +then be put into the XML IPC call which means the server will only +return the value for that node. + +In testing this was usually fine. However, in pratice, the methods +pcmk__node_attr_target uses to figure out the local cluster node name +involves checking the OCF_RESKEY_CRM_meta_on_node environment variable +among others. + +This variable was never set in testing, but can be set in the real +world. This leads to circumstances where the user did "attrd_updater -QA" +expecting to get the values on all nodes, but instead only got the value +on the local cluster node. + +In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never +called if the node was NULL but was called otherwise. + +The fix is to modify pcmk__attrd_api_query to take an option for +querying all nodes. If that's present, we'll query all nodes. If it's +not present, we'll look at the given node name - NULL means look it up, +anything else means just that node. + +Regression in 2.1.5 introduced by eb20a65577 +--- + include/crm/common/attrd_internal.h | 6 +++++- + include/crm/common/ipc_attrd_internal.h | 7 +++++-- + lib/common/ipc_attrd.c | 12 ++++++++---- + tools/attrd_updater.c | 5 +++-- + 4 files changed, 21 insertions(+), 9 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index 389be48..7337c38 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2022 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { + pcmk__node_attr_perm = (1 << 5), + pcmk__node_attr_sync_local = (1 << 6), + pcmk__node_attr_sync_cluster = (1 << 7), ++ // pcmk__node_attr_utilization is 8, but that has not been backported. ++ // I'm leaving the gap here in case we backport that in the future and ++ // also to avoid problems on mixed-version clusters. ++ pcmk__node_attr_query_all = (1 << 9), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index 2c6713f..b1b7584 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2022 the Pacemaker project contributors ++ * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); + * + * \param[in,out] api Connection to pacemaker-attrd + * \param[in] node Look up the attribute for this node +- * (or NULL for all nodes) ++ * (or NULL for the local node) + * \param[in] name Attribute name + * \param[in] options Bitmask of pcmk__node_attr_opts + * ++ * \note Passing pcmk__node_attr_query_all will cause the function to query ++ * the value of \p name on all nodes, regardless of the value of \p node. ++ * + * \return Standard Pacemaker return code + */ + int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 4606509..dece49b 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2011-2022 the Pacemaker project contributors ++ * Copyright 2011-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + return EINVAL; + } + +- target = pcmk__node_attr_target(node); ++ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { ++ node = NULL; ++ } else { ++ target = pcmk__node_attr_target(node); + +- if (target != NULL) { +- node = target; ++ if (target != NULL) { ++ node = target; ++ } + } + + request = create_attrd_op(NULL); +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index 3cd766d..cbd341d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, + static int + send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) + { ++ uint32_t options = pcmk__node_attr_none; + pcmk_ipc_api_t *attrd_api = NULL; + int rc = pcmk_rc_ok; + +@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no + + /* Decide which node(s) to query */ + if (query_all == TRUE) { +- attr_node = NULL; ++ options |= pcmk__node_attr_query_all; + } + +- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); ++ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); + + if (rc != pcmk_rc_ok) { + g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", +-- +2.31.1 + diff --git a/SOURCES/005-schema.patch b/SOURCES/005-schema.patch deleted file mode 100644 index 57f6309..0000000 --- a/SOURCES/005-schema.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 5b6280ac1a213e176aee6d61945b3283ea060a88 Mon Sep 17 00:00:00 2001 -From: Petr Pavlu -Date: Tue, 24 May 2022 18:02:31 +0200 -Subject: [PATCH] Feature: tools: report CRM feature set of nodes by crm_mon - -Enable crm_mon to report when CRM feature set is not consistent among -online nodes and output a version of each node if --show-detail is -specified. ---- - xml/api/crm_mon-2.21.rng | 3 + - xml/api/nodes-2.21.rng | 3 + - 9 files changed, 508 insertions(+), 125 deletions(-) - create mode 100644 cts/cli/crm_mon-feature_set.xml - create mode 100644 cts/cli/regression.feature_set.exp - -diff --git a/xml/api/crm_mon-2.21.rng b/xml/api/crm_mon-2.21.rng -index 37036d665..e99bcc3d7 100644 ---- a/xml/api/crm_mon-2.21.rng -+++ b/xml/api/crm_mon-2.21.rng -@@ -54,6 +54,9 @@ - - - -+ -+ -+ - - - -diff --git a/xml/api/nodes-2.21.rng b/xml/api/nodes-2.21.rng -index df4c77f37..7e236ba63 100644 ---- a/xml/api/nodes-2.21.rng -+++ b/xml/api/nodes-2.21.rng -@@ -25,6 +25,9 @@ - - - -+ -+ -+ - - - --- -2.31.1 - diff --git a/SOURCES/006-crm_resource.patch b/SOURCES/006-crm_resource.patch deleted file mode 100644 index 577264b..0000000 --- a/SOURCES/006-crm_resource.patch +++ /dev/null @@ -1,1686 +0,0 @@ -From a467f0953c61bd56a9b34a98c71855d3cfbf6ba4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 16:26:30 -0500 -Subject: [PATCH 01/14] Refactor: tools: use a flag to indicate locked - resources in crm_resource - -... to make the handling consistent with other checks. This also allows some of -the code to be simplified. ---- - tools/crm_resource.h | 13 +++++++++---- - tools/crm_resource_print.c | 21 ++++++++------------- - tools/crm_resource_runtime.c | 7 +++---- - 3 files changed, 20 insertions(+), 21 deletions(-) - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index 71a978893..b5fdd1bb5 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -8,6 +8,10 @@ - */ - - #include -+ -+#include -+#include -+ - #include - - #include -@@ -31,13 +35,14 @@ typedef struct node_info_s { - enum resource_check_flags { - rsc_remain_stopped = (1 << 0), - rsc_unpromotable = (1 << 1), -- rsc_unmanaged = (1 << 2) -+ rsc_unmanaged = (1 << 2), -+ rsc_locked = (1 << 3), - }; - - typedef struct resource_checks_s { -- pe_resource_t *rsc; -- unsigned int flags; -- const char *lock_node; -+ pe_resource_t *rsc; // Resource being checked -+ uint32_t flags; // Group of enum resource_check_flags -+ const char *lock_node; // Node that resource is shutdown-locked to, if any - } resource_checks_t; - - resource_checks_t *cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed); -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 5abf3df0c..f63fc952d 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -450,14 +450,13 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - resource_checks_t *checks = va_arg(args, resource_checks_t *); - - pe_resource_t *parent = uber_parent(checks->rsc); -- int rc = pcmk_rc_no_output; -- bool printed = false; - -- if (checks->flags != 0 || checks->lock_node != NULL) { -- printed = true; -- out->begin_list(out, NULL, NULL, "Resource Checks"); -+ if (checks->flags == 0) { -+ return pcmk_rc_no_output; - } - -+ out->begin_list(out, NULL, NULL, "Resource Checks"); -+ - if (pcmk_is_set(checks->flags, rsc_remain_stopped)) { - out->list_item(out, "check", "Configuration specifies '%s' should remain stopped", - parent->id); -@@ -473,17 +472,13 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - parent->id); - } - -- if (checks->lock_node) { -+ if (pcmk_is_set(checks->flags, rsc_locked)) { - out->list_item(out, "check", "'%s' is locked to node %s due to shutdown", - parent->id, checks->lock_node); - } - -- if (printed) { -- out->end_list(out); -- rc = pcmk_rc_ok; -- } -- -- return rc; -+ out->end_list(out); -+ return pcmk_rc_ok; - } - - PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *") -@@ -509,7 +504,7 @@ resource_check_list_xml(pcmk__output_t *out, va_list args) { - pcmk__xe_set_bool_attr(node, "unmanaged", true); - } - -- if (checks->lock_node) { -+ if (pcmk_is_set(checks->flags, rsc_locked)) { - crm_xml_add(node, "locked-to", checks->lock_node); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 9e7e1fe74..b5bccadaf 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -36,7 +36,8 @@ cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed) - rc->flags |= rsc_unmanaged; - } - -- if (rsc->lock_node) { -+ if (rsc->lock_node != NULL) { -+ rc->flags |= rsc_locked; - rc->lock_node = rsc->lock_node->details->uname; - } - -@@ -914,9 +915,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - - checks = cli_check_resource(rsc, role_s, managed); - -- if (checks->flags != 0 || checks->lock_node != NULL) { -- rc = out->message(out, "resource-check-list", checks); -- } -+ rc = out->message(out, "resource-check-list", checks); - - free(role_s); - free(managed); --- -2.31.1 - - -From 7f8f94d0a1086e592e39f3a1a812b1a65225c09b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 16:48:03 -0500 -Subject: [PATCH 02/14] Refactor: tools: functionize individual resource checks - in crm_resource - -... rather than have one check-everything function, to make the code simpler -and more readable. ---- - tools/crm_resource_runtime.c | 101 ++++++++++++++++++++--------------- - 1 file changed, 57 insertions(+), 44 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index b5bccadaf..d47f959f5 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -15,36 +15,6 @@ - #include - #include - --resource_checks_t * --cli_check_resource(pe_resource_t *rsc, char *role_s, char *managed) --{ -- pe_resource_t *parent = uber_parent(rsc); -- resource_checks_t *rc = calloc(1, sizeof(resource_checks_t)); -- -- if (role_s) { -- enum rsc_role_e role = text2role(role_s); -- -- if (role == RSC_ROLE_STOPPED) { -- rc->flags |= rsc_remain_stopped; -- } else if (pcmk_is_set(parent->flags, pe_rsc_promotable) && -- (role == RSC_ROLE_UNPROMOTED)) { -- rc->flags |= rsc_unpromotable; -- } -- } -- -- if (managed && !crm_is_true(managed)) { -- rc->flags |= rsc_unmanaged; -- } -- -- if (rsc->lock_node != NULL) { -- rc->flags |= rsc_locked; -- rc->lock_node = rsc->lock_node->details->uname; -- } -- -- rc->rsc = rsc; -- return rc; --} -- - static GList * - build_node_info_list(pe_resource_t *rsc) - { -@@ -898,29 +868,72 @@ cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, - return rc; - } - --int --cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+static void -+check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) - { - char *role_s = NULL; -- char *managed = NULL; -- pe_resource_t *parent = uber_parent(rsc); -- int rc = pcmk_rc_no_output; -- resource_checks_t *checks = NULL; -- -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); -+ pe_resource_t *parent = uber_parent(checks->rsc); - - find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, - NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); -+ if (role_s == NULL) { -+ return; -+ } - -- checks = cli_check_resource(rsc, role_s, managed); -+ switch (text2role(role_s)) { -+ case RSC_ROLE_STOPPED: -+ checks->flags |= rsc_remain_stopped; -+ break; - -- rc = out->message(out, "resource-check-list", checks); -+ case RSC_ROLE_UNPROMOTED: -+ if (pcmk_is_set(parent->flags, pe_rsc_promotable)) { -+ checks->flags |= rsc_unpromotable; -+ } -+ break; - -+ default: -+ break; -+ } - free(role_s); -- free(managed); -- free(checks); -- return rc; -+} -+ -+static void -+check_managed(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+{ -+ char *managed_s = NULL; -+ pe_resource_t *parent = uber_parent(checks->rsc); -+ -+ find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -+ NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed_s); -+ if (managed_s == NULL) { -+ return; -+ } -+ -+ if (!crm_is_true(managed_s)) { -+ checks->flags |= rsc_unmanaged; -+ } -+ free(managed_s); -+} -+ -+static void -+check_locked(resource_checks_t *checks) -+{ -+ if (checks->rsc->lock_node != NULL) { -+ checks->flags |= rsc_locked; -+ checks->lock_node = checks->rsc->lock_node->details->uname; -+ } -+} -+ -+int -+cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+{ -+ resource_checks_t checks = { .rsc = rsc }; -+ -+ check_role(out, cib_conn, &checks); -+ check_managed(out, cib_conn, &checks); -+ check_locked(&checks); -+ -+ return out->message(out, "resource-check-list", &checks); - } - - // \return Standard Pacemaker return code --- -2.31.1 - - -From 32414475281d909cd808f723a41d88a5e0d2b254 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:11:07 -0500 -Subject: [PATCH 03/14] Fix: tools: crm_resource target-role check should use - meta-attribute table - -Previously, check_role() searched the CIB for the uber-parent's target-role -attribute. That could give incorrect results if target-role was set on a -different resource in the ancestry chain (e.g. the resource itself for a group -member, or the group for a cloned group), or if there were multiple target-role -settings (e.g. using rules). - -Now, target-role is checked in rsc->meta, which should be fully evaluated for -inheritance and rules. ---- - tools/crm_resource_runtime.c | 15 ++++++--------- - 1 file changed, 6 insertions(+), 9 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index d47f959f5..e9d05cb77 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -869,24 +869,22 @@ cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, - } - - static void --check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+check_role(resource_checks_t *checks) - { -- char *role_s = NULL; -- pe_resource_t *parent = uber_parent(checks->rsc); -+ const char *role_s = g_hash_table_lookup(checks->rsc->meta, -+ XML_RSC_ATTR_TARGET_ROLE); - -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); - if (role_s == NULL) { - return; - } -- - switch (text2role(role_s)) { - case RSC_ROLE_STOPPED: - checks->flags |= rsc_remain_stopped; - break; - - case RSC_ROLE_UNPROMOTED: -- if (pcmk_is_set(parent->flags, pe_rsc_promotable)) { -+ if (pcmk_is_set(uber_parent(checks->rsc)->flags, -+ pe_rsc_promotable)) { - checks->flags |= rsc_unpromotable; - } - break; -@@ -894,7 +892,6 @@ check_role(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) - default: - break; - } -- free(role_s); - } - - static void -@@ -929,7 +926,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - { - resource_checks_t checks = { .rsc = rsc }; - -- check_role(out, cib_conn, &checks); -+ check_role(&checks); - check_managed(out, cib_conn, &checks); - check_locked(&checks); - --- -2.31.1 - - -From 0fd133680f7b2c25a946cf3fb25f4ee9ffeeaf93 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:15:43 -0500 -Subject: [PATCH 04/14] Fix: tools: crm_resource is-managed check should use - meta-attribute table - -Previously, check_managed() searched the CIB for the uber-parent's is-managed -attribute. That could give incorrect results if is-managed was set on a -different resource in the ancestry chain (e.g. the resource itself for a group -member, or the group for a cloned group), or if there were multiple is-managed -settings (e.g. using rules). - -Now, is-managed is checked in rsc->meta, which should be fully evaluated for -inheritance and rules. ---- - tools/crm_resource_runtime.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index e9d05cb77..4f62b4b2e 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -895,21 +895,14 @@ check_role(resource_checks_t *checks) - } - - static void --check_managed(pcmk__output_t *out, cib_t *cib_conn, resource_checks_t *checks) -+check_managed(resource_checks_t *checks) - { -- char *managed_s = NULL; -- pe_resource_t *parent = uber_parent(checks->rsc); -+ const char *managed_s = g_hash_table_lookup(checks->rsc->meta, -+ XML_RSC_ATTR_MANAGED); - -- find_resource_attr(out, cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, -- NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed_s); -- if (managed_s == NULL) { -- return; -- } -- -- if (!crm_is_true(managed_s)) { -+ if ((managed_s != NULL) && !crm_is_true(managed_s)) { - checks->flags |= rsc_unmanaged; - } -- free(managed_s); - } - - static void -@@ -927,7 +920,7 @@ cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) - resource_checks_t checks = { .rsc = rsc }; - - check_role(&checks); -- check_managed(out, cib_conn, &checks); -+ check_managed(&checks); - check_locked(&checks); - - return out->message(out, "resource-check-list", &checks); --- -2.31.1 - - -From e9523c1b238492c8cf8b453ba6710f13bf81cd28 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:18:44 -0500 -Subject: [PATCH 05/14] Refactor: tools: drop unused argument from - cli_resource_check() - ---- - tools/crm_resource.c | 4 ++-- - tools/crm_resource.h | 2 +- - tools/crm_resource_print.c | 24 ++++++++++++------------ - tools/crm_resource_runtime.c | 2 +- - 4 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 883563df9..bf5326b40 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1019,7 +1019,7 @@ cleanup(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - if (rc == pcmk_rc_ok) { -@@ -1326,7 +1326,7 @@ refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - if (rc == pcmk_rc_ok) { -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index b5fdd1bb5..bcff2b5f6 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -68,7 +68,7 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, - bool active, pe_working_set_t * data_set); - - /* runtime */ --int cli_resource_check(pcmk__output_t *out, cib_t * cib, pe_resource_t *rsc); -+int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc); - int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, - const char *rsc_id, pe_working_set_t *data_set); - GList *cli_resource_search(pe_resource_t *rsc, const char *requested_name, -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index f63fc952d..f025cbddd 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -587,7 +587,7 @@ PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t - static int - resource_reasons_list_default(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn = va_arg(args, cib_t *); -+ cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -610,7 +610,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - out->list_item(out, "reason", "Resource %s is running", rsc->id); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - hosts = NULL; - } -@@ -624,7 +624,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->id, host_uname); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -637,14 +637,14 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is running on host %s", - rsc->id, host_uname); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is assigned to host %s but not running", - rsc->id, host_uname); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - } - - g_list_free(allResources); -@@ -657,7 +657,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->fns->location(rsc, &hosts, TRUE); - out->list_item(out, "reason", "Resource %s is %srunning", - rsc->id, (hosts? "" : "not ")); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - } - -@@ -670,7 +670,7 @@ PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t - static int - resource_reasons_list_xml(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn = va_arg(args, cib_t *); -+ cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -695,7 +695,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "running", pcmk__btoa(hosts != NULL), - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - g_list_free(hosts); - hosts = NULL; -@@ -708,7 +708,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - crm_xml_add(xml_node, "running_on", host_uname); - } - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -728,7 +728,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - } - -@@ -741,7 +741,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - pcmk__output_xml_pop_parent(out); - } - -@@ -755,7 +755,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - - rsc->fns->location(rsc, &hosts, TRUE); - crm_xml_add(xml_node, "running", pcmk__btoa(hosts != NULL)); -- cli_resource_check(out, cib_conn, rsc); -+ cli_resource_check(out, rsc); - g_list_free(hosts); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 4f62b4b2e..47653a060 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -915,7 +915,7 @@ check_locked(resource_checks_t *checks) - } - - int --cli_resource_check(pcmk__output_t *out, cib_t * cib_conn, pe_resource_t *rsc) -+cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc) - { - resource_checks_t checks = { .rsc = rsc }; - --- -2.31.1 - - -From b1a1a07f3e44bc74575eab325277ea8c1f3391b2 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 5 Apr 2022 17:20:06 -0500 -Subject: [PATCH 06/14] Refactor: tools: drop unused argument from - resource-reasons-list message - ---- - tools/crm_resource.c | 2 +- - tools/crm_resource_print.c | 6 ++---- - 2 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index bf5326b40..7f656a20d 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1941,7 +1941,7 @@ main(int argc, char **argv) - if ((options.host_uname != NULL) && (node == NULL)) { - rc = pcmk_rc_node_unknown; - } else { -- rc = out->message(out, "resource-reasons-list", cib_conn, -+ rc = out->message(out, "resource-reasons-list", - data_set->resources, rsc, node); - } - break; -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index f025cbddd..580f9c71a 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -582,12 +582,11 @@ resource_search_list_xml(pcmk__output_t *out, va_list args) - return pcmk_rc_ok; - } - --PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", -+PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pe_resource_t *", - "pe_node_t *") - static int - resource_reasons_list_default(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); -@@ -665,12 +664,11 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - return pcmk_rc_ok; - } - --PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", -+PCMK__OUTPUT_ARGS("resource-reasons-list", "GList *", "pe_resource_t *", - "pe_node_t *") - static int - resource_reasons_list_xml(pcmk__output_t *out, va_list args) - { -- cib_t *cib_conn G_GNUC_UNUSED = va_arg(args, cib_t *); - GList *resources = va_arg(args, GList *); - pe_resource_t *rsc = va_arg(args, pe_resource_t *); - pe_node_t *node = va_arg(args, pe_node_t *); --- -2.31.1 - - -From 973eb2694b334b4e9e6967f6c7ceaebec10693db Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:08:37 -0500 -Subject: [PATCH 07/14] Refactor: tools: pass node to cli_resource_check() - -The node is not used yet ---- - tools/crm_resource.c | 12 ++++++------ - tools/crm_resource.h | 3 ++- - tools/crm_resource_print.c | 20 ++++++++++---------- - tools/crm_resource_runtime.c | 2 +- - 4 files changed, 19 insertions(+), 18 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 7f656a20d..756a06268 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1004,7 +1004,7 @@ ban_or_move(pcmk__output_t *out, pe_resource_t *rsc, const char *move_lifetime) - } - - static void --cleanup(pcmk__output_t *out, pe_resource_t *rsc) -+cleanup(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - int rc = pcmk_rc_ok; - -@@ -1019,7 +1019,7 @@ cleanup(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - if (rc == pcmk_rc_ok) { -@@ -1311,7 +1311,7 @@ refresh(pcmk__output_t *out) - } - - static void --refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) -+refresh_resource(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - int rc = pcmk_rc_ok; - -@@ -1326,7 +1326,7 @@ refresh_resource(pcmk__output_t *out, pe_resource_t *rsc) - - if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { - // Show any reasons why resource might stay stopped -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - if (rc == pcmk_rc_ok) { -@@ -2075,7 +2075,7 @@ main(int argc, char **argv) - start_mainloop(controld_api); - } - } else { -- cleanup(out, rsc); -+ cleanup(out, rsc, node); - } - break; - -@@ -2083,7 +2083,7 @@ main(int argc, char **argv) - if (rsc == NULL) { - rc = refresh(out); - } else { -- refresh_resource(out, rsc); -+ refresh_resource(out, rsc, node); - } - break; - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index bcff2b5f6..f7e44476d 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -68,7 +68,8 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, - bool active, pe_working_set_t * data_set); - - /* runtime */ --int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc); -+int cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, -+ pe_node_t *node); - int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, - const char *rsc_id, pe_working_set_t *data_set); - GList *cli_resource_search(pe_resource_t *rsc, const char *requested_name, -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 580f9c71a..087819601 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -609,7 +609,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - out->list_item(out, "reason", "Resource %s is running", rsc->id); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - hosts = NULL; - } -@@ -623,7 +623,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->id, host_uname); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -636,14 +636,14 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is running on host %s", - rsc->id, host_uname); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { - pe_resource_t *rsc = (pe_resource_t *) lpc->data; - out->list_item(out, "reason", "Resource %s is assigned to host %s but not running", - rsc->id, host_uname); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - } - - g_list_free(allResources); -@@ -656,7 +656,7 @@ resource_reasons_list_default(pcmk__output_t *out, va_list args) - rsc->fns->location(rsc, &hosts, TRUE); - out->list_item(out, "reason", "Resource %s is %srunning", - rsc->id, (hosts? "" : "not ")); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - } - -@@ -693,7 +693,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "running", pcmk__btoa(hosts != NULL), - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - pcmk__output_xml_pop_parent(out); - g_list_free(hosts); - hosts = NULL; -@@ -706,7 +706,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - crm_xml_add(xml_node, "running_on", host_uname); - } - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - - } else if ((rsc == NULL) && (host_uname != NULL)) { - const char* host_uname = node->details->uname; -@@ -726,7 +726,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - pcmk__output_xml_pop_parent(out); - } - -@@ -739,7 +739,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - "host", host_uname, - NULL); - -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, node); - pcmk__output_xml_pop_parent(out); - } - -@@ -753,7 +753,7 @@ resource_reasons_list_xml(pcmk__output_t *out, va_list args) - - rsc->fns->location(rsc, &hosts, TRUE); - crm_xml_add(xml_node, "running", pcmk__btoa(hosts != NULL)); -- cli_resource_check(out, rsc); -+ cli_resource_check(out, rsc, NULL); - g_list_free(hosts); - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 47653a060..68e899c45 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -915,7 +915,7 @@ check_locked(resource_checks_t *checks) - } - - int --cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc) -+cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { - resource_checks_t checks = { .rsc = rsc }; - --- -2.31.1 - - -From c3bfde0536f2eb51c81bf34fa957c38dc88f9cc3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 09:49:03 -0500 -Subject: [PATCH 08/14] Feature: tools: crm_resource --why now checks node - health status - -Closes T65 ---- - tools/crm_resource.h | 1 + - tools/crm_resource_print.c | 13 +++++++++ - tools/crm_resource_runtime.c | 56 ++++++++++++++++++++++++++++++++++++ - 3 files changed, 70 insertions(+) - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index f7e44476d..ae4b02a98 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -37,6 +37,7 @@ enum resource_check_flags { - rsc_unpromotable = (1 << 1), - rsc_unmanaged = (1 << 2), - rsc_locked = (1 << 3), -+ rsc_node_health = (1 << 4), - }; - - typedef struct resource_checks_s { -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 087819601..27fd76aaf 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -477,6 +477,15 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { - parent->id, checks->lock_node); - } - -+ if (pcmk_is_set(checks->flags, rsc_node_health)) { -+ out->list_item(out, "check", -+ "'%s' cannot run on unhealthy nodes due to " -+ PCMK__OPT_NODE_HEALTH_STRATEGY "='%s'", -+ parent->id, -+ pe_pref(checks->rsc->cluster->config_hash, -+ PCMK__OPT_NODE_HEALTH_STRATEGY)); -+ } -+ - out->end_list(out); - return pcmk_rc_ok; - } -@@ -508,6 +517,10 @@ resource_check_list_xml(pcmk__output_t *out, va_list args) { - crm_xml_add(node, "locked-to", checks->lock_node); - } - -+ if (pcmk_is_set(checks->flags, rsc_node_health)) { -+ pcmk__xe_set_bool_attr(node, "unhealthy", true); -+ } -+ - return pcmk_rc_ok; - } - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 68e899c45..2aa3efe38 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -914,6 +914,61 @@ check_locked(resource_checks_t *checks) - } - } - -+static bool -+node_is_unhealthy(pe_node_t *node) -+{ -+ switch (pe__health_strategy(node->details->data_set)) { -+ case pcmk__health_strategy_none: -+ break; -+ -+ case pcmk__health_strategy_no_red: -+ if (pe__node_health(node) < 0) { -+ return true; -+ } -+ break; -+ -+ case pcmk__health_strategy_only_green: -+ if (pe__node_health(node) <= 0) { -+ return true; -+ } -+ break; -+ -+ case pcmk__health_strategy_progressive: -+ case pcmk__health_strategy_custom: -+ /* @TODO These are finite scores, possibly with rules, and possibly -+ * combining with other scores, so attributing these as a cause is -+ * nontrivial. -+ */ -+ break; -+ } -+ return false; -+} -+ -+static void -+check_node_health(resource_checks_t *checks, pe_node_t *node) -+{ -+ if (node == NULL) { -+ GHashTableIter iter; -+ bool allowed = false; -+ bool all_nodes_unhealthy = true; -+ -+ g_hash_table_iter_init(&iter, checks->rsc->allowed_nodes); -+ while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { -+ allowed = true; -+ if (!node_is_unhealthy(node)) { -+ all_nodes_unhealthy = false; -+ break; -+ } -+ } -+ if (allowed && all_nodes_unhealthy) { -+ checks->flags |= rsc_node_health; -+ } -+ -+ } else if (node_is_unhealthy(node)) { -+ checks->flags |= rsc_node_health; -+ } -+} -+ - int - cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - { -@@ -922,6 +977,7 @@ cli_resource_check(pcmk__output_t *out, pe_resource_t *rsc, pe_node_t *node) - check_role(&checks); - check_managed(&checks); - check_locked(&checks); -+ check_node_health(&checks, node); - - return out->message(out, "resource-check-list", &checks); - } --- -2.31.1 - - -From 48730fd51a22e109514764a039e5c89fd204ad4c Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:41:48 -0500 -Subject: [PATCH 09/14] Low: schemas: copy crm_resource API schema in - preparation for changes - ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_resource-2.22.rng | 303 +++++++++++++++++++++++++++ - 2 files changed, 304 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_resource-2.22.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index ca16227fe..bdcae8ad6 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.21" -+# define PCMK__API_VERSION "2.22" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -new file mode 100644 -index 000000000..cd74da0d8 ---- /dev/null -+++ b/xml/api/crm_resource-2.22.rng -@@ -0,0 +1,303 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ promoted -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ocf -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ true -+ false -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ Stopped -+ Started -+ Promoted -+ Unpromoted -+ -+ -+ Master -+ Slave -+ -+ -+ --- -2.31.1 - - -From 75a885d9da92c84038e3abf732c11cf3fb6a79a7 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:33:50 -0500 -Subject: [PATCH 10/14] Fix: tools: correct crm_resource --why schema to match - actual output - -If both a resource and node name are specified, "running_on" is optional ---- - xml/api/crm_resource-2.22.rng | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index cd74da0d8..e89d850da 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -126,7 +126,9 @@ - - - -- -+ -+ -+ - - - --- -2.31.1 - - -From 5e4f993859dd68a3f88cb0648ace7b3837316288 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:20:03 -0500 -Subject: [PATCH 11/14] Low: schemas: simplify crm_resource --why schema - ---- - xml/api/crm_resource-2.22.rng | 64 ++++++++++++----------------------- - 1 file changed, 22 insertions(+), 42 deletions(-) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index e89d850da..2d2ba839f 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -102,56 +102,36 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -+ - - - -- -- -- - -- -- -- -- -- -- -- -- -- -- -- -- -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -- -- -- -- -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - --- -2.31.1 - - -From 79bdbbde27ad340c2054089aaecf5e0b49296e59 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:28:11 -0500 -Subject: [PATCH 12/14] Test: cts-cli: use validated XML output for - crm_resource --why test - ---- - cts/cli/regression.tools.exp | 8 ++++++-- - cts/cts-cli.in | 4 ++-- - 2 files changed, 8 insertions(+), 4 deletions(-) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 0d1cfa2ab..4237a3ec5 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -888,8 +888,12 @@ Deleted 'dummy' option: id=dummy-meta_attributes-is-managed name=is-managed - =#=#=#= End test: Create another resource meta attribute - OK (0) =#=#=#= - * Passed: crm_resource - Create another resource meta attribute - =#=#=#= Begin test: Show why a resource is not running =#=#=#= --Resource dummy is not running --Configuration specifies 'dummy' should remain stopped -+ -+ -+ -+ -+ -+ - =#=#=#= End test: Show why a resource is not running - OK (0) =#=#=#= - * Passed: crm_resource - Show why a resource is not running - =#=#=#= Begin test: Remove another resource meta attribute =#=#=#= -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 8565c485a..289ac966f 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -657,8 +657,8 @@ function test_tools() { - test_assert_validate $CRM_EX_OK 0 - - desc="Show why a resource is not running" -- cmd="crm_resource -Y -r dummy" -- test_assert $CRM_EX_OK 0 -+ cmd="crm_resource -Y -r dummy --output-as=xml" -+ test_assert_validate $CRM_EX_OK 0 - - desc="Remove another resource meta attribute" - cmd="crm_resource -r dummy --meta -d target-role --output-as=xml" --- -2.31.1 - - -From 929d1b40e82f186e7e31e380db2620e7e23968f1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 10:43:22 -0500 -Subject: [PATCH 13/14] Low: schemas: update crm_resource --why schema for new - health check - ---- - xml/api/crm_resource-2.22.rng | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/xml/api/crm_resource-2.22.rng b/xml/api/crm_resource-2.22.rng -index 2d2ba839f..8a4667559 100644 ---- a/xml/api/crm_resource-2.22.rng -+++ b/xml/api/crm_resource-2.22.rng -@@ -157,6 +157,9 @@ - - - -+ -+ true -+ - - - --- -2.31.1 - - -From 6630e55abc7b26be294ab6d42f12cdb7e2c69b55 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 23 Jun 2022 11:07:20 -0500 -Subject: [PATCH 14/14] Test: cts-cli: add tests for checking resource status - on unhealthy node - ---- - cts/cli/regression.tools.exp | 112 ++++++++++++++++++++++++++++++++++- - cts/cts-cli.in | 12 ++++ - 2 files changed, 122 insertions(+), 2 deletions(-) - -diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp -index 4237a3ec5..89ae4e97d 100644 ---- a/cts/cli/regression.tools.exp -+++ b/cts/cli/regression.tools.exp -@@ -3406,13 +3406,14 @@ Removing constraint: cli-prefer-dummy - - =#=#=#= End test: Clear all implicit constraints for dummy - OK (0) =#=#=#= - * Passed: crm_resource - Clear all implicit constraints for dummy --=#=#=#= Begin test: Delete a resource =#=#=#= --=#=#=#= Current cib after: Delete a resource =#=#=#= -+=#=#=#= Begin test: Set a node health strategy =#=#=#= -+=#=#=#= Current cib after: Set a node health strategy =#=#=#= - - - - - -+ - - - -@@ -3427,6 +3428,113 @@ Removing constraint: cli-prefer-dummy - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set a node health strategy - OK (0) =#=#=#= -+* Passed: crm_attribute - Set a node health strategy -+=#=#=#= Begin test: Set a node health attribute =#=#=#= -+=#=#=#= Current cib after: Set a node health attribute =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Set a node health attribute - OK (0) =#=#=#= -+* Passed: crm_attribute - Set a node health attribute -+=#=#=#= Begin test: Show why a resource is not running on an unhealthy node =#=#=#= -+ -+ -+ -+ -+ -+ -+=#=#=#= End test: Show why a resource is not running on an unhealthy node - OK (0) =#=#=#= -+* Passed: crm_resource - Show why a resource is not running on an unhealthy node -+=#=#=#= Begin test: Delete a resource =#=#=#= -+=#=#=#= Current cib after: Delete a resource =#=#=#= -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -diff --git a/cts/cts-cli.in b/cts/cts-cli.in -index 289ac966f..990d37cf7 100755 ---- a/cts/cts-cli.in -+++ b/cts/cts-cli.in -@@ -883,6 +883,18 @@ function test_tools() { - cmd="crm_resource -r dummy -U" - test_assert $CRM_EX_OK - -+ desc="Set a node health strategy" -+ cmd="crm_attribute -n node-health-strategy -v migrate-on-red" -+ test_assert $CRM_EX_OK -+ -+ desc="Set a node health attribute" -+ cmd="crm_attribute -N node3 -n '#health-cts-cli' -v red" -+ test_assert $CRM_EX_OK -+ -+ desc="Show why a resource is not running on an unhealthy node" -+ cmd="crm_resource -N node3 -Y -r dummy --output-as=xml" -+ test_assert_validate $CRM_EX_OK 0 -+ - desc="Delete a resource" - cmd="crm_resource -D -r dummy -t primitive" - test_assert $CRM_EX_OK --- -2.31.1 - diff --git a/SOURCES/007-stonith_admin.patch b/SOURCES/007-stonith_admin.patch deleted file mode 100644 index bddba16..0000000 --- a/SOURCES/007-stonith_admin.patch +++ /dev/null @@ -1,108 +0,0 @@ -From d6294dd28b6d95ad3844824996717f9959d97ac6 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Thu, 30 Jun 2022 11:07:32 -0700 -Subject: [PATCH 1/2] Fix: Use correct boolean in stonith__validate_agent_xml - -This fixes a regression introduced by 91a2b2e that flips the boolean -values for "valid" in the XML output. - -Resolves: RHBZ#2102292 (partial) - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_output.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c -index e0ff848c2..eb10ad0c5 100644 ---- a/lib/fencing/st_output.c -+++ b/lib/fencing/st_output.c -@@ -528,10 +528,9 @@ validate_agent_xml(pcmk__output_t *out, va_list args) { - char *error_output = va_arg(args, char *); - int rc = va_arg(args, int); - -- xmlNodePtr node = pcmk__output_create_xml_node(out, "validate", -- "agent", agent, -- "valid", pcmk__btoa(rc), -- NULL); -+ xmlNodePtr node = pcmk__output_create_xml_node( -+ out, "validate", "agent", agent, "valid", pcmk__btoa(rc == pcmk_ok), -+ NULL); - - if (device != NULL) { - crm_xml_add(node, "device", device); --- -2.31.1 - - -From 81e83683e69b4f147f40f5353f8e68032758a104 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 29 Jun 2022 18:15:33 -0700 -Subject: [PATCH 2/2] Fix: Use failed action result in rhcs_validate and - _get_metadata - -If an action failed but has a non-NULL result, get the rc and other -attributes from that result. - -This fixes a regression introduced by b441925, in which failure XML -output now contains a CRM_EX_CONNECTED rc instead of the correct one and -does not contain stdout/stderr. That commit caused -services__execute_file() to return a proper rc instead of TRUE. A -non-pcmk_ok bubbled up the call chain causing -internal_stonith_action_execute() to return -ECONNABORTED. Then -rhcs_validate() and _get_metadata() would use this rc instead of the one -attached to the result. - -Resolves: RHBZ#2102292 - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_rhcs.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c -index 39485013e..029c97eea 100644 ---- a/lib/fencing/st_rhcs.c -+++ b/lib/fencing/st_rhcs.c -@@ -130,16 +130,15 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) - stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, - 5, NULL, NULL, NULL); - int rc = stonith__execute(action); -+ result = stonith__action_result(action); - -- if (rc < 0) { -+ if (rc < 0 && result == NULL) { - crm_warn("Could not execute metadata action for %s: %s " - CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); - stonith__destroy_action(action); - return rc; - } - -- result = stonith__action_result(action); -- - if (result->execution_status != PCMK_EXEC_DONE) { - crm_warn("Could not execute metadata action for %s: %s", - agent, pcmk_exec_status_str(result->execution_status)); -@@ -262,6 +261,7 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, - int remaining_timeout = timeout; - xmlNode *metadata = NULL; - stonith_action_t *action = NULL; -+ pcmk__action_result_t *result = NULL; - - if (host_arg == NULL) { - time_t start_time = time(NULL); -@@ -298,9 +298,9 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, - NULL, host_arg); - - rc = stonith__execute(action); -- if (rc == pcmk_ok) { -- pcmk__action_result_t *result = stonith__action_result(action); -+ result = stonith__action_result(action); - -+ if (result != NULL) { - rc = pcmk_rc2legacy(stonith__result2rc(result)); - - // Take ownership of output so stonith__destroy_action() doesn't free it --- -2.31.1 - diff --git a/SOURCES/008-metadata.patch b/SOURCES/008-metadata.patch deleted file mode 100644 index 5dc9e27..0000000 --- a/SOURCES/008-metadata.patch +++ /dev/null @@ -1,34 +0,0 @@ -From e4d9c795dfe2d6737c777a265292864da98dae8f Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Thu, 30 Jun 2022 14:40:31 -0700 -Subject: [PATCH] Low: Always null-check result in stonith__rhcs_get_metadata - -Null-check result even if rc == 0. - -Signed-off-by: Reid Wahl ---- - lib/fencing/st_rhcs.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c -index 029c97eea..dfccff2cb 100644 ---- a/lib/fencing/st_rhcs.c -+++ b/lib/fencing/st_rhcs.c -@@ -132,9 +132,11 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) - int rc = stonith__execute(action); - result = stonith__action_result(action); - -- if (rc < 0 && result == NULL) { -- crm_warn("Could not execute metadata action for %s: %s " -- CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); -+ if (result == NULL) { -+ if (rc < 0) { -+ crm_warn("Could not execute metadata action for %s: %s " -+ CRM_XS " rc=%d", agent, pcmk_strerror(rc), rc); -+ } - stonith__destroy_action(action); - return rc; - } --- -2.31.1 - diff --git a/SOURCES/009-validate.patch b/SOURCES/009-validate.patch deleted file mode 100644 index a5d01f5..0000000 --- a/SOURCES/009-validate.patch +++ /dev/null @@ -1,94 +0,0 @@ -From d00a6abde7e6a41f8bc6085c875cb8072aff499b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 30 Jun 2022 09:25:05 -0400 -Subject: [PATCH 1/2] Fix: libstonithd: Add the "Agent not found..." message to - formatted output. - ---- - lib/fencing/st_client.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 137642af7..971bbe9a5 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -1763,9 +1763,14 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - default: - rc = -EINVAL; - errno = EINVAL; -- crm_perror(LOG_ERR, -- "Agent %s not found or does not support validation", -- agent); -+ -+ if (error_output) { -+ *error_output = crm_strdup_printf("Agent %s not found or does not support validation", -+ agent); -+ } else { -+ crm_err("Agent %s not found or does not support validation", agent); -+ } -+ - break; - } - g_hash_table_destroy(params_table); --- -2.31.1 - - -From f3a5fc961c30556b975011773e4cebf323bec38e Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 1 Jul 2022 10:38:45 -0400 -Subject: [PATCH 2/2] Refactor: libstonithd: Split apart error conditions when - validating. - -The "not found" and "can't validate" cases were previously jumbled -together. Now, return ENOENT if the agent is not found and EOPNOTSUPP -if it can't validate. The only caller appears to be handling both cases -correctly already, so no changes are needed there. ---- - lib/fencing/st_client.c | 21 +++++++++++++++++---- - 1 file changed, 17 insertions(+), 4 deletions(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 971bbe9a5..192334812 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -1760,19 +1760,32 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - break; - #endif - -+ case st_namespace_invalid: -+ errno = ENOENT; -+ rc = -errno; -+ -+ if (error_output) { -+ *error_output = crm_strdup_printf("Agent %s not found", agent); -+ } else { -+ crm_err("Agent %s not found", agent); -+ } -+ -+ break; -+ - default: -- rc = -EINVAL; -- errno = EINVAL; -+ errno = EOPNOTSUPP; -+ rc = -errno; - - if (error_output) { -- *error_output = crm_strdup_printf("Agent %s not found or does not support validation", -+ *error_output = crm_strdup_printf("Agent %s does not support validation", - agent); - } else { -- crm_err("Agent %s not found or does not support validation", agent); -+ crm_err("Agent %s does not support validation", agent); - } - - break; - } -+ - g_hash_table_destroy(params_table); - return rc; - } --- -2.31.1 - diff --git a/SOURCES/010-regression.patch b/SOURCES/010-regression.patch deleted file mode 100644 index e40ff0e..0000000 --- a/SOURCES/010-regression.patch +++ /dev/null @@ -1,47 +0,0 @@ -From e5f80059c7f1c0ad3264dc2a2a61e64cded0fe0f Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Tue, 12 Jul 2022 14:45:55 +0900 -Subject: [PATCH] High: scheduler: Resolves an issue where STONITH devices - cannot be registered. - ---- - lib/pacemaker/pcmk_sched_allocate.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c -index 85df6ace8..a7fe9c8d6 100644 ---- a/lib/pacemaker/pcmk_sched_allocate.c -+++ b/lib/pacemaker/pcmk_sched_allocate.c -@@ -724,12 +724,18 @@ log_unrunnable_actions(pe_working_set_t *data_set) - static void - unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) - { -+ const char* localhost_save = NULL; -+ - if (pcmk_is_set(data_set->flags, pe_flag_have_status)) { - crm_trace("Reusing previously calculated cluster status"); - pe__set_working_set_flags(data_set, flags); - return; - } - -+ if (data_set->localhost) { -+ localhost_save = data_set->localhost; -+ } -+ - CRM_ASSERT(cib != NULL); - crm_trace("Calculating cluster status"); - -@@ -740,6 +746,10 @@ unpack_cib(xmlNode *cib, unsigned long long flags, pe_working_set_t *data_set) - */ - set_working_set_defaults(data_set); - -+ if (localhost_save) { -+ data_set->localhost = localhost_save; -+ } -+ - pe__set_working_set_flags(data_set, flags); - data_set->input = cib; - cluster_status(data_set); // Sets pe_flag_have_status --- -2.31.1 - diff --git a/SOURCES/011-unfencing.patch b/SOURCES/011-unfencing.patch deleted file mode 100644 index 01255df..0000000 --- a/SOURCES/011-unfencing.patch +++ /dev/null @@ -1,178 +0,0 @@ -From b1094468ab0f7c6d2f5b457b721f3a852a9cae2c Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Jul 2022 13:09:51 +0200 -Subject: [PATCH 1/2] Fix: do unfencing equally for cluster-nodes & remotes - -Fixes T28 ---- - lib/pengine/utils.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 0c2eb3c16..83f76cccf 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -1201,12 +1201,8 @@ pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, - add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); - add_hash_param(stonith_op->meta, "stonith_action", op); - -- if (pe__is_guest_or_remote_node(node) -- && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { -- /* Extra work to detect device changes on remotes -- * -- * We may do this for all nodes in the future, but for now -- * the pcmk__check_action_config() based stuff works fine. -+ if (pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { -+ /* Extra work to detect device changes - */ - long max = 1024; - long digests_all_offset = 0; --- -2.31.1 - - -From f5db6e2c94273623a49f36f1bdb6c39315c53cab Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Jul 2022 11:29:05 +0200 -Subject: [PATCH 2/2] Test: cts-scheduler: update expected output for changes - in unfencing - ---- - cts/scheduler/exp/start-then-stop-with-unfence.exp | 2 +- - cts/scheduler/exp/unfence-definition.exp | 6 +++--- - cts/scheduler/exp/unfence-device.exp | 6 +++--- - cts/scheduler/exp/unfence-parameters.exp | 6 +++--- - cts/scheduler/exp/unfence-startup.exp | 4 ++-- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/cts/scheduler/exp/start-then-stop-with-unfence.exp b/cts/scheduler/exp/start-then-stop-with-unfence.exp -index b1868586f..69cfb63de 100644 ---- a/cts/scheduler/exp/start-then-stop-with-unfence.exp -+++ b/cts/scheduler/exp/start-then-stop-with-unfence.exp -@@ -151,7 +151,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-definition.exp b/cts/scheduler/exp/unfence-definition.exp -index 840a8d212..6a098ed3c 100644 ---- a/cts/scheduler/exp/unfence-definition.exp -+++ b/cts/scheduler/exp/unfence-definition.exp -@@ -373,7 +373,7 @@ - - - -- -+ - - - -@@ -384,7 +384,7 @@ - - - -- -+ - - - -@@ -392,7 +392,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-device.exp b/cts/scheduler/exp/unfence-device.exp -index a39fc758f..452351d98 100644 ---- a/cts/scheduler/exp/unfence-device.exp -+++ b/cts/scheduler/exp/unfence-device.exp -@@ -76,7 +76,7 @@ - - - -- -+ - - - -@@ -84,7 +84,7 @@ - - - -- -+ - - - -@@ -92,7 +92,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-parameters.exp b/cts/scheduler/exp/unfence-parameters.exp -index 3e70cb8e9..268bf008e 100644 ---- a/cts/scheduler/exp/unfence-parameters.exp -+++ b/cts/scheduler/exp/unfence-parameters.exp -@@ -357,7 +357,7 @@ - - - -- -+ - - - -@@ -368,7 +368,7 @@ - - - -- -+ - - - -@@ -376,7 +376,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/unfence-startup.exp b/cts/scheduler/exp/unfence-startup.exp -index 6745bff4b..f2d38e80c 100644 ---- a/cts/scheduler/exp/unfence-startup.exp -+++ b/cts/scheduler/exp/unfence-startup.exp -@@ -173,7 +173,7 @@ - - - -- -+ - - - -@@ -184,7 +184,7 @@ - - - -- -+ - - - --- -2.31.1 - diff --git a/SOURCES/012-crm_resource.patch b/SOURCES/012-crm_resource.patch deleted file mode 100644 index a087b3f..0000000 --- a/SOURCES/012-crm_resource.patch +++ /dev/null @@ -1,38 +0,0 @@ -From fe9150bc4b740b3748fec34fe668df4f8c0d0e25 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 9 Aug 2022 15:38:03 -0500 -Subject: [PATCH] Fix: tools: correct minimum execution status shown by - crm_resource -O - -regression introduced in 2.1.0 by 5ef28b946 - -Fixes T533 ---- - lib/pengine/pe_output.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c -index 5d716fe6cb..dbb49637c9 100644 ---- a/lib/pengine/pe_output.c -+++ b/lib/pengine/pe_output.c -@@ -1878,7 +1878,7 @@ node_and_op(pcmk__output_t *out, va_list args) { - time_t last_change = 0; - - pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), -- &status, 0); -+ &status, PCMK_EXEC_UNKNOWN); - - rsc = pe_find_resource(data_set->resources, op_rsc); - -@@ -1932,7 +1932,7 @@ node_and_op_xml(pcmk__output_t *out, va_list args) { - xmlNode *node = NULL; - - pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), -- &status, 0); -+ &status, PCMK_EXEC_UNKNOWN); - node = pcmk__output_create_xml_node(out, "operation", - "op", op_key ? op_key : ID(xml_op), - "node", crm_element_value(xml_op, XML_ATTR_UNAME), --- -2.31.1 - diff --git a/SOURCES/013-rolling-upgrade-monitor.patch b/SOURCES/013-rolling-upgrade-monitor.patch deleted file mode 100644 index ab67986..0000000 --- a/SOURCES/013-rolling-upgrade-monitor.patch +++ /dev/null @@ -1,1978 +0,0 @@ -From a35dfe0b76555f30dda4c9d96630866de40322b3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:40:24 -0500 -Subject: [PATCH 01/24] Low: fencing: use a default timeout with metadata and - validate - -If the caller did not specify a timeout, use a default in -stonith_api_operations_t:metadata() and validate(). (Timeout is currently -ignored past that point, so this has no effect yet.) - -Also, rename timeout argument for clarity. ---- - lib/fencing/st_client.c | 23 ++++++++++++++++------- - 1 file changed, 16 insertions(+), 7 deletions(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 2b0d308..28791ff 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -504,7 +504,8 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names - - static int - stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, -- const char *namespace, char **output, int timeout) -+ const char *namespace, char **output, -+ int timeout_sec) - { - /* By executing meta-data directly, we can get it from stonith_admin when - * the cluster is not running, which is important for higher-level tools. -@@ -512,16 +513,20 @@ stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *a - - enum stonith_namespace ns = stonith_get_namespace(agent, namespace); - -+ if (timeout_sec <= 0) { -+ timeout_sec = CRMD_METADATA_CALL_TIMEOUT; -+ } -+ - crm_trace("Looking up metadata for %s agent %s", - stonith_namespace2text(ns), agent); - - switch (ns) { - case st_namespace_rhcs: -- return stonith__rhcs_metadata(agent, timeout, output); -+ return stonith__rhcs_metadata(agent, timeout_sec, output); - - #if HAVE_STONITH_STONITH_H - case st_namespace_lha: -- return stonith__lha_metadata(agent, timeout, output); -+ return stonith__lha_metadata(agent, timeout_sec, output); - #endif - - default: -@@ -1684,8 +1689,8 @@ stonith_api_delete(stonith_t * stonith) - static int - stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - const char *namespace_s, const char *agent, -- stonith_key_value_t *params, int timeout, char **output, -- char **error_output) -+ stonith_key_value_t *params, int timeout_sec, -+ char **output, char **error_output) - { - /* Validation should be done directly via the agent, so we can get it from - * stonith_admin when the cluster is not running, which is important for -@@ -1731,17 +1736,21 @@ stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, - *error_output = NULL; - } - -+ if (timeout_sec <= 0) { -+ timeout_sec = CRMD_METADATA_CALL_TIMEOUT; // Questionable -+ } -+ - switch (stonith_get_namespace(agent, namespace_s)) { - case st_namespace_rhcs: - rc = stonith__rhcs_validate(st, call_options, target, agent, -- params_table, host_arg, timeout, -+ params_table, host_arg, timeout_sec, - output, error_output); - break; - - #if HAVE_STONITH_STONITH_H - case st_namespace_lha: - rc = stonith__lha_validate(st, call_options, target, agent, -- params_table, timeout, output, -+ params_table, timeout_sec, output, - error_output); - break; - #endif --- -2.31.1 - -From c2a863b7daeb829c0210d87a2f1503c1cf4dc7a5 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:00:00 -0500 -Subject: [PATCH 02/24] Doc: fencer: improve - stonith_api_operations_t:metadata() description - ---- - include/crm/stonith-ng.h | 15 +++++++++++---- - lib/fencing/st_client.c | 7 ++++--- - 2 files changed, 15 insertions(+), 7 deletions(-) - -diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h -index 4fe52ef..a41d411 100644 ---- a/include/crm/stonith-ng.h -+++ b/include/crm/stonith-ng.h -@@ -206,14 +206,21 @@ typedef struct stonith_api_operations_s - stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list); - - /*! -- * \brief Get the metadata documentation for a resource. -+ * \brief Retrieve a fence agent's metadata - * -- * \note Value is returned in output. Output must be freed when set. -+ * \param[in,out] stonith Fencer connection -+ * \param[in] call_options Group of enum stonith_call_options -+ * (currently ignored) -+ * \param[in] agent Fence agent to query -+ * \param[in] namespace Namespace of fence agent to query (optional) -+ * \param[out] output Where to store metadata -+ * \param[in] timeout_sec Error if not complete within this time - * - * \return Legacy Pacemaker return code -+ * \note The caller is responsible for freeing *output using free(). - */ -- int (*metadata)(stonith_t *st, int options, -- const char *device, const char *provider, char **output, int timeout); -+ int (*metadata)(stonith_t *stonith, int call_options, const char *agent, -+ const char *namespace, char **output, int timeout_sec); - - /*! - * \brief Retrieve a list of installed stonith agents -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 28791ff..6c252bc 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -502,10 +502,11 @@ stonith_api_device_list(stonith_t * stonith, int call_options, const char *names - return count; - } - -+// See stonith_api_operations_t:metadata() documentation - static int --stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, -- const char *namespace, char **output, -- int timeout_sec) -+stonith_api_device_metadata(stonith_t *stonith, int call_options, -+ const char *agent, const char *namespace, -+ char **output, int timeout_sec) - { - /* By executing meta-data directly, we can get it from stonith_admin when - * the cluster is not running, which is important for higher-level tools. --- -2.31.1 - -From 9beff34a0d39425ef470e59e251a8ca7c08e69a0 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:16:54 -0500 -Subject: [PATCH 03/24] Doc: fencing: add doxygen block for - stonith__action_create() - -... and rename a couple arguments for clarity ---- - include/crm/fencing/internal.h | 4 ++-- - lib/fencing/st_actions.c | 33 ++++++++++++++++++++++++--------- - 2 files changed, 26 insertions(+), 11 deletions(-) - -diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h -index d2b49f8..e2ca85e 100644 ---- a/include/crm/fencing/internal.h -+++ b/include/crm/fencing/internal.h -@@ -50,10 +50,10 @@ struct stonith_action_s; - typedef struct stonith_action_s stonith_action_t; - - stonith_action_t *stonith_action_create(const char *agent, -- const char *_action, -+ const char *action_name, - const char *victim, - uint32_t victim_nodeid, -- int timeout, -+ int timeout_sec, - GHashTable * device_args, - GHashTable * port_map, - const char * host_arg); -diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c -index b3429f6..d16fa33 100644 ---- a/lib/fencing/st_actions.c -+++ b/lib/fencing/st_actions.c -@@ -232,27 +232,42 @@ stonith__action_result(stonith_action_t *action) - } - - #define FAILURE_MAX_RETRIES 2 -+ -+/*! -+ * \internal -+ * \brief Create a new fencing action to be executed -+ * -+ * \param[in] agent Fence agent to use -+ * \param[in] action_name Fencing action to be executed -+ * \param[in] victim Name of target of fencing action (if known) -+ * \param[in] victim_nodeid Node ID of target of fencing action (if known) -+ * \param[in] timeout_sec Timeout to be used when executing action -+ * \param[in] device_args Parameters to pass to fence agent -+ * \param[in] port_map Mapping of target names to device ports -+ * \param[in] host_arg Agent parameter used to pass target name -+ * -+ * \return Newly created fencing action (asserts on error, never NULL) -+ */ - stonith_action_t * - stonith_action_create(const char *agent, -- const char *_action, -+ const char *action_name, - const char *victim, - uint32_t victim_nodeid, -- int timeout, GHashTable * device_args, -+ int timeout_sec, GHashTable * device_args, - GHashTable * port_map, const char *host_arg) - { -- stonith_action_t *action; -+ stonith_action_t *action = calloc(1, sizeof(stonith_action_t)); - -- action = calloc(1, sizeof(stonith_action_t)); - CRM_ASSERT(action != NULL); - -- action->args = make_args(agent, _action, victim, victim_nodeid, -+ action->args = make_args(agent, action_name, victim, victim_nodeid, - device_args, port_map, host_arg); - crm_debug("Preparing '%s' action for %s using agent %s", -- _action, (victim? victim : "no target"), agent); -+ action_name, (victim? victim : "no target"), agent); - action->agent = strdup(agent); -- action->action = strdup(_action); -+ action->action = strdup(action_name); - pcmk__str_update(&action->victim, victim); -- action->timeout = action->remaining_timeout = timeout; -+ action->timeout = action->remaining_timeout = timeout_sec; - action->max_retries = FAILURE_MAX_RETRIES; - - pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, -@@ -262,7 +277,7 @@ stonith_action_create(const char *agent, - char buffer[512]; - const char *value = NULL; - -- snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); -+ snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", action_name); - value = g_hash_table_lookup(device_args, buffer); - - if (value) { --- -2.31.1 - -From 3001cb016eefff55c55e709247b0c14c331fb330 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:20:24 -0500 -Subject: [PATCH 04/24] Low: fencing: use requested timeout with RHCS metadata - actions - -... instead of hardcoded 5 seconds, and rename timeout argument for clarity ---- - lib/fencing/st_rhcs.c | 35 ++++++++++++++++------------------- - 1 file changed, 16 insertions(+), 19 deletions(-) - -diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c -index dfccff2..5e600d2 100644 ---- a/lib/fencing/st_rhcs.c -+++ b/lib/fencing/st_rhcs.c -@@ -112,25 +112,24 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter) - } - - /*! -- * \brief Execute RHCS-compatible agent's meta-data action -+ * \brief Execute RHCS-compatible agent's metadata action - * -- * \param[in] agent Agent to execute -- * \param[in] timeout Action timeout -- * \param[out] metadata Where to store output xmlNode (or NULL to ignore) -- * -- * \todo timeout is currently ignored; shouldn't we use it? -+ * \param[in] agent Agent to execute -+ * \param[in] timeout_sec Action timeout -+ * \param[out] metadata Where to store output xmlNode (or NULL to ignore) - */ - static int --stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) -+stonith__rhcs_get_metadata(const char *agent, int timeout_sec, -+ xmlNode **metadata) - { - xmlNode *xml = NULL; - xmlNode *actions = NULL; - xmlXPathObject *xpathObj = NULL; -- pcmk__action_result_t *result = NULL; -- stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, -- 5, NULL, NULL, NULL); -+ stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, -+ 0, timeout_sec, NULL, -+ NULL, NULL); - int rc = stonith__execute(action); -- result = stonith__action_result(action); -+ pcmk__action_result_t *result = stonith__action_result(action); - - if (result == NULL) { - if (rc < 0) { -@@ -208,21 +207,19 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) - } - - /*! -- * \brief Execute RHCS-compatible agent's meta-data action -- * -- * \param[in] agent Agent to execute -- * \param[in] timeout Action timeout -- * \param[out] output Where to store action output (or NULL to ignore) -+ * \brief Retrieve metadata for RHCS-compatible fence agent - * -- * \todo timeout is currently ignored; shouldn't we use it? -+ * \param[in] agent Agent to execute -+ * \param[in] timeout_sec Action timeout -+ * \param[out] output Where to store action output (or NULL to ignore) - */ - int --stonith__rhcs_metadata(const char *agent, int timeout, char **output) -+stonith__rhcs_metadata(const char *agent, int timeout_sec, char **output) - { - char *buffer = NULL; - xmlNode *xml = NULL; - -- int rc = stonith__rhcs_get_metadata(agent, timeout, &xml); -+ int rc = stonith__rhcs_get_metadata(agent, timeout_sec, &xml); - - if (rc != pcmk_ok) { - free_xml(xml); --- -2.31.1 - -From 17dbf449d8b51ea27a89a13f47160a95b0a45149 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:32:44 -0500 -Subject: [PATCH 05/24] Refactor: fencing: make stonith_action_t:async bool - ---- - lib/fencing/st_actions.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c -index d16fa33..abd0d5a 100644 ---- a/lib/fencing/st_actions.c -+++ b/lib/fencing/st_actions.c -@@ -9,6 +9,7 @@ - - #include - -+#include - #include - #include - #include -@@ -32,7 +33,7 @@ struct stonith_action_s { - char *victim; - GHashTable *args; - int timeout; -- int async; -+ bool async; - void *userdata; - void (*done_cb) (int pid, const pcmk__action_result_t *result, - void *user_data); -@@ -671,7 +672,7 @@ stonith_action_execute_async(stonith_action_t * action, - action->userdata = userdata; - action->done_cb = done; - action->fork_cb = fork_cb; -- action->async = 1; -+ action->async = true; - - return internal_stonith_action_execute(action); - } --- -2.31.1 - -From 9b0f568dddc928104e6d2d54d5138e0c7ca5b537 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 14:59:28 -0500 -Subject: [PATCH 06/24] Refactor: fencing: rename - stonith_action_execute_async() - -... to stonith__execute_async(), since it's internal ---- - daemons/fenced/fenced_commands.c | 4 ++-- - include/crm/fencing/internal.h | 12 +++++------- - lib/fencing/st_actions.c | 11 +++++------ - 3 files changed, 12 insertions(+), 15 deletions(-) - -diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c -index 94aa6b8..41a1936 100644 ---- a/daemons/fenced/fenced_commands.c -+++ b/daemons/fenced/fenced_commands.c -@@ -510,8 +510,8 @@ stonith_device_execute(stonith_device_t * device) - /* for async exec, exec_rc is negative for early error exit - otherwise handling of success/errors is done via callbacks */ - cmd->activating_on = device; -- exec_rc = stonith_action_execute_async(action, (void *)cmd, -- cmd->done_cb, fork_cb); -+ exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, -+ fork_cb); - if (exec_rc < 0) { - cmd->activating_on = NULL; - cmd->done_cb(0, stonith__action_result(action), cmd); -diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h -index e2ca85e..1797d9a 100644 ---- a/include/crm/fencing/internal.h -+++ b/include/crm/fencing/internal.h -@@ -64,13 +64,11 @@ void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); - void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result); - xmlNode *stonith__find_xe_with_result(xmlNode *xml); - --int --stonith_action_execute_async(stonith_action_t * action, -- void *userdata, -- void (*done) (int pid, -- const pcmk__action_result_t *result, -- void *user_data), -- void (*fork_cb) (int pid, void *user_data)); -+int stonith__execute_async(stonith_action_t *action, void *userdata, -+ void (*done) (int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void (*fork_cb) (int pid, void *user_data)); - - xmlNode *create_level_registration_xml(const char *node, const char *pattern, - const char *attr, const char *value, -diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c -index abd0d5a..c4e32bd 100644 ---- a/lib/fencing/st_actions.c -+++ b/lib/fencing/st_actions.c -@@ -658,12 +658,11 @@ internal_stonith_action_execute(stonith_action_t * action) - * \return pcmk_ok if ownership of action has been taken, -errno otherwise - */ - int --stonith_action_execute_async(stonith_action_t * action, -- void *userdata, -- void (*done) (int pid, -- const pcmk__action_result_t *result, -- void *user_data), -- void (*fork_cb) (int pid, void *user_data)) -+stonith__execute_async(stonith_action_t * action, void *userdata, -+ void (*done) (int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void (*fork_cb) (int pid, void *user_data)) - { - if (!action) { - return -EINVAL; --- -2.31.1 - -From 1d8fbd12b302b5029a341f269bd00def79e6a0ea Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 16:43:57 -0500 -Subject: [PATCH 07/24] Refactor: fencing: add internal API for getting - metadata async - -Nothing uses it yet ---- - include/crm/fencing/internal.h | 6 +++ - lib/fencing/st_client.c | 80 ++++++++++++++++++++++++++++++++++ - 2 files changed, 86 insertions(+) - -diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h -index 1797d9a..513d1c4 100644 ---- a/include/crm/fencing/internal.h -+++ b/include/crm/fencing/internal.h -@@ -70,6 +70,12 @@ int stonith__execute_async(stonith_action_t *action, void *userdata, - void *user_data), - void (*fork_cb) (int pid, void *user_data)); - -+int stonith__metadata_async(const char *agent, int timeout_sec, -+ void (*callback)(int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void *user_data); -+ - xmlNode *create_level_registration_xml(const char *node, const char *pattern, - const char *attr, const char *value, - int level, -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 6c252bc..91075bd 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -2386,6 +2386,86 @@ stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, - freeXpathObject(xpath); - } - -+/*! -+ * \internal -+ * \brief Retrieve fence agent meta-data asynchronously -+ * -+ * \param[in] agent Agent to execute -+ * \param[in] timeout_sec Error if not complete within this time -+ * \param[in] callback Function to call with result (this will always be -+ * called, whether by this function directly or later -+ * via the main loop, and on success the metadata will -+ * be in its result argument's action_stdout) -+ * \param[in] user_data User data to pass to callback -+ * -+ * \return Standard Pacemaker return code -+ * \note The caller must use a main loop. This function is not a -+ * stonith_api_operations_t method because it does not need a stonith_t -+ * object and does not go through the fencer, but executes the agent -+ * directly. -+ */ -+int -+stonith__metadata_async(const char *agent, int timeout_sec, -+ void (*callback)(int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void *user_data) -+{ -+ switch (stonith_get_namespace(agent, NULL)) { -+ case st_namespace_rhcs: -+ { -+ stonith_action_t *action = NULL; -+ int rc = pcmk_ok; -+ -+ action = stonith_action_create(agent, "metadata", NULL, 0, -+ timeout_sec, NULL, NULL, NULL); -+ -+ rc = stonith__execute_async(action, user_data, callback, NULL); -+ if (rc != pcmk_ok) { -+ callback(0, stonith__action_result(action), user_data); -+ stonith__destroy_action(action); -+ } -+ return pcmk_legacy2rc(rc); -+ } -+ -+#if HAVE_STONITH_STONITH_H -+ case st_namespace_lha: -+ // LHA metadata is simply synthesized, so simulate async -+ { -+ pcmk__action_result_t result = { -+ .exit_status = CRM_EX_OK, -+ .execution_status = PCMK_EXEC_DONE, -+ .exit_reason = NULL, -+ .action_stdout = NULL, -+ .action_stderr = NULL, -+ }; -+ -+ stonith__lha_metadata(agent, timeout_sec, -+ &result.action_stdout); -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ return pcmk_rc_ok; -+ } -+#endif -+ -+ default: -+ { -+ pcmk__action_result_t result = { -+ .exit_status = CRM_EX_ERROR, -+ .execution_status = PCMK_EXEC_ERROR_HARD, -+ .exit_reason = crm_strdup_printf("No such agent '%s'", -+ agent), -+ .action_stdout = NULL, -+ .action_stderr = NULL, -+ }; -+ -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ return ENOENT; -+ } -+ } -+} -+ - /*! - * \internal - * \brief Return the exit status from an async action callback --- -2.31.1 - -From 1869cc181ef9599bd938fc545d302b2721169755 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 13 Sep 2022 17:33:10 -0500 -Subject: [PATCH 08/24] Refactor: liblrmd: add internal API for getting - metadata async - -Nothing uses it yet ---- - include/crm/lrmd_internal.h | 10 +++- - lib/lrmd/lrmd_client.c | 115 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 123 insertions(+), 2 deletions(-) - -diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h -index 284c4d6..5cb00d5 100644 ---- a/include/crm/lrmd_internal.h -+++ b/include/crm/lrmd_internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2015-2021 the Pacemaker project contributors -+ * Copyright 2015-2022 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -17,7 +17,7 @@ - #include // mainloop_io_t, ipc_client_callbacks - #include // pcmk__output_t - #include // pcmk__remote_t --#include // lrmd_t, lrmd_event_data_t -+#include // lrmd_t, lrmd_event_data_t, lrmd_rsc_info_t - - int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port); - -@@ -35,6 +35,12 @@ int lrmd_send_resource_alert(lrmd_t *lrmd, GList *alert_list, - int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, - const char *msg_type); - -+int lrmd__metadata_async(lrmd_rsc_info_t *rsc, -+ void (*callback)(int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void *user_data); -+ - void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, - int op_status, const char *exit_reason); - -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index 82afd6c..4b16bf0 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -2343,6 +2343,121 @@ lrmd_api_delete(lrmd_t * lrmd) - free(lrmd); - } - -+struct metadata_cb { -+ void (*callback)(int pid, const pcmk__action_result_t *result, -+ void *user_data); -+ void *user_data; -+}; -+ -+/*! -+ * \internal -+ * \brief Process asynchronous metadata completion -+ * -+ * \param[in] action Metadata action that completed -+ */ -+static void -+metadata_complete(svc_action_t *action) -+{ -+ struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; -+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; -+ -+ pcmk__set_result(&result, action->rc, action->status, -+ services__exit_reason(action)); -+ pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); -+ -+ metadata_cb->callback(0, &result, metadata_cb->user_data); -+ result.action_stdout = NULL; // Prevent free, because action owns it -+ result.action_stderr = NULL; // Prevent free, because action owns it -+ pcmk__reset_result(&result); -+ free(metadata_cb); -+} -+ -+/*! -+ * \internal -+ * \brief Retrieve agent metadata asynchronously -+ * -+ * \param[in] rsc Resource agent specification -+ * \param[in] callback Function to call with result (this will always be -+ * called, whether by this function directly or later via -+ * the main loop, and on success the metadata will be in -+ * its result argument's action_stdout) -+ * \param[in] user_data User data to pass to callback -+ * -+ * \return Standard Pacemaker return code -+ * \note This function is not a lrmd_api_operations_t method because it does not -+ * need an lrmd_t object and does not go through the executor, but -+ * executes the agent directly. -+ */ -+int -+lrmd__metadata_async(lrmd_rsc_info_t *rsc, -+ void (*callback)(int pid, -+ const pcmk__action_result_t *result, -+ void *user_data), -+ void *user_data) -+{ -+ svc_action_t *action = NULL; -+ struct metadata_cb *metadata_cb = NULL; -+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; -+ -+ CRM_CHECK(callback != NULL, return EINVAL); -+ -+ if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { -+ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR, -+ "Invalid resource specification"); -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ return EINVAL; -+ } -+ -+ if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { -+ return stonith__metadata_async(rsc->type, -+ CRMD_METADATA_CALL_TIMEOUT / 1000, -+ callback, user_data); -+ } -+ -+ action = services__create_resource_action(rsc->type, rsc->standard, -+ rsc->provider, rsc->type, -+ CRMD_ACTION_METADATA, 0, -+ CRMD_METADATA_CALL_TIMEOUT, NULL, -+ 0); -+ if (action == NULL) { -+ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, -+ "Out of memory"); -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ return ENOMEM; -+ } -+ if (action->rc != PCMK_OCF_UNKNOWN) { -+ pcmk__set_result(&result, action->rc, action->status, -+ services__exit_reason(action)); -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ services_action_free(action); -+ return EINVAL; -+ } -+ -+ action->cb_data = calloc(1, sizeof(struct metadata_cb)); -+ if (action->cb_data == NULL) { -+ services_action_free(action); -+ pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, -+ "Out of memory"); -+ callback(0, &result, user_data); -+ pcmk__reset_result(&result); -+ return ENOMEM; -+ } -+ -+ metadata_cb = (struct metadata_cb *) action->cb_data; -+ metadata_cb->callback = callback; -+ metadata_cb->user_data = user_data; -+ if (!services_action_async(action, metadata_complete)) { -+ services_action_free(action); -+ return pcmk_rc_error; // @TODO Derive from action->rc and ->status -+ } -+ -+ // The services library has taken responsibility for action -+ return pcmk_rc_ok; -+} -+ - /*! - * \internal - * \brief Set the result of an executor event --- -2.31.1 - -From de89164053cde8f44ca74a007703e0827ffd67ec Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Sep 2022 16:34:37 -0500 -Subject: [PATCH 09/24] Low: controller: ignore CRM_OP_LRM_REFRESH - -This was only sent by crm_resource --refresh in versions 1.1.9 and earlier. -Since the local crm_resource is the same version as the controller, and -Pacemaker Remote was introduced in 1.1.9, this means that only remote nodes -running 1.1.9 can possibly send it. - -It didn't really do anything useful anyway, so just ignore it. ---- - daemons/controld/controld_execd.c | 33 +++++----------------------- - daemons/controld/controld_messages.c | 2 +- - include/crm/crm.h | 2 +- - lib/pacemaker/pcmk_graph_producer.c | 3 +-- - lib/pengine/common.c | 2 -- - 5 files changed, 9 insertions(+), 33 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index fa411a6..719fab0 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1553,32 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, - lrmd_free_event(op); - } - --static void --handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, -- const char *from_host, const char *from_sys) --{ -- int rc = pcmk_ok; -- xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); -- -- fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); -- crm_info("Forced a local resource history refresh: call=%d", rc); -- -- if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) { -- xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, -- from_sys, CRM_SYSTEM_LRMD, -- fsa_our_uuid); -- -- crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); -- -- if (relay_message(reply, TRUE) == FALSE) { -- crm_log_xml_err(reply, "Unable to route reply"); -- } -- free_xml(reply); -- } -- -- free_xml(fragment); --} -- - static void - handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) - { -@@ -1787,7 +1761,12 @@ do_lrm_invoke(long long action, - } - - if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { -- handle_refresh_op(lrm_state, user_name, from_host, from_sys); -+ /* @COMPAT This can only be sent by crm_resource --refresh on a -+ * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely -+ * unlikely. It previously would cause the controller to re-write its -+ * resource history to the CIB. Just ignore it. -+ */ -+ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); - - } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { - handle_query_op(input->msg, lrm_state); -diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c -index 31d3524..957fc20 100644 ---- a/daemons/controld/controld_messages.c -+++ b/daemons/controld/controld_messages.c -@@ -1061,7 +1061,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) - return handle_lrm_delete(stored_msg); - - } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) -- || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) -+ || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT - || (strcmp(op, CRM_OP_REPROBE) == 0)) { - - crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 5ec66d2..f2e536e 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -146,7 +146,7 @@ extern char *crm_system_name; - # define CRM_OP_REGISTER "register" - # define CRM_OP_IPC_FWD "ipc_fwd" - # define CRM_OP_INVOKE_LRM "lrm_invoke" --# define CRM_OP_LRM_REFRESH "lrm_refresh" /* Deprecated */ -+# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 - # define CRM_OP_LRM_QUERY "lrm_query" - # define CRM_OP_LRM_DELETE "lrm_delete" - # define CRM_OP_LRM_FAIL "lrm_fail" -diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c -index 4c1b5a6..0077719 100644 ---- a/lib/pacemaker/pcmk_graph_producer.c -+++ b/lib/pacemaker/pcmk_graph_producer.c -@@ -446,8 +446,7 @@ create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details, - - } else if (pcmk__str_any_of(action->task, - CRM_OP_SHUTDOWN, -- CRM_OP_CLEAR_FAILCOUNT, -- CRM_OP_LRM_REFRESH, NULL)) { -+ CRM_OP_CLEAR_FAILCOUNT, NULL)) { - action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT); - - } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) { -diff --git a/lib/pengine/common.c b/lib/pengine/common.c -index 93ba3fe..7db9d0e 100644 ---- a/lib/pengine/common.c -+++ b/lib/pengine/common.c -@@ -384,8 +384,6 @@ text2task(const char *task) - return no_action; - } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) { - return no_action; -- } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { -- return no_action; - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { - return no_action; - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { --- -2.31.1 - -From 406fbc52ed652915887e78138f8f3c2eeaeabfb6 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Sep 2022 16:46:15 -0500 -Subject: [PATCH 10/24] API: libcrmcommon: deprecate CRM_OP_LRM_QUERY - -This has been unused since at least Pacemaker 1.0.0, and since we don't support -rolling upgrades from anything that old, and Pacemaker Remote didn't exist -then, we can just drop support for it entirely. ---- - daemons/controld/controld_execd.c | 17 ----------------- - include/crm/crm.h | 1 - - include/crm/crm_compat.h | 5 ++++- - 3 files changed, 4 insertions(+), 19 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 719fab0..54e6818 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1553,20 +1553,6 @@ fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, - lrmd_free_event(op); - } - --static void --handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) --{ -- xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); -- xmlNode *reply = create_reply(msg, data); -- -- if (relay_message(reply, TRUE) == FALSE) { -- crm_err("Unable to route reply"); -- crm_log_xml_err(reply, "reply"); -- } -- free_xml(reply); -- free_xml(data); --} -- - static void - handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, - const char *from_host, const char *user_name, -@@ -1768,9 +1754,6 @@ do_lrm_invoke(long long action, - */ - crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); - -- } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { -- handle_query_op(input->msg, lrm_state); -- - // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op - } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) { - update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, -diff --git a/include/crm/crm.h b/include/crm/crm.h -index f2e536e..38915e3 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -147,7 +147,6 @@ extern char *crm_system_name; - # define CRM_OP_IPC_FWD "ipc_fwd" - # define CRM_OP_INVOKE_LRM "lrm_invoke" - # define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10 --# define CRM_OP_LRM_QUERY "lrm_query" - # define CRM_OP_LRM_DELETE "lrm_delete" - # define CRM_OP_LRM_FAIL "lrm_fail" - # define CRM_OP_PROBED "probe_complete" -diff --git a/include/crm/crm_compat.h b/include/crm/crm_compat.h -index 3b35a5e..8a4b368 100644 ---- a/include/crm/crm_compat.h -+++ b/include/crm/crm_compat.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2021 the Pacemaker project contributors -+ * Copyright 2004-2022 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -31,6 +31,9 @@ extern "C" { - //! \deprecated This defined constant will be removed in a future release - #define MAX_IPC_DELAY 120 - -+//! \deprecated This defined constant will be removed in a future release -+#define CRM_OP_LRM_QUERY "lrm_query" -+ - //!@{ - //! \deprecated This macro will be removed in a future release - --- -2.31.1 - -From 7c3d2f58d387d2ec0d5c5d340f8816f324e816bf Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Sep 2022 16:49:48 -0500 -Subject: [PATCH 11/24] Refactor: controller: drop do_lrm_query_internal() - -Now that there's only one (short) caller, just move its contents there ---- - daemons/controld/controld_execd.c | 28 +++++++++++----------------- - 1 file changed, 11 insertions(+), 17 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 54e6818..99c9193 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -811,19 +811,26 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) - return FALSE; - } - --static xmlNode * --do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) -+xmlNode * -+controld_query_executor_state(const char *node_name) - { - xmlNode *xml_state = NULL; - xmlNode *xml_data = NULL; - xmlNode *rsc_list = NULL; - crm_node_t *peer = NULL; -+ lrm_state_t *lrm_state = lrm_state_find(node_name); -+ -+ if (!lrm_state) { -+ crm_err("Could not find executor state for node %s", node_name); -+ return NULL; -+ } - - peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); - CRM_CHECK(peer != NULL, return NULL); - -- xml_state = create_node_state_update(peer, update_flags, NULL, -- __func__); -+ xml_state = create_node_state_update(peer, -+ node_update_cluster|node_update_peer, -+ NULL, __func__); - if (xml_state == NULL) { - return NULL; - } -@@ -840,19 +847,6 @@ do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) - return xml_state; - } - --xmlNode * --controld_query_executor_state(const char *node_name) --{ -- lrm_state_t *lrm_state = lrm_state_find(node_name); -- -- if (!lrm_state) { -- crm_err("Could not find executor state for node %s", node_name); -- return NULL; -- } -- return do_lrm_query_internal(lrm_state, -- node_update_cluster|node_update_peer); --} -- - /*! - * \internal - * \brief Map standard Pacemaker return code to operation status and OCF code --- -2.31.1 - -From 5cab259417a06f64a607f99c478459093ed1b5ed Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Sep 2022 15:48:44 -0500 -Subject: [PATCH 12/24] Doc: controller: drop pointless comment - -It's (likely?) impossible for a live cluster to have been doing rolling -upgrades since 2006. ---- - daemons/controld/controld_execd.c | 12 +----------- - 1 file changed, 1 insertion(+), 11 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 99c9193..53b1156 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -678,18 +678,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ - - target_rc = rsc_op_expected_rc(op); - -- /* there is a small risk in formerly mixed clusters that it will -- * be sub-optimal. -- * -- * however with our upgrade policy, the update we send should -- * still be completely supported anyway -- */ - caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); -- CRM_LOG_ASSERT(caller_version != NULL); -- -- if(caller_version == NULL) { -- caller_version = CRM_FEATURE_SET; -- } -+ CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); - - xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, - fsa_our_uname, src); --- -2.31.1 - -From b4541d7ecd9551674c4546415751a223ff3013ed Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 15 Sep 2022 11:24:28 -0500 -Subject: [PATCH 13/24] Refactor: controller: move where reload actions get - remapped - -... from do_lrm_invoke() to do_lrm_rsc_op(), which will make planned changes -easier ---- - daemons/controld/controld_execd.c | 38 ++++++++++++++++--------------- - 1 file changed, 20 insertions(+), 18 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 53b1156..c9f0cc7 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -43,7 +43,8 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us - static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, - const char *rsc_id, const char *operation); - static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -- const char *operation, xmlNode *msg); -+ const char *operation, xmlNode *msg, -+ struct ra_metadata_s *md); - - static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - int log_level); -@@ -1808,26 +1809,12 @@ do_lrm_invoke(long long action, - do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, - crm_rsc_delete, user_name); - -- } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, -- CRMD_ACTION_RELOAD_AGENT, NULL)) { -- /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs -- * will schedule reload-agent actions only. In either case, we need -- * to map that to whatever the resource agent actually supports. -- * Default to the OCF 1.1 name. -- */ -+ } else { - struct ra_metadata_s *md = NULL; -- const char *reload_name = CRMD_ACTION_RELOAD_AGENT; - - md = controld_get_rsc_metadata(lrm_state, rsc, - controld_metadata_from_cache); -- if ((md != NULL) -- && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { -- reload_name = CRMD_ACTION_RELOAD; -- } -- do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml); -- -- } else { -- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); -+ do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md); - } - - lrmd_free_rsc_info(rsc); -@@ -2176,7 +2163,7 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t - - static void - do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -- const char *operation, xmlNode *msg) -+ const char *operation, xmlNode *msg, struct ra_metadata_s *md) - { - int rc; - int call_id = 0; -@@ -2198,6 +2185,21 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - } - } - -+ if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, -+ CRMD_ACTION_RELOAD_AGENT, NULL)) { -+ /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs -+ * will schedule reload-agent actions only. In either case, we need -+ * to map that to whatever the resource agent actually supports. -+ * Default to the OCF 1.1 name. -+ */ -+ if ((md != NULL) -+ && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { -+ operation = CRMD_ACTION_RELOAD; -+ } else { -+ operation = CRMD_ACTION_RELOAD_AGENT; -+ } -+ } -+ - op = construct_op(lrm_state, msg, rsc->id, operation); - CRM_CHECK(op != NULL, return); - --- -2.31.1 - -From a4f6e394a61712da750aabffca2b6dd02f0c5ae6 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 15 Sep 2022 15:12:06 -0500 -Subject: [PATCH 14/24] Refactor: controller: drop operation argument to - do_lrm_rsc_op() - -It can be derived from the XML argument ---- - daemons/controld/controld_execd.c | 26 +++++++++++++------------- - 1 file changed, 13 insertions(+), 13 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index c9f0cc7..89a993b 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -43,8 +43,7 @@ static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer us - static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, - const char *rsc_id, const char *operation); - static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -- const char *operation, xmlNode *msg, -- struct ra_metadata_s *md); -+ xmlNode *msg, struct ra_metadata_s *md); - - static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, - int log_level); -@@ -1814,7 +1813,7 @@ do_lrm_invoke(long long action, - - md = controld_get_rsc_metadata(lrm_state, rsc, - controld_metadata_from_cache); -- do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, md); -+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md); - } - - lrmd_free_rsc_info(rsc); -@@ -2162,8 +2161,8 @@ record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t - } - - static void --do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, -- const char *operation, xmlNode *msg, struct ra_metadata_s *md) -+do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, -+ struct ra_metadata_s *md) - { - int rc; - int call_id = 0; -@@ -2172,17 +2171,18 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - lrmd_key_value_t *params = NULL; - fsa_data_t *msg_data = NULL; - const char *transition = NULL; -+ const char *operation = NULL; - gboolean stop_recurring = FALSE; - const char *nack_reason = NULL; - -- CRM_CHECK(rsc != NULL, return); -- CRM_CHECK(operation != NULL, return); -+ CRM_CHECK((rsc != NULL) && (msg != NULL), return); - -- if (msg != NULL) { -- transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); -- if (transition == NULL) { -- crm_log_xml_err(msg, "Missing transition number"); -- } -+ operation = crm_element_value(msg, XML_LRM_ATTR_TASK); -+ CRM_CHECK(!pcmk__str_empty(operation), return); -+ -+ transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); -+ if (pcmk__str_empty(transition)) { -+ crm_log_xml_err(msg, "Missing transition number"); - } - - if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, -@@ -2241,7 +2241,7 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - crm_notice("Requesting local execution of %s operation for %s on %s " - CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, - crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, -- transition, rsc->id, operation, op->interval_ms); -+ (transition != NULL ? transition : ""), rsc->id, operation, op->interval_ms); - - if (pcmk_is_set(fsa_input_register, R_SHUTDOWN) - && pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) { --- -2.31.1 - -From 486dbdf023f82a82a02207d8fb7921f8f2ac0588 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 15 Sep 2022 15:40:38 -0500 -Subject: [PATCH 15/24] Low: controller: add failsafe for no executor - connection - -... in do_lrm_rsc_op(), to make planned changes easier ---- - daemons/controld/controld_execd.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 89a993b..8986b9b 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -2185,6 +2185,17 @@ do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, - crm_log_xml_err(msg, "Missing transition number"); - } - -+ if (lrm_state == NULL) { -+ // This shouldn't be possible, but provide a failsafe just in case -+ crm_err("Cannot execute %s of %s: No executor connection " -+ CRM_XS " transition_key=%s", -+ operation, rsc->id, (transition != NULL ? transition : "")); -+ synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, -+ PCMK_OCF_UNKNOWN_ERROR, -+ "No executor connection"); -+ return; -+ } -+ - if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, - CRMD_ACTION_RELOAD_AGENT, NULL)) { - /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs --- -2.31.1 - -From afd53bba7dfb5109d844318dff0f82e4687d9e32 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 15 Sep 2022 12:04:31 -0500 -Subject: [PATCH 16/24] Log: controller: improve messages when metadata cache - update fails - -Previously, metadata_cache_update() or ra_param_from_xml() would log an error, -then controld_get_rsc_metadata() (but not the other caller, -process_lrm_event()) would log another warning with the agent info. - -Combine these into a single message always logged by metadata_cache_update(), -which also has been renamed to controld_cache_metadata(). ---- - daemons/controld/controld_execd.c | 2 +- - daemons/controld/controld_metadata.c | 27 ++++++++++++--------------- - daemons/controld/controld_metadata.h | 6 +++--- - 3 files changed, 16 insertions(+), 19 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 8986b9b..fe16c96 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -2858,7 +2858,7 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, - } else if (rsc && (op->rc == PCMK_OCF_OK)) { - char *metadata = unescape_newlines(op->output); - -- metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); -+ controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); - free(metadata); - } - } -diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c -index 8c6f195..91a6a10 100644 ---- a/daemons/controld/controld_metadata.c -+++ b/daemons/controld/controld_metadata.c -@@ -149,13 +149,11 @@ ra_param_from_xml(xmlNode *param_xml) - - p = calloc(1, sizeof(struct ra_param_s)); - if (p == NULL) { -- crm_crit("Could not allocate memory for resource metadata"); - return NULL; - } - - p->rap_name = strdup(param_name); - if (p->rap_name == NULL) { -- crm_crit("Could not allocate memory for resource metadata"); - free(p); - return NULL; - } -@@ -196,10 +194,11 @@ log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version) - } - - struct ra_metadata_s * --metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, -- const char *metadata_str) -+controld_cache_metadata(GHashTable *mdc, lrmd_rsc_info_t *rsc, -+ const char *metadata_str) - { - char *key = NULL; -+ const char *reason = NULL; - xmlNode *metadata = NULL; - xmlNode *match = NULL; - struct ra_metadata_s *md = NULL; -@@ -210,20 +209,19 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, - - key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); - if (!key) { -- crm_crit("Could not allocate memory for resource metadata"); -+ reason = "Invalid resource agent standard or type"; - goto err; - } - - metadata = string2xml(metadata_str); - if (!metadata) { -- crm_err("Metadata for %s:%s:%s is not valid XML", -- rsc->standard, rsc->provider, rsc->type); -+ reason = "Metadata is not valid XML"; - goto err; - } - - md = calloc(1, sizeof(struct ra_metadata_s)); - if (md == NULL) { -- crm_crit("Could not allocate memory for resource metadata"); -+ reason = "Could not allocate memory"; - goto err; - } - -@@ -281,6 +279,7 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, - struct ra_param_s *p = ra_param_from_xml(match); - - if (p == NULL) { -+ reason = "Could not allocate memory"; - goto err; - } - if (pcmk_is_set(p->rap_flags, ra_param_private)) { -@@ -311,6 +310,9 @@ metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, - return md; - - err: -+ crm_warn("Unable to update metadata for %s (%s%s%s:%s): %s", -+ rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), -+ (rsc->provider != NULL ? rsc->provider : ""), rsc->type, reason); - free(key); - free_xml(metadata); - metadata_free(md); -@@ -377,13 +379,8 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - return NULL; - } - -- metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, -- metadata_str); -+ metadata = controld_cache_metadata(lrm_state->metadata_cache, rsc, -+ metadata_str); - free(metadata_str); -- if (metadata == NULL) { -- crm_warn("Failed to update metadata for %s (%s%s%s:%s)", -- rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), -- ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); -- } - return metadata; - } -diff --git a/daemons/controld/controld_metadata.h b/daemons/controld/controld_metadata.h -index 7354f94..52d3336 100644 ---- a/daemons/controld/controld_metadata.h -+++ b/daemons/controld/controld_metadata.h -@@ -73,9 +73,9 @@ void metadata_cache_free(GHashTable *mdc); - void metadata_cache_reset(GHashTable *mdc); - void metadata_cache_fini(void); - --struct ra_metadata_s *metadata_cache_update(GHashTable *mdc, -- lrmd_rsc_info_t *rsc, -- const char *metadata_str); -+struct ra_metadata_s *controld_cache_metadata(GHashTable *mdc, -+ lrmd_rsc_info_t *rsc, -+ const char *metadata_str); - struct ra_metadata_s *controld_get_rsc_metadata(lrm_state_t *lrm_state, - lrmd_rsc_info_t *rsc, - uint32_t source); --- -2.31.1 - -From caeed447d0d8a980d431efd70e5b6f9c91ffac7f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 15 Sep 2022 13:33:36 -0500 -Subject: [PATCH 17/24] Fix: controller: pre-load agent metadata asynchronously - -The controller needs resource agent metadata to record digests with pending and -completed resource actions. - -Previously, metadata was collected synchronously when needed. This caused -several problems, two of which are fixed here for most actions: synchronous -execution blocks the controller from doing anything else (and if the agent's -metadata action tries to contact the controller, that blocks everything until -the action times out), and the metadata action ate into the real action's -timeout. - -Now, if we're likely to need metadata for an action, attempt to get it -asynchronously before executing that action, so the metadata is available in -cache when needed. - -This is not a complete solution, as there are other code paths that might -require metadata and still lead to synchronous execution, but it handles the -most important cases. - -Fixes T554 ---- - daemons/controld/controld_execd.c | 105 +++++++++++++++++++++++---- - daemons/controld/controld_metadata.c | 22 +++--- - 2 files changed, 102 insertions(+), 25 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index fe16c96..c56fdf5 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -670,7 +670,6 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ - struct ra_metadata_s *metadata = NULL; - const char *caller_version = NULL; - lrm_state_t *lrm_state = NULL; -- uint32_t metadata_source = controld_metadata_from_agent; - - if (op == NULL) { - return FALSE; -@@ -703,19 +702,14 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_ - return TRUE; - } - -- /* Getting meta-data from cache is OK unless this is a successful start -- * action -- always refresh from the agent for those, in case the -- * resource agent was updated. -+ /* Ideally the metadata is cached, and the agent is just a fallback. - * -- * @TODO Only refresh the meta-data after starts if the agent actually -- * changed (using something like inotify, or a hash or modification time of -- * the agent executable). -+ * @TODO Go through all callers and ensure they get metadata asynchronously -+ * first. - */ -- if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc) -- || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { -- metadata_source |= controld_metadata_from_cache; -- } -- metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source); -+ metadata = controld_get_rsc_metadata(lrm_state, rsc, -+ controld_metadata_from_agent -+ |controld_metadata_from_cache); - if (metadata == NULL) { - return TRUE; - } -@@ -1673,6 +1667,56 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, - user_name, input, unregister); - } - -+// User data for asynchronous metadata execution -+struct metadata_cb_data { -+ lrmd_rsc_info_t *rsc; // Copy of resource information -+ xmlNode *input_xml; // Copy of FSA input XML -+}; -+ -+static struct metadata_cb_data * -+new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) -+{ -+ struct metadata_cb_data *data = NULL; -+ -+ data = calloc(1, sizeof(struct metadata_cb_data)); -+ CRM_ASSERT(data != NULL); -+ data->input_xml = copy_xml(input_xml); -+ data->rsc = lrmd_copy_rsc_info(rsc); -+ return data; -+} -+ -+static void -+free_metadata_cb_data(struct metadata_cb_data *data) -+{ -+ lrmd_free_rsc_info(data->rsc); -+ free_xml(data->input_xml); -+ free(data); -+} -+ -+/*! -+ * \internal -+ * \brief Execute an action after metadata has been retrieved -+ * -+ * \param[in] pid Ignored -+ * \param[in] result Result of metadata action -+ * \param[in] user_data Metadata callback data -+ */ -+static void -+metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) -+{ -+ struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; -+ -+ struct ra_metadata_s *md = NULL; -+ lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); -+ -+ if ((lrm_state != NULL) && pcmk__result_ok(result)) { -+ md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, -+ result->action_stdout); -+ } -+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); -+ free_metadata_cb_data(data); -+} -+ - /* A_LRM_INVOKE */ - void - do_lrm_invoke(long long action, -@@ -1811,9 +1855,40 @@ do_lrm_invoke(long long action, - } else { - struct ra_metadata_s *md = NULL; - -- md = controld_get_rsc_metadata(lrm_state, rsc, -- controld_metadata_from_cache); -- do_lrm_rsc_op(lrm_state, rsc, input->xml, md); -+ /* Getting metadata from cache is OK except for start actions -- -+ * always refresh from the agent for those, in case the resource -+ * agent was updated. -+ * -+ * @TODO Only refresh metadata for starts if the agent actually -+ * changed (using something like inotify, or a hash or modification -+ * time of the agent executable). -+ */ -+ if (strcmp(operation, CRMD_ACTION_START) != 0) { -+ md = controld_get_rsc_metadata(lrm_state, rsc, -+ controld_metadata_from_cache); -+ } -+ -+ if ((md == NULL) && crm_op_needs_metadata(rsc->standard, -+ operation)) { -+ /* Most likely, we'll need the agent metadata to record the -+ * pending operation and the operation result. Get it now rather -+ * than wait until then, so the metadata action doesn't eat into -+ * the real action's timeout. -+ * -+ * @TODO Metadata is retrieved via direct execution of the -+ * agent, which has a couple of related issues: the executor -+ * should execute agents, not the controller; and metadata for -+ * Pacemaker Remote nodes should be collected on those nodes, -+ * not locally. -+ */ -+ struct metadata_cb_data *data = NULL; -+ -+ data = new_metadata_cb_data(rsc, input->xml); -+ (void) lrmd__metadata_async(rsc, metadata_complete, -+ (void *) data); -+ } else { -+ do_lrm_rsc_op(lrm_state, rsc, input->xml, md); -+ } - } - - lrmd_free_rsc_info(rsc); -diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c -index 91a6a10..a954ebd 100644 ---- a/daemons/controld/controld_metadata.c -+++ b/daemons/controld/controld_metadata.c -@@ -356,17 +356,19 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - return NULL; - } - -- /* For now, we always collect resource agent meta-data via a local, -- * synchronous, direct execution of the agent. This has multiple issues: -- * the executor should execute agents, not the controller; meta-data for -- * Pacemaker Remote nodes should be collected on those nodes, not -- * locally; and the meta-data call shouldn't eat into the timeout of the -- * real action being performed. -+ /* For most actions, metadata was cached asynchronously before action -+ * execution (via metadata_complete()). - * -- * These issues are planned to be addressed by having the scheduler -- * schedule a meta-data cache check at the beginning of each transition. -- * Once that is working, this block will only be a fallback in case the -- * initial collection fails. -+ * However if that failed, and for other actions, retrieve the metadata now -+ * via a local, synchronous, direct execution of the agent. -+ * -+ * This has multiple issues, which is why this is just a fallback: the -+ * executor should execute agents, not the controller; metadata for -+ * Pacemaker Remote nodes should be collected on those nodes, not locally; -+ * the metadata call shouldn't eat into the timeout of the real action being -+ * performed; and the synchronous call blocks the controller (which also -+ * means that if the metadata action tries to contact the controller, -+ * everything will hang until the timeout). - */ - rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, - rsc->type, &metadata_str, 0); --- -2.31.1 - -From fddf663d5285740771145e83c41f33c0bfb86dfb Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 19 Sep 2022 15:19:06 -0500 -Subject: [PATCH 18/24] Low: libstonithd: return CRM_EX_NOSUCH for bad agent - namespace - -Callers can't rely on a particular exit code scheme at this point, -but it doesn't hurt ---- - lib/fencing/st_client.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 91075bd..d41b066 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -2451,7 +2451,7 @@ stonith__metadata_async(const char *agent, int timeout_sec, - default: - { - pcmk__action_result_t result = { -- .exit_status = CRM_EX_ERROR, -+ .exit_status = CRM_EX_NOSUCH, - .execution_status = PCMK_EXEC_ERROR_HARD, - .exit_reason = crm_strdup_printf("No such agent '%s'", - agent), --- -2.31.1 - -From 2de926f5b2b5dbf28f994bc35477d59ce46d5ab1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 19 Sep 2022 15:23:43 -0500 -Subject: [PATCH 19/24] Low: liblrmd: consider invalid agent specification a - fatal error - ---- - lib/lrmd/lrmd_client.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index 4b16bf0..d691dce 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -2402,7 +2402,8 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc, - CRM_CHECK(callback != NULL, return EINVAL); - - if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { -- pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR, -+ pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, -+ PCMK_EXEC_ERROR_FATAL, - "Invalid resource specification"); - callback(0, &result, user_data); - pcmk__reset_result(&result); --- -2.31.1 - -From 2d526dae9dbfc6f8658ff96f5f6d58ee09ea879c Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 19 Sep 2022 15:25:12 -0500 -Subject: [PATCH 20/24] Low: liblrmd: use resource ID for metadata actions when - available - ---- - lib/lrmd/lrmd_client.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index d691dce..570a2b8 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -2416,11 +2416,11 @@ lrmd__metadata_async(lrmd_rsc_info_t *rsc, - callback, user_data); - } - -- action = services__create_resource_action(rsc->type, rsc->standard, -- rsc->provider, rsc->type, -- CRMD_ACTION_METADATA, 0, -- CRMD_METADATA_CALL_TIMEOUT, NULL, -- 0); -+ action = services__create_resource_action((rsc->id != NULL ? rsc->id : rsc->type), -+ rsc->standard, rsc->provider, -+ rsc->type, CRMD_ACTION_METADATA, -+ 0, CRMD_METADATA_CALL_TIMEOUT, -+ NULL, 0); - if (action == NULL) { - pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, - "Out of memory"); --- -2.31.1 - -From 3d632be58dca13293e4ae974da5dfe2838fcdf12 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 19 Sep 2022 15:27:11 -0500 -Subject: [PATCH 21/24] Refactor: controller: executor query can assume local - node - ---- - daemons/controld/controld_execd.c | 6 +++--- - daemons/controld/controld_fsa.h | 4 ++-- - daemons/controld/controld_join_client.c | 2 +- - daemons/controld/controld_join_dc.c | 2 +- - 4 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index c56fdf5..039b194 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -796,16 +796,16 @@ build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) - } - - xmlNode * --controld_query_executor_state(const char *node_name) -+controld_query_executor_state(void) - { - xmlNode *xml_state = NULL; - xmlNode *xml_data = NULL; - xmlNode *rsc_list = NULL; - crm_node_t *peer = NULL; -- lrm_state_t *lrm_state = lrm_state_find(node_name); -+ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); - - if (!lrm_state) { -- crm_err("Could not find executor state for node %s", node_name); -+ crm_err("Could not find executor state for node %s", fsa_our_uname); - return NULL; - } - -diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h -index 296232f..d137310 100644 ---- a/daemons/controld/controld_fsa.h -+++ b/daemons/controld/controld_fsa.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2021 the Pacemaker project contributors -+ * Copyright 2004-2022 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -518,7 +518,7 @@ extern gboolean ever_had_quorum; - // These should be moved elsewhere - void do_update_cib_nodes(gboolean overwrite, const char *caller); - int crmd_cib_smart_opt(void); --xmlNode *controld_query_executor_state(const char *node_name); -+xmlNode *controld_query_executor_state(void); - - const char *fsa_input2string(enum crmd_fsa_input input); - const char *fsa_state2string(enum crmd_fsa_state state); -diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c -index 6485856..bfec430 100644 ---- a/daemons/controld/controld_join_client.c -+++ b/daemons/controld/controld_join_client.c -@@ -268,7 +268,7 @@ do_cl_join_finalize_respond(long long action, - update_dc_expected(input->msg); - - /* send our status section to the DC */ -- tmp1 = controld_query_executor_state(fsa_our_uname); -+ tmp1 = controld_query_executor_state(); - if (tmp1 != NULL) { - xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, - CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); -diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c -index 9386182..9a8ea3e 100644 ---- a/daemons/controld/controld_join_dc.c -+++ b/daemons/controld/controld_join_dc.c -@@ -591,7 +591,7 @@ do_dc_join_ack(long long action, - } - controld_delete_node_state(join_from, section, cib_scope_local); - if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) { -- xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); -+ xmlNode *now_dc_lrmd_state = controld_query_executor_state(); - - if (now_dc_lrmd_state != NULL) { - fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, --- -2.31.1 - -From d852ec335bd5b518a3f06c7f1b597370094311ae Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 20 Sep 2022 10:18:48 -0500 -Subject: [PATCH 22/24] Log: controller: add messages when getting agent - metadata - ---- - daemons/controld/controld_execd.c | 5 +++++ - daemons/controld/controld_metadata.c | 10 ++++++++++ - 2 files changed, 15 insertions(+) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index 039b194..f02da82 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1884,6 +1884,11 @@ do_lrm_invoke(long long action, - struct metadata_cb_data *data = NULL; - - data = new_metadata_cb_data(rsc, input->xml); -+ crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", -+ rsc->id, rsc->standard, -+ ((rsc->provider == NULL)? "" : ":"), -+ ((rsc->provider == NULL)? "" : rsc->provider), -+ rsc->type); - (void) lrmd__metadata_async(rsc, metadata_complete, - (void *) data); - } else { -diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c -index a954ebd..39b43b0 100644 ---- a/daemons/controld/controld_metadata.c -+++ b/daemons/controld/controld_metadata.c -@@ -348,6 +348,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - free(key); - } - if (metadata != NULL) { -+ crm_debug("Retrieved metadata for %s (%s%s%s:%s) from cache", -+ rsc->id, rsc->standard, -+ ((rsc->provider == NULL)? "" : ":"), -+ ((rsc->provider == NULL)? "" : rsc->provider), -+ rsc->type); - return metadata; - } - } -@@ -370,6 +375,11 @@ controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, - * means that if the metadata action tries to contact the controller, - * everything will hang until the timeout). - */ -+ crm_debug("Retrieving metadata for %s (%s%s%s:%s) synchronously", -+ rsc->id, rsc->standard, -+ ((rsc->provider == NULL)? "" : ":"), -+ ((rsc->provider == NULL)? "" : rsc->provider), -+ rsc->type); - rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, - rsc->type, &metadata_str, 0); - if (rc != pcmk_ok) { --- -2.31.1 - -From 5aec773a20e1ded971a4082358e266353615f196 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Sep 2022 14:36:44 -0500 -Subject: [PATCH 23/24] Test: cts-lab: allow any whitespace in "Recover" - messages - -This seems to have always been multiple spaces, not sure what happened ---- - cts/lab/CTStests.py | 12 ++++++------ - cts/lab/patterns.py | 4 ++-- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py -index 5535177..8b56758 100644 ---- a/cts/lab/CTStests.py -+++ b/cts/lab/CTStests.py -@@ -1,7 +1,7 @@ - """ Test-specific classes for Pacemaker's Cluster Test Suite (CTS) - """ - --__copyright__ = "Copyright 2000-2021 the Pacemaker project contributors" -+__copyright__ = "Copyright 2000-2022 the Pacemaker project contributors" - __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" - - # -@@ -1225,7 +1225,7 @@ class MaintenanceMode(CTSTest): - '''Return list of errors which should be ignored''' - return [ - r"Updating failcount for %s" % self.rid, -- r"schedulerd.*: Recover %s\s*\(.*\)" % self.rid, -+ r"schedulerd.*: Recover\s+%s\s+\(.*\)" % self.rid, - r"Unknown operation: fail", - self.templates["Pat:RscOpOK"] % (self.action, self.rid), - r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), -@@ -1324,7 +1324,7 @@ class ResourceRecover(CTSTest): - '''Return list of errors which should be ignored''' - return [ - r"Updating failcount for %s" % self.rid, -- r"schedulerd.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt), -+ r"schedulerd.*: Recover\s+(%s|%s)\s+\(.*\)" % (self.rid, self.rid_alt), - r"Unknown operation: fail", - self.templates["Pat:RscOpOK"] % (self.action, self.rid), - r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), -@@ -2559,7 +2559,7 @@ class RemoteLXC(CTSTest): - '''Return list of errors which should be ignored''' - return [ - r"Updating failcount for ping", -- r"schedulerd.*: Recover (ping|lxc-ms|container)\s*\(.*\)", -+ r"schedulerd.*: Recover\s+(ping|lxc-ms|container)\s+\(.*\)", - # The orphaned lxc-ms resource causes an expected transition error - # that is a result of the scheduler not having knowledge that the - # promotable resource used to be a clone. As a result, it looks like that -@@ -3054,7 +3054,7 @@ class RemoteStonithd(RemoteDriver): - r"Software caused connection abort", - r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor", - r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*", -- r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)", -+ r"schedulerd.*:\s+Recover\s+remote-.*\s+\(.*\)", - r"error: Result of monitor operation for .* on remote-.*: Internal communication failure", - ] - -@@ -3120,7 +3120,7 @@ class RemoteRscFailure(RemoteDriver): - - def errorstoignore(self): - ignore_pats = [ -- r"schedulerd.*: Recover remote-rsc\s*\(.*\)", -+ r"schedulerd.*: Recover\s+remote-rsc\s+\(.*\)", - r"Dummy.*: No process state file found", - ] - -diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py -index 90cac73..6e718f7 100644 ---- a/cts/lab/patterns.py -+++ b/cts/lab/patterns.py -@@ -66,7 +66,7 @@ class BasePatterns(object): - - "Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s", - "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK", -- "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover %s", -+ "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s", - "Pat:Fencing_active" : r"stonith resource .* is active on 2 nodes (attempting recovery)", - "Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error", - -@@ -180,7 +180,7 @@ class crm_corosync(BasePatterns): - r"Parameters to .* action changed:", - r"Parameters to .* changed", - r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)", -- r"pacemaker-schedulerd.*Recover .*\(.* -\> .*\)", -+ r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)", - r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", - r"Peer is not part of our cluster", - r"We appear to be in an election loop", --- -2.31.1 - -From 338cf55d19cb4ebebedf092dd0a5969ac2eda295 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 19 Sep 2022 15:55:42 -0500 -Subject: [PATCH 24/24] Test: cts-lab: match parentheses correctly - ---- - cts/lab/patterns.py | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py -index 6e718f7..856fffb 100644 ---- a/cts/lab/patterns.py -+++ b/cts/lab/patterns.py -@@ -271,6 +271,7 @@ class crm_corosync(BasePatterns): - ] - self.components["pacemaker-based-ignore"] = [ - r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", -+ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", - # This is overbroad, but we don't have a way to say that only - # certain transition errors are acceptable (if the fencer respawns, - # fence devices may appear multiply active). We have to rely on -@@ -328,7 +329,7 @@ class crm_corosync(BasePatterns): - r"crit:.*Fencing daemon connection failed", - r"error:.*Fencer connection failed \(will retry\)", - r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations", -- r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error", -+ r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", - # This is overbroad, but we don't have a way to say that only - # certain transition errors are acceptable (if the fencer respawns, - # fence devices may appear multiply active). We have to rely on --- -2.31.1 - diff --git a/SOURCES/014-abort-transition.patch b/SOURCES/014-abort-transition.patch deleted file mode 100644 index cd12ccd..0000000 --- a/SOURCES/014-abort-transition.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 04d1ba5ff20e135c900239f0ebadad42a41b5eba Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Sat, 10 Sep 2022 03:39:12 -0700 -Subject: [PATCH] Fix: controller: Resource reordering doesn't cause transition - abort - -The te_update_diff_v2() function ignores all move operations. This is -correct for most CIB sections. However, a move in the resources section -affects placement order and can require resources to change nodes. In -that case, since the diff handler does not cause a transition abort, the -moves will not be initiated until the next natural transition (up to the -value of cluster-recheck-interval). - -This commit modifies te_update_diff_v2() so that it no longer ignores -moves within the resources section. - -This fixes a regression triggered by 41d0a1a and set up by 45e5e82. -However, the underlying bug had already been present. Prior to 41d0a1a, -the CIB replacement notification handler caused a transition abort, when -the resources section was replaced, which hid this bug. - -Closes T549 - -Signed-off-by: Reid Wahl ---- - daemons/controld/controld_te_callbacks.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c -index 6e0dd216e..87ad861a2 100644 ---- a/daemons/controld/controld_te_callbacks.c -+++ b/daemons/controld/controld_te_callbacks.c -@@ -419,7 +419,13 @@ te_update_diff_v2(xmlNode *diff) - crm_trace("Ignoring %s change for version field", op); - continue; - -- } else if (strcmp(op, "move") == 0) { -+ } else if ((strcmp(op, "move") == 0) -+ && (strstr(xpath, -+ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION -+ "/" XML_CIB_TAG_RESOURCES) == NULL)) { -+ /* We still need to consider moves within the resources section, -+ * since they affect placement order. -+ */ - crm_trace("Ignoring move change at %s", xpath); - continue; - } -@@ -434,7 +440,7 @@ te_update_diff_v2(xmlNode *diff) - match = match->children; - } - -- } else if (strcmp(op, "delete") != 0) { -+ } else if (!pcmk__str_any_of(op, "delete", "move", NULL)) { - crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", - op, xpath); - continue; --- -2.31.1 - diff --git a/SOURCES/015-one_shot.patch b/SOURCES/015-one_shot.patch deleted file mode 100644 index 4896d64..0000000 --- a/SOURCES/015-one_shot.patch +++ /dev/null @@ -1,3589 +0,0 @@ -From 23d14e3515d226fee3ec9e0328f001f53597dad2 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 11:23:46 -0700 -Subject: [PATCH 01/22] API: libpacemaker: pcmk_pacemakerd_status() ipc_name - arg is now const - -Signed-off-by: Reid Wahl ---- - include/pacemaker.h | 11 ++++++----- - include/pcmki/pcmki_cluster_queries.h | 3 ++- - lib/pacemaker/pcmk_cluster_queries.c | 17 +++++++++++++++-- - 3 files changed, 23 insertions(+), 8 deletions(-) - -diff --git a/include/pacemaker.h b/include/pacemaker.h -index 17c68e9..a76569a 100644 ---- a/include/pacemaker.h -+++ b/include/pacemaker.h -@@ -107,15 +107,16 @@ int pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) - void pcmk_free_injections(pcmk_injections_t *injections); - - /*! -- * \brief Get pacemakerd status -+ * \brief Get and output \p pacemakerd status - * -- * \param[in,out] xml The destination for the result, as an XML tree. -- * \param[in] ipc_name IPC name for request -- * \param[in] message_timeout_ms Message timeout -+ * \param[in,out] xml Destination for the result, as an XML tree -+ * \param[in] ipc_name IPC name for request -+ * \param[in] message_timeout_ms Message timeout - * - * \return Standard Pacemaker return code - */ --int pcmk_pacemakerd_status(xmlNodePtr *xml, char *ipc_name, unsigned int message_timeout_ms); -+int pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, -+ unsigned int message_timeout_ms); - - /*! - * \brief Calculate and output resource operation digests -diff --git a/include/pcmki/pcmki_cluster_queries.h b/include/pcmki/pcmki_cluster_queries.h -index 0a4c21c..9aea9a5 100644 ---- a/include/pcmki/pcmki_cluster_queries.h -+++ b/include/pcmki/pcmki_cluster_queries.h -@@ -10,7 +10,8 @@ - - int pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_timeout_ms); - int pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms); --int pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeout_ms); -+int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, -+ guint message_timeout_ms); - int pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT); - - #endif -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index c30a9b8..cac8ce0 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -358,8 +358,19 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) - return rc; - } - -+/*! -+ * \internal -+ * \brief Get and output \p pacemakerd status -+ * -+ * \param[in,out] out Output object -+ * \param[in] ipc_name IPC name for request -+ * \param[in] message_timeout_ms Message timeout -+ * -+ * \return Standard Pacemaker return code -+ */ - int --pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeout_ms) -+pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, -+ guint message_timeout_ms) - { - data_t data = { - .out = out, -@@ -385,8 +396,10 @@ pcmk__pacemakerd_status(pcmk__output_t *out, char *ipc_name, guint message_timeo - return data.rc; - } - -+// Documented in header - int --pcmk_pacemakerd_status(xmlNodePtr *xml, char *ipc_name, unsigned int message_timeout_ms) -+pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, -+ unsigned int message_timeout_ms) - { - pcmk__output_t *out = NULL; - int rc = pcmk_rc_ok; --- -2.31.1 - -From b15f4030020a8c0aa1cdb9e72a633adff02944bc Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 12:19:49 -0700 -Subject: [PATCH 02/22] Feature: pacemakerd: New - pcmk__pcmkd_state_enum2friendly() function - -Given an enum pcmk_pacemakerd_state value, this function returns a -user-friendly string representation. This will be used in future -commits. - -Signed-off-by: Reid Wahl ---- - include/crm/common/ipc_internal.h | 3 +++ - lib/common/ipc_pacemakerd.c | 33 +++++++++++++++++++++++++++++++ - tools/crm_mon.c | 14 ++++++------- - 3 files changed, 43 insertions(+), 7 deletions(-) - -diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h -index 2a0c562..ebde808 100644 ---- a/include/crm/common/ipc_internal.h -+++ b/include/crm/common/ipc_internal.h -@@ -29,6 +29,7 @@ extern "C" { - - #include // US_AUTH_GETPEEREID - #include -+#include // enum pcmk_pacemakerd_state - #include // mainloop_io_t - - /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that -@@ -250,6 +251,8 @@ pcmk__ipc_sys_name(const char *ipc_name, const char *fallback) - return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback)); - } - -+const char *pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state); -+ - #ifdef __cplusplus - } - #endif -diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c -index 2bec0d1..3777f95 100644 ---- a/lib/common/ipc_pacemakerd.c -+++ b/lib/common/ipc_pacemakerd.c -@@ -62,6 +62,39 @@ pcmk_pacemakerd_api_daemon_state_enum2text( - return "invalid"; - } - -+/*! -+ * \internal -+ * \brief Return a friendly string representation of a \p pacemakerd state -+ * -+ * \param[in] state \p pacemakerd state -+ * -+ * \return A user-friendly string representation of \p state, or -+ * "Invalid pacemakerd state" -+ */ -+const char * -+pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state) -+{ -+ switch (state) { -+ case pcmk_pacemakerd_state_init: -+ return "Initializing pacemaker"; -+ case pcmk_pacemakerd_state_starting_daemons: -+ return "Pacemaker daemons are starting"; -+ case pcmk_pacemakerd_state_wait_for_ping: -+ return "Waiting for startup trigger from SBD"; -+ case pcmk_pacemakerd_state_running: -+ return "Pacemaker is running"; -+ case pcmk_pacemakerd_state_shutting_down: -+ return "Pacemaker daemons are shutting down"; -+ case pcmk_pacemakerd_state_shutdown_complete: -+ /* Assuming pacemakerd won't process messages while in -+ * shutdown_complete state unless reporting to SBD -+ */ -+ return "Pacemaker daemons are shut down (reporting to SBD)"; -+ default: -+ return "Invalid pacemakerd state"; -+ } -+} -+ - // \return Standard Pacemaker return code - static int - new_data(pcmk_ipc_api_t *api) -diff --git a/tools/crm_mon.c b/tools/crm_mon.c -index eaf79bd..e8cb709 100644 ---- a/tools/crm_mon.c -+++ b/tools/crm_mon.c -@@ -951,26 +951,26 @@ pacemakerd_status(void) - rc = ENOTCONN; - if ((output_format == mon_output_console) || - (output_format == mon_output_plain)) { -+ -+ const char *state_str = NULL; -+ state_str = pcmk__pcmkd_state_enum2friendly(state); - switch (state) { - case pcmk_pacemakerd_state_running: - rc = pcmk_rc_ok; - break; - case pcmk_pacemakerd_state_starting_daemons: -- out->info(out,"Pacemaker daemons starting ..."); -+ out->info(out, "%s", state_str); - break; - case pcmk_pacemakerd_state_wait_for_ping: -- out->info(out,"Waiting for startup-trigger from SBD ..."); -+ out->info(out, "%s", state_str); - break; - case pcmk_pacemakerd_state_shutting_down: -- out->info(out,"Pacemaker daemons shutting down ..."); -+ out->info(out, "%s", state_str); - /* try our luck maybe CIB is still accessible */ - rc = pcmk_rc_ok; - break; - case pcmk_pacemakerd_state_shutdown_complete: -- /* assuming pacemakerd doesn't dispatch any pings after entering -- * that state unless it is waiting for SBD -- */ -- out->info(out,"Pacemaker daemons shut down - reporting to SBD ..."); -+ out->info(out, "%s", state_str); - break; - default: - break; --- -2.31.1 - -From 7eb4fa59db667f1904b607cde8ed8b9caf7a46ed Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 12:51:40 -0700 -Subject: [PATCH 03/22] Low: libcrmcommon: Check invalid time value in - pacemakerd API reply - -If the XML_ATTR_TSTAMP attribute is present but can't be parsed as an -integer, value_ll gets set to PCMK__PARSE_INT_DEFAULT (-1). This should -never happen, but just in case, we should convert a negative to 0 before -we cast to time_t, an unsigned type. - -Signed-off-by: Reid Wahl ---- - lib/common/ipc_pacemakerd.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c -index 3777f95..562308c 100644 ---- a/lib/common/ipc_pacemakerd.c -+++ b/lib/common/ipc_pacemakerd.c -@@ -211,7 +211,7 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) - reply_data.data.ping.status = - pcmk__str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), "ok", - pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error; -- reply_data.data.ping.last_good = (time_t) value_ll; -+ reply_data.data.ping.last_good = (value_ll < 0)? 0 : (time_t) value_ll; - reply_data.data.ping.sys_from = crm_element_value(msg_data, - XML_PING_ATTR_SYSFROM); - } else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) { --- -2.31.1 - -From 3169eaafce20e2a444c3b96755daf36dd7143242 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 15:01:12 -0700 -Subject: [PATCH 04/22] Low: libpacemaker: Correct default for pinged_buf in - pacemakerd_event_cb - -Default should be NULL so that the last_updated default gets used -correctly in the pacemakerd-health message. - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_cluster_queries.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index cac8ce0..43b2b1f 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -229,7 +229,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - (reply->data.ping.status == pcmk_rc_ok)? - pcmk_pacemakerd_api_daemon_state_enum2text( - reply->data.ping.state):"query failed", -- (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:""); -+ (reply->data.ping.status == pcmk_rc_ok)? pinged_buf : NULL); - data->rc = pcmk_rc_ok; - crm_time_free(crm_when); - free(pinged_buf); --- -2.31.1 - -From c6141eb0f47fc806a309f99ec52ccb274b134533 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 22:48:40 -0700 -Subject: [PATCH 05/22] Refactor: libpacemaker: Improve return codes in - pcmk__pacemakerd_status - -Use pcmk_rc_ipc_unresponsive if we don't get a response from the API, -and EBADMSG if we get an bad reply or unexpected reply type. - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_cluster_queries.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index 43b2b1f..9937e16 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -207,6 +207,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - out->err(out, "error: Bad reply from pacemakerd: %s", - crm_exit_str(status)); - event_done(data, pacemakerd_api); -+ data->rc = EBADMSG; - return; - } - -@@ -214,6 +215,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - out->err(out, "error: Unknown reply type %d from pacemakerd", - reply->reply_type); - event_done(data, pacemakerd_api); -+ data->rc = EBADMSG; - return; - } - -@@ -375,7 +377,7 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, - data_t data = { - .out = out, - .mainloop = NULL, -- .rc = pcmk_rc_ok, -+ .rc = pcmk_rc_ipc_unresponsive, - .message_timer_id = 0, - .message_timeout_ms = message_timeout_ms - }; --- -2.31.1 - -From df2e449a29fe3460b98403767a16be4d89ef3455 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 15:12:23 -0700 -Subject: [PATCH 06/22] Feature: libpacemaker: pacemakerd-health message - accepts state - -Previously, the pacemakerd-health message accepted only a state string. -This made it difficult to use different state strings for different -output formats. - -Now, the pacemakerd-health message accepts an enum pcmk_pacemakerd_state -value and an optional state string. If the state string is not set, then -the formatter function looks up an appropriate string representation for -the state. If the state string is set, it acts as an explicit override -and is used in place of a lookup. - -Note that this will cause "invalid" to be printed instead of "" -for quiet text outputs, and it will cause "Invalid pacemakerd state" to -be printed instead of "unknown state" for the default output. - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_cluster_queries.c | 40 +++++++++++--------- - lib/pacemaker/pcmk_output.c | 56 +++++++++++++++++++++------- - 2 files changed, 65 insertions(+), 31 deletions(-) - -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index 9937e16..3e36a12 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -180,9 +180,6 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - pcmk__output_t *out = data->out; - pcmk_pacemakerd_api_reply_t *reply = event_data; - -- crm_time_t *crm_when; -- char *pinged_buf = NULL; -- - switch (event_type) { - case pcmk_ipc_event_disconnect: - if (data->rc == ECONNRESET) { // Unexpected -@@ -220,22 +217,29 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - } - - // Parse desired information from reply -- crm_when = crm_time_new(NULL); -- crm_time_set_timet(crm_when, &reply->data.ping.last_good); -- pinged_buf = crm_time_as_string(crm_when, -- crm_time_log_date | crm_time_log_timeofday | -- crm_time_log_with_timezone); -- -- out->message(out, "pacemakerd-health", -- reply->data.ping.sys_from, -- (reply->data.ping.status == pcmk_rc_ok)? -- pcmk_pacemakerd_api_daemon_state_enum2text( -- reply->data.ping.state):"query failed", -- (reply->data.ping.status == pcmk_rc_ok)? pinged_buf : NULL); -+ if (reply->data.ping.status == pcmk_rc_ok) { -+ crm_time_t *when = crm_time_new(NULL); -+ char *when_s = NULL; -+ -+ crm_time_set_timet(when, &reply->data.ping.last_good); -+ when_s = crm_time_as_string(when, -+ crm_time_log_date -+ |crm_time_log_timeofday -+ |crm_time_log_with_timezone); -+ -+ out->message(out, "pacemakerd-health", -+ reply->data.ping.sys_from, reply->data.ping.state, NULL, -+ when_s); -+ -+ crm_time_free(when); -+ free(when_s); -+ -+ } else { -+ out->message(out, "pacemakerd-health", -+ reply->data.ping.sys_from, reply->data.ping.state, -+ "query failed", NULL); -+ } - data->rc = pcmk_rc_ok; -- crm_time_free(crm_when); -- free(pinged_buf); -- - event_done(data, pacemakerd_api); - } - -diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c -index 9a522a3..edd4b82 100644 ---- a/lib/pacemaker/pcmk_output.c -+++ b/lib/pacemaker/pcmk_output.c -@@ -627,36 +627,65 @@ health_xml(pcmk__output_t *out, va_list args) - return pcmk_rc_ok; - } - --PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") -+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", -+ "const char *") - static int --pacemakerd_health_text(pcmk__output_t *out, va_list args) -+pacemakerd_health(pcmk__output_t *out, va_list args) - { - const char *sys_from = va_arg(args, const char *); -- const char *state = va_arg(args, const char *); -+ enum pcmk_pacemakerd_state state = -+ (enum pcmk_pacemakerd_state) va_arg(args, int); -+ const char *state_s = va_arg(args, const char *); - const char *last_updated = va_arg(args, const char *); - -+ if (state_s == NULL) { -+ state_s = pcmk__pcmkd_state_enum2friendly(state); -+ } -+ return out->info(out, "Status of %s: '%s' (last updated %s)", -+ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", -+ state_s, -+ (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); -+} -+ -+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", -+ "const char *") -+static int -+pacemakerd_health_text(pcmk__output_t *out, va_list args) -+{ - if (!out->is_quiet(out)) { -- return out->info(out, "Status of %s: '%s' %s %s", crm_str(sys_from), -- crm_str(state), (!pcmk__str_empty(last_updated))? -- "last updated":"", crm_str(last_updated)); -+ return pacemakerd_health(out, args); - } else { -- pcmk__formatted_printf(out, "%s\n", crm_str(state)); -+ const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); -+ enum pcmk_pacemakerd_state state = -+ (enum pcmk_pacemakerd_state) va_arg(args, int); -+ const char *state_s = va_arg(args, const char *); -+ const char *last_updated G_GNUC_UNUSED = va_arg(args, const char *); -+ -+ if (state_s == NULL) { -+ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); -+ } -+ pcmk__formatted_printf(out, "%s\n", state_s); - return pcmk_rc_ok; - } -- -- return pcmk_rc_no_output; - } - --PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") -+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", -+ "const char *") - static int - pacemakerd_health_xml(pcmk__output_t *out, va_list args) - { - const char *sys_from = va_arg(args, const char *); -- const char *state = va_arg(args, const char *); -+ enum pcmk_pacemakerd_state state = -+ (enum pcmk_pacemakerd_state) va_arg(args, int); -+ const char *state_s = va_arg(args, const char *); - const char *last_updated = va_arg(args, const char *); - -+ if (state_s == NULL) { -+ state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); -+ } -+ - pcmk__output_create_xml_node(out, crm_str(sys_from), -- "state", crm_str(state), -+ "state", state_s, - "last_updated", crm_str(last_updated), - NULL); - return pcmk_rc_ok; -@@ -1899,7 +1928,8 @@ static pcmk__message_entry_t fmt_functions[] = { - { "locations-list", "xml", locations_list_xml }, - { "node-action", "default", node_action }, - { "node-action", "xml", node_action_xml }, -- { "pacemakerd-health", "default", pacemakerd_health_text }, -+ { "pacemakerd-health", "default", pacemakerd_health }, -+ { "pacemakerd-health", "text", pacemakerd_health_text }, - { "pacemakerd-health", "xml", pacemakerd_health_xml }, - { "profile", "default", profile_default, }, - { "profile", "xml", profile_xml }, --- -2.31.1 - -From 9bb521dc8b835641746095fe66b7a2137ce12c20 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 15:53:18 -0700 -Subject: [PATCH 07/22] Feature: libpacemaker: pcmk__pacemakerd_status() can - return pcmkd state - -Signed-off-by: Reid Wahl ---- - include/pcmki/pcmki_cluster_queries.h | 3 ++- - lib/pacemaker/pcmk_cluster_queries.c | 22 +++++++++++++++++----- - tools/crmadmin.c | 3 ++- - 3 files changed, 21 insertions(+), 7 deletions(-) - -diff --git a/include/pcmki/pcmki_cluster_queries.h b/include/pcmki/pcmki_cluster_queries.h -index 9aea9a5..702ab1f 100644 ---- a/include/pcmki/pcmki_cluster_queries.h -+++ b/include/pcmki/pcmki_cluster_queries.h -@@ -11,7 +11,8 @@ - int pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_timeout_ms); - int pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms); - int pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, -- guint message_timeout_ms); -+ guint message_timeout_ms, -+ enum pcmk_pacemakerd_state *state); - int pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT); - - #endif -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index 3e36a12..5834ef0 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -36,6 +36,7 @@ typedef struct { - int rc; - guint message_timer_id; - guint message_timeout_ms; -+ enum pcmk_pacemakerd_state pcmkd_state; - } data_t; - - static void -@@ -217,6 +218,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - } - - // Parse desired information from reply -+ data->pcmkd_state = reply->data.ping.state; - if (reply->data.ping.status == pcmk_rc_ok) { - crm_time_t *when = crm_time_new(NULL); - char *when_s = NULL; -@@ -282,7 +284,8 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time - .mainloop = NULL, - .rc = pcmk_rc_ok, - .message_timer_id = 0, -- .message_timeout_ms = message_timeout_ms -+ .message_timeout_ms = message_timeout_ms, -+ .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; - pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb); - -@@ -327,7 +330,8 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) - .mainloop = NULL, - .rc = pcmk_rc_ok, - .message_timer_id = 0, -- .message_timeout_ms = message_timeout_ms -+ .message_timeout_ms = message_timeout_ms, -+ .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; - pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb); - -@@ -371,19 +375,23 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) - * \param[in,out] out Output object - * \param[in] ipc_name IPC name for request - * \param[in] message_timeout_ms Message timeout -+ * \param[out] state Where to store the \p pacemakerd state, if -+ * not \p NULL - * - * \return Standard Pacemaker return code - */ - int - pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, -- guint message_timeout_ms) -+ guint message_timeout_ms, -+ enum pcmk_pacemakerd_state *state) - { - data_t data = { - .out = out, - .mainloop = NULL, - .rc = pcmk_rc_ipc_unresponsive, - .message_timer_id = 0, -- .message_timeout_ms = message_timeout_ms -+ .message_timeout_ms = message_timeout_ms, -+ .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; - pcmk_ipc_api_t *pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb); - -@@ -399,6 +407,9 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, - pcmk_free_ipc_api(pacemakerd_api); - } - -+ if (state != NULL) { -+ *state = data.pcmkd_state; -+ } - return data.rc; - } - -@@ -417,7 +428,8 @@ pcmk_pacemakerd_status(xmlNodePtr *xml, const char *ipc_name, - - pcmk__register_lib_messages(out); - -- rc = pcmk__pacemakerd_status(out, ipc_name, (guint) message_timeout_ms); -+ rc = pcmk__pacemakerd_status(out, ipc_name, (guint) message_timeout_ms, -+ NULL); - pcmk__out_epilogue(out, xml, rc); - return rc; - } -diff --git a/tools/crmadmin.c b/tools/crmadmin.c -index 169289f..f4c2783 100644 ---- a/tools/crmadmin.c -+++ b/tools/crmadmin.c -@@ -238,7 +238,8 @@ main(int argc, char **argv) - rc = pcmk__controller_status(out, options.optarg, options.timeout); - break; - case cmd_pacemakerd_health: -- rc = pcmk__pacemakerd_status(out, options.ipc_name, options.timeout); -+ rc = pcmk__pacemakerd_status(out, options.ipc_name, options.timeout, -+ NULL); - break; - case cmd_list_nodes: - rc = pcmk__list_nodes(out, options.optarg, options.BASH_EXPORT); --- -2.31.1 - -From 4841c22f9a7cc927e87007c9691e2c239f035a58 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 16:50:06 -0700 -Subject: [PATCH 08/22] Fix: libpacemaker: Memory leak in - pcmk_cluster_queries.c:ipc_connect() - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_cluster_queries.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index 5834ef0..00a809d 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -270,6 +270,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) - pcmk_ipc_name(api, true), - pcmk_rc_str(rc)); - data->rc = rc; -+ pcmk_free_ipc_api(api); - return NULL; - } - --- -2.31.1 - -From 8e202448c47ad0ddc148b2e0514ef98b4847fa6e Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 18:25:35 -0700 -Subject: [PATCH 09/22] Doc: libpe_status: Replace old funcname in - pe__build_rsc_list() comment - -build_uname_list -> pe__build_node_name_list() - -Signed-off-by: Reid Wahl ---- - lib/pengine/utils.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 77111a6..1a4eb3e 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -2551,9 +2551,9 @@ pe__build_rsc_list(pe_working_set_t *data_set, const char *s) { - resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); - } - } else { -- /* The given string was not a valid resource name. It's either -- * a tag or it's a typo or something. See build_uname_list for -- * more detail. -+ /* The given string was not a valid resource name. It's a tag or a -+ * typo or something. See pe__build_node_name_list() for more -+ * detail. - */ - resources = pe__rscs_with_tag(data_set, s); - } --- -2.31.1 - -From 0c412f49d607a8f60790b13e75d8c7b3a8c6c1d9 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 18:38:06 -0700 -Subject: [PATCH 10/22] Refactor: libpacemaker: Clarify pointer arguments in - pcmk_status.c - -Make only_node and only_rsc arguments const. Add doxygen blocks. Change -"st" argument to "stonith". - -This is not comprehensive. We're updating pcmk__status() because we're -about to add a new argument, and pcmk__output_cluster_status() because -it shares most of its arguments with pcmk__status(). - -Signed-off-by: Reid Wahl ---- - include/pcmki/pcmki_status.h | 18 ++++--- - lib/pacemaker/pcmk_status.c | 93 +++++++++++++++++++++++++++++------- - 2 files changed, 87 insertions(+), 24 deletions(-) - -diff --git a/include/pcmki/pcmki_status.h b/include/pcmki/pcmki_status.h -index 6614fe4..2bbd099 100644 ---- a/include/pcmki/pcmki_status.h -+++ b/include/pcmki/pcmki_status.h -@@ -38,15 +38,19 @@ extern "C" { - */ - int pcmk__output_simple_status(pcmk__output_t *out, pe_working_set_t *data_set); - --int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, -- xmlNode *current_cib, enum pcmk__fence_history fence_history, -- uint32_t show, uint32_t show_opts, char *only_node, -- char *only_rsc, char *neg_location_prefix, -+int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, -+ cib_t *cib, xmlNode *current_cib, -+ enum pcmk__fence_history fence_history, -+ uint32_t show, uint32_t show_opts, -+ const char *only_node, const char *only_rsc, -+ const char *neg_location_prefix, - bool simple_output); - --int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, -- uint32_t show, uint32_t show_opts, char *only_node, char *only_rsc, -- char *neg_location_prefix, bool simple_output); -+int pcmk__status(pcmk__output_t *out, cib_t *cib, -+ enum pcmk__fence_history fence_history, uint32_t show, -+ uint32_t show_opts, const char *only_node, -+ const char *only_rsc, const char *neg_location_prefix, -+ bool simple_output); - - #ifdef __cplusplus - } -diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c -index 12136ea..1bf0172 100644 ---- a/lib/pacemaker/pcmk_status.c -+++ b/lib/pacemaker/pcmk_status.c -@@ -135,11 +135,38 @@ pacemakerd_status(pcmk__output_t *out) - return rc; - } - -+/*! -+ * \internal -+ * \brief Output the cluster status given a fencer and CIB connection -+ * -+ * \param[in,out] out Output object -+ * \param[in,out] stonith Fencer connection -+ * \param[in,out] cib CIB connection -+ * \param[in] current_cib Current CIB XML -+ * \param[in] fence_history How much of the fencing history to output -+ * \param[in] show Group of \p pcmk_section_e flags -+ * \param[in] show_opts Group of \p pcmk_show_opt_e flags -+ * \param[in] only_node If a node name or tag, include only the -+ * matching node(s) (if any) in the output. -+ * If \p "*" or \p NULL, include all nodes -+ * in the output. -+ * \param[in] only_rsc If a resource ID or tag, include only the -+ * matching resource(s) (if any) in the -+ * output. If \p "*" or \p NULL, include all -+ * resources in the output. -+ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID -+ * \param[in] simple_output Whether to use a simple output format. -+ * Note: This is for use by \p crm_mon only -+ * and is planned to be deprecated. -+ * -+ * \return Standard Pacemaker return code -+ */ - int --pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, -+pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, cib_t *cib, - xmlNode *current_cib, enum pcmk__fence_history fence_history, -- uint32_t show, uint32_t show_opts, char *only_node, -- char *only_rsc, char *neg_location_prefix, bool simple_output) -+ uint32_t show, uint32_t show_opts, -+ const char *only_node, const char *only_rsc, -+ const char *neg_location_prefix, bool simple_output) - { - xmlNode *cib_copy = copy_xml(current_cib); - stonith_history_t *stonith_history = NULL; -@@ -159,7 +186,8 @@ pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *st, cib_t *cib, - - /* get the stonith-history if there is evidence we need it */ - if (fence_history != pcmk__fence_history_none) { -- history_rc = pcmk__get_fencing_history(st, &stonith_history, fence_history); -+ history_rc = pcmk__get_fencing_history(stonith, &stonith_history, -+ fence_history); - } - - data_set = pe_new_working_set(); -@@ -235,14 +263,43 @@ pcmk_status(xmlNodePtr *xml) - return rc; - } - -+/*! -+ * \internal -+ * \brief Query and output the cluster status -+ * -+ * The operation is considered a success if we're able to get the \p pacemakerd -+ * state. If possible, we'll also try to connect to the fencer and CIB and -+ * output their respective status information. -+ * -+ * \param[in,out] out Output object -+ * \param[in,out] cib CIB connection -+ * \param[in] fence_history How much of the fencing history to output -+ * \param[in] show Group of \p pcmk_section_e flags -+ * \param[in] show_opts Group of \p pcmk_show_opt_e flags -+ * \param[in] only_node If a node name or tag, include only the -+ * matching node(s) (if any) in the output. -+ * If \p "*" or \p NULL, include all nodes -+ * in the output. -+ * \param[in] only_rsc If a resource ID or tag, include only the -+ * matching resource(s) (if any) in the -+ * output. If \p "*" or \p NULL, include all -+ * resources in the output. -+ * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID -+ * \param[in] simple_output Whether to use a simple output format. -+ * Note: This is for use by \p crm_mon only -+ * and is planned to be deprecated. -+ * -+ * \return Standard Pacemaker return code -+ */ - int --pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, -- uint32_t show, uint32_t show_opts, char *only_node, char *only_rsc, -- char *neg_location_prefix, bool simple_output) -+pcmk__status(pcmk__output_t *out, cib_t *cib, -+ enum pcmk__fence_history fence_history, uint32_t show, -+ uint32_t show_opts, const char *only_node, const char *only_rsc, -+ const char *neg_location_prefix, bool simple_output) - { - xmlNode *current_cib = NULL; - int rc = pcmk_rc_ok; -- stonith_t *st = NULL; -+ stonith_t *stonith = NULL; - - if (cib == NULL) { - return ENOTCONN; -@@ -261,9 +318,9 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_his - } - - if (fence_history != pcmk__fence_history_none && cib->variant == cib_native) { -- st = fencing_connect(); -+ stonith = fencing_connect(); - -- if (st == NULL) { -+ if (stonith == NULL) { - return ENOTCONN; - } - } -@@ -273,17 +330,19 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_his - goto done; - } - -- rc = pcmk__output_cluster_status(out, st, cib, current_cib, fence_history, show, show_opts, -- only_node, only_rsc, neg_location_prefix, simple_output); -+ rc = pcmk__output_cluster_status(out, stonith, cib, current_cib, -+ fence_history, show, show_opts, only_node, -+ only_rsc, neg_location_prefix, -+ simple_output); - - done: -- if (st != NULL) { -- if (st->state != stonith_disconnected) { -- st->cmds->remove_notification(st, NULL); -- st->cmds->disconnect(st); -+ if (stonith != NULL) { -+ if (stonith->state != stonith_disconnected) { -+ stonith->cmds->remove_notification(stonith, NULL); -+ stonith->cmds->disconnect(stonith); - } - -- stonith_api_delete(st); -+ stonith_api_delete(stonith); - } - - return rc; --- -2.31.1 - -From 8384af058c47a46cd10a070f90f6dc0bd1b12045 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 22:22:49 -0700 -Subject: [PATCH 11/22] Feature: libpacemaker: HTML formatter for - pacemakerd-health message - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_output.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c -index edd4b82..c088a6a 100644 ---- a/lib/pacemaker/pcmk_output.c -+++ b/lib/pacemaker/pcmk_output.c -@@ -646,6 +646,32 @@ pacemakerd_health(pcmk__output_t *out, va_list args) - state_s, - (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); - } -+ -+PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", -+ "const char *") -+static int -+pacemakerd_health_html(pcmk__output_t *out, va_list args) -+{ -+ const char *sys_from = va_arg(args, const char *); -+ enum pcmk_pacemakerd_state state = -+ (enum pcmk_pacemakerd_state) va_arg(args, int); -+ const char *state_s = va_arg(args, const char *); -+ const char *last_updated = va_arg(args, const char *); -+ char *msg = NULL; -+ -+ if (state_s == NULL) { -+ state_s = pcmk__pcmkd_state_enum2friendly(state); -+ } -+ -+ msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", -+ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", -+ state_s, -+ (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); -+ pcmk__output_create_html_node(out, "li", NULL, NULL, msg); -+ -+ free(msg); -+ return pcmk_rc_ok; -+} - - PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "int", "const char *", - "const char *") -@@ -1929,6 +1955,7 @@ static pcmk__message_entry_t fmt_functions[] = { - { "node-action", "default", node_action }, - { "node-action", "xml", node_action_xml }, - { "pacemakerd-health", "default", pacemakerd_health }, -+ { "pacemakerd-health", "html", pacemakerd_health_html }, - { "pacemakerd-health", "text", pacemakerd_health_text }, - { "pacemakerd-health", "xml", pacemakerd_health_xml }, - { "profile", "default", profile_default, }, --- -2.31.1 - -From ec6a28bf64d23107c81d473c02038c29b17f2917 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Sat, 3 Sep 2022 21:40:04 -0700 -Subject: [PATCH 12/22] Low: schemas: Copy some API schemas in preparation for - changes - -Signed-off-by: Reid Wahl ---- - include/crm/common/output_internal.h | 2 +- - xml/api/command-output-2.23.rng | 26 +++ - xml/api/crm_resource-2.23.rng | 288 +++++++++++++++++++++++++++ - xml/api/stonith_admin-2.23.rng | 52 +++++ - 4 files changed, 367 insertions(+), 1 deletion(-) - create mode 100644 xml/api/command-output-2.23.rng - create mode 100644 xml/api/crm_resource-2.23.rng - create mode 100644 xml/api/stonith_admin-2.23.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 24f5b2c..1e71e13 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.22" -+# define PCMK__API_VERSION "2.23" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/command-output-2.23.rng b/xml/api/command-output-2.23.rng -new file mode 100644 -index 0000000..710c134 ---- /dev/null -+++ b/xml/api/command-output-2.23.rng -@@ -0,0 +1,26 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ stdout -+ -+ -+ -+ -+ -+ stderr -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/crm_resource-2.23.rng b/xml/api/crm_resource-2.23.rng -new file mode 100644 -index 0000000..8a46675 ---- /dev/null -+++ b/xml/api/crm_resource-2.23.rng -@@ -0,0 +1,288 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ promoted -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ocf -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ true -+ false -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ Stopped -+ Started -+ Promoted -+ Unpromoted -+ -+ -+ Master -+ Slave -+ -+ -+ -diff --git a/xml/api/stonith_admin-2.23.rng b/xml/api/stonith_admin-2.23.rng -new file mode 100644 -index 0000000..b55fae9 ---- /dev/null -+++ b/xml/api/stonith_admin-2.23.rng -@@ -0,0 +1,52 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 526a4148ba548a3dfec4394c9d10a8d71d18b81e Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Sun, 4 Sep 2022 01:15:59 -0700 -Subject: [PATCH 13/22] Fix: schemas: crm_resource --validate validation fails - -In case of an error, the output of `crm_resource --validate` fails to -validate (if validate-all does not output XML). This is because if a - contains two elements with the same name in a RelaxNG schema, -only the first occurrence is honored and the rest are ignored. (This -does not seem to be documented clearly; it's a conclusion based on -experimentation.) - -The solution is to create just one that contains a -(instead of a that contains two s). - -Closes RHBZ#2123727 - -Signed-off-by: Reid Wahl ---- - xml/Makefile.am | 2 +- - xml/api/command-output-2.23.rng | 14 +------------- - xml/api/crm_resource-2.23.rng | 10 +++++----- - xml/api/stonith_admin-2.23.rng | 2 +- - xml/api/subprocess-output-2.23.rng | 24 ++++++++++++++++++++++++ - 5 files changed, 32 insertions(+), 20 deletions(-) - create mode 100644 xml/api/subprocess-output-2.23.rng - -diff --git a/xml/Makefile.am b/xml/Makefile.am -index 39f02f5..0a4a8aa 100644 ---- a/xml/Makefile.am -+++ b/xml/Makefile.am -@@ -69,7 +69,7 @@ API_request_base = command-output \ - CIB_cfg_base = options nodes resources constraints fencing acls tags alerts - - # Names of all schemas (including top level and those included by others) --API_base = $(API_request_base) fence-event failure generic-list item node-attrs node-history nodes resources status -+API_base = $(API_request_base) fence-event failure generic-list item node-attrs node-history nodes resources status subprocess-output - CIB_base = cib $(CIB_cfg_base) status score rule nvset - - # Static schema files and transforms (only CIB has transforms) -diff --git a/xml/api/command-output-2.23.rng b/xml/api/command-output-2.23.rng -index 710c134..4de49bd 100644 ---- a/xml/api/command-output-2.23.rng -+++ b/xml/api/command-output-2.23.rng -@@ -8,19 +8,7 @@ - - - -- -- -- -- stdout -- -- -- -- -- -- stderr -- -- -- -+ - - - -diff --git a/xml/api/crm_resource-2.23.rng b/xml/api/crm_resource-2.23.rng -index 8a46675..f841026 100644 ---- a/xml/api/crm_resource-2.23.rng -+++ b/xml/api/crm_resource-2.23.rng -@@ -229,12 +229,12 @@ - - - -- -- -+ -+ - -- -- -- -+ -+ -+ - - - -diff --git a/xml/api/stonith_admin-2.23.rng b/xml/api/stonith_admin-2.23.rng -index b55fae9..f3fab68 100644 ---- a/xml/api/stonith_admin-2.23.rng -+++ b/xml/api/stonith_admin-2.23.rng -@@ -45,7 +45,7 @@ - - - -- -+ - - - -diff --git a/xml/api/subprocess-output-2.23.rng b/xml/api/subprocess-output-2.23.rng -new file mode 100644 -index 0000000..2f7a8e7 ---- /dev/null -+++ b/xml/api/subprocess-output-2.23.rng -@@ -0,0 +1,24 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ stdout -+ -+ -+ -+ -+ -+ stderr -+ -+ -+ -+ -+ --- -2.31.1 - -From 60af39cd1582bcf91ebcfc5f9ce2fc98fd14b5b9 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Sat, 27 Aug 2022 22:46:38 -0700 -Subject: [PATCH 14/22] Low: schemas: Add schema for crm_error - -This matches the current capabilities of crm_error, though we might want -to change to a oneOrMore choice for name and description later. - -Closes T97 - -Signed-off-by: Reid Wahl ---- - xml/api/crm_error-2.23.rng | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - create mode 100644 xml/api/crm_error-2.23.rng - -diff --git a/xml/api/crm_error-2.23.rng b/xml/api/crm_error-2.23.rng -new file mode 100644 -index 0000000..8ba6e62 ---- /dev/null -+++ b/xml/api/crm_error-2.23.rng -@@ -0,0 +1,24 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 4dbb0e9d79dd36647fbb222bd5c2adae518e541c Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 14 Sep 2022 22:53:49 -0700 -Subject: [PATCH 15/22] Low: schemas: Copy API schemas in preparation for - changes - -Signed-off-by: Reid Wahl ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_mon-2.24.rng | 186 +++++++++++++++ - xml/api/crm_resource-2.24.rng | 288 +++++++++++++++++++++++ - xml/api/crm_simulate-2.24.rng | 338 +++++++++++++++++++++++++++ - xml/api/nodes-2.24.rng | 54 +++++ - xml/api/resources-2.24.rng | 109 +++++++++ - 6 files changed, 976 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_mon-2.24.rng - create mode 100644 xml/api/crm_resource-2.24.rng - create mode 100644 xml/api/crm_simulate-2.24.rng - create mode 100644 xml/api/nodes-2.24.rng - create mode 100644 xml/api/resources-2.24.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 1e71e13..6c6d5a3 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.23" -+# define PCMK__API_VERSION "2.24" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_mon-2.24.rng b/xml/api/crm_mon-2.24.rng -new file mode 100644 -index 0000000..b52307a ---- /dev/null -+++ b/xml/api/crm_mon-2.24.rng -@@ -0,0 +1,186 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ granted -+ revoked -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/crm_resource-2.24.rng b/xml/api/crm_resource-2.24.rng -new file mode 100644 -index 0000000..6a3334c ---- /dev/null -+++ b/xml/api/crm_resource-2.24.rng -@@ -0,0 +1,288 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ promoted -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ocf -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ true -+ false -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ Stopped -+ Started -+ Promoted -+ Unpromoted -+ -+ -+ Master -+ Slave -+ -+ -+ -diff --git a/xml/api/crm_simulate-2.24.rng b/xml/api/crm_simulate-2.24.rng -new file mode 100644 -index 0000000..5be0afa ---- /dev/null -+++ b/xml/api/crm_simulate-2.24.rng -@@ -0,0 +1,338 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/nodes-2.24.rng b/xml/api/nodes-2.24.rng -new file mode 100644 -index 0000000..9686344 ---- /dev/null -+++ b/xml/api/nodes-2.24.rng -@@ -0,0 +1,54 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ red -+ yellow -+ green -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ unknown -+ member -+ remote -+ ping -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/resources-2.24.rng b/xml/api/resources-2.24.rng -new file mode 100644 -index 0000000..e279583 ---- /dev/null -+++ b/xml/api/resources-2.24.rng -@@ -0,0 +1,109 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ docker -+ rkt -+ podman -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 1d36b5d50e071ecaa66948066f23043a513871e8 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 14 Sep 2022 22:58:04 -0700 -Subject: [PATCH 16/22] API: schemas: Add locked_to= to resources API schema - -Ref T433 - -Signed-off-by: Reid Wahl ---- - xml/api/resources-2.24.rng | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/xml/api/resources-2.24.rng b/xml/api/resources-2.24.rng -index e279583..f8ae6eb 100644 ---- a/xml/api/resources-2.24.rng -+++ b/xml/api/resources-2.24.rng -@@ -94,6 +94,9 @@ - - - -+ -+ -+ - - - --- -2.31.1 - -From e8caa027408243a6c7edfa966a1a7b0535458b9a Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 21:53:02 -0700 -Subject: [PATCH 17/22] Low: schemas: Copy API schemas in preparation for - changes - -Signed-off-by: Reid Wahl ---- - include/crm/common/output_internal.h | 2 +- - xml/api/crm_mon-2.25.rng | 186 +++++++++++++++++++++++++++ - xml/api/crmadmin-2.25.rng | 68 ++++++++++ - 3 files changed, 255 insertions(+), 1 deletion(-) - create mode 100644 xml/api/crm_mon-2.25.rng - create mode 100644 xml/api/crmadmin-2.25.rng - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 6c6d5a3..1974721 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -28,7 +28,7 @@ extern "C" { - */ - - --# define PCMK__API_VERSION "2.24" -+# define PCMK__API_VERSION "2.25" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) -diff --git a/xml/api/crm_mon-2.25.rng b/xml/api/crm_mon-2.25.rng -new file mode 100644 -index 0000000..b52307a ---- /dev/null -+++ b/xml/api/crm_mon-2.25.rng -@@ -0,0 +1,186 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ granted -+ revoked -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/xml/api/crmadmin-2.25.rng b/xml/api/crmadmin-2.25.rng -new file mode 100644 -index 0000000..34c9ca4 ---- /dev/null -+++ b/xml/api/crmadmin-2.25.rng -@@ -0,0 +1,68 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ unknown -+ member -+ remote -+ ping -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 9e06f1b526e9ceb94cc1709e245537d169ca2952 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 21:55:51 -0700 -Subject: [PATCH 18/22] Low: schemas: Add pacemakerd-health schema in - preparation for fix - -Signed-off-by: Reid Wahl ---- - xml/api/crm_mon-2.25.rng | 3 +++ - xml/api/crmadmin-2.25.rng | 9 +-------- - xml/api/pacemakerd-health-2.25.rng | 20 ++++++++++++++++++++ - 3 files changed, 24 insertions(+), 8 deletions(-) - create mode 100644 xml/api/pacemakerd-health-2.25.rng - -diff --git a/xml/api/crm_mon-2.25.rng b/xml/api/crm_mon-2.25.rng -index b52307a..1e501dd 100644 ---- a/xml/api/crm_mon-2.25.rng -+++ b/xml/api/crm_mon-2.25.rng -@@ -7,6 +7,9 @@ - - - -+ -+ -+ - - - -diff --git a/xml/api/crmadmin-2.25.rng b/xml/api/crmadmin-2.25.rng -index 34c9ca4..973f6d4 100644 ---- a/xml/api/crmadmin-2.25.rng -+++ b/xml/api/crmadmin-2.25.rng -@@ -11,7 +11,7 @@ - - - -- -+ - - - -@@ -29,13 +29,6 @@ - - - -- -- -- -- -- -- -- - - - -diff --git a/xml/api/pacemakerd-health-2.25.rng b/xml/api/pacemakerd-health-2.25.rng -new file mode 100644 -index 0000000..2089b25 ---- /dev/null -+++ b/xml/api/pacemakerd-health-2.25.rng -@@ -0,0 +1,20 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 9a320b51e21e4c52a5ac3332d35c0d70fdd1650c Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 22:03:19 -0700 -Subject: [PATCH 19/22] Low: libpacemaker: Fix pacemakerd-health XML output - -We were using F_CRM_SYS_FROM as the name of the XML element, instead of -something static like "pacemakerd". It happens that the value in -F_CRM_SYS_FROM seems to always be CRM_SYSTEM_MCP ("pacemakerd"), so the -element name was effectively deterministic. Nonetheless, the schema -required the element be called "pacemakerd"; there was no allowance for -another system name. That defeats any purpose of flexible element -naming. - -It seems better to call the element "pacemakerd" and make -sys_from a field, if we keep sys_from at all. (Can't use -"pacemakerd-health" for backward compatibility reasons.) - -Additionally, if sys_from or last_updated is NULL, pass them directly to -pcmk__output_create_xml_node(). Those attributes will simply be skipped -if their values are NULL. - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_output.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c -index c088a6a..153a422 100644 ---- a/lib/pacemaker/pcmk_output.c -+++ b/lib/pacemaker/pcmk_output.c -@@ -710,9 +710,10 @@ pacemakerd_health_xml(pcmk__output_t *out, va_list args) - state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); - } - -- pcmk__output_create_xml_node(out, crm_str(sys_from), -+ pcmk__output_create_xml_node(out, "pacemakerd", -+ "sys_from", sys_from, - "state", state_s, -- "last_updated", crm_str(last_updated), -+ "last_updated", last_updated, - NULL); - return pcmk_rc_ok; - } --- -2.31.1 - -From bb57ee10fe6eaeaaeafbf8b491b446f7bffb6b22 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Tue, 11 Oct 2022 15:14:27 -0700 -Subject: [PATCH 20/22] Refactor: libpacemaker: Default to sync dispatch in - pcmk_cluster_queries - -If message_timeout_ms == 0 for various functions in -pcmk_cluster_queries.c, default to using sync dispatch instead of -starting a mainloop with timeout 30s that behaves basically like sync -dispatch. - -This makes it easier to reason about calling these functions when the -caller may have its own mainloop. - -Signed-off-by: Reid Wahl ---- - include/pacemaker.h | 8 +++- - lib/pacemaker/pcmk_cluster_queries.c | 64 ++++++++++++++++++++++------ - 2 files changed, 57 insertions(+), 15 deletions(-) - -diff --git a/include/pacemaker.h b/include/pacemaker.h -index a76569a..0ca9c29 100644 ---- a/include/pacemaker.h -+++ b/include/pacemaker.h -@@ -111,7 +111,13 @@ void pcmk_free_injections(pcmk_injections_t *injections); - * - * \param[in,out] xml Destination for the result, as an XML tree - * \param[in] ipc_name IPC name for request -- * \param[in] message_timeout_ms Message timeout -+ * \param[in] message_timeout_ms How long to wait for a reply from the -+ * \p pacemakerd API. If 0, -+ * \p pcmk_ipc_dispatch_sync will be used. -+ * If positive, \p pcmk_ipc_dispatch_main -+ * will be used, and a new mainloop will be -+ * created for this purpose (freed before -+ * return). - * - * \return Standard Pacemaker return code - */ -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index 00a809d..d4361c9 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -246,13 +246,13 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - } - - static pcmk_ipc_api_t * --ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) -+ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, -+ enum pcmk_ipc_dispatch dispatch_type) - { - int rc; - pcmk__output_t *out = data->out; - pcmk_ipc_api_t *api = NULL; - -- - rc = pcmk_new_ipc_api(&api, server); - if (api == NULL) { - out->err(out, "error: Could not connect to %s: %s", -@@ -264,7 +264,8 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb) - if (cb != NULL) { - pcmk_register_ipc_callback(api, cb, data); - } -- rc = pcmk_connect_ipc(api, pcmk_ipc_dispatch_main); -+ -+ rc = pcmk_connect_ipc(api, dispatch_type); - if (rc != pcmk_rc_ok) { - out->err(out, "error: Could not connect to %s: %s", - pcmk_ipc_name(api, true), -@@ -288,16 +289,26 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time - .message_timeout_ms = message_timeout_ms, - .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; -- pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, controller_status_event_cb); -+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; -+ pcmk_ipc_api_t *controld_api = NULL; -+ -+ if (message_timeout_ms == 0) { -+ dispatch_type = pcmk_ipc_dispatch_sync; -+ } -+ controld_api = ipc_connect(&data, pcmk_ipc_controld, -+ controller_status_event_cb, dispatch_type); - - if (controld_api != NULL) { - int rc = pcmk_controld_api_ping(controld_api, dest_node); - if (rc != pcmk_rc_ok) { -- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); -+ out->err(out, "error: Could not ping controller API: %s", -+ pcmk_rc_str(rc)); - data.rc = rc; - } - -- start_main_loop(&data); -+ if (dispatch_type == pcmk_ipc_dispatch_main) { -+ start_main_loop(&data); -+ } - - pcmk_free_ipc_api(controld_api); - } -@@ -334,16 +345,26 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) - .message_timeout_ms = message_timeout_ms, - .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; -- pcmk_ipc_api_t *controld_api = ipc_connect(&data, pcmk_ipc_controld, designated_controller_event_cb); -+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; -+ pcmk_ipc_api_t *controld_api = NULL; -+ -+ if (message_timeout_ms == 0) { -+ dispatch_type = pcmk_ipc_dispatch_sync; -+ } -+ controld_api = ipc_connect(&data, pcmk_ipc_controld, -+ designated_controller_event_cb, dispatch_type); - - if (controld_api != NULL) { - int rc = pcmk_controld_api_ping(controld_api, NULL); - if (rc != pcmk_rc_ok) { -- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); -+ out->err(out, "error: Could not ping controller API: %s", -+ pcmk_rc_str(rc)); - data.rc = rc; - } - -- start_main_loop(&data); -+ if (dispatch_type == pcmk_ipc_dispatch_main) { -+ start_main_loop(&data); -+ } - - pcmk_free_ipc_api(controld_api); - } -@@ -375,7 +396,13 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) - * - * \param[in,out] out Output object - * \param[in] ipc_name IPC name for request -- * \param[in] message_timeout_ms Message timeout -+ * \param[in] message_timeout_ms How long to wait for a reply from the -+ * \p pacemakerd API. If 0, -+ * \p pcmk_ipc_dispatch_sync will be used. -+ * If positive, \p pcmk_ipc_dispatch_main -+ * will be used, and a new mainloop will be -+ * created for this purpose (freed before -+ * return). - * \param[out] state Where to store the \p pacemakerd state, if - * not \p NULL - * -@@ -394,17 +421,26 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, - .message_timeout_ms = message_timeout_ms, - .pcmkd_state = pcmk_pacemakerd_state_invalid, - }; -- pcmk_ipc_api_t *pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, pacemakerd_event_cb); -+ enum pcmk_ipc_dispatch dispatch_type = pcmk_ipc_dispatch_main; -+ pcmk_ipc_api_t *pacemakerd_api = NULL; -+ -+ if (message_timeout_ms == 0) { -+ dispatch_type = pcmk_ipc_dispatch_sync; -+ } -+ pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, -+ pacemakerd_event_cb, dispatch_type); - - if (pacemakerd_api != NULL) { - int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); - if (rc != pcmk_rc_ok) { -- out->err(out, "error: Command failed: %s", pcmk_rc_str(rc)); -+ out->err(out, "error: Could not ping launcher API: %s", -+ pcmk_rc_str(rc)); - data.rc = rc; - } - -- start_main_loop(&data); -- -+ if (dispatch_type == pcmk_ipc_dispatch_main) { -+ start_main_loop(&data); -+ } - pcmk_free_ipc_api(pacemakerd_api); - } - --- -2.31.1 - -From 97cb9452bb918c0b8ad6d1b937bff8f222191580 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Mon, 10 Oct 2022 18:10:54 -0700 -Subject: [PATCH 21/22] Fix: tools: crm_mon --one-shot fails while pacemaker is - shutting down - -crm_mon --one-shot checks the pacemakerd state before trying to get a -CIB connection. If pacemakerd is shutting down, it returns ENOTCONN. -This can cause a resource agent that calls crm_mon (for example, -ocf:heartbeat:pgsql) to fail to stop during shutdown. - -This is a regression introduced by commit 3f342e3. -crm_mon.c:pacemakerd_status() returns pcmk_rc_ok if pacemakerd is -shutting down, since 49ebe4c and 46d6edd (fixes for CLBZ#5471). 3f342e3 -refactored crm_mon --one-shot to use library functions. pcmk__status() -now does most of the work, calling pcmk_status.c:pacemakerd_status(). -That function returns ENOTCONN if pacemakerd is shutting down. As a -result, we don't try to connect to the CIB during shutdown. - -Here we update pcmk__status() to use pcmk__pacemakerd_status() instead -of a static and mostly redundant pacemakerd_status(). It receives the -pacemakerd state via an output pointer argument. If pacemakerd is -running or shutting down (or if we get an EREMOTEIO rc), we try -connecting to the fencer and CIB. However, as long as we successfully -get the pacemakerd state, we return success from pcmk__status(), since -we did obtain the cluster status. - -A couple of minor notes: -* pcmk__status() now takes a timeout argument that it passes to - pcmk__pacemakerd_status(). timeout == 0 uses pcmk_ipc_dispatch_sync, - matching the old implementation. A positive timeout uses - pcmk_ipc_dispatch_main. -* pcmk_cluster_queries.c:ipc_connect() no longer always prints a "Could - not connect" error for EREMOTEIO. The caller may consider it OK. - -Fixes T579 -Fixes CLBZ#5501 - -Signed-off-by: Reid Wahl ---- - include/pcmki/pcmki_status.h | 2 +- - lib/pacemaker/pcmk_cluster_queries.c | 27 ++++-- - lib/pacemaker/pcmk_status.c | 120 +++++++++------------------ - tools/crm_mon.c | 2 +- - 4 files changed, 61 insertions(+), 90 deletions(-) - -diff --git a/include/pcmki/pcmki_status.h b/include/pcmki/pcmki_status.h -index 2bbd099..0dde21c 100644 ---- a/include/pcmki/pcmki_status.h -+++ b/include/pcmki/pcmki_status.h -@@ -50,7 +50,7 @@ int pcmk__status(pcmk__output_t *out, cib_t *cib, - enum pcmk__fence_history fence_history, uint32_t show, - uint32_t show_opts, const char *only_node, - const char *only_rsc, const char *neg_location_prefix, -- bool simple_output); -+ bool simple_output, guint timeout_ms); - - #ifdef __cplusplus - } -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index d4361c9..220c872 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -247,7 +247,7 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - - static pcmk_ipc_api_t * - ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, -- enum pcmk_ipc_dispatch dispatch_type) -+ enum pcmk_ipc_dispatch dispatch_type, bool eremoteio_ok) - { - int rc; - pcmk__output_t *out = data->out; -@@ -267,9 +267,15 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, - - rc = pcmk_connect_ipc(api, dispatch_type); - if (rc != pcmk_rc_ok) { -- out->err(out, "error: Could not connect to %s: %s", -- pcmk_ipc_name(api, true), -- pcmk_rc_str(rc)); -+ if ((rc == EREMOTEIO) && eremoteio_ok) { -+ /* EREMOTEIO may be expected and acceptable for some callers. -+ * Preserve the return code in case callers need to handle it -+ * specially. -+ */ -+ } else { -+ out->err(out, "error: Could not connect to %s: %s", -+ pcmk_ipc_name(api, true), pcmk_rc_str(rc)); -+ } - data->rc = rc; - pcmk_free_ipc_api(api); - return NULL; -@@ -296,7 +302,8 @@ pcmk__controller_status(pcmk__output_t *out, char *dest_node, guint message_time - dispatch_type = pcmk_ipc_dispatch_sync; - } - controld_api = ipc_connect(&data, pcmk_ipc_controld, -- controller_status_event_cb, dispatch_type); -+ controller_status_event_cb, dispatch_type, -+ false); - - if (controld_api != NULL) { - int rc = pcmk_controld_api_ping(controld_api, dest_node); -@@ -352,7 +359,8 @@ pcmk__designated_controller(pcmk__output_t *out, guint message_timeout_ms) - dispatch_type = pcmk_ipc_dispatch_sync; - } - controld_api = ipc_connect(&data, pcmk_ipc_controld, -- designated_controller_event_cb, dispatch_type); -+ designated_controller_event_cb, dispatch_type, -+ false); - - if (controld_api != NULL) { - int rc = pcmk_controld_api_ping(controld_api, NULL); -@@ -407,6 +415,11 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms) - * not \p NULL - * - * \return Standard Pacemaker return code -+ * -+ * \note This function returns \p EREMOTEIO if run on a Pacemaker Remote node -+ * with \p pacemaker-remoted running, since \p pacemakerd is not proxied -+ * to remote nodes. The fencer and CIB may still be accessible, but -+ * \p state will be \p pcmk_pacemakerd_state_invalid. - */ - int - pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, -@@ -428,7 +441,7 @@ pcmk__pacemakerd_status(pcmk__output_t *out, const char *ipc_name, - dispatch_type = pcmk_ipc_dispatch_sync; - } - pacemakerd_api = ipc_connect(&data, pcmk_ipc_pacemakerd, -- pacemakerd_event_cb, dispatch_type); -+ pacemakerd_event_cb, dispatch_type, true); - - if (pacemakerd_api != NULL) { - int rc = pcmk_pacemakerd_api_ping(pacemakerd_api, ipc_name); -diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c -index 1bf0172..794c9ea 100644 ---- a/lib/pacemaker/pcmk_status.c -+++ b/lib/pacemaker/pcmk_status.c -@@ -70,71 +70,6 @@ fencing_connect(void) - } - } - --static void --pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, -- enum pcmk_ipc_event event_type, crm_exit_t status, -- void *event_data, void *user_data) --{ -- pcmk_pacemakerd_api_reply_t *reply = event_data; -- enum pcmk_pacemakerd_state *state = -- (enum pcmk_pacemakerd_state *) user_data; -- -- /* we are just interested in the latest reply */ -- *state = pcmk_pacemakerd_state_invalid; -- -- if (event_type != pcmk_ipc_event_reply || status != CRM_EX_OK) { -- return; -- } -- -- if (reply->reply_type == pcmk_pacemakerd_reply_ping && -- reply->data.ping.last_good != (time_t) 0 && -- reply->data.ping.status == pcmk_rc_ok) { -- *state = reply->data.ping.state; -- } --} -- --static int --pacemakerd_status(pcmk__output_t *out) --{ -- int rc = pcmk_rc_ok; -- pcmk_ipc_api_t *pacemakerd_api = NULL; -- enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; -- -- rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); -- if (pacemakerd_api == NULL) { -- out->err(out, "Could not connect to pacemakerd: %s", -- pcmk_rc_str(rc)); -- return rc; -- } -- -- pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state); -- -- rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_sync); -- if (rc == EREMOTEIO) { -- return pcmk_rc_ok; -- } else if (rc != pcmk_rc_ok) { -- out->err(out, "Could not connect to pacemakerd: %s", -- pcmk_rc_str(rc)); -- pcmk_free_ipc_api(pacemakerd_api); -- return rc; -- } -- -- rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); -- -- if (rc != pcmk_rc_ok) { -- /* Got some error from pcmk_pacemakerd_api_ping, so return it. */ -- } else if (state == pcmk_pacemakerd_state_running) { -- rc = pcmk_rc_ok; -- } else if (state == pcmk_pacemakerd_state_shutting_down) { -- rc = ENOTCONN; -- } else { -- rc = EAGAIN; -- } -- -- pcmk_free_ipc_api(pacemakerd_api); -- return rc; --} -- - /*! - * \internal - * \brief Output the cluster status given a fencer and CIB connection -@@ -256,7 +191,7 @@ pcmk_status(xmlNodePtr *xml) - stonith__register_messages(out); - - rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all, -- show_opts, NULL, NULL, NULL, false); -+ show_opts, NULL, NULL, NULL, false, 0); - pcmk__out_epilogue(out, xml, rc); - - cib_delete(cib); -@@ -288,6 +223,13 @@ pcmk_status(xmlNodePtr *xml) - * \param[in] simple_output Whether to use a simple output format. - * Note: This is for use by \p crm_mon only - * and is planned to be deprecated. -+ * \param[in] timeout_ms How long to wait for a reply from the -+ * \p pacemakerd API. If 0, -+ * \p pcmk_ipc_dispatch_sync will be used. -+ * If positive, \p pcmk_ipc_dispatch_main -+ * will be used, and a new mainloop will be -+ * created for this purpose (freed before -+ * return). - * - * \return Standard Pacemaker return code - */ -@@ -295,34 +237,47 @@ int - pcmk__status(pcmk__output_t *out, cib_t *cib, - enum pcmk__fence_history fence_history, uint32_t show, - uint32_t show_opts, const char *only_node, const char *only_rsc, -- const char *neg_location_prefix, bool simple_output) -+ const char *neg_location_prefix, bool simple_output, -+ guint timeout_ms) - { - xmlNode *current_cib = NULL; - int rc = pcmk_rc_ok; - stonith_t *stonith = NULL; -+ enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; - - if (cib == NULL) { - return ENOTCONN; - } - -- if (cib->variant == cib_native) { -- if (cib->state == cib_connected_query || cib->state == cib_connected_command) { -- rc = pcmk_rc_ok; -- } else { -- rc = pacemakerd_status(out); -+ if ((cib->variant == cib_native) -+ && (cib->state != cib_connected_query) -+ && (cib->state != cib_connected_command)) { -+ -+ rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, &state); -+ switch (rc) { -+ case pcmk_rc_ok: -+ switch (state) { -+ case pcmk_pacemakerd_state_running: -+ case pcmk_pacemakerd_state_shutting_down: -+ // CIB may still be available while shutting down -+ break; -+ default: -+ return rc; -+ } -+ break; -+ case EREMOTEIO: -+ /* We'll always get EREMOTEIO if we run this on a Pacemaker -+ * Remote node. The fencer and CIB might be available. -+ */ -+ rc = pcmk_rc_ok; -+ break; -+ default: -+ return rc; - } - } - -- if (rc != pcmk_rc_ok) { -- return rc; -- } -- - if (fence_history != pcmk__fence_history_none && cib->variant == cib_native) { - stonith = fencing_connect(); -- -- if (stonith == NULL) { -- return ENOTCONN; -- } - } - - rc = cib_connect(out, cib, ¤t_cib); -@@ -334,6 +289,9 @@ pcmk__status(pcmk__output_t *out, cib_t *cib, - fence_history, show, show_opts, only_node, - only_rsc, neg_location_prefix, - simple_output); -+ if (rc != pcmk_rc_ok) { -+ out->err(out, "Error outputting status info from the fencer or CIB"); -+ } - - done: - if (stonith != NULL) { -@@ -345,7 +303,7 @@ done: - stonith_api_delete(stonith); - } - -- return rc; -+ return pcmk_rc_ok; - } - - /* This is an internal-only function that is planned to be deprecated and removed. -diff --git a/tools/crm_mon.c b/tools/crm_mon.c -index e8cb709..c70c439 100644 ---- a/tools/crm_mon.c -+++ b/tools/crm_mon.c -@@ -1330,7 +1330,7 @@ one_shot(void) - int rc = pcmk__status(out, cib, fence_history, show, show_opts, - options.only_node, options.only_rsc, - options.neg_location_prefix, -- output_format == mon_output_monitor); -+ output_format == mon_output_monitor, 0); - - if (rc == pcmk_rc_ok) { - clean_up(pcmk_rc2exitc(rc)); --- -2.31.1 - -From 4e63214f61f03d2756f884dd411db07cb22e9de6 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Tue, 11 Oct 2022 13:25:45 -0700 -Subject: [PATCH 22/22] Low: libpacemaker: Correct sys_from default in - pacemakerd_health() - -sys_from should be a subsystem ("pacemakerd" is expected), not a node. - -Signed-off-by: Reid Wahl ---- - lib/pacemaker/pcmk_output.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c -index 153a422..b61f354 100644 ---- a/lib/pacemaker/pcmk_output.c -+++ b/lib/pacemaker/pcmk_output.c -@@ -642,7 +642,7 @@ pacemakerd_health(pcmk__output_t *out, va_list args) - state_s = pcmk__pcmkd_state_enum2friendly(state); - } - return out->info(out, "Status of %s: '%s' (last updated %s)", -- (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", -+ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown subsystem", - state_s, - (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); - } -@@ -664,7 +664,7 @@ pacemakerd_health_html(pcmk__output_t *out, va_list args) - } - - msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)", -- (!pcmk__str_empty(sys_from)) ? sys_from : "unknown node", -+ (!pcmk__str_empty(sys_from)) ? sys_from : "unknown subsystem", - state_s, - (!pcmk__str_empty(last_updated)) ? last_updated : "at unknown time"); - pcmk__output_create_html_node(out, "li", NULL, NULL, msg); --- -2.31.1 - diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index d39cd67..2be7c1f 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -35,11 +35,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.1.4 -%global specversion 5 +%global pcmkversion 2.1.5 +%global specversion 7 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit dc6eb4362e67c1497a413434eba097063bf1ef83 +%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -65,9 +65,6 @@ ## Add option to create binaries suitable for use with profiling tools %bcond_with profiling -## Add option to create binaries with coverage analysis -%bcond_with coverage - ## Allow deprecated option to skip (or enable, on RHEL) documentation %if 0%{?rhel} %bcond_with doc @@ -124,9 +121,14 @@ %define archive_version %(c=%{commit}; echo ${c:10}) %define archive_github_url %{commit}#/%{name}-%{archive_version}.tar.gz %else +%if "%{commit}" == "DIST" +%define archive_version %{pcmkversion} +%define archive_github_url %{archive_version}#/%{name}-%{pcmkversion}.tar.gz +%else %define archive_version %(c=%{commit}; echo ${c:0:%{commit_abbrev}}) %define archive_github_url %{archive_version}#/%{name}-%{archive_version}.tar.gz %endif +%endif ### Always use a simple release number %define pcmk_release %{specversion} @@ -230,7 +232,7 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.2 +Release: %{pcmk_release}%{?dist} License: GPLv2+ and LGPLv2+ Url: https://www.clusterlabs.org/ @@ -246,21 +248,11 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} # upstream commits -Patch001: 001-stonith-enabled.patch -Patch002: 002-acl_group.patch -Patch003: 003-regression.patch -Patch004: 004-schema.patch -Patch005: 005-schema.patch -Patch006: 006-crm_resource.patch -Patch007: 007-stonith_admin.patch -Patch008: 008-metadata.patch -Patch009: 009-validate.patch -Patch010: 010-regression.patch -Patch011: 011-unfencing.patch -Patch012: 012-crm_resource.patch -Patch013: 013-rolling-upgrade-monitor.patch -Patch014: 014-abort-transition.patch -Patch015: 015-one_shot.patch +Patch001: 001-sync-points.patch +Patch002: 002-remote-regression.patch +Patch003: 003-history-cleanup.patch +Patch004: 004-g_source_remove.patch +Patch005: 005-query-null.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -339,6 +331,9 @@ BuildRequires: inkscape BuildRequires: %{python_name}-sphinx %endif +# Booth requires this +Provides: pacemaker-ticket-support = 2.0 + Provides: pcmk-cluster-manager = %{version}-%{release} Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} @@ -360,7 +355,8 @@ when related resources fail and can be configured to periodically check resource health. Available rpmbuild rebuild options: - --with(out) : cibsecrets doc hardening nls pre_release profiling stonithd + --with(out) : cibsecrets hardening nls pre_release profiling + stonithd %package cli License: GPLv2+ and LGPLv2+ @@ -464,6 +460,7 @@ Requires: %{pkgname_pcmk_libs} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} Requires: %{pkgname_procps} Requires: psmisc +Requires: %{python_name}-psutil BuildArch: noarch # systemd Python bindings are a separate package in some distros @@ -858,15 +855,42 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog -* Wed Oct 26 2022 Chris Lumens - 2.1.4-5.2 -- Fix regression where crm_mon returns nonzero status at cluster shutdown -- Resolves: rhbz2133911 - -* Tue Oct 18 2022 Chris Lumens - 2.1.4-5.1 -- Fix regression where reordered resources do not get moved -- Execute resource metadata actions asynchronously -- Resolves: rhbz2128035 -- Resolves: rhbz2128036 +* Wed Feb 22 2023 Chris Lumens - 2.1.5-7 +- Additional fixes for SIGABRT during pacemaker-fenced shutdown +- Backport fix for attrd_updater -QA not displaying all nodes +- Related: rhbz2166967 +- Resolves: rhbz2169829 + +* Thu Feb 9 2023 Chris Lumens - 2.1.5-6 +- Backport fix for migration history cleanup causing resource recovery +- Backport fix for SIGABRT during pacemaker-fenced shutdown +- Resolves: rhbz2166393 +- Resolves: rhbz2166967 + +* Tue Jan 24 2023 Ken Gaillot - 2.1.5-5 +- Backport fix for remote node shutdown regression +- Resolves: rhbz2163450 + +* Mon Dec 12 2022 Chris Lumens - 2.1.5-4 +- Rebase pacemaker on upstream 2.1.5 final release +- Add support for sync points to attribute daemon +- Resolves: rhbz2122353 + +* Tue Dec 06 2022 Chris Lumens - 2.1.5-3 +- Fix errors found by covscan +- Related: rhbz2122353 + +* Wed Nov 23 2022 Chris Lumens - 2.1.5-2 +- Rebase on upstream 2.1.5-rc3 release +- Related: rhbz2122353 + +* Tue Nov 15 2022 Chris Lumens - 2.1.5-1 +- Rebase on upstream 2.1.5-rc2 release +- Resolves: rhbz2123727 +- Resolves: rhbz2125337 +- Resolves: rhbz2125344 +- Resolves: rhbz2133546 +- Resolves: rhbz2142683 * Wed Aug 10 2022 Ken Gaillot - 2.1.4-5 - Fix regression in crm_resource -O