Compare commits

...

No commits in common. 'c9' and 'i10cs' have entirely different histories.
c9 ... i10cs

2
.gitignore vendored

@ -1 +1 @@
SOURCES/ClusterLabs-resource-agents-fd0720f7.tar.gz
SOURCES/ClusterLabs-resource-agents-56e76b01.tar.gz

@ -1 +1 @@
3b517ecdbe2103df77813050e5c998e102c5de7e SOURCES/ClusterLabs-resource-agents-fd0720f7.tar.gz
34d516ee1b647c20665302efcb8282cd535076b0 SOURCES/ClusterLabs-resource-agents-56e76b01.tar.gz

@ -1,75 +0,0 @@
From b806487ca758fce838c988767556007ecf66a6e3 Mon Sep 17 00:00:00 2001
From: Roger Zhou <zzhou@suse.com>
Date: Mon, 10 Apr 2023 18:08:56 +0800
Subject: [PATCH] exportfs: make the "fsid=" parameter optional
Based on feedback [1] from the kernel developer @neilbrown regarding the
NFS clustering use case, it has been determined that the fsid= parameter
is now considered optional and safe to omit.
[1] https://bugzilla.suse.com/show_bug.cgi?id=1201271#c49
"""
Since some time in 2007 NFS has used the UUID of a filesystem as the
primary identifier for that filesystem, rather than using the device
number. So from that time there should have been reduced need for the
"fsid=" option. Probably there are some filesystems that this didn't
work for. btrfs has been problematic at time, particularly when subvols
are exported. But for quite some years this has all "just worked" at
least for the major filesystems (ext4 xfs btrfs). [...] I would suggest
getting rid of the use of fsid= altogether. [...] I'm confident that it
was no longer an issue in SLE-12 and similarly not in SLE-15.
"""
---
heartbeat/exportfs | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index 2307a9e67b..435a19646b 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -82,7 +82,7 @@ The directory or directories to export.
<content type="string" />
</parameter>
-<parameter name="fsid" unique="0" required="1">
+<parameter name="fsid" unique="0" required="0">
<longdesc lang="en">
The fsid option to pass to exportfs. This can be a unique positive
integer, a UUID (assuredly sans comma characters), or the special string
@@ -185,6 +185,8 @@ exportfs_methods() {
reset_fsid() {
CURRENT_FSID=$OCF_RESKEY_fsid
+ [ -z "$CURRENT_FSID" ] && CURRENT_FSID=`echo "$OCF_RESKEY_options" | sed -n 's/.*fsid=\([^,]*\).*/\1/p'`
+ echo $CURRENT_FSID
}
bump_fsid() {
CURRENT_FSID=$((CURRENT_FSID+1))
@@ -322,7 +324,7 @@ export_one() {
if echo "$opts" | grep fsid >/dev/null; then
#replace fsid in options list
opts=`echo "$opts" | sed "s,fsid=[^,]*,fsid=$(get_fsid),g"`
- else
+ elif [ -n "$OCF_RESKEY_fsid" ]; then
#tack the fsid option onto our options list.
opts="${opts}${sep}fsid=$(get_fsid)"
fi
@@ -448,8 +450,8 @@ exportfs_validate_all ()
ocf_exit_reason "$OCF_RESKEY_fsid cannot contain a comma"
return $OCF_ERR_CONFIGURED
fi
- if [ $NUMDIRS -gt 1 ] &&
- ! ocf_is_decimal "$OCF_RESKEY_fsid"; then
+ if [ $NUMDIRS -gt 1 ] && [ -n "$(reset_fsid)" ] &&
+ ! ocf_is_decimal "$(reset_fsid)"; then
ocf_exit_reason "use integer fsid when exporting multiple directories"
return $OCF_ERR_CONFIGURED
fi
@@ -485,6 +487,6 @@ done
OCF_RESKEY_directory="${directories%% }"
NUMDIRS=`echo "$OCF_RESKEY_directory" | wc -w`
-OCF_REQUIRED_PARAMS="directory fsid clientspec"
+OCF_REQUIRED_PARAMS="directory clientspec"
OCF_REQUIRED_BINARIES="exportfs"
ocf_rarun $*

@ -1,43 +0,0 @@
From 1d1481aa6d848efab4d398ad6e74d80b5b32549f Mon Sep 17 00:00:00 2001
From: Valentin Vidic <vvidic@debian.org>
Date: Wed, 1 Nov 2023 18:25:45 +0100
Subject: [PATCH] exportfs: remove test for "fsid=" parameter
fsid parameter is now considered optional.
---
tools/ocft/exportfs | 5 -----
tools/ocft/exportfs-multidir | 5 -----
2 files changed, 10 deletions(-)
diff --git a/tools/ocft/exportfs b/tools/ocft/exportfs
index 285a4b8ea0..1ec3d4c364 100644
--- a/tools/ocft/exportfs
+++ b/tools/ocft/exportfs
@@ -28,11 +28,6 @@ CASE "check base env"
Include prepare
AgentRun start OCF_SUCCESS
-CASE "check base env: no 'OCF_RESKEY_fsid'"
- Include prepare
- Env OCF_RESKEY_fsid=
- AgentRun start OCF_ERR_CONFIGURED
-
CASE "check base env: invalid 'OCF_RESKEY_directory'"
Include prepare
Env OCF_RESKEY_directory=/no_such
diff --git a/tools/ocft/exportfs-multidir b/tools/ocft/exportfs-multidir
index 00e41f0859..ac6d5c7f6a 100644
--- a/tools/ocft/exportfs-multidir
+++ b/tools/ocft/exportfs-multidir
@@ -28,11 +28,6 @@ CASE "check base env"
Include prepare
AgentRun start OCF_SUCCESS
-CASE "check base env: no 'OCF_RESKEY_fsid'"
- Include prepare
- Env OCF_RESKEY_fsid=
- AgentRun start OCF_ERR_CONFIGURED
-
CASE "check base env: invalid 'OCF_RESKEY_directory'"
Include prepare
Env OCF_RESKEY_directory=/no_such

@ -1,45 +0,0 @@
From e4f84ae185b6943d1ff461d53c7f1b5295783086 Mon Sep 17 00:00:00 2001
From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
Date: Wed, 1 Nov 2023 19:35:21 +0100
Subject: [PATCH] findif.sh: fix loopback handling
tools/ocft/IPaddr2 fails the loopback test because of the missing
table local parameter:
$ ip -o -f inet route list match 127.0.0.3 scope host
$ ip -o -f inet route list match 127.0.0.3 table local scope host
local 127.0.0.0/8 dev lo proto kernel src 127.0.0.1
Also rename the function because it is called only in for the special
loopback address case.
---
heartbeat/findif.sh | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 5f1c19ec3..7c766e6e0 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -29,10 +29,10 @@ prefixcheck() {
fi
return 0
}
-getnetworkinfo()
+getloopbackinfo()
{
local line netinfo
- ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table:=main}" scope host | (while read line;
+ ip -o -f inet route list match $OCF_RESKEY_ip table local scope host | (while read line;
do
netinfo=`echo $line | awk '{print $2}'`
case $netinfo in
@@ -222,7 +222,7 @@ findif()
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in
127.*)
- set -- `getnetworkinfo`
+ set -- `getloopbackinfo`
shift;;
esac
fi

@ -1,20 +0,0 @@
--- a/heartbeat/findif.sh 2024-02-08 11:31:53.414257686 +0100
+++ b/heartbeat/findif.sh 2023-11-02 10:20:12.150853167 +0100
@@ -210,14 +210,14 @@
fi
findif_check_params $family || return $?
- if [ -n "$netmask" ] ; then
+ if [ -n "$netmask" ]; then
match=$match/$netmask
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
else
- set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
fi
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in

@ -1,343 +0,0 @@
From fc0657b936f6a58f741e33f851b22f82bc68bffa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 6 Feb 2024 13:28:12 +0100
Subject: [PATCH 1/2] ocf-shellfuncs: add curl_retry()
---
heartbeat/ocf-shellfuncs.in | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index c5edb6f57..a69a9743d 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -672,6 +672,40 @@ EOF
systemctl daemon-reload
}
+# usage: curl_retry RETRIES SLEEP ARGS URL
+#
+# Use --show-error in ARGS to log HTTP error code
+#
+# returns:
+# 0 success
+# exit:
+# 1 fail
+curl_retry()
+{
+ local retries=$1 sleep=$2 opts=$3 url=$4
+ local tries=$(($retries + 1))
+ local args="--fail $opts $url"
+ local result rc
+
+ for try in $(seq $tries); do
+ ocf_log debug "curl $args try $try of $tries"
+ result=$(echo "$args" | xargs curl 2>&1)
+ rc=$?
+
+ ocf_log debug "result: $result"
+ [ $rc -eq 0 ] && break
+ sleep $sleep
+ done
+
+ if [ $rc -ne 0 ]; then
+ ocf_exit_reason "curl $args failed $tries tries"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ echo "$result"
+ return $rc
+}
+
# usage: crm_mon_no_validation args...
# run crm_mon without any cib schema validation
# This is useful when an agent runs in a bundle to avoid potential
From 80d330557319bdae9e45aad1279e435fc481d4e7 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 6 Feb 2024 13:28:25 +0100
Subject: [PATCH 2/2] AWS agents: use curl_retry()
---
heartbeat/aws-vpc-move-ip | 35 ++++++++++++++++++++++++++---------
heartbeat/aws-vpc-route53.in | 27 +++++++++++++++++++++++++--
heartbeat/awseip | 36 +++++++++++++++++++++++++++++++-----
heartbeat/awsvip | 32 ++++++++++++++++++++++++++++----
4 files changed, 110 insertions(+), 20 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 54806f6ea..6115e5ba8 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -47,6 +47,8 @@ OCF_RESKEY_interface_default="eth0"
OCF_RESKEY_iflabel_default=""
OCF_RESKEY_monapi_default="false"
OCF_RESKEY_lookup_type_default="InstanceId"
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -60,6 +62,8 @@ OCF_RESKEY_lookup_type_default="InstanceId"
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}}
: ${OCF_RESKEY_lookup_type=${OCF_RESKEY_lookup_type_default}}
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
#######################################################################
@@ -194,6 +198,22 @@ Name of resource type to lookup in route table.
<content type="string" default="${OCF_RESKEY_lookup_type_default}" />
</parameter>
+<parameter name="curl_retries" unique="0">
+<longdesc lang="en">
+curl retries before failing
+</longdesc>
+<shortdesc lang="en">curl retries</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_retries_default}" />
+</parameter>
+
+<parameter name="curl_sleep" unique="0">
+<longdesc lang="en">
+curl sleep between tries
+</longdesc>
+<shortdesc lang="en">curl sleep</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_sleep_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -250,8 +270,10 @@ ec2ip_validate() {
fi
fi
- TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
- EC2_INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN")
+ TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+ EC2_INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
if [ -z "${EC2_INSTANCE_ID}" ]; then
ocf_exit_reason "Instance ID not found. Is this a EC2 instance?"
@@ -365,14 +387,9 @@ ec2ip_get_instance_eni() {
fi
ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
- cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id -H \"X-aws-ec2-metadata-token: $TOKEN\""
- ocf_log debug "executing command: $cmd"
+ cmd="curl_retry \"$OCF_RESKEY_curl_retries\" \"$OCF_RESKEY_curl_sleep\" \"--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'\" \"http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id\""
EC2_NETWORK_INTERFACE_ID="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}"
echo $EC2_NETWORK_INTERFACE_ID
}
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index 18ab157e8..eba2ed95c 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -53,6 +53,8 @@ OCF_RESKEY_hostedzoneid_default=""
OCF_RESKEY_fullname_default=""
OCF_RESKEY_ip_default="local"
OCF_RESKEY_ttl_default=10
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -62,6 +64,8 @@ OCF_RESKEY_ttl_default=10
: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}}
: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}}
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
usage() {
cat <<-EOT
@@ -185,6 +189,22 @@ Time to live for Route53 ARECORD
<shortdesc lang="en">ARECORD TTL</shortdesc>
<content type="string" default="${OCF_RESKEY_ttl_default}" />
</parameter>
+
+<parameter name="curl_retries" unique="0">
+<longdesc lang="en">
+curl retries before failing
+</longdesc>
+<shortdesc lang="en">curl retries</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_retries_default}" />
+</parameter>
+
+<parameter name="curl_sleep" unique="0">
+<longdesc lang="en">
+curl sleep between tries
+</longdesc>
+<shortdesc lang="en">curl sleep</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_sleep_default}" />
+</parameter>
</parameters>
<actions>
@@ -357,8 +377,11 @@ r53_monitor() {
_get_ip() {
case $OCF_RESKEY_ip in
local|public)
- TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
- IPADDRESS=$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4 -H "X-aws-ec2-metadata-token: $TOKEN");;
+ TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+ IPADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4")
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+ ;;
*.*.*.*)
IPADDRESS="${OCF_RESKEY_ip}";;
esac
diff --git a/heartbeat/awseip b/heartbeat/awseip
index 49b0ca615..ffb6223a1 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -49,12 +49,16 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
@@ -141,6 +145,22 @@ a short delay between API calls, to avoid sending API too quick
<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
</parameter>
+<parameter name="curl_retries" unique="0">
+<longdesc lang="en">
+curl retries before failing
+</longdesc>
+<shortdesc lang="en">curl retries</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_retries_default}" />
+</parameter>
+
+<parameter name="curl_sleep" unique="0">
+<longdesc lang="en">
+curl sleep between tries
+</longdesc>
+<shortdesc lang="en">curl sleep</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_sleep_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -171,14 +191,18 @@ awseip_start() {
awseip_monitor && return $OCF_SUCCESS
if [ -n "${PRIVATE_IP_ADDRESS}" ]; then
- NETWORK_INTERFACES_MACS=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -H "X-aws-ec2-metadata-token: $TOKEN")
+ NETWORK_INTERFACES_MACS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "-s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/")
for MAC in ${NETWORK_INTERFACES_MACS}; do
- curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/local-ipv4s -H "X-aws-ec2-metadata-token: $TOKEN" |
+ curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "-s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC%/*}/local-ipv4s" |
grep -q "^${PRIVATE_IP_ADDRESS}$"
if [ $? -eq 0 ]; then
- NETWORK_ID=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/interface-id -H "X-aws-ec2-metadata-token: $TOKEN")
+ NETWORK_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "-s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC%/*}/interface-id")
fi
done
+ if [ -z "$NETWORK_ID" ]; then
+ ocf_exit_reason "Could not find network interface for private_ip_address: $PRIVATE_IP_ADDRESS"
+ exit $OCF_ERR_GENERIC
+ fi
$AWSCLI_CMD ec2 associate-address \
--network-interface-id ${NETWORK_ID} \
--allocation-id ${ALLOCATION_ID} \
@@ -282,8 +306,10 @@ fi
ELASTIC_IP="${OCF_RESKEY_elastic_ip}"
ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
-TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN")
+TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
case $__OCF_ACTION in
start)
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index bdb4d68dd..f2b238a0f 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -48,12 +48,16 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
@@ -124,6 +128,22 @@ a short delay between API calls, to avoid sending API too quick
<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
</parameter>
+<parameter name="curl_retries" unique="0">
+<longdesc lang="en">
+curl retries before failing
+</longdesc>
+<shortdesc lang="en">curl retries</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_retries_default}" />
+</parameter>
+
+<parameter name="curl_sleep" unique="0">
+<longdesc lang="en">
+curl sleep between tries
+</longdesc>
+<shortdesc lang="en">curl sleep</shortdesc>
+<content type="integer" default="${OCF_RESKEY_curl_sleep_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -246,10 +266,14 @@ if [ -n "${OCF_RESKEY_region}" ]; then
AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
-TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN")
-MAC_ADDRESS=$(curl -s http://169.254.169.254/latest/meta-data/mac -H "X-aws-ec2-metadata-token: $TOKEN")
-NETWORK_ID=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id -H "X-aws-ec2-metadata-token: $TOKEN")
+TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+MAC_ADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/mac")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+NETWORK_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id")
+[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
case $__OCF_ACTION in
start)

@ -1,555 +0,0 @@
From f45f76600a7e02c860566db7d1350dc3b09449c2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 6 Nov 2023 15:49:44 +0100
Subject: [PATCH] aws-vpc-move-ip/aws-vpc-route53/awseip/awsvip: add auth_type
parameter and AWS Policy based authentication type
---
heartbeat/aws-vpc-move-ip | 43 +++++++++++++++++++----
heartbeat/aws-vpc-route53.in | 47 ++++++++++++++++++++-----
heartbeat/awseip | 68 +++++++++++++++++++++++++++---------
heartbeat/awsvip | 60 ++++++++++++++++++++++++-------
4 files changed, 173 insertions(+), 45 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index dee040300f..54806f6eaa 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -36,6 +36,7 @@
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_ip_default=""
@@ -48,6 +49,7 @@ OCF_RESKEY_monapi_default="false"
OCF_RESKEY_lookup_type_default="InstanceId"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
@@ -58,8 +60,6 @@ OCF_RESKEY_lookup_type_default="InstanceId"
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}}
: ${OCF_RESKEY_lookup_type=${OCF_RESKEY_lookup_type_default}}
-
-[ -n "$OCF_RESKEY_region" ] && region_opt="--region $OCF_RESKEY_region"
#######################################################################
@@ -83,6 +83,10 @@ cat <<END
<longdesc lang="en">
Resource Agent to move IP addresses within a VPC of the Amazon Webservices EC2
by changing an entry in an specific routing table
+
+Credentials needs to be setup by running "aws configure", or by using AWS Policies.
+
+See https://aws.amazon.com/cli/ for more information about awscli.
</longdesc>
<shortdesc lang="en">Move IP within a VPC of the AWS EC2</shortdesc>
@@ -95,6 +99,15 @@ Path to command line tools for AWS
<content type="string" default="${OCF_RESKEY_awscli_default}" />
</parameter>
+<parameter name="auth_type">
+<longdesc lang="en">
+Authentication type "key" for AccessKey and SecretAccessKey set via "aws configure",
+or "role" to use AWS Policies.
+</longdesc>
+<shortdesc lang="en">Authentication type</shortdesc>
+<content type="string" default="${OCF_RESKEY_auth_type_default}" />
+</parameter>
+
<parameter name="profile">
<longdesc lang="en">
Valid AWS CLI profile name (see ~/.aws/config and 'aws configure')
@@ -198,7 +211,7 @@ END
execute_cmd_as_role(){
cmd=$1
role=$2
- output="$($OCF_RESKEY_awscli sts assume-role --role-arn $role --role-session-name AWSCLI-RouteTableUpdate --profile $OCF_RESKEY_profile $region_opt --output=text)"
+ output="$($AWSCLI_CMD sts assume-role --role-arn $role --role-session-name AWSCLI-RouteTableUpdate --output=text)"
export AWS_ACCESS_KEY_ID="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $5}')"
export AWS_SECRET_ACCESS_KEY="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $7}')"
export AWS_SESSION_TOKEN="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $8}')"
@@ -220,11 +233,11 @@ ec2ip_set_address_param_compat(){
}
ec2ip_validate() {
- for cmd in $OCF_RESKEY_awscli ip curl; do
+ for cmd in "$OCF_RESKEY_awscli" ip curl; do
check_binary "$cmd"
done
- if [ -z "$OCF_RESKEY_profile" ]; then
+ if [ "x${OCF_RESKEY_auth_type}" = "xkey" ] && [ -z "$OCF_RESKEY_profile" ]; then
ocf_exit_reason "profile parameter not set"
return $OCF_ERR_CONFIGURED
fi
@@ -262,7 +275,7 @@ ec2ip_monitor() {
for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
ocf_log info "monitor: check routing table (API call) - $rtb"
if [ -z "${OCF_RESKEY_routing_table_role}" ]; then
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile $region_opt --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].$OCF_RESKEY_lookup_type"
+ cmd="$AWSCLI_CMD --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].$OCF_RESKEY_lookup_type"
ocf_log debug "executing command: $cmd"
ROUTE_TO_INSTANCE="$($cmd)"
else
@@ -368,7 +381,7 @@ ec2ip_get_and_configure() {
EC2_NETWORK_INTERFACE_ID="$(ec2ip_get_instance_eni)"
for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do
if [ -z "${OCF_RESKEY_routing_table_role}" ]; then
- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile $region_opt --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID"
+ cmd="$AWSCLI_CMD --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID"
ocf_log debug "executing command: $cmd"
$cmd
else
@@ -475,6 +488,22 @@ if ! ocf_is_root; then
exit $OCF_ERR_PERM
fi
+AWSCLI_CMD="${OCF_RESKEY_awscli}"
+if [ "x${OCF_RESKEY_auth_type}" = "xkey" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --profile ${OCF_RESKEY_profile}"
+elif [ "x${OCF_RESKEY_auth_type}" = "xrole" ]; then
+ if [ -z "${OCF_RESKEY_region}" ]; then
+ ocf_exit_reason "region needs to be set when using role-based authentication"
+ exit $OCF_ERR_CONFIGURED
+ fi
+else
+ ocf_exit_reason "Incorrect auth_type: ${OCF_RESKEY_auth_type}"
+ exit $OCF_ERR_CONFIGURED
+fi
+if [ -n "${OCF_RESKEY_region}" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
+fi
+
ec2ip_set_address_param_compat
ec2ip_validate
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index 22cbb35833..18ab157e8a 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -46,24 +46,22 @@
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
+OCF_RESKEY_region_default=""
OCF_RESKEY_hostedzoneid_default=""
OCF_RESKEY_fullname_default=""
OCF_RESKEY_ip_default="local"
OCF_RESKEY_ttl_default=10
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
+: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_hostedzoneid:=${OCF_RESKEY_hostedzoneid_default}}
: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}}
: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}}
-#######################################################################
-
-
-AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10"
-#######################################################################
-
usage() {
cat <<-EOT
@@ -123,6 +121,15 @@ Path to command line tools for AWS
<content type="string" default="${OCF_RESKEY_awscli_default}" />
</parameter>
+<parameter name="auth_type">
+<longdesc lang="en">
+Authentication type "key" for AccessKey and SecretAccessKey set via "aws configure",
+or "role" to use AWS Policies.
+</longdesc>
+<shortdesc lang="en">Authentication type</shortdesc>
+<content type="string" default="${OCF_RESKEY_auth_type_default}" />
+</parameter>
+
<parameter name="profile">
<longdesc lang="en">
The name of the AWS CLI profile of the root account. This
@@ -196,7 +203,7 @@ r53_validate() {
# Check for required binaries
ocf_log debug "Checking for required binaries"
- for command in curl dig; do
+ for command in "${OCF_RESKEY_awscli}" curl dig; do
check_binary "$command"
done
@@ -216,7 +223,10 @@ r53_validate() {
esac
# profile
- [[ -z "$OCF_RESKEY_profile" ]] && ocf_log error "AWS CLI profile not set $OCF_RESKEY_profile!" && exit $OCF_ERR_CONFIGURED
+ if [ "x${OCF_RESKEY_auth_type}" = "xkey" ] && [ -z "$OCF_RESKEY_profile" ]; then
+ ocf_exit_reason "profile parameter not set"
+ return $OCF_ERR_CONFIGURED
+ fi
# TTL
[[ -z "$OCF_RESKEY_ttl" ]] && ocf_log error "TTL not set $OCF_RESKEY_ttl!" && exit $OCF_ERR_CONFIGURED
@@ -417,7 +427,6 @@ _update_record() {
}
###############################################################################
-
case $__OCF_ACTION in
usage|help)
usage
@@ -427,6 +436,26 @@ case $__OCF_ACTION in
metadata
exit $OCF_SUCCESS
;;
+esac
+
+AWSCLI_CMD="${OCF_RESKEY_awscli}"
+if [ "x${OCF_RESKEY_auth_type}" = "xkey" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --profile ${OCF_RESKEY_profile}"
+elif [ "x${OCF_RESKEY_auth_type}" = "xrole" ]; then
+ if [ -z "${OCF_RESKEY_region}" ]; then
+ ocf_exit_reason "region needs to be set when using role-based authentication"
+ exit $OCF_ERR_CONFIGURED
+ fi
+else
+ ocf_exit_reason "Incorrect auth_type: ${OCF_RESKEY_auth_type}"
+ exit $OCF_ERR_CONFIGURED
+fi
+if [ -n "${OCF_RESKEY_region}" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
+fi
+AWSCLI_CMD="$AWSCLI_CMD --cli-connect-timeout 10"
+
+case $__OCF_ACTION in
start)
r53_validate || exit $?
r53_start
diff --git a/heartbeat/awseip b/heartbeat/awseip
index dc48460c85..49b0ca6155 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -23,7 +23,8 @@
#
# Prerequisites:
#
-# - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.)
+# - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.) or
+# (AWSRole) Setup up relevant AWS Policies to allow agent related functions to be executed.
# - a reserved secondary private IP address for EC2 instances high availability
# - IAM user role with the following permissions:
# * DescribeInstances
@@ -44,11 +45,15 @@
# Defaults
#
OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
+OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
+: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
meta_data() {
@@ -63,7 +68,7 @@ Resource Agent for Amazon AWS Elastic IP Addresses.
It manages AWS Elastic IP Addresses with awscli.
-Credentials needs to be setup by running "aws configure".
+Credentials needs to be setup by running "aws configure", or by using AWS Policies.
See https://aws.amazon.com/cli/ for more information about awscli.
</longdesc>
@@ -79,6 +84,15 @@ command line tools for aws services
<content type="string" default="${OCF_RESKEY_awscli_default}" />
</parameter>
+<parameter name="auth_type">
+<longdesc lang="en">
+Authentication type "key" for AccessKey and SecretAccessKey set via "aws configure",
+or "role" to use AWS Policies.
+</longdesc>
+<shortdesc lang="en">Authentication type</shortdesc>
+<content type="string" default="${OCF_RESKEY_auth_type_default}" />
+</parameter>
+
<parameter name="profile">
<longdesc lang="en">
Valid AWS CLI profile name (see ~/.aws/config and 'aws configure')
@@ -111,6 +125,14 @@ predefined private ip address for ec2 instance
<content type="string" default="" />
</parameter>
+<parameter name="region" required="0">
+<longdesc lang="en">
+Region for AWS resource (required for role-based authentication)
+</longdesc>
+<shortdesc lang="en">Region</shortdesc>
+<content type="string" default="${OCF_RESKEY_region_default}" />
+</parameter>
+
<parameter name="api_delay" unique="0">
<longdesc lang="en">
a short delay between API calls, to avoid sending API too quick
@@ -157,13 +179,13 @@ awseip_start() {
NETWORK_ID=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/interface-id -H "X-aws-ec2-metadata-token: $TOKEN")
fi
done
- $AWSCLI --profile $OCF_RESKEY_profile ec2 associate-address \
+ $AWSCLI_CMD ec2 associate-address \
--network-interface-id ${NETWORK_ID} \
--allocation-id ${ALLOCATION_ID} \
--private-ip-address ${PRIVATE_IP_ADDRESS}
RET=$?
else
- $AWSCLI --profile $OCF_RESKEY_profile ec2 associate-address \
+ $AWSCLI_CMD ec2 associate-address \
--instance-id ${INSTANCE_ID} \
--allocation-id ${ALLOCATION_ID}
RET=$?
@@ -183,7 +205,7 @@ awseip_start() {
awseip_stop() {
awseip_monitor || return $OCF_SUCCESS
- ASSOCIATION_ID=$($AWSCLI --profile $OCF_RESKEY_profile --output json ec2 describe-addresses \
+ ASSOCIATION_ID=$($AWSCLI_CMD --output json ec2 describe-addresses \
--allocation-id ${ALLOCATION_ID} | grep -m 1 "AssociationId" | awk -F'"' '{print$4}')
if [ -z "${ASSOCIATION_ID}" ]; then
@@ -191,9 +213,7 @@ awseip_stop() {
return $OCF_NOT_RUNNING
fi
- $AWSCLI --profile ${OCF_RESKEY_profile} \
- ec2 disassociate-address \
- --association-id ${ASSOCIATION_ID}
+ $AWSCLI_CMD ec2 disassociate-address --association-id ${ASSOCIATION_ID}
RET=$?
# delay to avoid sending request too fast
@@ -208,7 +228,7 @@ awseip_stop() {
}
awseip_monitor() {
- $AWSCLI --profile $OCF_RESKEY_profile ec2 describe-instances --instance-id "${INSTANCE_ID}" | grep -q "${ELASTIC_IP}"
+ $AWSCLI_CMD ec2 describe-instances --instance-id "${INSTANCE_ID}" | grep -q "${ELASTIC_IP}"
RET=$?
if [ $RET -ne 0 ]; then
@@ -218,9 +238,9 @@ awseip_monitor() {
}
awseip_validate() {
- check_binary ${AWSCLI}
+ check_binary "${OCF_RESKEY_awscli}"
- if [ -z "$OCF_RESKEY_profile" ]; then
+ if [ "x${OCF_RESKEY_auth_type}" = "xkey" ] && [ -z "$OCF_RESKEY_profile" ]; then
ocf_exit_reason "profile parameter not set"
return $OCF_ERR_CONFIGURED
fi
@@ -238,9 +258,27 @@ case $__OCF_ACTION in
meta_data
exit $OCF_SUCCESS
;;
-esac
+ usage|help)
+ awseip_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
-AWSCLI="${OCF_RESKEY_awscli}"
+AWSCLI_CMD="${OCF_RESKEY_awscli}"
+if [ "x${OCF_RESKEY_auth_type}" = "xkey" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --profile ${OCF_RESKEY_profile}"
+elif [ "x${OCF_RESKEY_auth_type}" = "xrole" ]; then
+ if [ -z "${OCF_RESKEY_region}" ]; then
+ ocf_exit_reason "region needs to be set when using role-based authentication"
+ exit $OCF_ERR_CONFIGURED
+ fi
+else
+ ocf_exit_reason "Incorrect auth_type: ${OCF_RESKEY_auth_type}"
+ exit $OCF_ERR_CONFIGURED
+fi
+if [ -n "${OCF_RESKEY_region}" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
+fi
ELASTIC_IP="${OCF_RESKEY_elastic_ip}"
ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
@@ -272,10 +310,6 @@ case $__OCF_ACTION in
validate|validate-all)
awseip_validate
;;
- usage|help)
- awseip_usage
- exit $OCF_SUCCESS
- ;;
*)
awseip_usage
exit $OCF_ERR_UNIMPLEMENTED
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index 037278e296..bdb4d68dd0 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -23,7 +23,8 @@
#
# Prerequisites:
#
-# - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.)
+# - preconfigured AWS CLI running environment (AccessKey, SecretAccessKey, etc.) or
+# (AWSRole) Setup up relevant AWS Policies to allow agent related functions to be executed.
# - a reserved secondary private IP address for EC2 instances high availablity
# - IAM user role with the following permissions:
# * DescribeInstances
@@ -43,11 +44,15 @@
# Defaults
#
OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
+OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
+: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
+: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
meta_data() {
@@ -62,7 +67,7 @@ Resource Agent for Amazon AWS Secondary Private IP Addresses.
It manages AWS Secondary Private IP Addresses with awscli.
-Credentials needs to be setup by running "aws configure".
+Credentials needs to be setup by running "aws configure", or by using AWS Policies.
See https://aws.amazon.com/cli/ for more information about awscli.
</longdesc>
@@ -78,6 +83,15 @@ command line tools for aws services
<content type="string" default="${OCF_RESKEY_awscli_default}" />
</parameter>
+<parameter name="auth_type">
+<longdesc lang="en">
+Authentication type "key" for AccessKey and SecretAccessKey set via "aws configure",
+or "role" to use AWS Policies.
+</longdesc>
+<shortdesc lang="en">Authentication type</shortdesc>
+<content type="string" default="${OCF_RESKEY_auth_type_default}" />
+</parameter>
+
<parameter name="profile">
<longdesc lang="en">
Valid AWS CLI profile name (see ~/.aws/config and 'aws configure')
@@ -94,6 +108,14 @@ reserved secondary private ip for ec2 instance
<content type="string" default="" />
</parameter>
+<parameter name="region" required="0">
+<longdesc lang="en">
+Region for AWS resource (required for role-based authentication)
+</longdesc>
+<shortdesc lang="en">Region</shortdesc>
+<content type="string" default="${OCF_RESKEY_region_default}" />
+</parameter>
+
<parameter name="api_delay" unique="0">
<longdesc lang="en">
a short delay between API calls, to avoid sending API too quick
@@ -131,7 +153,7 @@ END
awsvip_start() {
awsvip_monitor && return $OCF_SUCCESS
- $AWSCLI --profile $OCF_RESKEY_profile ec2 assign-private-ip-addresses \
+ $AWSCLI_CMD ec2 assign-private-ip-addresses \
--network-interface-id ${NETWORK_ID} \
--private-ip-addresses ${SECONDARY_PRIVATE_IP} \
--allow-reassignment
@@ -151,7 +173,7 @@ awsvip_start() {
awsvip_stop() {
awsvip_monitor || return $OCF_SUCCESS
- $AWSCLI --profile $OCF_RESKEY_profile ec2 unassign-private-ip-addresses \
+ $AWSCLI_CMD ec2 unassign-private-ip-addresses \
--network-interface-id ${NETWORK_ID} \
--private-ip-addresses ${SECONDARY_PRIVATE_IP}
RET=$?
@@ -168,7 +190,7 @@ awsvip_stop() {
}
awsvip_monitor() {
- $AWSCLI --profile ${OCF_RESKEY_profile} ec2 describe-instances \
+ $AWSCLI_CMD ec2 describe-instances \
--instance-id "${INSTANCE_ID}" \
--query 'Reservations[].Instances[].NetworkInterfaces[].PrivateIpAddresses[].PrivateIpAddress[]' \
--output text | \
@@ -182,9 +204,9 @@ awsvip_monitor() {
}
awsvip_validate() {
- check_binary ${AWSCLI}
+ check_binary "${OCF_RESKEY_awscli}"
- if [ -z "$OCF_RESKEY_profile" ]; then
+ if [ "x${OCF_RESKEY_auth_type}" = "xkey" ] && [ -z "$OCF_RESKEY_profile" ]; then
ocf_exit_reason "profile parameter not set"
return $OCF_ERR_CONFIGURED
fi
@@ -202,9 +224,27 @@ case $__OCF_ACTION in
meta_data
exit $OCF_SUCCESS
;;
+ usage|help)
+ awsvip_usage
+ exit $OCF_SUCCESS
+ ;;
esac
-AWSCLI="${OCF_RESKEY_awscli}"
+AWSCLI_CMD="${OCF_RESKEY_awscli}"
+if [ "x${OCF_RESKEY_auth_type}" = "xkey" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --profile ${OCF_RESKEY_profile}"
+elif [ "x${OCF_RESKEY_auth_type}" = "xrole" ]; then
+ if [ -z "${OCF_RESKEY_region}" ]; then
+ ocf_exit_reason "region needs to be set when using role-based authentication"
+ exit $OCF_ERR_CONFIGURED
+ fi
+else
+ ocf_exit_reason "Incorrect auth_type: ${OCF_RESKEY_auth_type}"
+ exit $OCF_ERR_CONFIGURED
+fi
+if [ -n "${OCF_RESKEY_region}" ]; then
+ AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
+fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN")
@@ -236,10 +276,6 @@ case $__OCF_ACTION in
validate|validate-all)
awsvip_validate
;;
- usage|help)
- awsvip_usage
- exit $OCF_SUCCESS
- ;;
*)
awsvip_usage
exit $OCF_ERR_UNIMPLEMENTED

@ -1,71 +0,0 @@
From 54fa7a59c36697cd8df5b619fff0b50af00df76e Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 20 Nov 2023 16:35:52 +0100
Subject: [PATCH 1/2] storage_mon: fix file handler out of scope leak and
uninitialized values
---
tools/storage_mon.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 1aae29e58..cc415e97f 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -382,9 +382,11 @@ static int write_pid_file(const char *pidfile)
syslog(LOG_ERR, "Failed to write '%s' to %s: %s", pid, pidfile, strerror(errno));
goto done;
}
- close(fd);
rc = 0;
done:
+ if (fd != -1) {
+ close(fd);
+ }
if (pid != NULL) {
free(pid);
}
@@ -663,6 +665,7 @@ storage_mon_client(void)
snprintf(request.message, SMON_MAX_MSGSIZE, "%s", SMON_GET_RESULT_COMMAND);
request.hdr.id = 0;
request.hdr.size = sizeof(struct storage_mon_check_value_req);
+ response.hdr.id = 0;
rc = qb_ipcc_send(conn, &request, request.hdr.size);
if (rc < 0) {
syslog(LOG_ERR, "qb_ipcc_send error : %d\n", rc);
@@ -683,7 +686,11 @@ storage_mon_client(void)
/* greater than 0 : monitoring error. */
/* -1 : communication system error. */
/* -2 : Not all checks completed for first device in daemon mode. */
- rc = atoi(response.message);
+ if (strnlen(response.message, 1)) {
+ rc = atoi(response.message);
+ } else {
+ rc = -1;
+ }
syslog(LOG_DEBUG, "daemon response[%d]: %s \n", response.hdr.id, response.message);
From b23ba4eaefb500199c4845751f4c5545c81f42f1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 20 Nov 2023 16:37:37 +0100
Subject: [PATCH 2/2] findif: also check that netmaskbits != EOS
---
tools/findif.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/findif.c b/tools/findif.c
index a25395fec..ab108a3c4 100644
--- a/tools/findif.c
+++ b/tools/findif.c
@@ -669,7 +669,7 @@ main(int argc, char ** argv) {
}
}
- if (netmaskbits) {
+ if (netmaskbits != NULL && *netmaskbits != EOS) {
best_netmask = netmask;
}else if (best_netmask == 0L) {
/*

@ -1,23 +0,0 @@
From cb968378959b8aa334e98daf62a1b08ef6525fb4 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 22 Nov 2023 10:32:31 +0100
Subject: [PATCH] storage_mon: use memset() to fix "uninitialized value"
covscan error, as qb_ipcc_recv() will always set a message (according to
honzaf)
---
tools/storage_mon.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index cc415e97f..a9227ef90 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -655,6 +655,7 @@ storage_mon_client(void)
int32_t rc;
+ memset(&response, 0, sizeof(response));
snprintf(ipcs_name, SMON_MAX_IPCSNAME, "storage_mon_%s", attrname);
conn = qb_ipcc_connect(ipcs_name, 0);
if (conn == NULL) {

@ -1,46 +0,0 @@
From 65a066cf9066390db65c4875e21c4c391793b9ae Mon Sep 17 00:00:00 2001
From: Arslan Ahmad <arslan.ahmad97@googlemail.com>
Date: Tue, 16 Jan 2024 09:11:17 +0530
Subject: [PATCH] Avoid false positive for VG activation
When lvm.conf file has `volume_list` parameter configured and the
cluster is managing the shared storage using `system_id_source`,
then the activation of the LV fails to happen. However it is
reported as a success.
The fixes will avoid starting of `LVM-activate` resource when
the cluster is configured with both `system_id_source` and
`volume_list`.
Signed-off-by: Arslan Ahmad <arslan.ahmad97@googlemail.com>
---
heartbeat/LVM-activate | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index f6f24a3b5..3858ed8dc 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -448,6 +448,10 @@ systemid_check()
{
# system_id_source is set in lvm.conf
source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
+
+ # Is volume_list set in lvm.conf
+ vol_list=$(lvmconfig 'activation/volume_list' 2>/dev/null | cut -d"=" -f2)
+
if [ "$source" = "" ] || [ "$source" = "none" ]; then
ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
exit $OCF_ERR_ARGS
@@ -458,6 +462,11 @@ systemid_check()
exit $OCF_ERR_ARGS
fi
+ if [ -n "$source" ] && [ -n "$vol_list" ]; then
+ ocf_exit_reason "Both system_id_source & volume_list cannot be defined!"
+ exit $OCF_ERR_ARGS
+ fi
+
return $OCF_SUCCESS
}

@ -1,40 +0,0 @@
From 264e38e02cb4c04877e412bac254e42c7f6b2e1c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 20 Feb 2024 12:34:42 +0100
Subject: [PATCH] Filesystem: fail when leading or trailing whitespace is
present in device or directory parameters
---
heartbeat/Filesystem | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index e1378f781..f88e3b552 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -995,6 +995,12 @@ if [ -n "${OCF_RESKEY_force_unmount}" ]; then
fi
DEVICE="$OCF_RESKEY_device"
+case "$DEVICE" in
+ [[:space:]]*|*[[:space:]])
+ ocf_exit_reason "device parameter does not accept leading or trailing whitespace characters"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+esac
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then
options="-o $OCF_RESKEY_options"
@@ -1032,6 +1038,12 @@ if [ -z "$OCF_RESKEY_directory" ]; then
else
MOUNTPOINT="$(echo "$OCF_RESKEY_directory" | sed 's/\/*$//')"
: ${MOUNTPOINT:=/}
+ case "$MOUNTPOINT" in
+ [[:space:]]*|*[[:space:]])
+ ocf_exit_reason "directory parameter does not accept leading or trailing whitespace characters"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
if [ -e "$MOUNTPOINT" ] ; then
CANONICALIZED_MOUNTPOINT="$(readlink -f "$MOUNTPOINT")"
if [ $? -ne 0 ]; then

@ -1,30 +0,0 @@
From 1317efc72af6b72d9fb37aea18dc16129c146148 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 25 Jun 2024 13:33:19 +0200
Subject: [PATCH] Filesystem: return success during stop-action when leading or
trailing whitespace is present in device or directory parameters
---
heartbeat/Filesystem | 2 ++
1 file changed, 2 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 8e0127531..3eb520e0c 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1037,6 +1037,7 @@ fi
DEVICE="$OCF_RESKEY_device"
case "$DEVICE" in
[[:space:]]*|*[[:space:]])
+ [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
ocf_exit_reason "device parameter does not accept leading or trailing whitespace characters"
exit $OCF_ERR_CONFIGURED
;;
@@ -1080,6 +1081,7 @@ else
: ${MOUNTPOINT:=/}
case "$MOUNTPOINT" in
[[:space:]]*|*[[:space:]])
+ [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
ocf_exit_reason "directory parameter does not accept leading or trailing whitespace characters"
exit $OCF_ERR_CONFIGURED
;;

@ -1,75 +0,0 @@
From 4357f0dbb8668ac4090cd7070c2ea195e5683326 Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Wed, 24 Jan 2024 13:27:26 +0100
Subject: [PATCH] galera: allow joiner to report non-Primary during initial IST
It seems that with recent galera versions, when a galera node
joins a cluster, there is a small time window where the node is
connected to the primary component of the galera cluster, but it
might still be preparing its IST. During this time, it can report
itself as being 'not ready' and in 'non-primary' state.
Update the galera resource agent to allow the node to be in
non-primary state, but only if running a "promote" operation. Any
network partition during the promotion will be caught by the
promote timeout.
In reworking the promotion code, we move the check for primary
partition into the "galera_monitor" function. The check works
as before for regular "monitor" or "probe" operations.
Related-Bug: rhbz#2255414
---
heartbeat/galera.in | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index 6aed3e4b6d..b518595cb0 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -822,6 +822,11 @@ galera_promote()
return $rc
fi
+ # At this point, the mysql pidfile is created on disk and the
+ # mysql server is reacheable via its UNIX socket. If we are a
+ # joiner, SST transfers (rsync) have finished, but an IST may
+ # still be requested or ongoing
+
galera_monitor
rc=$?
if [ $rc != $OCF_SUCCESS -a $rc != $OCF_RUNNING_MASTER ]; then
@@ -835,12 +840,6 @@ galera_promote()
return $OCF_ERR_GENERIC
fi
- is_primary
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Failure. Master instance started, but is not in Primary mode."
- return $OCF_ERR_GENERIC
- fi
-
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
@@ -991,8 +990,18 @@ galera_monitor()
fi
rc=$OCF_RUNNING_MASTER
else
- ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
- rc=$OCF_ERR_GENERIC
+ # It seems that with recent galera (26.4+), a joiner that is
+ # connected to a Primary component and is preparing its IST
+ # request might still temporarily report its state as
+ # Non-Primary. Do not fail in this case as the promote
+ # operation will loop until the IST finishes or the promote
+ # times out.
+ if [ "$__OCF_ACTION" = "promote" ] && ! ocf_is_true $(is_bootstrap); then
+ ocf_log info "local node <${NODENAME}> is receiving a State Transfer."
+ else
+ ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
+ rc=$OCF_ERR_GENERIC
+ fi
fi
return $rc

@ -1,25 +0,0 @@
From f717b4a3aa83c9124e62716f421b99e314d00233 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 12 Apr 2024 12:23:21 +0200
Subject: [PATCH] findif.sh: fix corner cases
---
heartbeat/findif.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 7c766e6e0..13484f827 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -215,9 +215,9 @@ findif()
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
else
- set -- $(ip -o -f $family route list match $match $scope | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
fi
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in

@ -1,365 +0,0 @@
From 12d73d53026d219be67c0d5353010ba08ab49e98 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 May 2024 09:45:55 +0200
Subject: [PATCH 1/3] findif.sh: add metric for IPv6 support and fail when
matching more than 1 route
---
heartbeat/findif.sh | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 13484f827..ca5d1a5c1 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -196,10 +196,13 @@ findif()
{
local match="$OCF_RESKEY_ip"
local family
+ local proto
local scope
local nic="$OCF_RESKEY_nic"
local netmask="$OCF_RESKEY_cidr_netmask"
local brdcast="$OCF_RESKEY_broadcast"
+ local metric
+ local routematch
echo $match | grep -qs ":"
if [ $? = 0 ] ; then
@@ -215,10 +218,19 @@ findif()
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
+ routematch=$(ip -o -f $family route list match $match $proto $scope | grep "dev $nic " | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
else
- set -- $(ip -o -f $family route list match $match $scope | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
+ routematch=$(ip -o -f $family route list match $match $proto $scope | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
fi
+ if [ "$family" = "inet6" ]; then
+ routematch=$(echo "$routematch" | grep -v "^default")
+ fi
+
+ if [ $(echo "$routematch" | wc -l) -gt 1 ]; then
+ ocf_exit_reason "More than 1 routes match $match. Unable to decide which route to use."
+ return $OCF_ERR_GENERIC
+ fi
+ set -- $routematch
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in
127.*)
@@ -255,6 +267,7 @@ findif()
return $OCF_ERR_GENERIC
fi
fi
- echo "$nic netmask $netmask broadcast $brdcast"
+ metric=$(echo "$@" | sed "s/.*metric[[:blank:]]\([^ ]\+\).*/\1/")
+ echo "$nic netmask $netmask broadcast $brdcast metric $metric"
return $OCF_SUCCESS
}
From 488c096d63fe0f7e15938e65483ba20628080198 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 May 2024 09:47:11 +0200
Subject: [PATCH 2/3] IPaddr2: use metric for IPv6
---
heartbeat/IPaddr2 | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 5f30b8f98..091bea418 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -561,10 +561,11 @@ ip_init() {
if
[ $rc -eq 0 ]
then
- NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //'`
+ NICINFO=`echo "$NICINFO" | sed -e 's/netmask\ //;s/broadcast\ //;s/metric\ //'`
NIC=`echo "$NICINFO" | cut -d" " -f1`
NETMASK=`echo "$NICINFO" | cut -d" " -f2`
BRDCAST=`echo "$NICINFO" | cut -d" " -f3`
+ METRIC=`echo "$NICINFO" | cut -d" " -f4`
else
# findif couldn't find the interface
if ocf_is_probe; then
@@ -659,13 +660,14 @@ delete_interface () {
# Add an interface
#
add_interface () {
- local cmd msg extra_opts ipaddr netmask broadcast iface label
+ local cmd msg extra_opts ipaddr netmask broadcast iface label metric
ipaddr="$1"
netmask="$2"
broadcast="$3"
iface="$4"
label="$5"
+ metric="$6"
if [ "$FAMILY" = "inet" ] && ocf_is_true $OCF_RESKEY_run_arping &&
check_binary arping; then
@@ -688,6 +690,9 @@ add_interface () {
fi
extra_opts=""
+ if [ "$FAMILY" = "inet6" ]; then
+ extra_opts="$extra_opts metric $metric"
+ fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then
extra_opts="$extra_opts nodad"
fi
@@ -1083,7 +1088,7 @@ ip_start() {
done
fi
- add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL
+ add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC"
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
From d1c4d1969381d3e35cfaaaaae522e5687a9ed88a Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 May 2024 09:47:56 +0200
Subject: [PATCH 3/3] IPsrcaddr: add IPv6 support
---
heartbeat/IPsrcaddr | 116 ++++++++++++++++++++++++++++++++------------
1 file changed, 85 insertions(+), 31 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index c732ce8df..1c87d5b7f 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -60,6 +60,7 @@ OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_destination_default="0.0.0.0/0"
OCF_RESKEY_proto_default=""
OCF_RESKEY_metric_default=""
+OCF_RESKEY_pref_default=""
OCF_RESKEY_table_default=""
: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}}
@@ -67,6 +68,7 @@ OCF_RESKEY_table_default=""
: ${OCF_RESKEY_destination=${OCF_RESKEY_destination_default}}
: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
: ${OCF_RESKEY_metric=${OCF_RESKEY_metric_default}}
+: ${OCF_RESKEY_pref=${OCF_RESKEY_pref_default}}
: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
#######################################################################
@@ -75,10 +77,13 @@ OCF_RESKEY_table_default=""
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
- CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
-CMDCHANGE="$IP2UTIL route change to "
+echo "$OCF_RESKEY_ipaddress" | grep -q ":" && FAMILY="inet6" || FAMILY="inet"
+[ "$FAMILY" = "inet6" ] && [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && OCF_RESKEY_destination="::/0"
-if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then
+ CMDSHOW="$IP2UTIL -f $FAMILY route show $TABLE to exact $OCF_RESKEY_destination"
+CMDCHANGE="$IP2UTIL -f $FAMILY route change to "
+
+if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] && [ "$OCF_RESKEY_destination" != "::/0" ]; then
CMDSHOW="$CMDSHOW src $OCF_RESKEY_ipaddress"
fi
@@ -153,6 +158,14 @@ Metric. Only needed if incorrect metric value is used.
<content type="string" default="${OCF_RESKEY_metric_default}" />
</parameter>
+<parameter name="pref">
+<longdesc lang="en">
+IPv6 route preference (low, medium or high). Only needed if incorrect pref value is used.
+</longdesc>
+<shortdesc lang="en">IPv6 route preference.</shortdesc>
+<content type="string" default="${OCF_RESKEY_pref_default}" />
+</parameter>
+
<parameter name="table">
<longdesc lang="en">
Table to modify and use for interface lookup. E.g. "local".
@@ -196,12 +209,21 @@ errorexit() {
# where the src clause "src Y.Y.Y.Y" may or may not be present
WS="[[:blank:]]"
-OCTET="[0-9]\{1,3\}"
-IPADDR="\($OCTET\.\)\{3\}$OCTET"
+case "$FAMILY" in
+ inet)
+ GROUP="[0-9]\{1,3\}"
+ IPADDR="\($GROUP\.\)\{3\}$GROUP"
+ ;;
+ inet6)
+ GROUP="[0-9a-f]\{0,4\}"
+ IPADDR="\($GROUP\:\)\{0,\}$GROUP"
+ ;;
+esac
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
-MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
-METRICCLAUSE=".*\(metric$WS[^ ]\+\)"
+MATCHROUTE="\(.*${WS}\)proto [^ ]\+\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
+METRICCLAUSE=".*\(metric$WS[^ ]\+\).*"
PROTOCLAUSE=".*\(proto$WS[^ ]\+\).*"
+PREFCLAUSE=".*\(pref$WS[^ ]\+\).*"
FINDIF=findif
# findif needs that to be set
@@ -216,17 +238,17 @@ srca_read() {
errorexit "more than 1 matching route exists"
# But there might still be no matching route
- [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \
+ ([ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ]) && [ -z "$ROUTE" ] && \
! ocf_is_probe && [ "$__OCF_ACTION" != stop ] && errorexit "no matching route exists"
# Sed out the source ip address if it exists
- SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"`
+ SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\4/p"`
# and what remains after stripping out the source ip address clause
- ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"`
+ ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\2\6/"`
# using "src <ip>" only returns output if there's a match
- if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ]; then
+ if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] && [ "$OCF_RESKEY_destination" != "::/0" ]; then
[ -z "$ROUTE" ] && return 1 || return 0
fi
@@ -249,12 +271,15 @@ srca_start() {
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC' failed"
+ # NetworkManager manages routes with proto static/kernel
+ [ -z "$OCF_RESKEY_proto" ] && echo "$PROTO" | grep -q "proto \(kernel\|static\)" && PROTO="proto keepalived"
- if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
- $CMDCHANGE $ROUTE_WO_SRC src $1 || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC $PREF || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC $PREF' failed"
+
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ]; then
+ $CMDCHANGE $ROUTE_WO_SRC $PROTO src $1 || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC $PROTO src $1' failed"
fi
rc=$?
fi
@@ -290,14 +315,15 @@ srca_stop() {
fi
PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
- OPTS="proto kernel scope $SCOPE src $PRIMARY_IP"
+ OPTS="proto kernel scope $SCOPE"
+ [ "$FAMILY" = "inet" ] && OPTS="$OPTS src $PRIMARY_IP"
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC $PREF || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC $PREF' failed"
- if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
- $CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP' failed"
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ]; then
+ $CMDCHANGE $ROUTE_WO_SRC proto static || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC proto static' failed"
fi
return $?
@@ -330,7 +356,7 @@ CheckIP() {
case $ip in
*[!0-9.]*) #got invalid char
false;;
- .*|*.) #begin or end by ".", which is invalid
+ .*|*.) #begin or end with ".", which is invalid
false;;
*..*) #consecutive ".", which is invalid
false;;
@@ -356,6 +382,18 @@ CheckIP() {
return $? # This return is unnecessary, this comment too :)
}
+CheckIP6() {
+ ip="$1"
+ case $ip in
+ *[!0-9a-f:]*) #got invalid char
+ false;;
+ *:::*) # more than 2 consecutive ":", which is invalid
+ false;;
+ *::*::*) # more than 1 "::", which is invalid
+ false;;
+ esac
+}
+
#
# Find out which interface or alias serves the given IP address
# The argument is an IP address, and its output
@@ -396,8 +434,7 @@ find_interface_solaris() {
# is an (aliased) interface name (e.g., "eth0" and "eth0:0").
#
find_interface_generic() {
-
- local iface=`$IP2UTIL -o -f inet addr show | grep "\ $BASEIP" \
+ local iface=`$IP2UTIL -o -f $FAMILY addr show | grep "\ $BASEIP" \
| cut -d ' ' -f2 | grep -v '^ipsec[0-9][0-9]*$'`
if [ -z "$iface" ]; then
return $OCF_ERR_GENERIC
@@ -502,7 +539,9 @@ srca_validate_all() {
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
- :
+ :
+ elif CheckIP6 "$ipaddress"; then
+ :
else
ocf_exit_reason "Invalid IP address [$ipaddress]"
return $OCF_ERR_CONFIGURED
@@ -570,21 +609,36 @@ rc=$?
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
-LISTROUTE=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress`
+case "$FAMILY" in
+ inet)
+ LISTCMD="$IP2UTIL -f $FAMILY route list dev $INTERFACE scope link $PROTO match $ipaddress"
+ ;;
+ inet6)
+ LISTCMD="$IP2UTIL -f $FAMILY route list dev $INTERFACE $PROTO match $ipaddress"
+ ;;
+esac
+LISTROUTE=`$LISTCMD`
+
[ -z "$PROTO" ] && PROTO=`echo $LISTROUTE | sed -n "s/$PROTOCLAUSE/\1/p"`
if [ -n "$OCF_RESKEY_metric" ]; then
METRIC="metric $OCF_RESKEY_metric"
-elif [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ]; then
+elif [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ] || [ "$FAMILY" = "inet6" ]; then
METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"`
else
METRIC=""
fi
-if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+if [ "$FAMILY" = "inet6" ]; then
+ if [ -z "$OCF_RESKEY_pref" ]; then
+ PREF=`echo $LISTROUTE | sed -n "s/$PREFCLAUSE/\1/p"`
+ else
+ PREF="pref $OCF_RESKEY_pref"
+ fi
+fi
+if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ] ;then
NETWORK=`echo $LISTROUTE | grep -m 1 -o '^[^ ]*'`
if [ -z "$NETWORK" ]; then
- err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
- err_str="$err_str match $ipaddress' failed to find a matching route"
+ err_str="command '$LISTCMD' failed to find a matching route"
if [ "$__OCF_ACTION" = "start" ]; then
ocf_exit_reason "$err_str"

@ -1,22 +0,0 @@
From 4075aff88776e2811ebc83b735b2a70bcf46247f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 24 Jun 2024 09:45:29 +0200
Subject: [PATCH] IPaddr2: only set metric value for IPv6 when detected
---
heartbeat/IPaddr2 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 091bea418..3bc5abec1 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -690,7 +690,7 @@ add_interface () {
fi
extra_opts=""
- if [ "$FAMILY" = "inet6" ]; then
+ if [ "$FAMILY" = "inet6" ] && [ -n "$metric" ]; then
extra_opts="$extra_opts metric $metric"
fi
if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then

@ -1,25 +0,0 @@
From f561e272e9b7fe94ba598b70c6d2f44d034446ed Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 14 Aug 2024 12:05:54 +0200
Subject: [PATCH] findif.sh: ignore unreachable, blackhole, and prohibit routes
---
heartbeat/findif.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index ca5d1a5c1..7b817f75c 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -218,9 +218,9 @@ findif()
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- routematch=$(ip -o -f $family route list match $match $proto $scope | grep "dev $nic " | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
+ routematch=$(ip -o -f $family route list match $match $proto $scope | grep -v "^\(unreachable\|prohibit\|blackhole\)" | grep "dev $nic " | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
else
- routematch=$(ip -o -f $family route list match $match $proto $scope | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
+ routematch=$(ip -o -f $family route list match $match $proto $scope | grep -v "^\(unreachable\|prohibit\|blackhole\)" | sed -e 's,^\([0-9.]\+\) ,\1/32 ,;s,^\([0-9a-f:]\+\) ,\1/128 ,' | sort -t/ -k2,2nr)
fi
if [ "$family" = "inet6" ]; then
routematch=$(echo "$routematch" | grep -v "^default")

@ -1,36 +0,0 @@
From f23ae9c1e9ff9a44a053c7c2378975ac5b807478 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 29 Aug 2024 16:24:02 +0200
Subject: [PATCH] IPsrcaddr: specify dev for default route, as e.g. fe80::
routes can be present on multiple interfaces
---
heartbeat/IPsrcaddr | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 1c87d5b7f..58d89a280 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -278,8 +278,8 @@ srca_start() {
errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC $PREF' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ]; then
- $CMDCHANGE $ROUTE_WO_SRC $PROTO src $1 || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC $PROTO src $1' failed"
+ $CMDCHANGE $ROUTE_WO_SRC dev $INTERFACE $PROTO src $1 || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC dev $INTERFACE $PROTO src $1' failed"
fi
rc=$?
fi
@@ -322,8 +322,8 @@ srca_stop() {
errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC $PREF' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] || [ "$OCF_RESKEY_destination" = "::/0" ]; then
- $CMDCHANGE $ROUTE_WO_SRC proto static || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC proto static' failed"
+ $CMDCHANGE $ROUTE_WO_SRC dev $INTERFACE proto static || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC dev $INTERFACE proto static' failed"
fi
return $?

@ -1,23 +0,0 @@
From a9c4aeb971e9f4963345d0e215b729def62dd27c Mon Sep 17 00:00:00 2001
From: pepadelic <162310096+pepadelic@users.noreply.github.com>
Date: Mon, 15 Apr 2024 13:52:54 +0200
Subject: [PATCH] Update db2: fix OCF_SUCESS name in db2_notify
fix OCF_SUCESS to OCF_SUCCESS in db2_notify
---
heartbeat/db2 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/db2 b/heartbeat/db2
index 95447ab6cb..1cd66f15af 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -848,7 +848,7 @@ db2_notify() {
# only interested in pre-start
[ $OCF_RESKEY_CRM_meta_notify_type = pre \
- -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCESS
+ -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCCESS
# gets FIRST_ACTIVE_LOG
db2_get_cfg $dblist || return $?

@ -1,110 +0,0 @@
From 66a5308d2e8f61093716a076f4386416dc18045c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 22 Apr 2024 11:26:09 +0200
Subject: [PATCH] Filesystem: fail when incorrect device mounted on mountpoint,
and dont unmount the mountpoint in this case, or if mountpoint set to "/"
---
heartbeat/Filesystem | 71 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 58 insertions(+), 13 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index e1378f781..cec71f1a6 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -582,10 +582,16 @@ Filesystem_start()
fi
# See if the device is already mounted.
- if Filesystem_status >/dev/null 2>&1 ; then
- ocf_log info "Filesystem $MOUNTPOINT is already mounted."
- return $OCF_SUCCESS
- fi
+ Filesystem_status
+ case "$?" in
+ $OCF_SUCCESS)
+ ocf_log info "Filesystem $MOUNTPOINT is already mounted."
+ return $OCF_SUCCESS
+ ;;
+ $OCF_ERR_CONFIGURED)
+ return $OCF_ERR_CONFIGURED
+ ;;
+ esac
fstype_supported || exit $OCF_ERR_INSTALLED
@@ -801,10 +807,42 @@ Filesystem_stop()
#
Filesystem_status()
{
- match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
- if list_mounts | grep "$match_string" >/dev/null 2>&1; then
- rc=$OCF_SUCCESS
- msg="$MOUNTPOINT is mounted (running)"
+ local match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
+ local mounted_device=$(list_mounts | grep "$match_string" | awk '{print $1}')
+
+ if [ -n "$mounted_device" ]; then
+ if [ "X$blockdevice" = "Xyes" ]; then
+ if [ -e "$DEVICE" ] ; then
+ local canonicalized_device="$(readlink -f "$DEVICE")"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $DEVICE because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
+ else
+ local canonicalized_device="$DEVICE"
+ fi
+ if [ -e "$mounted_device" ] ; then
+ local canonicalized_mounted_device="$(readlink -f "$mounted_device")"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $mounted_device because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
+ else
+ local canonicalized_mounted_device="$mounted_device"
+ fi
+ if [ "$canonicalized_device" != "$canonicalized_mounted_device" ]; then
+ if ocf_is_probe || [ "$__OCF_ACTION" = "stop" ]; then
+ ocf_log debug "Another device ($mounted_device) is already mounted on $MOUNTPOINT"
+ rc=$OCF_NOT_RUNNING
+ else
+ ocf_exit_reason "Another device ($mounted_device) is already mounted on $MOUNTPOINT"
+ rc=$OCF_ERR_CONFIGURED
+ fi
+ fi
+ else
+ rc=$OCF_SUCCESS
+ msg="$MOUNTPOINT is mounted (running)"
+ fi
else
rc=$OCF_NOT_RUNNING
msg="$MOUNTPOINT is unmounted (stopped)"
@@ -1041,9 +1079,18 @@ else
else
CANONICALIZED_MOUNTPOINT="$MOUNTPOINT"
fi
- # At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
- # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
- # kill the whole system. Is that a good idea?
+
+ if echo "$CANONICALIZED_MOUNTPOINT" | grep -q "^\s*/\s*$"; then
+ if ocf_is_probe; then
+ ocf_log debug "/ cannot be managed in a cluster"
+ exit $OCF_NOT_RUNNING
+ elif [ "$__OCF_ACTION" = "start" ] || [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "status" ]; then
+ ocf_exit_reason "/ cannot be managed in a cluster"
+ exit $OCF_ERR_CONFIGURED
+ elif [ "$__OCF_ACTION" = "stop" ]; then
+ exit $OCF_SUCCESS
+ fi
+ fi
fi
# Check to make sure the utilites are found
@@ -1124,5 +1171,3 @@ case $OP in
;;
esac
exit $?
-
-

@ -1,22 +0,0 @@
From 4b09b3e467a7f8076bbf20f5b027efecf16303e7 Mon Sep 17 00:00:00 2001
From: Gianluca Piccolo <gianluca.piccolo@wuerth-phoenix.com>
Date: Thu, 6 Jun 2024 17:34:41 +0200
Subject: [PATCH] Fix #1944
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index a445349b9..59b6c1b51 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -664,7 +664,7 @@ get_pids()
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'
else
- $FUSER -m $dir 2>/dev/null
+ $FUSER -Mm $dir 2>/dev/null
fi
elif [ "$FORCE_UNMOUNT" = "safe" ]; then
procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')

@ -1,26 +0,0 @@
From c9ba6ac66ee27a70c69e1156f17aa6beac277bc5 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 7 Jun 2024 14:23:28 +0200
Subject: [PATCH] Filesystem: use fuser -c on FreeBSD, as -m and -M are used
for other functionality
---
heartbeat/Filesystem | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 59b6c1b51..88fe2e2eb 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -661,8 +661,10 @@ get_pids()
fi
if ocf_is_true "$FORCE_UNMOUNT"; then
- if [ "X${HOSTOS}" = "XOpenBSD" ];then
+ if [ "X${HOSTOS}" = "XOpenBSD" ]; then
fstat | grep $dir | awk '{print $3}'
+ elif [ "X${HOSTOS}" = "XFreeBSD" ]; then
+ $FUSER -c $dir 2>/dev/null
else
$FUSER -Mm $dir 2>/dev/null
fi

@ -1,61 +0,0 @@
From 481672f73d05666ab20a883cf8fc746cb1f3050f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 20 Jun 2024 09:29:21 +0200
Subject: [PATCH] galera/mariadb/mysql/redis: remove Unpromoted monitor-action,
as it's covered by the regular monitor-action
---
heartbeat/galera.in | 1 -
heartbeat/mariadb.in | 1 -
heartbeat/mysql | 1 -
heartbeat/redis.in | 1 -
4 files changed, 4 deletions(-)
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index b518595cb0..b29d68bf73 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -299,7 +299,6 @@ Use it with caution! (and fencing)
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="300s" />
<action name="demote" timeout="120s" />
<action name="validate-all" timeout="5s" />
diff --git a/heartbeat/mariadb.in b/heartbeat/mariadb.in
index e0f1f3c9f1..1dca98ba68 100644
--- a/heartbeat/mariadb.in
+++ b/heartbeat/mariadb.in
@@ -255,7 +255,6 @@ The port on which the Promoted MariaDB instance is listening.
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 1df2fc0f28..6b00889ff4 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -322,7 +322,6 @@ whether a node is usable for clients to read from.</shortdesc>
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/redis.in b/heartbeat/redis.in
index 6429477e11..1e541f13d5 100755
--- a/heartbeat/redis.in
+++ b/heartbeat/redis.in
@@ -221,7 +221,6 @@ is in use.
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="60s" interval="45s" />
<action name="monitor" role="Promoted" depth="0" timeout="60s" interval="20s" />
-<action name="monitor" role="Unpromoted" depth="0" timeout="60s" interval="60s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />

@ -0,0 +1,455 @@
From 61cec34a754017537c61e79cd1212f2688c32429 Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:19:10 +0530
Subject: [PATCH 1/7] Introduce a new shell function to reuse IMDS token
---
heartbeat/ocf-shellfuncs.in | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 5c4bb3264..0c4632cf9 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -1111,3 +1111,34 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
fi
+
+# File to store the token and timestamp
+TOKEN_FILE="/tmp/.imds_token"
+TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
+TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+
+# Function to fetch a new token
+fetch_new_token() {
+ TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME")
+ echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ echo "$TOKEN"
+}
+
+# Function to retrieve or renew the token
+get_token() {
+ if [[ -f "$TOKEN_FILE" ]]; then
+ read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
+ CURRENT_TIME=$(date +%s)
+ ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
+
+ if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ # Token is still valid
+ echo "$STORED_TOKEN"
+ return
+ fi
+ fi
+ # Fetch a new token if not valid
+ fetch_new_token
+}
+
+
From 00629fa44cb7a8dd1045fc8cad755e1d0c808476 Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:21:18 +0530
Subject: [PATCH 2/7] Utilize the get_token function to reuse the token
---
heartbeat/aws-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 6115e5ba8..fbeb2ee64 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -270,7 +270,7 @@ ec2ip_validate() {
fi
fi
- TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
EC2_INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From 36126cdcb90ad617ecfce03d986550907732aa4f Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:22:16 +0530
Subject: [PATCH 3/7] Utilize to get_token function to reuse the token
---
heartbeat/awsvip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index f2b238a0f..ca19ac086 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -266,7 +266,7 @@ if [ -n "${OCF_RESKEY_region}" ]; then
AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
-TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From dcd0050df5ba94905bc71d38b05cbb93f5687b61 Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Mon, 4 Nov 2024 20:05:33 +0530
Subject: [PATCH 4/7] Move token renewal function to aws.sh for reuse in AWS
agent scripts
---
heartbeat/Makefile.am | 1 +
heartbeat/aws-vpc-move-ip | 1 +
heartbeat/aws-vpc-route53.in | 3 ++-
heartbeat/aws.sh | 46 ++++++++++++++++++++++++++++++++++++
heartbeat/awseip | 3 ++-
heartbeat/awsvip | 1 +
heartbeat/ocf-shellfuncs.in | 33 +-------------------------
7 files changed, 54 insertions(+), 34 deletions(-)
create mode 100644 heartbeat/aws.sh
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 409847970..655740f14 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -218,6 +218,7 @@ ocfcommon_DATA = ocf-shellfuncs \
ocf-rarun \
ocf-distro \
apache-conf.sh \
+ aws.sh \
http-mon.sh \
sapdb-nosha.sh \
sapdb.sh \
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index fbeb2ee64..f4b0492f2 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -33,6 +33,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index eba2ed95c..f7e756782 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -43,6 +43,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
@@ -377,7 +378,7 @@ r53_monitor() {
_get_ip() {
case $OCF_RESKEY_ip in
local|public)
- TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
IPADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
new file mode 100644
index 000000000..fc557109c
--- /dev/null
+++ b/heartbeat/aws.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+#
+# AWS Helper Scripts
+#
+#
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
+
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
+
+# Function to enable reusable IMDS token retrieval for efficient repeated access
+# File to store the token and timestamp
+TOKEN_FILE="/tmp/.imds_token"
+TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
+TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+
+# Function to fetch a new token
+fetch_new_token() {
+ TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME'" "http://169.254.169.254/latest/api/token")
+ echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ echo "$TOKEN"
+}
+
+# Function to retrieve or renew the token
+get_token() {
+ if [ -f "$TOKEN_FILE" ]; then
+ read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
+ CURRENT_TIME=$(date +%s)
+ ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
+
+ if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ # Token is still valid
+ echo "$STORED_TOKEN"
+ return
+ fi
+ fi
+ # Fetch a new token if not valid
+ fetch_new_token
+}
\ No newline at end of file
diff --git a/heartbeat/awseip b/heartbeat/awseip
index ffb6223a1..049c2e566 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -38,6 +38,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
#######################################################################
@@ -306,7 +307,7 @@ fi
ELASTIC_IP="${OCF_RESKEY_elastic_ip}"
ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
-TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index ca19ac086..de67981d8 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -37,6 +37,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
#######################################################################
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 0c4632cf9..922c6ea45 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -1110,35 +1110,4 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
# pacemaker sets HA_use_logd, some others use HA_LOGD :/
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
-fi
-
-# File to store the token and timestamp
-TOKEN_FILE="/tmp/.imds_token"
-TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
-TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
-
-# Function to fetch a new token
-fetch_new_token() {
- TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME")
- echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
- echo "$TOKEN"
-}
-
-# Function to retrieve or renew the token
-get_token() {
- if [[ -f "$TOKEN_FILE" ]]; then
- read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
- CURRENT_TIME=$(date +%s)
- ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
-
- if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
- # Token is still valid
- echo "$STORED_TOKEN"
- return
- fi
- fi
- # Fetch a new token if not valid
- fetch_new_token
-}
-
-
+fi
\ No newline at end of file
From 9f7be201923c8eab1b121f2067ed74a69841cf8a Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 19:12:34 +0530
Subject: [PATCH 5/7] Refactor to use common temp path and update shell syntax
---
heartbeat/Makefile.am | 2 +-
heartbeat/aws.sh | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 655740f14..8352f3a3d 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -218,7 +218,7 @@ ocfcommon_DATA = ocf-shellfuncs \
ocf-rarun \
ocf-distro \
apache-conf.sh \
- aws.sh \
+ aws.sh \
http-mon.sh \
sapdb-nosha.sh \
sapdb.sh \
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index fc557109c..c77f93b91 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -17,7 +17,7 @@ OCF_RESKEY_curl_sleep_default="1"
# Function to enable reusable IMDS token retrieval for efficient repeated access
# File to store the token and timestamp
-TOKEN_FILE="/tmp/.imds_token"
+TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
@@ -35,7 +35,7 @@ get_token() {
CURRENT_TIME=$(date +%s)
ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
- if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ if [ "$ELAPSED_TIME" -lt "$((TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD))" ]; then
# Token is still valid
echo "$STORED_TOKEN"
return
From 4f61048064d1df3bebdb5c1441cf0020f213c01b Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 19:30:15 +0530
Subject: [PATCH 6/7] Consolidate curl_retry and curl_sleep variable to a
single location in aws.sh
---
heartbeat/aws-vpc-move-ip | 4 ----
heartbeat/aws-vpc-route53.in | 4 ----
heartbeat/awseip | 4 ----
heartbeat/awsvip | 4 ----
4 files changed, 16 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index f4b0492f2..3aa9ceb02 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -48,8 +48,6 @@ OCF_RESKEY_interface_default="eth0"
OCF_RESKEY_iflabel_default=""
OCF_RESKEY_monapi_default="false"
OCF_RESKEY_lookup_type_default="InstanceId"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -63,8 +61,6 @@ OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}}
: ${OCF_RESKEY_lookup_type=${OCF_RESKEY_lookup_type_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
#######################################################################
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index f7e756782..85c8de3c1 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -54,8 +54,6 @@ OCF_RESKEY_hostedzoneid_default=""
OCF_RESKEY_fullname_default=""
OCF_RESKEY_ip_default="local"
OCF_RESKEY_ttl_default=10
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -65,8 +63,6 @@ OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}}
: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
usage() {
cat <<-EOT
diff --git a/heartbeat/awseip b/heartbeat/awseip
index 049c2e566..4b1c3bc6a 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -50,16 +50,12 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index de67981d8..8c71e7fac 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -49,16 +49,12 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
From d451c5c595b08685f84ec85da96ae9cb4fc076fe Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 20:50:24 +0530
Subject: [PATCH 7/7] aws.sh needs to added to be symlinkstargets in
doc/man/Makefile.am
---
doc/man/Makefile.am | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index ef7639bff..447f5cba3 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -42,7 +42,7 @@ radir = $(abs_top_builddir)/heartbeat
# required for out-of-tree build
symlinkstargets = \
ocf-distro ocf.py ocf-rarun ocf-returncodes \
- findif.sh apache-conf.sh http-mon.sh mysql-common.sh \
+ findif.sh apache-conf.sh aws.sh http-mon.sh mysql-common.sh \
nfsserver-redhat.sh openstack-common.sh ora-common.sh
preptree:

@ -0,0 +1,161 @@
From cc5ffa5e599c974c426e93faa821b342e96b916d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:46:27 +0100
Subject: [PATCH 1/2] aws.sh: chmod 600 $TOKEN_FILE, add get_instance_id() with
DMI support, and use get_instance_id() in AWS agents
---
heartbeat/aws-vpc-move-ip | 2 +-
heartbeat/aws.sh | 30 +++++++++++++++++++++++++++---
heartbeat/awseip | 2 +-
heartbeat/awsvip | 2 +-
4 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 3aa9ceb02..09ae68b57 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -269,7 +269,7 @@ ec2ip_validate() {
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
- EC2_INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+ EC2_INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
if [ -z "${EC2_INSTANCE_ID}" ]; then
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index c77f93b91..9cd343c16 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -9,8 +9,8 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
+OCF_RESKEY_curl_retries_default="4"
+OCF_RESKEY_curl_sleep_default="3"
: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
@@ -20,11 +20,13 @@ OCF_RESKEY_curl_sleep_default="1"
TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+DMI_FILE="/sys/devices/virtual/dmi/id/board_asset_tag" # Only supported on nitro-based instances.
# Function to fetch a new token
fetch_new_token() {
TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME'" "http://169.254.169.254/latest/api/token")
echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ chmod 600 "$TOKEN_FILE"
echo "$TOKEN"
}
@@ -43,4 +45,26 @@ get_token() {
fi
# Fetch a new token if not valid
fetch_new_token
-}
\ No newline at end of file
+}
+
+get_instance_id() {
+ local INSTANCE_ID
+
+ # Try to get the EC2 instance ID from DMI first before falling back to IMDS.
+ ocf_log debug "EC2: Attempt to get EC2 Instance ID from local file."
+ if [ -r "$DMI_FILE" ] && [ -s "$DMI_FILE" ]; then
+ INSTANCE_ID="$(cat "$DMI_FILE")"
+ case "$INSTANCE_ID" in
+ i-0*) echo "$INSTANCE_ID"; return "$OCF_SUCCESS" ;;
+ esac
+ fi
+
+ INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to get EC2 Instance ID"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ echo "$INSTANCE_ID"
+ return "$OCF_SUCCESS"
+}
diff --git a/heartbeat/awseip b/heartbeat/awseip
index 4b1c3bc6a..7f38376dc 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -305,7 +305,7 @@ ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
case $__OCF_ACTION in
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index 8c71e7fac..0856ac5e4 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -265,7 +265,7 @@ fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
MAC_ADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/mac")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From b8d3ecc6a8ce4baf4b28d02978dd573728ccf5fa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 18 Nov 2024 11:10:42 +0100
Subject: [PATCH 2/2] aws.sh/ocf-shellfuncs: add ability to fresh token if it's
invalid
---
heartbeat/aws.sh | 1 +
heartbeat/ocf-shellfuncs.in | 11 ++++++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index 9cd343c16..64f2e13a7 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -18,6 +18,7 @@ OCF_RESKEY_curl_sleep_default="3"
# Function to enable reusable IMDS token retrieval for efficient repeated access
# File to store the token and timestamp
TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
+TOKEN_FUNC="fetch_new_token" # Used by curl_retry() if saved token is invalid
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
DMI_FILE="/sys/devices/virtual/dmi/id/board_asset_tag" # Only supported on nitro-based instances.
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 922c6ea45..8e51fa3c8 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -697,6 +697,15 @@ curl_retry()
ocf_log debug "result: $result"
[ $rc -eq 0 ] && break
+ if [ -n "$TOKEN" ] && [ -n "$TOKEN_FILE" ] && \
+ [ -f "$TOKEN_FILE" ] && [ -n "$TOKEN_FUNC" ] && \
+ echo "$result" | grep -q "The requested URL returned error: 401$"; then
+ local OLD_TOKEN="$TOKEN"
+ ocf_log err "Token invalid. Getting new token."
+ TOKEN=$($TOKEN_FUNC)
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+ args=$(echo "$args" | sed "s/$OLD_TOKEN/$TOKEN/")
+ fi
sleep $sleep
done
@@ -1110,4 +1119,4 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
# pacemaker sets HA_use_logd, some others use HA_LOGD :/
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
-fi
\ No newline at end of file
+fi

@ -0,0 +1,184 @@
From 392d40048a25d7cb73ec5b5e9f7a5862f7a3fd48 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:22:27 +0100
Subject: [PATCH 1/2] aws.sh: add get_interface_mac()
---
heartbeat/aws.sh | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index 64f2e13a7..ebb4eb1f4 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -69,3 +69,24 @@ get_instance_id() {
echo "$INSTANCE_ID"
return "$OCF_SUCCESS"
}
+
+get_interface_mac() {
+ local MAC_FILE MAC_ADDR rc
+ MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
+ if [ -f "$MAC_FILE" ]; then
+ cmd="cat ${MAC_FILE}"
+ else
+ cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
+ fi
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+
+ echo $MAC_ADDR
+ return $OCF_SUCCESS
+}
From 87337ac4da931d5a53c83d53d4bab17ee123ba9f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:26:38 +0100
Subject: [PATCH 2/2] awsvip: let user specify which interface to use, and make
the parameter optional in aws-vpc-move-ip
---
heartbeat/aws-vpc-move-ip | 20 ++++----------------
heartbeat/aws.sh | 4 +++-
heartbeat/awsvip | 24 +++++++++++++++++-------
3 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 09ae68b57..2afc0ba53 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -157,7 +157,7 @@ Role to use to query/update the route table
<content type="string" default="${OCF_RESKEY_routing_table_role_default}" />
</parameter>
-<parameter name="interface" required="1">
+<parameter name="interface" required="0">
<longdesc lang="en">
Name of the network interface, i.e. eth0
</longdesc>
@@ -321,7 +321,7 @@ ec2ip_monitor() {
ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call"
fi
- cmd="ip addr show to $OCF_RESKEY_ip up"
+ cmd="ip addr show dev $OCF_RESKEY_interface to $OCF_RESKEY_ip up"
ocf_log debug "executing command: $cmd"
RESULT=$($cmd | grep "$OCF_RESKEY_ip")
if [ -z "$RESULT" ]; then
@@ -331,7 +331,7 @@ ec2ip_monitor() {
level="info"
fi
- ocf_log "$level" "IP $OCF_RESKEY_ip not assigned to running interface"
+ ocf_log "$level" "IP $OCF_RESKEY_ip not assigned to interface $OCF_RESKEY_interface"
return $OCF_NOT_RUNNING
fi
@@ -369,19 +369,7 @@ ec2ip_drop() {
}
ec2ip_get_instance_eni() {
- MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
- if [ -f $MAC_FILE ]; then
- cmd="cat ${MAC_FILE}"
- else
- cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
- fi
- ocf_log debug "executing command: $cmd"
- MAC_ADDR="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
+ MAC_ADDR=$(get_interface_mac)
ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
cmd="curl_retry \"$OCF_RESKEY_curl_retries\" \"$OCF_RESKEY_curl_sleep\" \"--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'\" \"http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id\""
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index ebb4eb1f4..216033afe 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -73,7 +73,9 @@ get_instance_id() {
get_interface_mac() {
local MAC_FILE MAC_ADDR rc
MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
- if [ -f "$MAC_FILE" ]; then
+ if [ -z "$OCF_RESKEY_interface" ]; then
+ cmd="curl_retry \"$OCF_RESKEY_curl_retries\" \"$OCF_RESKEY_curl_sleep\" \"--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'\" \"http://169.254.169.254/latest/meta-data/mac\""
+ elif [ -f "$MAC_FILE" ]; then
cmd="cat ${MAC_FILE}"
else
cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index 0856ac5e4..015180d5a 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -49,12 +49,14 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
+OCF_RESKEY_interface_default=""
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}}
meta_data() {
cat <<END
@@ -125,6 +127,14 @@ a short delay between API calls, to avoid sending API too quick
<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
</parameter>
+<parameter name="interface" required="0">
+<longdesc lang="en">
+Name of the network interface, i.e. eth0
+</longdesc>
+<shortdesc lang="en">network interface name</shortdesc>
+<content type="string" default="${OCF_RESKEY_interface_default}" />
+</parameter>
+
<parameter name="curl_retries" unique="0">
<longdesc lang="en">
curl retries before failing
@@ -207,16 +217,16 @@ awsvip_stop() {
}
awsvip_monitor() {
- $AWSCLI_CMD ec2 describe-instances \
- --instance-id "${INSTANCE_ID}" \
- --query 'Reservations[].Instances[].NetworkInterfaces[].PrivateIpAddresses[].PrivateIpAddress[]' \
+ $AWSCLI_CMD ec2 describe-network-interfaces \
+ --network-interface-ids "${NETWORK_ID}" \
+ --query 'NetworkInterfaces[].PrivateIpAddresses[].PrivateIpAddress[]' \
--output text | \
grep -qE "(^|\s)${SECONDARY_PRIVATE_IP}(\s|$)"
- RET=$?
-
- if [ $RET -ne 0 ]; then
+ if [ $? -ne 0 ]; then
+ [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe && ocf_log error "IP $SECONDARY_PRIVATE_IP not assigned to interface ${NETWORK_ID}"
return $OCF_NOT_RUNNING
fi
+
return $OCF_SUCCESS
}
@@ -267,7 +277,7 @@ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-MAC_ADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/mac")
+MAC_ADDRESS=$(get_interface_mac)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
NETWORK_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC

@ -1,29 +0,0 @@
From 9a7b47f1838e9d6e3c807e9db5312097adb5c499 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 5 Nov 2021 10:30:49 +0100
Subject: [PATCH] gcp-ilb/Squid: fix issues detected by CI
---
heartbeat/Squid.in | 2 +-
heartbeat/gcp-ilb | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/gcp-ilb b/heartbeat/gcp-ilb
index 28484b241..48dc3ac4e 100755
--- a/heartbeat/gcp-ilb
+++ b/heartbeat/gcp-ilb
@@ -53,12 +53,12 @@ pidfile="/var/run/$OCF_RESOURCE_INSTANCE.pid"
#Validate command for logging
-if $OCF_RESKEY_log_enable = "true"; then
+if [ $OCF_RESKEY_log_enable = "true" ]; then
if type $OCF_RESKEY_log_cmd > /dev/null 2>&1; then
logging_cmd="$OCF_RESKEY_log_cmd $OCF_RESKEY_log_params"
ocf_log debug "Logging command is: \'$logging_cmd\' "
else
- $OCF_RESKEY_log_enable = "false"
+ OCF_RESKEY_log_enable="false"
ocf_log err "\'$logging_cmd\' is invalid. External logging disabled."
fi;

@ -1,51 +0,0 @@
From 14576f7ca02fb0abff188238ac019e88ab06e878 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 9 Nov 2021 11:49:36 +0100
Subject: [PATCH] gcp-ilb: only check if log_cmd binary is available if
log_enable is true
---
heartbeat/gcp-ilb | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/heartbeat/gcp-ilb b/heartbeat/gcp-ilb
index 48dc3ac4e..f84f373b7 100755
--- a/heartbeat/gcp-ilb
+++ b/heartbeat/gcp-ilb
@@ -37,7 +37,7 @@ if type "socat" > /dev/null 2>&1; then
OCF_RESKEY_cat_default="socat"
else
OCF_RESKEY_cat_default="nc"
-fi;
+fi
: ${OCF_RESKEY_cat=${OCF_RESKEY_cat_default}}
@@ -53,7 +53,7 @@ pidfile="/var/run/$OCF_RESOURCE_INSTANCE.pid"
#Validate command for logging
-if [ $OCF_RESKEY_log_enable = "true" ]; then
+if ocf_is_true "$OCF_RESKEY_log_enable"; then
if type $OCF_RESKEY_log_cmd > /dev/null 2>&1; then
logging_cmd="$OCF_RESKEY_log_cmd $OCF_RESKEY_log_params"
ocf_log debug "Logging command is: \'$logging_cmd\' "
@@ -61,7 +61,7 @@ if [ $OCF_RESKEY_log_enable = "true" ]; then
OCF_RESKEY_log_enable="false"
ocf_log err "\'$logging_cmd\' is invalid. External logging disabled."
- fi;
+ fi
fi
@@ -285,7 +285,8 @@ ilb_stop() {
ilb_validate() {
check_binary "$OCF_RESKEY_cat"
- check_binary "$OCF_RESKEY_log_cmd"
+
+ ocf_is_true "$OCF_RESKEY_log_enable" && check_binary "$OCF_RESKEY_log_cmd"
if ! ocf_is_decimal "$OCF_RESKEY_port"; then
ocf_exit_reason "$OCF_RESKEY_port is not a valid port"

@ -1,32 +0,0 @@
From 925180da2f41feddc5aac3c249563eb179b34029 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 22 Nov 2021 16:44:48 +0100
Subject: [PATCH] db2: use -l forever instead of -t nodes -l reboot, as they
conflict with eachother
---
heartbeat/db2 | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/db2 b/heartbeat/db2
index 03146a957..fa2a45a5d 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -274,7 +274,7 @@ db2_fal_attrib() {
while read id node member
do
[ "$member" = member -a "$node" != "$me" ] || continue
- crm_attribute -t nodes -l reboot --node=$node -n $attr -v "$3"
+ crm_attribute -l forever --node=$node -n $attr -v "$3"
rc=$?
ocf_log info "DB2 instance $instance($db2node/$db: setting attrib for FAL to $FIRST_ACTIVE_LOG @ $node"
[ $rc != 0 ] && break
@@ -282,7 +282,7 @@ db2_fal_attrib() {
;;
get)
- crm_attribute -t nodes -l reboot -n $attr -G --quiet 2>&1
+ crm_attribute -l forever -n $attr -G --quiet 2>&1
rc=$?
if [ $rc != 0 ]
then

@ -1,32 +0,0 @@
From 75eaf06eea8957aa3941823955d1c8fa7933ab1d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 23 Feb 2022 16:32:21 +0100
Subject: [PATCH] db2: only warn when notify isnt set, and use
ocf_local_nodename() to get node name
---
heartbeat/db2 | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/db2 b/heartbeat/db2
index fa2a45a5d..ea24d33fc 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -267,7 +267,7 @@ db2_fal_attrib() {
case "$2" in
set)
- me=$(uname -n)
+ me=$(ocf_local_nodename)
# loop over all member nodes and set attribute
crm_node -l |
@@ -284,7 +284,7 @@ db2_fal_attrib() {
get)
crm_attribute -l forever -n $attr -G --quiet 2>&1
rc=$?
- if [ $rc != 0 ]
+ if ! ocf_is_true "$OCF_RESKEY_CRM_meta_notify" && [ $rc != 0 ]
then
ocf_log warn "DB2 instance $instance($db2node/$db: can't retrieve attribute $attr, are you sure notifications are enabled ?"
fi

@ -1,22 +0,0 @@
From c6338011cf9ea69324f44c8c31a4ca2478aab35a Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 7 Dec 2021 08:59:50 +0100
Subject: [PATCH] podman: remove anonymous volumes
---
heartbeat/podman | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index fd901c968..2b73857f1 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -251,7 +251,7 @@ remove_container()
return 0
fi
ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
- ocf_run podman rm $CONTAINER
+ ocf_run podman rm -v $CONTAINER
rc=$?
if [ $rc -ne 0 ]; then
# due to a podman bug (rhbz#1841485), sometimes a stopped

@ -1,43 +0,0 @@
From 7c54e4ecda33c90a1046c0688774f5b847ab10fe Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 7 Dec 2021 10:37:24 +0100
Subject: [PATCH] Route: return OCF_NOT_RUNNING for probe action when interface
or route doesnt exist
---
heartbeat/Route | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/heartbeat/Route b/heartbeat/Route
index 8b390615a..7db41d0ae 100755
--- a/heartbeat/Route
+++ b/heartbeat/Route
@@ -227,15 +227,6 @@ route_stop() {
}
route_status() {
- if [ -n "${OCF_RESKEY_device}" ]; then
- # Must check if device exists or is gone.
- # If device is gone, route is also unconfigured.
- ip link show dev ${OCF_RESKEY_device} >/dev/null 2>&1
- if [ $? -ne 0 ]; then
- # Assume device does not exist, and short-circuit here.
- return $OCF_NOT_RUNNING
- fi
- fi
show_output="$(ip $addr_family route show $(create_route_spec) 2>/dev/null)"
if [ $? -eq 0 ]; then
if [ -n "$show_output" ]; then
@@ -251,7 +242,11 @@ route_status() {
else
# "ip route show" returned an error code. Assume something
# went wrong.
- return $OCF_ERR_GENERIC
+ if ocf_is_probe; then
+ return $OCF_NOT_RUNNING
+ else
+ return $OCF_ERR_GENERIC
+ fi
fi
}

@ -1,41 +0,0 @@
From 6d2ed7615614ede093f097189876d0f08553a43e Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Mon, 14 Feb 2022 22:23:39 -0800
Subject: [PATCH] IPsrcaddr: Add warning about DHCP
If DHCP is enabled for the interface that serves OCF_RESKEY_ipaddress,
then NetworkManager (and possibly dhclient in systems without NM;
unsure) may later re-add a route that the IPsrcaddr resource replaced.
This may cause the resource to fail or cause other unexpected behavior.
So far this has been observed with a default route, albeit with an edge
case of a configuration (OCF_RESKEY_ipaddress on a different subnet)
that may not be totally valid. There are likely to be other situations
as well where DHCP can cause conflicts with IPsrcaddr's manual updates
via iproute. The safest option is to use only static configuration for
the involved interface.
Resolves: RHBZ#1654862
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/IPsrcaddr | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index ec868409f..fd7b6f68d 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -99,6 +99,12 @@ meta_data() {
<longdesc lang="en">
Resource script for IPsrcaddr. It manages the preferred source address
modification.
+
+Note: DHCP should not be enabled for the interface serving the preferred
+source address. Enabling DHCP may result in unexpected behavior, such as
+the automatic addition of duplicate or conflicting routes. This may
+cause the IPsrcaddr resource to fail, or it may produce undesired
+behavior while the resource continues to run.
</longdesc>
<shortdesc lang="en">Manages the preferred source address for outgoing IP packets</shortdesc>

@ -1,49 +0,0 @@
From 5a65f66ff803ad7ed15af958cc1efdde4d53dcb7 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 17 Feb 2022 03:53:21 -0800
Subject: [PATCH] IPsrcaddr: Better error message when no matching route found
If OCF_RESKEY_destination is not explicitly set and `ip route list`
can't find a route matching the specifications, the NETWORK variable
doesn't get set. This causes a certain failure of the start operation,
because there is no PREFIX argument to `ip route replace` (syntax
error). It may also cause unexpected behavior for stop operations (but
not in all cases). During a monitor, this event can only happen if
something has changed outside the cluster's control, and so is cause
for warning there.
Exit OCF_ERR_ARGS for start, log debug for probe, log warning for all
other ops.
Resolves: RHBZ#1654862
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/IPsrcaddr | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index fd7b6f68d..f0216722d 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -549,6 +549,20 @@ rc=$?
INTERFACE=`echo $findif_out | awk '{print $1}'`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+
+ if [ -z "$NETWORK" ]; then
+ err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
+ err_str="$err_str match $ipaddress' failed to find a matching route"
+
+ if [ "$__OCF_ACTION" = "start" ]; then
+ ocf_exit_reason "$err_str"
+ exit $OCF_ERR_ARGS
+ elif ! ocf_is_probe; then
+ ocf_log warn "$err_str"
+ else
+ ocf_log debug "$err_str"
+ fi
+ fi
else
NETWORK="$OCF_RESKEY_destination"
fi

@ -1,56 +0,0 @@
From 0a197f1cd227e768837dff778a0c56fc1085d434 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 21 Feb 2022 13:54:04 +0100
Subject: [PATCH] IPsrcaddr: fix indentation in better error message code
---
heartbeat/IPsrcaddr | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index f0216722d..c82adc0e9 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -542,27 +542,27 @@ fi
findif_out=`$FINDIF -C`
rc=$?
[ $rc -ne 0 ] && {
- ocf_exit_reason "[$FINDIF -C] failed"
- exit $rc
+ ocf_exit_reason "[$FINDIF -C] failed"
+ exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
- if [ -z "$NETWORK" ]; then
- err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
- err_str="$err_str match $ipaddress' failed to find a matching route"
-
- if [ "$__OCF_ACTION" = "start" ]; then
- ocf_exit_reason "$err_str"
- exit $OCF_ERR_ARGS
- elif ! ocf_is_probe; then
- ocf_log warn "$err_str"
- else
- ocf_log debug "$err_str"
- fi
- fi
+ if [ -z "$NETWORK" ]; then
+ err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"
+ err_str="$err_str match $ipaddress' failed to find a matching route"
+
+ if [ "$__OCF_ACTION" = "start" ]; then
+ ocf_exit_reason "$err_str"
+ exit $OCF_ERR_ARGS
+ elif ! ocf_is_probe; then
+ ocf_log warn "$err_str"
+ else
+ ocf_log debug "$err_str"
+ fi
+ fi
else
NETWORK="$OCF_RESKEY_destination"
fi

@ -1,117 +0,0 @@
From 50a596bfb977b18902dc62b99145bbd1a087690a Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 1 Mar 2022 11:06:07 +0100
Subject: [PATCH] IPsrcaddr: fixes
- use findif.sh to detect secondary interfaces
- get metric and proto to update the correct route/update it correctly
- match route using interface to fail when trying to update secondary
interfaces without specifying destination (would update default route
before)
- also use PRIMARY_IP/OPTS during stop-action for default routes (to get
back to the exact routes we started with)
- dont fail during stop-action if route doesnt exist
- use [[:blank:]] for WS to follow POSIX standard (suggested by nrwahl)
---
heartbeat/IPsrcaddr | 35 +++++++++++++++++++----------------
1 file changed, 19 insertions(+), 16 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index c82adc0e9..7dbf65ff5 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -52,6 +52,7 @@
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/findif.sh
# Defaults
OCF_RESKEY_ipaddress_default=""
@@ -181,19 +182,21 @@ errorexit() {
#
# where the src clause "src Y.Y.Y.Y" may or may not be present
-WS="[`echo -en ' \t'`]"
+WS="[[:blank:]]"
OCTET="[0-9]\{1,3\}"
IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
-FINDIF=$HA_BIN/findif
+METRICCLAUSE=".*\(metric$WS[^ ]\+\)"
+PROTOCLAUSE=".*\(proto$WS[^ ]\+\)"
+FINDIF=findif
# findif needs that to be set
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
# Capture matching route - doublequotes prevent word splitting...
- ROUTE="`$CMDSHOW 2> /dev/null`" || errorexit "command '$CMDSHOW' failed"
+ ROUTE="`$CMDSHOW dev $INTERFACE 2> /dev/null`" || errorexit "command '$CMDSHOW' failed"
# ... so we can make sure there is only 1 matching route
[ 1 -eq `echo "$ROUTE" | wc -l` ] || \
@@ -201,7 +204,7 @@ srca_read() {
# But there might still be no matching route
[ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \
- ! ocf_is_probe && errorexit "no matching route exists"
+ ! ocf_is_probe && [ "$__OCF_ACTION" != stop ] && errorexit "no matching route exists"
# Sed out the source ip address if it exists
SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"`
@@ -232,8 +235,8 @@ srca_start() {
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE src $1 || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE src $1' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $PROTO src $1 $METRIC' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
@@ -266,14 +269,11 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- OPTS=""
- if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then
- PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
- OPTS="proto kernel scope host src $PRIMARY_IP"
- fi
+ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
+ OPTS="proto kernel scope link src $PRIMARY_IP"
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC || \
@@ -539,16 +539,19 @@ if [ $rc -ne $OCF_SUCCESS ]; then
esac
fi
-findif_out=`$FINDIF -C`
+findif_out=`$FINDIF`
rc=$?
[ $rc -ne 0 ] && {
- ocf_exit_reason "[$FINDIF -C] failed"
+ ocf_exit_reason "[$FINDIF] failed"
exit $rc
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
+LISTROUTE=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress`
+METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"`
+[ -z "$PROTO" ] && PROTO=`echo $LISTROUTE | sed -n "s/$PROTOCLAUSE/\1/p"`
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
- NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+ NETWORK=`echo $LISTROUTE | grep -m 1 -o '^[^ ]*'`
if [ -z "$NETWORK" ]; then
err_str="command '$IP2UTIL route list dev $INTERFACE scope link $PROTO"

@ -1,543 +0,0 @@
From 3e469239e8c853725b28a9c6b509152aacc2c5cc Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 13 Jun 2022 11:24:05 +0200
Subject: [PATCH 1/2] all agents: update to promotable terms
---
heartbeat/SAPInstance | 22 +++++++++++-----------
heartbeat/conntrackd.in | 6 +++---
heartbeat/db2 | 12 ++++++------
heartbeat/dnsupdate.in | 2 +-
heartbeat/galera.in | 26 +++++++++++++-------------
heartbeat/iface-bridge | 6 +++---
heartbeat/mariadb.in | 30 +++++++++++++++---------------
heartbeat/mpathpersist.in | 24 ++++++++++++------------
heartbeat/mysql | 4 ++--
heartbeat/mysql-proxy | 2 +-
heartbeat/pgsql | 2 +-
heartbeat/redis.in | 4 ++--
heartbeat/sg_persist.in | 4 ++--
14 files changed, 74 insertions(+), 74 deletions(-)
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index 016f59aff..e3fe788ae 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -25,8 +25,8 @@
# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery using cleanipc, default is false)
# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor critical services only)
# OCF_RESKEY_SHUTDOWN_METHOD (optional, defaults to NORMAL, KILL: terminate the SAP instance with OS commands - faster, at your own risk)
-# OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Master/Slave configuration)
-# OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Master/Slave configuration)
+# OCF_RESKEY_ERS_InstanceName (optional, InstanceName of the ERS instance in a Promotable configuration)
+# OCF_RESKEY_ERS_START_PROFILE (optional, START_PROFILE of the ERS instance in a Promotable configuration)
# OCF_RESKEY_PRE_START_USEREXIT (optional, lists a script which can be executed before the resource is started)
# OCF_RESKEY_POST_START_USEREXIT (optional, lists a script which can be executed after the resource is started)
# OCF_RESKEY_PRE_STOP_USEREXIT (optional, lists a script which can be executed before the resource is stopped)
@@ -92,11 +92,11 @@ sapinstance_usage() {
$0 manages a SAP Instance as an HA resource.
- The 'start' operation starts the instance or the ERS instance in a Master/Slave configuration
+ The 'start' operation starts the instance or the ERS instance in a Promotable configuration
The 'stop' operation stops the instance
The 'status' operation reports whether the instance is running
The 'monitor' operation reports whether the instance seems to be working
- The 'promote' operation starts the primary instance in a Master/Slave configuration
+ The 'promote' operation starts the primary instance in a Promotable configuration
The 'demote' operation stops the primary instance and starts the ERS instance
The 'reload' operation allows changed parameters (non-unique only) without restarting the service
The 'notify' operation always returns SUCCESS
@@ -201,11 +201,11 @@ You may specify multiple services separated by a | (pipe) sign in this parameter
<content type="string" default="${OCF_RESKEY_SHUTDOWN_METHOD_default}"/>
</parameter>
<parameter name="ERS_InstanceName" unique="1" required="0">
- <longdesc lang="en">Only used in a Master/Slave resource configuration:
+ <longdesc lang="en">Only used in a Promotable resource configuration:
The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile.
-The enqueue replication instance must be installed, before you want to configure a master-slave cluster resource.
+The enqueue replication instance must be installed, before you want to configure a promotable cluster resource.
-The master-slave configuration in the cluster must use this properties:
+The promotable configuration in the cluster must use this properties:
clone_max = 2
clone_node_max = 1
master_node_max = 1
@@ -215,7 +215,7 @@ master_max = 1
<content type="string" default="${OCF_RESKEY_ERS_InstanceName_default}"/>
</parameter>
<parameter name="ERS_START_PROFILE" unique="1" required="0">
- <longdesc lang="en">Only used in a Master/Slave resource configuration:
+ <longdesc lang="en">Only used in a Promotable resource configuration:
The parameter ERS_InstanceName must also be set in this configuration.
The name of the SAP START profile. Specify this parameter, if you have changed the name of the SAP START profile after the default SAP installation. As SAP release 7.10 does not have a START profile anymore, you need to specify the Instance Profile than.
</longdesc>
@@ -243,7 +243,7 @@ The name of the SAP START profile. Specify this parameter, if you have changed t
<content type="string" default="${OCF_RESKEY_POST_STOP_USEREXIT_default}" />
</parameter>
<parameter name="IS_ERS" unique="0" required="0">
- <longdesc lang="en">Only used for ASCS/ERS SAP Netweaver installations without implementing a master/slave resource to
+ <longdesc lang="en">Only used for ASCS/ERS SAP Netweaver installations without implementing a promotable resource to
allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set
to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also
systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario.
@@ -266,8 +266,8 @@ The name of the SAP START profile. Specify this parameter, if you have changed t
<action name="stop" timeout="240s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="60s" interval="120s" />
-<action name="monitor" depth="0" timeout="60s" interval="121s" role="Slave" />
-<action name="monitor" depth="0" timeout="60s" interval="119s" role="Master" />
+<action name="monitor" depth="0" timeout="60s" interval="121s" role="Unpromoted" />
+<action name="monitor" depth="0" timeout="60s" interval="119s" role="Promoted" />
<action name="promote" timeout="320s" />
<action name="demote" timeout="320s" />
<action name="reload" timeout="320s" />
diff --git a/heartbeat/conntrackd.in b/heartbeat/conntrackd.in
index f115250d6..1c2ee955b 100644
--- a/heartbeat/conntrackd.in
+++ b/heartbeat/conntrackd.in
@@ -50,7 +50,7 @@ meta_data() {
<version>1.0</version>
<longdesc lang="en">
-Master/Slave OCF Resource Agent for conntrackd
+Promotable OCF Resource Agent for conntrackd
</longdesc>
<shortdesc lang="en">This resource agent manages conntrackd</shortdesc>
@@ -81,8 +81,8 @@ For example "/packages/conntrackd-0.9.14/etc/conntrackd/conntrackd.conf"</longde
<action name="demote" timeout="30s" />
<action name="notify" timeout="30s" />
<action name="stop" timeout="30s" />
-<action name="monitor" timeout="20s" interval="20s" role="Slave" />
-<action name="monitor" timeout="20s" interval="10s" role="Master" />
+<action name="monitor" timeout="20s" interval="20s" role="Unpromoted" />
+<action name="monitor" timeout="20s" interval="10s" role="Promoted" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
diff --git a/heartbeat/db2 b/heartbeat/db2
index 4a4b2f477..620b89583 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -3,7 +3,7 @@
# db2
#
# Resource agent that manages a DB2 LUW database in Standard role
-# or HADR configuration in master/slave configuration.
+# or HADR configuration in promotable configuration.
# Multi partition is supported as well.
#
# Copyright (c) 2011 Holger Teutsch <holger.teutsch@web.de>
@@ -61,7 +61,7 @@ cat <<END
<resource-agent name="db2" version="1.0">
<version>1.0</version>
<longdesc lang="en">
-Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in master/slave configuration. Multiple partitions are supported.
+Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles in promotable configuration. Multiple partitions are supported.
Standard mode:
@@ -71,8 +71,8 @@ Configure each partition as a separate primitive resource.
HADR mode:
A single database in HADR configuration is made highly available by automating takeover operations.
-Configure a master / slave resource with notifications enabled and an
-additional monitoring operation with role "Master".
+Configure a promotable resource with notifications enabled and an
+additional monitoring operation with role "Promoted".
In case of HADR be very deliberate in specifying intervals/timeouts. The detection of a failure including promote must complete within HADR_PEER_WINDOW.
@@ -84,7 +84,7 @@ In addition to honoring requirements for crash recovery etc. for your specific d
For further information and examples consult http://www.linux-ha.org/wiki/db2_(resource_agent)
</longdesc>
-<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as master/slave configuration. Multiple partitions are supported.</shortdesc>
+<shortdesc lang="en">Resource Agent that manages an IBM DB2 LUW databases in Standard role as primitive or in HADR roles as promotable configuration. Multiple partitions are supported.</shortdesc>
<parameters>
<parameter name="instance" unique="1" required="1">
@@ -125,7 +125,7 @@ The number of the partition (DBPARTITIONNUM) to be managed.
<action name="demote" timeout="120s"/>
<action name="notify" timeout="10s"/>
<action name="monitor" depth="0" timeout="60s" interval="20s"/>
-<action name="monitor" depth="0" timeout="60s" role="Master" interval="22s"/>
+<action name="monitor" depth="0" timeout="60s" role="Promoted" interval="22s"/>
<action name="validate-all" timeout="5s"/>
<action name="meta-data" timeout="5s"/>
</actions>
diff --git a/heartbeat/dnsupdate.in b/heartbeat/dnsupdate.in
index 35b7c99bb..b54822cd8 100755
--- a/heartbeat/dnsupdate.in
+++ b/heartbeat/dnsupdate.in
@@ -119,7 +119,7 @@ the exact syntax.
<parameter name="server" unique="0" required="0">
<longdesc lang="en">
Which DNS server to send these updates for. When no
-server is provided, this defaults to the master server
+server is provided, this defaults to the promoted server
for the correct zone.
</longdesc>
<shortdesc lang="en">DNS server to contact</shortdesc>
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index c363eb254..546b1a853 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -26,31 +26,31 @@
##
# README.
#
-# This agent only supports being configured as a multistate Master
+# This agent only supports being configured as a multistate Promoted
# resource.
#
-# Slave vs Master role:
+# Unpromoted vs Promoted role:
#
-# During the 'Slave' role, galera instances are in read-only mode and
+# During the 'Unpromoted' role, galera instances are in read-only mode and
# will not attempt to connect to the cluster. This role exists only as
# a means to determine which galera instance is the most up-to-date. The
# most up-to-date node will be used to bootstrap a galera cluster that
# has no current members.
#
-# The galera instances will only begin to be promoted to the Master role
+# The galera instances will only begin to be promoted to the Promoted role
# once all the nodes in the 'wsrep_cluster_address' connection address
# have entered read-only mode. At that point the node containing the
-# database that is most current will be promoted to Master. Once the first
-# Master instance bootstraps the galera cluster, the other nodes will be
-# promoted to Master as well.
+# database that is most current will be promoted to Promoted. Once the first
+# Promoted instance bootstraps the galera cluster, the other nodes will be
+# promoted to Promoted as well.
#
# Example: Create a galera cluster using nodes rhel7-node1 rhel7-node2 rhel7-node3
#
# pcs resource create db galera enable_creation=true \
-# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master
+# wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta promoted-max=3 --promoted
#
# By setting the 'enable_creation' option, the database will be automatically
-# generated at startup. The meta attribute 'master-max=3' means that all 3
+# generated at startup. The meta attribute 'promoted-max=3' means that all 3
# nodes listed in the wsrep_cluster_address list will be allowed to connect
# to the galera cluster and perform replication.
#
@@ -114,8 +114,8 @@ The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
-The 'promote' operation makes this mysql server run as master
-The 'demote' operation makes this mysql server run as slave
+The 'promote' operation makes this mysql server run as promoted
+The 'demote' operation makes this mysql server run as unpromoted
The 'validate-all' operation reports whether the parameters are valid
UEND
@@ -298,8 +298,8 @@ Use it with caution! (and fencing)
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
-<action name="monitor" role="Master" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Slave" depth="0" timeout="30s" interval="30s" />
+<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
+<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="300s" />
<action name="demote" timeout="120s" />
<action name="validate-all" timeout="5s" />
diff --git a/heartbeat/iface-bridge b/heartbeat/iface-bridge
index 75d5371dd..a4e50adb9 100755
--- a/heartbeat/iface-bridge
+++ b/heartbeat/iface-bridge
@@ -211,7 +211,7 @@ bridge_meta_data() {
<longdesc lang="en">
Set the port cost. This is a dimensionless metric.
A list of port/cost can be specified using the following
- format: slave cost slave cost.
+ format: unpromoted cost unpromoted cost.
Example: eth0 100 eth1 1000
</longdesc>
<shortdesc lang="en">
@@ -228,7 +228,7 @@ bridge_meta_data() {
This metric is used in the designated port and root port
selection algorithms.
A list of port/priority can be specified using the following
- format: slave cost slave cost.
+ format: unpromoted cost unpromoted cost.
Example: eth0 10 eth1 60
</longdesc>
<shortdesc lang="en">
@@ -262,7 +262,7 @@ bridge_meta_data() {
Enable or disable a port from the multicast router.
Kernel enables all port by default.
A list of port can be specified using the following
- format: slave 0|1 slave 0|1.
+ format: unpromoted 0|1 unpromoted 0|1.
Example: eth0 1 eth1 0
</longdesc>
<shortdesc lang="en">
diff --git a/heartbeat/mariadb.in b/heartbeat/mariadb.in
index 39ad191bb..5a39ccb66 100644
--- a/heartbeat/mariadb.in
+++ b/heartbeat/mariadb.in
@@ -3,7 +3,7 @@
#
# MariaDB
#
-# Description: Manages a MariaDB Master/Slave database as Linux-HA resource
+# Description: Manages a MariaDB Promotable database as Linux-HA resource
#
# Authors: Alan Robertson: DB2 Script
# Jakub Janczak: rewrite as MySQL
@@ -61,8 +61,8 @@ The 'start' operation starts the database.
The 'stop' operation stops the database.
The 'status' operation reports whether the database is running
The 'monitor' operation reports whether the database seems to be working
-The 'promote' operation makes this mysql server run as master
-The 'demote' operation makes this mysql server run as slave
+The 'promote' operation makes this mysql server run as promoted
+The 'demote' operation makes this mysql server run as unpromoted
The 'validate-all' operation reports whether the parameters are valid
UEND
@@ -78,20 +78,20 @@ meta_data() {
<longdesc lang="en">
Resource script for MariaDB.
-Manages a complete master/slave replication setup with GTID, for simpler
+Manages a complete promotable replication setup with GTID, for simpler
uses look at the mysql resource agent which supports older replication
forms which mysql and mariadb have in common.
The resource must be setup to use notifications. Set 'notify=true' in the metadata
-attributes when defining a MariaDB master/slave instance.
+attributes when defining a MariaDB promotable instance.
-The default behavior is to use uname -n values in the change master to command.
+The default behavior is to use uname -n values in the change promoted to command.
Other IPs can be specified manually by adding a node attribute
\${INSTANCE_ATTR_NAME}_mysql_master_IP giving the IP to use for replication.
For example, if the mariadb primitive you are using is p_mariadb, the
attribute to set will be p_mariadb_mysql_master_IP.
</longdesc>
-<shortdesc lang="en">Manages a MariaDB master/slave instance</shortdesc>
+<shortdesc lang="en">Manages a MariaDB promotable instance</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
@@ -154,7 +154,7 @@ The logfile to be used for mysqld.
<longdesc lang="en">
All node names of nodes that will execute mariadb.
Please separate each node name with a space.
-This is required for the master selection to function.
+This is required for the promoted selection to function.
</longdesc>
<shortdesc lang="en">node list</shortdesc>
<content type="string" default="${OCF_RESKEY_node_list_default}" />
@@ -220,11 +220,11 @@ Additional parameters which are passed to the mysqld on startup.
<parameter name="replication_user" unique="0" required="0">
<longdesc lang="en">
MariaDB replication user. This user is used for starting and stopping
-MariaDB replication, for setting and resetting the master host, and for
+MariaDB replication, for setting and resetting the promoted host, and for
setting and unsetting read-only mode. Because of that, this user must
have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, PROCESS and RELOAD
privileges on all nodes within the cluster. Mandatory if you define a
-master-slave resource.
+promotable resource.
</longdesc>
<shortdesc lang="en">MariaDB replication user</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_user_default}" />
@@ -232,8 +232,8 @@ master-slave resource.
<parameter name="replication_passwd" unique="0" required="0">
<longdesc lang="en">
-MariaDB replication password. Used for replication client and slave.
-Mandatory if you define a master-slave resource.
+MariaDB replication password. Used for replication client and unpromoted.
+Mandatory if you define a promotable resource.
</longdesc>
<shortdesc lang="en">MariaDB replication user password</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
@@ -241,7 +241,7 @@ Mandatory if you define a master-slave resource.
<parameter name="replication_port" unique="0" required="0">
<longdesc lang="en">
-The port on which the Master MariaDB instance is listening.
+The port on which the Promoted MariaDB instance is listening.
</longdesc>
<shortdesc lang="en">MariaDB replication port</shortdesc>
<content type="string" default="${OCF_RESKEY_replication_port_default}" />
@@ -254,8 +254,8 @@ The port on which the Master MariaDB instance is listening.
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
-<action name="monitor" role="Master" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Slave" depth="0" timeout="30s" interval="30s" />
+<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
+<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/mpathpersist.in b/heartbeat/mpathpersist.in
index fcf1b3a4b..e47fef4bd 100644
--- a/heartbeat/mpathpersist.in
+++ b/heartbeat/mpathpersist.in
@@ -80,9 +80,9 @@ meta_data() {
<longdesc lang="en">
This resource agent manages SCSI persistent reservations on multipath devices.
"mpathpersist" from multipath-tools is used, please see its documentation.
-Should be used as multistate (Master/Slave) resource
-Slave registers its node id ("crm_node -i") as reservation key ( --param-sark ) on each device in the params "devs" list.
-Master reserves all devices from params "devs" list with reservation "--prout-type" value from "reservation_type" parameter.
+Should be used as multistate (Promotable) resource
+Unpromoted registers its node id ("crm_node -i") as reservation key ( --param-sark ) on each device in the params "devs" list.
+Promoted reserves all devices from params "devs" list with reservation "--prout-type" value from "reservation_type" parameter.
Please see man sg_persist(8) and mpathpersist(8) for reservation_type details.
</longdesc>
<shortdesc lang="en">Manages SCSI persistent reservations on multipath devices</shortdesc>
@@ -132,7 +132,7 @@ reservation type
master_score_base value
"master_score_base" value is used in "master_score" calculation:
master_score = master_score_base + master_score_dev_factor * working_devs
-if set to bigger value in mpathpersist resource configuration on some node, this node will be "preferred" for master role.
+if set to bigger value in mpathpersist resource configuration on some node, this node will be "preferred" for promoted role.
</longdesc>
<shortdesc lang="en">base master_score value</shortdesc>
<content type="string" default="${OCF_RESKEY_master_score_base_default}" />
@@ -140,9 +140,9 @@ if set to bigger value in mpathpersist resource configuration on some node, this
<parameter name="master_score_dev_factor" unique="0" required="0">
<longdesc lang="en">
-Working device factor in master_score calculation
+Working device factor in promoted calculation
each "working" device provides additional value to "master_score",
-so the node that sees more devices will be preferred for the "Master"-role
+so the node that sees more devices will be preferred for the "Promoted"-role
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">working device factor in master_score calculation</shortdesc>
@@ -151,10 +151,10 @@ Setting it to 0 will disable this behavior.
<parameter name="master_score_delay" unique="0" required="0">
<longdesc lang="en">
-master/slave decreases/increases its master_score after delay of "master_score_delay" seconds
-so if some device gets inaccessible, the slave decreases its master_score first and the resource will no be watched
-and after this device reappears again the master increases its master_score first
-this can work only if the master_score_delay is bigger then monitor interval on both master and slave
+promoted/unpromoted decreases/increases its master_score after delay of "master_score_delay" seconds
+so if some device gets inaccessible, the unpromoted decreases its promoted first and the resource will no be watched
+and after this device reappears again the promoted increases its master_score first
+this can work only if the master_score_delay is bigger then monitor interval on both promoted and unpromoted
Setting it to 0 will disable this behavior.
</longdesc>
<shortdesc lang="en">master_score decrease/increase delay time</shortdesc>
@@ -168,8 +168,8 @@ Setting it to 0 will disable this behavior.
<action name="demote" timeout="30s" />
<action name="notify" timeout="30s" />
<action name="stop" timeout="30s" />
-<action name="monitor" depth="0" timeout="20s" interval="29s" role="Slave" />
-<action name="monitor" depth="0" timeout="20s" interval="60s" role="Master" />
+<action name="monitor" depth="0" timeout="20s" interval="29s" role="Unpromoted" />
+<action name="monitor" depth="0" timeout="20s" interval="60s" role="Promoted" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 720de8c1a..aec44fe5e 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -321,8 +321,8 @@ whether a node is usable for clients to read from.</shortdesc>
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="20s" />
-<action name="monitor" role="Master" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" role="Slave" depth="0" timeout="30s" interval="30s" />
+<action name="monitor" role="Promoted" depth="0" timeout="30s" interval="10s" />
+<action name="monitor" role="Unpromoted" depth="0" timeout="30s" interval="30s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/mysql-proxy b/heartbeat/mysql-proxy
index e34396d9a..fdf2fa230 100755
--- a/heartbeat/mysql-proxy
+++ b/heartbeat/mysql-proxy
@@ -162,7 +162,7 @@ Address:port of the remote back-end servers (default: 127.0.0.1:3306).
<parameter name="proxy_read_only_backend_addresses" unique="0" required="0">
<longdesc lang="en">
-Address:port of the remote (read only) slave-server (default: ).
+Address:port of the remote (read only) unpromoted-server (default: ).
</longdesc>
<shortdesc lang="en">MySql Proxy read only back-end servers</shortdesc>
<content type="string" default="${OCF_RESKEY_proxy_read_only_backend_addresses_default}" />
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index e3a39038f..94aceb324 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -458,7 +458,7 @@ wal receiver is not running in the master and the attribute shows status as
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="30s" interval="30s"/>
-<action name="monitor" depth="0" timeout="30s" interval="29s" role="Master" />
+<action name="monitor" depth="0" timeout="30s" interval="29s" role="Promoted" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/redis.in b/heartbeat/redis.in
index 7f886c7ea..6429477e1 100755
--- a/heartbeat/redis.in
+++ b/heartbeat/redis.in
@@ -220,8 +220,8 @@ is in use.
<action name="stop" timeout="120s" />
<action name="status" timeout="60s" />
<action name="monitor" depth="0" timeout="60s" interval="45s" />
-<action name="monitor" role="Master" depth="0" timeout="60s" interval="20s" />
-<action name="monitor" role="Slave" depth="0" timeout="60s" interval="60s" />
+<action name="monitor" role="Promoted" depth="0" timeout="60s" interval="20s" />
+<action name="monitor" role="Unpromoted" depth="0" timeout="60s" interval="60s" />
<action name="promote" timeout="120s" />
<action name="demote" timeout="120s" />
<action name="notify" timeout="90s" />
diff --git a/heartbeat/sg_persist.in b/heartbeat/sg_persist.in
index 678762f40..0497cc469 100644
--- a/heartbeat/sg_persist.in
+++ b/heartbeat/sg_persist.in
@@ -168,8 +168,8 @@ Setting it to 0 will disable this behavior.
<action name="demote" timeout="30s" />
<action name="notify" timeout="30s" />
<action name="stop" timeout="30s" />
-<action name="monitor" depth="0" timeout="20s" interval="29s" role="Slave" />
-<action name="monitor" depth="0" timeout="20s" interval="60s" role="Master" />
+<action name="monitor" depth="0" timeout="20s" interval="29s" role="Unpromoted" />
+<action name="monitor" depth="0" timeout="20s" interval="60s" role="Promoted" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
From 14e5cb71e3749d311745f110f90cc1139f9cedaf Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 16 Jun 2022 15:54:39 +0200
Subject: [PATCH 2/2] metadata: update to promoted roles
---
heartbeat/metadata.rng | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/metadata.rng b/heartbeat/metadata.rng
index 3dd735547..909efc284 100644
--- a/heartbeat/metadata.rng
+++ b/heartbeat/metadata.rng
@@ -85,8 +85,8 @@
<define name="role-values">
<choice>
- <value>Master</value>
- <value>Slave</value>
+ <value>Promoted</value>
+ <value>Unpromoted</value>
</choice>
</define>

@ -1,102 +0,0 @@
From e651576c1b5c1ffbe0fd1b78f209be9a3f9764e7 Mon Sep 17 00:00:00 2001
From: XingWei-Liu <liuxingwei@uniontech.com>
Date: Thu, 10 Mar 2022 10:38:11 +0800
Subject: [PATCH 1/4] change lvm_status return value from ocf_not_running to
ocf_err_generic
---
heartbeat/LVM-activate | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index aed672ea3..0aef76706 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -790,7 +790,7 @@ lvm_status() {
fi
if [ $dm_count -eq 0 ]; then
- return $OCF_NOT_RUNNING
+ return $OCF_ERR_GENERIC
fi
case "$OCF_CHECK_LEVEL" in
From 540ae56436a4f9547bb17aa206fe0e8c7a7fea87 Mon Sep 17 00:00:00 2001
From: XingWei-Liu <liuxingwei@uniontech.com>
Date: Thu, 10 Mar 2022 16:44:25 +0800
Subject: [PATCH 2/4] add if ocf_is_probe in monitor func
---
heartbeat/LVM-activate | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 0aef76706..c86606637 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -790,7 +790,11 @@ lvm_status() {
fi
if [ $dm_count -eq 0 ]; then
- return $OCF_ERR_GENERIC
+ if ocf_is_probe ;then
+ return $OCF_NOT_RUNNING
+ else
+ return $OCF_ERR_GENERIC
+ fi
fi
case "$OCF_CHECK_LEVEL" in
From ae3f35d4f671f3288034a257c6dd8eff9a83447a Mon Sep 17 00:00:00 2001
From: XingWei-Liu <liuxingwei@uniontech.com>
Date: Thu, 10 Mar 2022 16:50:04 +0800
Subject: [PATCH 3/4] add if ocf_is_probe in monitor func
---
heartbeat/LVM-activate | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c86606637..f345f73a9 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -791,9 +791,9 @@ lvm_status() {
if [ $dm_count -eq 0 ]; then
if ocf_is_probe ;then
- return $OCF_NOT_RUNNING
- else
return $OCF_ERR_GENERIC
+ else
+ return $OCF_NOT_RUNNING
fi
fi
From 1072c0490ef936a1a7dfd8411da434dce1569457 Mon Sep 17 00:00:00 2001
From: XingWei-Liu <liuxingwei@uniontech.com>
Date: Thu, 10 Mar 2022 18:10:21 +0800
Subject: [PATCH 4/4] reverse return value in monitor func
---
heartbeat/LVM-activate | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index f345f73a9..c86606637 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -791,9 +791,9 @@ lvm_status() {
if [ $dm_count -eq 0 ]; then
if ocf_is_probe ;then
- return $OCF_ERR_GENERIC
- else
return $OCF_NOT_RUNNING
+ else
+ return $OCF_ERR_GENERIC
fi
fi

@ -1,312 +0,0 @@
From fd1d6426a2d05f521207c305d10b49fedd92c2df Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Mon, 28 Feb 2022 09:27:42 +0100
Subject: [PATCH 1/4] IPaddr2: Allow to disable Duplicate Address Detection for
IPv6
"Starting" an IPv6 address with IPaddr2 involves performing Duplicate
Address Detection which typically takes at least 1000 ms. Allow the user
to disable DAD if they can guarantee that the configured address is not
duplicate and they wish to start the resource faster.
---
heartbeat/IPaddr2 | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 735dd7779..650392b70 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -88,6 +88,7 @@ OCF_RESKEY_arp_sender_default=""
OCF_RESKEY_send_arp_opts_default=""
OCF_RESKEY_flush_routes_default="false"
OCF_RESKEY_run_arping_default=false
+OCF_RESKEY_nodad_default=false
OCF_RESKEY_noprefixroute_default="false"
OCF_RESKEY_preferred_lft_default="forever"
OCF_RESKEY_network_namespace_default=""
@@ -110,6 +111,7 @@ OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}}
: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}}
: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}}
+: ${OCF_RESKEY_nodad=${OCF_RESKEY_nodad_default}}
: ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}}
: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}}
: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}}
@@ -391,6 +393,14 @@ Whether or not to run arping for IPv4 collision detection check.
<content type="string" default="${OCF_RESKEY_run_arping_default}"/>
</parameter>
+<parameter name="nodad">
+<longdesc lang="en">
+For IPv6, do not perform Duplicate Address Detection when adding the address.
+</longdesc>
+<shortdesc lang="en">Use nodad flag</shortdesc>
+<content type="string" default="${OCF_RESKEY_nodad_default}"/>
+</parameter>
+
<parameter name="noprefixroute">
<longdesc lang="en">
Use noprefixroute flag (see 'man ip-address').
@@ -662,6 +672,11 @@ add_interface () {
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
+ if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then
+ cmd="$cmd nodad"
+ msg="${msg} (with nodad)"
+ fi
+
if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then
cmd="$cmd noprefixroute"
msg="${msg} (with noprefixroute)"
From f4a9e3281d48c5d37f5df593d014706c46ddb3a7 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Mon, 7 Mar 2022 17:21:59 +0100
Subject: [PATCH 2/4] IPaddr2: Allow to send IPv6 Neighbor Advertisements in
background
"Starting" an IPv6 address with IPaddr2 involves sending Neighbor
Advertisement packets to inform neighboring machines about the new
IP+MAC translation. By default, 5x packets with 200 ms sleep after each
are sent which delays the start by 1000 ms. Allow the user to run this
operation in background, similarly as is possible with GARP for IPv4.
---
heartbeat/IPaddr2 | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 650392b70..e243a642d 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -83,7 +83,7 @@ OCF_RESKEY_unique_clone_address_default=false
OCF_RESKEY_arp_interval_default=200
OCF_RESKEY_arp_count_default=5
OCF_RESKEY_arp_count_refresh_default=0
-OCF_RESKEY_arp_bg_default=true
+OCF_RESKEY_arp_bg_default=""
OCF_RESKEY_arp_sender_default=""
OCF_RESKEY_send_arp_opts_default=""
OCF_RESKEY_flush_routes_default="false"
@@ -336,9 +336,10 @@ situations.
<parameter name="arp_bg">
<longdesc lang="en">
-Whether or not to send the ARP packets in the background.
+Whether or not to send the ARP (IPv4) or NA (IPv6) packets in the background.
+The default is true for IPv4 and false for IPv6.
</longdesc>
-<shortdesc lang="en">ARP from background</shortdesc>
+<shortdesc lang="en">ARP/NA from background</shortdesc>
<content type="string" default="${OCF_RESKEY_arp_bg_default}"/>
</parameter>
@@ -507,6 +508,9 @@ ip_init() {
ocf_exit_reason "IPv4 does not support lvs_ipv6_addrlabel"
exit $OCF_ERR_CONFIGURED
fi
+ if [ -z "$OCF_RESKEY_arp_bg" ]; then
+ OCF_RESKEY_arp_bg=true
+ fi
else
FAMILY=inet6
# address sanitization defined in RFC5952
@@ -527,6 +531,9 @@ ip_init() {
exit $OCF_ERR_CONFIGURED
fi
fi
+ if [ -z "$OCF_RESKEY_arp_bg" ]; then
+ OCF_RESKEY_arp_bg=false
+ fi
fi
# support nic:iflabel format in nic parameter
@@ -893,6 +900,20 @@ run_arp_sender() {
fi
}
+log_send_ua() {
+ local cmdline
+ local output
+ local rc
+
+ cmdline="$@"
+ output=$($cmdline 2>&1)
+ rc=$?
+ if [ $rc -ne 0 ] ; then
+ ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements: rc=$rc"
+ fi
+ ocf_log info "$output"
+ return $rc
+}
#
# Run send_ua to note send ICMPv6 Unsolicited Neighbor Advertisements.
@@ -930,7 +951,11 @@ run_send_ua() {
ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC"
ocf_log info "$SENDUA $ARGS"
- $SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements."
+ if ocf_is_true $OCF_RESKEY_arp_bg; then
+ log_send_ua $SENDUA $ARGS &
+ else
+ log_send_ua $SENDUA $ARGS
+ fi
}
# Do we already serve this IP address on the given $NIC?
From c8afb43012c264f3ee24013a92b2a2f3566db2fd Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Tue, 8 Mar 2022 12:35:56 +0100
Subject: [PATCH 3/4] IPaddr2: Log 'ip addr add' options together
Change the log message in add_interface() from
"Adding ... (with <opt1>) (with <opt2>)"
to
"Adding ... (with <opt1> <opt2>)".
---
heartbeat/IPaddr2 | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index e243a642d..dca1b6f5b 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -651,7 +651,7 @@ delete_interface () {
# Add an interface
#
add_interface () {
- local cmd msg ipaddr netmask broadcast iface label
+ local cmd msg extra_opts ipaddr netmask broadcast iface label
ipaddr="$1"
netmask="$2"
@@ -679,23 +679,24 @@ add_interface () {
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
+ extra_opts=""
if [ "$FAMILY" = "inet6" ] && ocf_is_true "${OCF_RESKEY_nodad}"; then
- cmd="$cmd nodad"
- msg="${msg} (with nodad)"
+ extra_opts="$extra_opts nodad"
fi
if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then
- cmd="$cmd noprefixroute"
- msg="${msg} (with noprefixroute)"
+ extra_opts="$extra_opts noprefixroute"
fi
if [ ! -z "$label" ]; then
- cmd="$cmd label $label"
- msg="${msg} (with label $label)"
+ extra_opts="$extra_opts label $label"
fi
if [ "$FAMILY" = "inet6" ] ;then
- cmd="$cmd preferred_lft $OCF_RESKEY_preferred_lft"
- msg="${msg} (with preferred_lft $OCF_RESKEY_preferred_lft)"
+ extra_opts="$extra_opts preferred_lft $OCF_RESKEY_preferred_lft"
+ fi
+ if [ -n "$extra_opts" ]; then
+ cmd="$cmd$extra_opts"
+ msg="$msg (with$extra_opts)"
fi
ocf_log info "$msg"
From cb4d52ead694718282a40eab24e04b6d85bcc802 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Mon, 7 Mar 2022 17:25:02 +0100
Subject: [PATCH 4/4] IPaddr2: Clarify behavior of 'arp_*' parameters for IPv4
and IPv6
* Mention that 'arp_*' parameters are shared by the IPv4 and IPv6 code.
* Clarify description of these parameters and mark which of them apply
only to IPv4.
---
heartbeat/IPaddr2 | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index dca1b6f5b..97a7431a2 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -157,6 +157,12 @@ and/or clone-max &lt; number of nodes. In case of node failure,
clone instances need to be re-allocated on surviving nodes.
This would not be possible if there is already an instance
on those nodes, and clone-node-max=1 (which is the default).
+
+When the specified IP address gets assigned to a respective interface, the
+resource agent sends unsolicited ARP (Address Resolution Protocol, IPv4) or NA
+(Neighbor Advertisement, IPv6) packets to inform neighboring machines about the
+change. This functionality is controlled for both IPv4 and IPv6 by shared
+'arp_*' parameters.
</longdesc>
<shortdesc lang="en">Manages virtual IPv4 and IPv6 addresses (Linux specific version)</shortdesc>
@@ -306,28 +312,30 @@ a unique address to manage
<parameter name="arp_interval">
<longdesc lang="en">
-Specify the interval between unsolicited ARP packets in milliseconds.
+Specify the interval between unsolicited ARP (IPv4) or NA (IPv6) packets in
+milliseconds.
This parameter is deprecated and used for the backward compatibility only.
It is effective only for the send_arp binary which is built with libnet,
and send_ua for IPv6. It has no effect for other arp_sender.
</longdesc>
-<shortdesc lang="en">ARP packet interval in ms (deprecated)</shortdesc>
+<shortdesc lang="en">ARP/NA packet interval in ms (deprecated)</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_interval_default}"/>
</parameter>
<parameter name="arp_count">
<longdesc lang="en">
-Number of unsolicited ARP packets to send at resource initialization.
+Number of unsolicited ARP (IPv4) or NA (IPv6) packets to send at resource
+initialization.
</longdesc>
-<shortdesc lang="en">ARP packet count sent during initialization</shortdesc>
+<shortdesc lang="en">ARP/NA packet count sent during initialization</shortdesc>
<content type="integer" default="${OCF_RESKEY_arp_count_default}"/>
</parameter>
<parameter name="arp_count_refresh">
<longdesc lang="en">
-Number of unsolicited ARP packets to send during resource monitoring. Doing
-so helps mitigate issues of stuck ARP caches resulting from split-brain
+For IPv4, number of unsolicited ARP packets to send during resource monitoring.
+Doing so helps mitigate issues of stuck ARP caches resulting from split-brain
situations.
</longdesc>
<shortdesc lang="en">ARP packet count sent during monitoring</shortdesc>
@@ -345,7 +353,7 @@ The default is true for IPv4 and false for IPv6.
<parameter name="arp_sender">
<longdesc lang="en">
-The program to send ARP packets with on start. Available options are:
+For IPv4, the program to send ARP packets with on start. Available options are:
- send_arp: default
- ipoibarping: default for infiniband interfaces if ipoibarping is available
- iputils_arping: use arping in iputils package
@@ -357,7 +365,7 @@ The program to send ARP packets with on start. Available options are:
<parameter name="send_arp_opts">
<longdesc lang="en">
-Extra options to pass to the arp_sender program.
+For IPv4, extra options to pass to the arp_sender program.
Available options are vary depending on which arp_sender is used.
A typical use case is specifying '-A' for iputils_arping to use
@@ -388,7 +396,7 @@ IP address goes away.
<parameter name="run_arping">
<longdesc lang="en">
-Whether or not to run arping for IPv4 collision detection check.
+For IPv4, whether or not to run arping for collision detection check.
</longdesc>
<shortdesc lang="en">Run arping for IPv4 collision detection check</shortdesc>
<content type="string" default="${OCF_RESKEY_run_arping_default}"/>

@ -1,401 +0,0 @@
From d59a000da2766476538bb82d1889f5c0f3882f9f Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Wed, 2 Mar 2022 18:43:31 +0100
Subject: [PATCH] corosync-qnetd: Add resource agent
Mostly for better monitor operation using corosync-qnetd-tool. As qnetd
is (almost) stateless only directory which has to be copied (once)
across the nodes is nss db directory (usually
/etc/corosync/qnetd/nssdb).
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/corosync-qnetd | 353 +++++++++++++++++++++++++++++++++++++++
3 files changed, 355 insertions(+)
create mode 100755 heartbeat/corosync-qnetd
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 1093717fe..013aa392d 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -127,6 +127,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_azure-lb.7 \
ocf_heartbeat_clvm.7 \
ocf_heartbeat_conntrackd.7 \
+ ocf_heartbeat_corosync-qnetd.7 \
ocf_heartbeat_crypt.7 \
ocf_heartbeat_db2.7 \
ocf_heartbeat_dhcpd.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 67b400679..38154e2da 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -101,6 +101,7 @@ ocf_SCRIPTS = AoEtarget \
azure-lb \
clvm \
conntrackd \
+ corosync-qnetd \
crypt \
db2 \
dhcpd \
diff --git a/heartbeat/corosync-qnetd b/heartbeat/corosync-qnetd
new file mode 100755
index 000000000..6b9777711
--- /dev/null
+++ b/heartbeat/corosync-qnetd
@@ -0,0 +1,353 @@
+#!/bin/sh
+#
+# Copyright (C) 2022 Red Hat, Inc. All rights reserved.
+#
+# Authors: Jan Friesse <jfriesse@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+# Initialization:
+: "${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}"
+. "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs"
+
+# Use runuser if available for SELinux.
+if [ -x "/sbin/runuser" ]; then
+ SU="runuser"
+else
+ SU="su"
+fi
+
+# Attempt to detect a default binary
+OCF_RESKEY_binary_default=$(which corosync-qnetd 2> /dev/null)
+if [ "${OCF_RESKEY_binary_default}" = "" ]; then
+ OCF_RESKEY_binary_default="/usr/bin/corosync-qnetd"
+fi
+
+# Defaults
+OCF_RESKEY_qnetd_opts_default=""
+OCF_RESKEY_qnetd_tool_binary_default="/usr/bin/corosync-qnetd-tool"
+OCF_RESKEY_ip_default=""
+OCF_RESKEY_port_default=""
+OCF_RESKEY_nss_db_dir_default=""
+OCF_RESKEY_pid_default="/var/run/corosync-qnetd/corosync-qnetd-${OCF_RESOURCE_INSTANCE}.pid"
+OCF_RESKEY_ipc_sock_default="/var/run/corosync-qnetd/corosync-qnetd-${OCF_RESOURCE_INSTANCE}.sock"
+OCF_RESKEY_user_default="coroqnetd"
+OCF_RESKEY_group_default="${OCF_RESKEY_user_default}"
+
+: "${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}"
+: "${OCF_RESKEY_qnetd_opts=${OCF_RESKEY_qnetd_opts_default}}"
+: "${OCF_RESKEY_qnetd_tool_binary=${OCF_RESKEY_qnetd_tool_binary_default}}"
+: "${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}"
+: "${OCF_RESKEY_port=${OCF_RESKEY_port_default}}"
+: "${OCF_RESKEY_nss_db_dir=${OCF_RESKEY_nss_db_dir_default}}"
+: "${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}"
+: "${OCF_RESKEY_ipc_sock=${OCF_RESKEY_ipc_sock_default}}"
+: "${OCF_RESKEY_user=${OCF_RESKEY_user_default}}"
+: "${OCF_RESKEY_group=${OCF_RESKEY_group_default}}"
+
+corosync_qnetd_usage() {
+ cat <<END
+usage: $0 {start|stop|status|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+corosync_qnetd_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="corosync-qnetd" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">OCF Resource script for corosync-qnetd. It manages a corosync-qnetd
+instance as a HA resource. It is required to copy nss db directory (usually /etc/corosync/qnetd/nssdb)
+across all nodes (only once - after database is initialized).</longdesc>
+<shortdesc lang="en">Corosync QNet daemon resource agent</shortdesc>
+
+<parameters>
+
+<parameter name="binary">
+ <longdesc lang="en">Location of the corosync-qnetd binary</longdesc>
+ <shortdesc lang="en">corosync-qnetd binary</shortdesc>
+ <content type="string" default="${OCF_RESKEY_binary_default}" />
+</parameter>
+
+<parameter name="qnetd_opts">
+ <longdesc lang="en">
+ Additional options for corosync-qnetd binary. "-4" for example.
+ </longdesc>
+ <shortdesc lang="en">corosync-qnetd extra options</shortdesc>
+ <content type="string" default="${OCF_RESKEY_qnetd_opts_default}" />
+</parameter>
+
+<parameter name="qnetd_tool_binary">
+ <longdesc lang="en">
+ The absolute path to the corosync-qnetd-tool for monitoring with OCF_CHECK_LEVEL greater zero.
+ </longdesc>
+ <shortdesc lang="en">The absolute path to the corosync-qnetd-tool binary</shortdesc>
+ <content type="string" default="${OCF_RESKEY_qnetd_tool_binary_default}" />
+</parameter>
+
+<parameter name="ip">
+ <longdesc lang="en">
+ IP address to listen on. By default the daemon listens on all addresses (wildcard).
+ </longdesc>
+ <shortdesc lang="en">IP address to listen on</shortdesc>
+ <content type="string" default="${OCF_RESKEY_ip_default}" />
+</parameter>
+
+<parameter name="port">
+ <longdesc lang="en">
+ TCP port to listen on. Default port is 5403.
+ </longdesc>
+ <shortdesc lang="en">TCP port to listen on</shortdesc>
+ <content type="string" default="${OCF_RESKEY_port_default}" />
+</parameter>
+
+<parameter name="nss_db_dir">
+ <longdesc lang="en">
+ Location of the corosync-qnetd nss db directory (empty for default - usually /etc/corosync/qnetd/nssdb)
+ </longdesc>
+ <shortdesc lang="en">corosync-qnetd nss db directory</shortdesc>
+ <content type="string" default="${OCF_RESKEY_nss_db_dir_default}" />
+</parameter>
+
+<parameter name="pid">
+ <longdesc lang="en">
+ Location of the corosync-qnetd pid/lock
+ </longdesc>
+ <shortdesc lang="en">corosync-qnetd pid file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_pid_default}" />
+</parameter>
+
+<parameter name="ipc_sock">
+ <longdesc lang="en">
+ Location of the corosync-qnetd ipc socket
+ </longdesc>
+ <shortdesc lang="en">corosync-qnetd ipc socket file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_ipc_sock_default}" />
+</parameter>
+
+<parameter name="user">
+ <longdesc lang="en">User running corosync-qnetd</longdesc>
+ <shortdesc lang="en">corosync-qnetd user</shortdesc>
+ <content type="string" default="${OCF_RESKEY_user_default}" />
+</parameter>
+
+<parameter name="group">
+ <longdesc lang="en">Group running corosync-qnetd</longdesc>
+ <shortdesc lang="en">corosync-qnetd group</shortdesc>
+ <content type="string" default="${OCF_RESKEY_group_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20s" />
+<action name="stop" timeout="20s" />
+<action name="status" timeout="20s" />
+<action name="monitor" depth="0" timeout="20s" interval="10s" start-delay="10s" />
+<action name="validate-all" timeout="20s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+}
+
+corosync_qnetd_status() {
+ ocf_pidfile_status "${OCF_RESKEY_pid}" > /dev/null 2>&1
+ case "$?" in
+ 0)
+ rc="$OCF_SUCCESS"
+ ;;
+ 1|2)
+ rc="$OCF_NOT_RUNNING"
+ ;;
+ *)
+ rc="$OCF_ERR_GENERIC"
+ ;;
+ esac
+
+ return "$rc"
+}
+
+corosync_qnetd_start() {
+ corosync_qnetd_validate_all
+ rc="$?"
+
+ if [ "$rc" -ne 0 ]; then
+ return "$rc"
+ fi
+
+ # if resource is already running,no need to continue code after this.
+ if corosync_qnetd_status; then
+ ocf_log info "corosync-qnetd is already running"
+ return "${OCF_SUCCESS}"
+ fi
+
+ pid_dir=$(dirname "${OCF_RESKEY_pid}")
+ sock_dir=$(dirname "${OCF_RESKEY_ipc_sock}")
+
+ for d in "$pid_dir" "$sock_dir";do
+ if [ ! -d "$d" ];then
+ mkdir -p "$d"
+ chmod 0770 "$d"
+ chown "${OCF_RESKEY_user}:${OCF_RESKEY_group}" "$d"
+ fi
+ done
+
+ params="-S \"local_socket_file=${OCF_RESKEY_ipc_sock}\" -S \"lock_file=${OCF_RESKEY_pid}\""
+
+ if [ -n "${OCF_RESKEY_nss_db_dir}" ];then
+ params="$params -S \"nss_db_dir=${OCF_RESKEY_nss_db_dir}\""
+ fi
+
+ if [ -n "${OCF_RESKEY_ip}" ];then
+ params="$params -l \"${OCF_RESKEY_ip}\""
+ fi
+
+ if [ -n "${OCF_RESKEY_port}" ];then
+ params="$params -p \"${OCF_RESKEY_port}\""
+ fi
+
+ params="$params ${OCF_RESKEY_qnetd_opts}"
+
+ ocf_run "$SU" -s "/bin/sh" "${OCF_RESKEY_user}" -c "${OCF_RESKEY_binary} $params"
+
+ while :; do
+ corosync_qnetd_monitor "debug"
+ rc="$?"
+
+ if [ "$rc" -eq "${OCF_SUCCESS}" ]; then
+ break
+ fi
+ sleep 1
+
+ ocf_log debug "corosync-qnetd still hasn't started yet. Waiting..."
+ done
+
+ ocf_log info "corosync-qnetd started"
+ return "${OCF_SUCCESS}"
+}
+
+corosync_qnetd_stop() {
+ corosync_qnetd_status
+
+ if [ "$?" -ne "$OCF_SUCCESS" ]; then
+ # Currently not running. Nothing to do.
+ ocf_log info "corosync-qnetd is already stopped"
+
+ return "$OCF_SUCCESS"
+ fi
+
+ pid=$(cat "${OCF_RESKEY_pid}")
+ kill "$pid"
+
+ # Wait for process to stop
+ while corosync_qnetd_monitor "debug"; do
+ sleep 1
+ done
+
+ ocf_log info "corosync-qnetd stopped"
+ return "$OCF_SUCCESS"
+}
+
+corosync_qnetd_monitor() {
+ loglevel=${1:-err}
+
+ corosync_qnetd_status
+ rc="$?"
+
+ if [ "$rc" -ne "$OCF_SUCCESS" ];then
+ return "$rc"
+ fi
+
+ out=$("${OCF_RESKEY_qnetd_tool_binary}" -s -p "${OCF_RESKEY_ipc_sock}" 2>&1 >/dev/null)
+ rc="$?"
+
+ if [ "$rc" != 0 ];then
+ ocf_log "$loglevel" "$out"
+ fi
+
+ case "$rc" in
+ "0") rc="$OCF_SUCCESS" ;;
+ "3") rc="$OCF_NOT_RUNNING" ;;
+ *) rc="$OCF_ERR_GENERIC" ;;
+ esac
+
+ return "$rc"
+}
+
+corosync_qnetd_validate_all() {
+ check_binary "${OCF_RESKEY_binary}"
+
+ check_binary "${OCF_RESKEY_qnetd_tool_binary}"
+}
+
+
+# **************************** MAIN SCRIPT ************************************
+
+# Make sure meta-data and usage always succeed
+case "$__OCF_ACTION" in
+ meta-data)
+ corosync_qnetd_meta_data
+ exit "$OCF_SUCCESS"
+ ;;
+ usage|help)
+ corosync_qnetd_usage
+ exit "$OCF_SUCCESS"
+ ;;
+esac
+
+# This OCF agent script need to be run as root user.
+if ! ocf_is_root; then
+ echo "$0 agent script need to be run as root user."
+ ocf_log debug "$0 agent script need to be run as root user."
+ exit "$OCF_ERR_GENERIC"
+fi
+
+# Translate each action into the appropriate function call
+case "$__OCF_ACTION" in
+ start)
+ corosync_qnetd_start
+ ;;
+ stop)
+ corosync_qnetd_stop
+ ;;
+ status)
+ corosync_qnetd_status
+ ;;
+ monitor)
+ corosync_qnetd_monitor
+ ;;
+ validate-all)
+ corosync_qnetd_validate_all
+ ;;
+ *)
+ corosync_qnetd_usage
+ exit "$OCF_ERR_UNIMPLEMENTED"
+ ;;
+esac
+
+rc="$?"
+exit "$rc"
+# End of this script

@ -1,44 +0,0 @@
From 26de0ad2f0f975166fe79ef72ab08e2c03519eea Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 28 Mar 2022 13:25:35 +0200
Subject: [PATCH] Filesystem: fix logic for UUID/label devices with space
between parameter and UUID/label
---
heartbeat/Filesystem | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 1a90d6a42..72a1b8623 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -596,11 +596,11 @@ Filesystem_start()
flushbufs "$DEVICE"
# Mount the filesystem.
case "$FSTYPE" in
- none) $MOUNT $options "$DEVICE" "$MOUNTPOINT" &&
+ none) $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" &&
bind_mount
;;
- "") $MOUNT $options "$DEVICE" "$MOUNTPOINT" ;;
- *) $MOUNT -t "$FSTYPE" $options "$DEVICE" "$MOUNTPOINT" ;;
+ "") $MOUNT $options $device_opt "$DEVICE" "$MOUNTPOINT" ;;
+ *) $MOUNT -t "$FSTYPE" $options $device_opt "$DEVICE" "$MOUNTPOINT" ;;
esac
if [ $? -ne 0 ]; then
@@ -902,7 +902,13 @@ set_blockdevice_var() {
fi
case "$DEVICE" in
- -*) # Oh... An option to mount instead... Typically -U or -L
+ --*) # Typically --uuid or --label
+ device_opt=$(echo $DEVICE | sed -E "s/([[:blank:]]|=).*//")
+ DEVICE=$(echo $DEVICE | sed -E "s/$device_opt([[:blank:]]*|=)//")
+ ;;
+ -*) # Oh... An option to mount instead... Typically -U or -L
+ device_opt=$(echo $DEVICE | cut -c1-2)
+ DEVICE=$(echo $DEVICE | sed "s/$device_opt[[:blank:]]*//")
;;
/dev/null) # Special case for BSC
blockdevice=yes

@ -1,38 +0,0 @@
From d9b46474fc19d9c57e2cfb752d60319017da8410 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 6 Apr 2022 14:14:19 +0200
Subject: [PATCH] Filesystem: improve logic for UUID/label and add note that
/dev/disk/by-{uuid,label}/ are preferred on Linux
---
heartbeat/Filesystem | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 72a1b8623..44270ad98 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -163,6 +163,8 @@ directory where the status file is to be placed.
<parameter name="device" required="1">
<longdesc lang="en">
The name of block device for the filesystem, or -U, -L options for mount, or NFS mount specification.
+
+NOTE: On Linux /dev/disk/by-{uuid,label}/ are preferred to -U/-L.
</longdesc>
<shortdesc lang="en">block device</shortdesc>
<content type="string" default="${OCF_RESKEY_device_default}" />
@@ -902,11 +904,11 @@ set_blockdevice_var() {
fi
case "$DEVICE" in
- --*) # Typically --uuid or --label
- device_opt=$(echo $DEVICE | sed -E "s/([[:blank:]]|=).*//")
+ --uuid=*|--uuid\ *|--label=*|--label\ *)
+ device_opt=$(echo $DEVICE | sed "s/\([[:blank:]]\|=\).*//")
DEVICE=$(echo $DEVICE | sed -E "s/$device_opt([[:blank:]]*|=)//")
;;
- -*) # Oh... An option to mount instead... Typically -U or -L
+ -U*|-L*) # short versions of --uuid/--label
device_opt=$(echo $DEVICE | cut -c1-2)
DEVICE=$(echo $DEVICE | sed "s/$device_opt[[:blank:]]*//")
;;

@ -1,61 +0,0 @@
From 340e12c0d457d244d375c2d805e78033c9dbdf78 Mon Sep 17 00:00:00 2001
From: Takashi Kajinami <tkajinam@redhat.com>
Date: Wed, 04 May 2022 23:13:35 +0900
Subject: [PATCH] NovaCompute/Evacuate: Make user/project domain configurable
... so that we can use a user or a project in a non-default keystone
domain.
Change-Id: I6e2175adca08fd97942cb83b8f8094e980b60c9d
---
diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
index 596f520..4565766 100644
--- a/heartbeat/NovaEvacuate
+++ b/heartbeat/NovaEvacuate
@@ -63,13 +63,29 @@
<parameter name="tenant_name" unique="0" required="1">
<longdesc lang="en">
-Tenant name for connecting to keystone in admin context.
+Tenant(Project) name for connecting to keystone in admin context.
Note that with Keystone V3 tenant names are only unique within a domain.
</longdesc>
<shortdesc lang="en">Tenant name</shortdesc>
<content type="string" default="" />
</parameter>
+<parameter name="user_domain" unique="0" required="0">
+<longdesc lang="en">
+Keystone domain the user belongs to
+</longdesc>
+<shortdesc lang="en">Keystone v3 User Domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="project_domain" unique="0" required="0">
+<longdesc lang="en">
+Keystone domain the tenant(project) belongs to
+</longdesc>
+<shortdesc lang="en">Keystone v3 Project Domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
<parameter name="domain" unique="0" required="0">
<longdesc lang="en">
DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
@@ -319,6 +335,14 @@
fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+ if [ -n "${OCF_RESKEY_user_domain}" ]; then
+ fence_options="${fence_options} -u ${OCF_RESKEY_user_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_project_domain}" ]; then
+ fence_options="${fence_options} -P ${OCF_RESKEY_project_domain}"
+ fi
+
if [ -n "${OCF_RESKEY_domain}" ]; then
fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
fi

@ -1,55 +0,0 @@
From bb5cfa172ca58cd8adcedcaca92bde54d0645661 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 14 Jul 2022 10:55:19 +0200
Subject: [PATCH] openstack-agents: set domain parameter's default to Default
and fix missing parameter name in ocf_exit_reason
---
heartbeat/openstack-common.sh | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/heartbeat/openstack-common.sh b/heartbeat/openstack-common.sh
index b6eec09c..14d290bd 100644
--- a/heartbeat/openstack-common.sh
+++ b/heartbeat/openstack-common.sh
@@ -1,6 +1,10 @@
+OCF_RESKEY_user_domain_name_default="Default"
+OCF_RESKEY_project_domain_name_default="Default"
OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
OCF_RESKEY_insecure_default="false"
+: ${OCF_RESKEY_user_domain_name=${OCF_RESKEY_user_domain_name_default}}
+: ${OCF_RESKEY_project_domain_name=${OCF_RESKEY_project_domain_name_default}}
: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
: ${OCF_RESKEY_insecure=${OCF_RESKEY_insecure_default}}
@@ -64,7 +68,7 @@ Keystone Project.
Keystone User Domain Name.
</longdesc>
<shortdesc lang="en">Keystone User Domain Name</shortdesc>
-<content type="string" />
+<content type="string" default="${OCF_RESKEY_user_domain_name_default}" />
</parameter>
<parameter name="project_domain_name" required="0">
@@ -72,7 +76,7 @@ Keystone User Domain Name.
Keystone Project Domain Name.
</longdesc>
<shortdesc lang="en">Keystone Project Domain Name</shortdesc>
-<content type="string" />
+<content type="string" default="${OCF_RESKEY_project_domain_name_default}" />
</parameter>
<parameter name="openstackcli">
@@ -133,7 +137,7 @@ get_config() {
exit $OCF_ERR_CONFIGURED
fi
if [ -z "$OCF_RESKEY_project_domain_name" ]; then
- ocf_exit_reason " not set"
+ ocf_exit_reason "project_domain_name not set"
exit $OCF_ERR_CONFIGURED
fi
--
2.36.1

@ -1,282 +0,0 @@
From ebea4c3620261c529cad908c0e52064df84b0c61 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Jul 2022 10:28:11 +0200
Subject: [PATCH] openstack-agents: warn when openstackcli is slow
---
heartbeat/openstack-cinder-volume | 19 +++++++++++--------
heartbeat/openstack-common.sh | 22 ++++++++++++++++++++++
heartbeat/openstack-floating-ip | 17 ++++++++++-------
heartbeat/openstack-info.in | 20 ++++++++++----------
heartbeat/openstack-virtual-ip | 20 ++++++++++----------
5 files changed, 63 insertions(+), 35 deletions(-)
diff --git a/heartbeat/openstack-cinder-volume b/heartbeat/openstack-cinder-volume
index 19bf04faf..116442c41 100755
--- a/heartbeat/openstack-cinder-volume
+++ b/heartbeat/openstack-cinder-volume
@@ -113,11 +113,14 @@ _get_node_id() {
}
osvol_validate() {
+ local result
+
check_binary "$OCF_RESKEY_openstackcli"
get_config
- if ! $OCF_RESKEY_openstackcli volume list|grep -q $OCF_RESKEY_volume_id ; then
+ result=$(run_openstackcli "volume list")
+ if ! echo "$result" | grep -q $OCF_RESKEY_volume_id; then
ocf_exit_reason "volume-id $OCF_RESKEY_volume_id not found"
return $OCF_ERR_CONFIGURED
fi
@@ -156,17 +159,17 @@ osvol_monitor() {
# Is the volue attached?
# We use the API
#
- result=$($OCF_RESKEY_openstackcli volume show \
+ result=$(run_openstackcli "volume show \
--column status \
--column attachments \
--format value \
- $OCF_RESKEY_volume_id)
+ $OCF_RESKEY_volume_id")
- if echo "$result" | grep -q available ; then
+ if echo "$result" | grep -q available; then
ocf_log warn "$OCF_RESKEY_volume_id is not attached to any instance"
return $OCF_NOT_RUNNING
else
- export attached_server_id=$(echo $result|head -n1|
+ export attached_server_id=$(echo "$result"|head -n1|
grep -P -o "'server_id': '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}'"|
grep -P -o "[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}")
ocf_log info "$OCF_RESKEY_volume_id is attached to instance $attached_server_id"
@@ -199,7 +202,7 @@ osvol_stop() {
#
# Detach the volume
#
- if ! $OCF_RESKEY_openstackcli server remove volume $node_id $OCF_RESKEY_volume_id ; then
+ if ! run_openstackcli "server remove volume $node_id $OCF_RESKEY_volume_id"; then
ocf_log error "Couldn't remove volume $OCF_RESKEY_volume_id from instance $node_id"
return $OCF_ERR_GENERIC
fi
@@ -225,7 +228,7 @@ osvol_start() {
# TODO: make it optional in case multi-attachment is allowed by Cinder
#
if [ ! -z $attached_server_id ] ; then
- if ! $OCF_RESKEY_openstackcli server remove volume $attached_server_id $OCF_RESKEY_volume_id ; then
+ if ! run_openstackcli "server remove volume $attached_server_id $OCF_RESKEY_volume_id"; then
ocf_log error "Couldn't remove volume $OCF_RESKEY_volume_id from instance $attached_server_id"
return $OCF_ERR_GENERIC
fi
@@ -238,7 +241,7 @@ osvol_start() {
#
# Attach the volume
#
- $OCF_RESKEY_openstackcli server add volume $node_id $OCF_RESKEY_volume_id
+ run_openstackcli "server add volume $node_id $OCF_RESKEY_volume_id"
if [ $? != $OCF_SUCCESS ]; then
ocf_log error "Couldn't add volume $OCF_RESKEY_volume_id to instance $node_id"
return $OCF_ERR_GENERIC
diff --git a/heartbeat/openstack-common.sh b/heartbeat/openstack-common.sh
index 4763c90db..b6eec09c2 100644
--- a/heartbeat/openstack-common.sh
+++ b/heartbeat/openstack-common.sh
@@ -145,3 +145,25 @@ get_config() {
OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-project-domain-name $OCF_RESKEY_project_domain_name"
fi
}
+
+run_openstackcli() {
+ local cmd="${OCF_RESKEY_openstackcli} $1"
+ local result
+ local rc
+ local start_time=$(date +%s)
+ local end_time
+ local elapsed_time
+
+ result=$($cmd)
+ rc=$?
+ end_time=$(date +%s)
+ elapsed_time=$(expr $end_time - $start_time)
+
+ if [ $elapsed_time -gt 20 ]; then
+ ocf_log warn "$cmd took ${elapsed_time}s to complete"
+ fi
+
+ echo "$result"
+
+ return $rc
+}
diff --git a/heartbeat/openstack-floating-ip b/heartbeat/openstack-floating-ip
index 6e2895654..7317f19a8 100755
--- a/heartbeat/openstack-floating-ip
+++ b/heartbeat/openstack-floating-ip
@@ -101,11 +101,14 @@ END
}
osflip_validate() {
+ local result
+
check_binary "$OCF_RESKEY_openstackcli"
get_config
- if ! $OCF_RESKEY_openstackcli floating ip list|grep -q $OCF_RESKEY_ip_id ; then
+ result=$(run_openstackcli "floating ip list")
+ if ! echo "$result" | grep -q $OCF_RESKEY_ip_id; then
ocf_exit_reason "ip-id $OCF_RESKEY_ip_id not found"
return $OCF_ERR_CONFIGURED
fi
@@ -132,14 +135,14 @@ osflip_monitor() {
| awk '{gsub("[^ ]*:", "");print}')
# Is the IP active and attached?
- result=$($OCF_RESKEY_openstackcli floating ip show \
+ result=$(run_openstackcli "floating ip show \
--column port_id --column floating_ip_address \
--format yaml \
- $OCF_RESKEY_ip_id)
+ $OCF_RESKEY_ip_id")
for port in $node_port_ids ; do
- if echo $result | grep -q $port ; then
- floating_ip=$(echo $result | awk '/floating_ip_address/ {print $2}')
+ if echo "$result" | grep -q $port ; then
+ floating_ip=$(echo "$result" | awk '/floating_ip_address/ {print $2}')
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n openstack_floating_ip -v $floating_ip
return $OCF_SUCCESS
@@ -160,7 +163,7 @@ osflip_stop() {
return $OCF_SUCCESS
fi
- if ! $OCF_RESKEY_openstackcli floating ip unset --port $OCF_RESKEY_ip_id ; then
+ if ! run_openstackcli "floating ip unset --port $OCF_RESKEY_ip_id"; then
return $OCF_ERR_GENERIC
fi
@@ -194,7 +197,7 @@ osflip_start() {
ocf_log info "Moving IP address $OCF_RESKEY_ip_id to port ID $node_port_id"
- $OCF_RESKEY_openstackcli floating ip set --port $node_port_id $OCF_RESKEY_ip_id
+ run_openstackcli "floating ip set --port $node_port_id $OCF_RESKEY_ip_id"
if [ $? != $OCF_SUCCESS ]; then
ocf_log error "$OCF_RESKEY_ip_id Cannot be set to port $node_port_id"
return $OCF_ERR_GENERIC
diff --git a/heartbeat/openstack-info.in b/heartbeat/openstack-info.in
index f3a59fc7a..6502f1df1 100755
--- a/heartbeat/openstack-info.in
+++ b/heartbeat/openstack-info.in
@@ -119,9 +119,7 @@ END
#######################################################################
OSInfoStats() {
- local result
local value
- local node
local node_id
get_config
@@ -141,31 +139,33 @@ OSInfoStats() {
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_id -v "$node_id"
# Nova data: flavor
- value=$($OCF_RESKEY_openstackcli server show \
+ value=$(run_openstackcli "server show \
--format value \
--column flavor \
- $node_id)
+ $node_id")
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_flavor -v "$value"
# Nova data: availability zone
- value=$($OCF_RESKEY_openstackcli server show \
+ value=$(run_openstackcli "server show \
--format value \
--column OS-EXT-AZ:availability_zone \
- $node_id)
+ $node_id")
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_az -v "$value"
# Network data: ports
value=""
- for port_id in $($OCF_RESKEY_openstackcli port list \
+ for port_id in $(run_openstackcli "port list \
--format value \
--column id \
- --server $node_id); do
- subnet_id=$($OCF_RESKEY_openstackcli port show \
+ --server $node_id"); do
+ subnet_result=$(run_openstackcli "port show \
--format json \
--column fixed_ips \
- ${port_id} | grep -P '\"subnet_id\": \".*\",$' |
+ ${port_id}")
+ subnet_id=$(echo "$subnet_result" |
+ grep -P '\"subnet_id\": \".*\",$' |
grep -P -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}')
value="${value}${subnet_id}:${port_id},"
done
diff --git a/heartbeat/openstack-virtual-ip b/heartbeat/openstack-virtual-ip
index c654d980a..361357d55 100755
--- a/heartbeat/openstack-virtual-ip
+++ b/heartbeat/openstack-virtual-ip
@@ -132,11 +132,11 @@ osvip_monitor() {
node_port_id=$(osvip_port_id)
- result=$($OCF_RESKEY_openstackcli port show \
+ result=$(run_openstackcli "port show \
--format value \
--column allowed_address_pairs \
- ${node_port_id})
- if echo $result | grep -q "$OCF_RESKEY_ip"; then
+ ${node_port_id}")
+ if echo "$result" | grep -q "$OCF_RESKEY_ip"; then
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n openstack_virtual_ip -v $OCF_RESKEY_ip
return $OCF_SUCCESS
@@ -158,20 +158,20 @@ osvip_stop() {
return $OCF_SUCCESS
fi
- mac_address=$($OCF_RESKEY_openstackcli port show \
+ mac_address=$(run_openstackcli "port show \
--format value \
--column mac_address \
- $node_port_id)
- echo ${mac_address} | grep -q -P "^([0-9a-f]{2}:){5}[0-9a-f]{2}$"
+ $node_port_id")
+ echo "${mac_address}" | grep -q -P "^([0-9a-f]{2}:){5}[0-9a-f]{2}$"
if [ $? -ne 0 ]; then
ocf_log error "MAC address '${mac_address}' is not valid."
return $OCF_ERR_GENERIC
fi
- if ! $OCF_RESKEY_openstackcli port unset \
+ if ! run_openstackcli "port unset \
--allowed-address \
ip-address=$OCF_RESKEY_ip,mac-address=${mac_address} \
- $node_port_id; then
+ $node_port_id"; then
return $OCF_ERR_GENERIC
fi
@@ -196,9 +196,9 @@ osvip_start() {
ocf_log info "Moving IP address $OCF_RESKEY_ip to port ID $node_port_id"
- $OCF_RESKEY_openstackcli port set \
+ run_openstackcli "port set \
--allowed-address ip-address=$OCF_RESKEY_ip \
- $node_port_id
+ $node_port_id"
if [ $? != $OCF_SUCCESS ]; then
ocf_log error "$OCF_RESKEY_ip Cannot be set to port $node_port_id"
return $OCF_ERR_GENERIC

@ -1,770 +0,0 @@
diff --color -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
--- a/heartbeat/Makefile.am 2022-03-15 16:14:29.355209012 +0100
+++ b/heartbeat/Makefile.am 2022-03-15 16:18:35.917048467 +0100
@@ -217,6 +217,7 @@
lvm-clvm.sh \
lvm-plain.sh \
lvm-tag.sh \
+ openstack-common.sh \
ora-common.sh \
mysql-common.sh \
nfsserver-redhat.sh \
diff --color -uNr a/heartbeat/openstack-cinder-volume b/heartbeat/openstack-cinder-volume
--- a/heartbeat/openstack-cinder-volume 2022-03-15 16:14:29.370209063 +0100
+++ b/heartbeat/openstack-cinder-volume 2022-03-15 16:17:36.231840008 +0100
@@ -34,11 +34,11 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/openstack-common.sh
+
# Defaults
-OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
OCF_RESKEY_volume_local_check_default="true"
-: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
: ${OCF_RESKEY_volume_local_check=${OCF_RESKEY_volume_local_check_default}}
#######################################################################
@@ -68,14 +68,11 @@
<shortdesc lang="en">Attach a cinder volume</shortdesc>
<parameters>
-<parameter name="openstackcli">
-<longdesc lang="en">
-Path to command line tools for openstack.
-</longdesc>
-<shortdesc lang="en">Path to Openstack CLI tool</shortdesc>
-<content type="string" default="${OCF_RESKEY_openstackcli_default}" />
-</parameter>
+END
+common_meta_data
+
+cat <<END
<parameter name="volume_local_check">
<longdesc lang="en">
This option allows the cluster to monitor the cinder volume presence without
@@ -85,28 +82,19 @@
<content type="boolean" default="${OCF_RESKEY_volume_local_check_default}" />
</parameter>
-<parameter name="openrc" required="1">
-<longdesc lang="en">
-Valid Openstack credentials as openrc file from api_access/openrc.
-</longdesc>
-<shortdesc lang="en">openrc file</shortdesc>
-<content type="string" />
-</parameter>
-
<parameter name="volume_id" required="1">
<longdesc lang="en">
-Cinder volume identifier to use to attach the bloc storage.
+Cinder volume identifier to use to attach the block storage.
</longdesc>
<shortdesc lang="en">Volume ID</shortdesc>
<content type="string" />
</parameter>
-
</parameters>
<actions>
<action name="start" timeout="180s" />
<action name="stop" timeout="180s" />
-<action name="monitor" depth="0" timeout="30s" interval="60s" />
+<action name="monitor" depth="0" timeout="180s" interval="60s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
@@ -127,17 +115,7 @@
osvol_validate() {
check_binary "$OCF_RESKEY_openstackcli"
- if [ -z "$OCF_RESKEY_openrc" ]; then
- ocf_exit_reason "openrc parameter not set"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -f "$OCF_RESKEY_openrc" ] ; then
- ocf_exit_reason "openrc file not found"
- return $OCF_ERR_CONFIGURED
- fi
-
- . $OCF_RESKEY_openrc
+ get_config
if ! $OCF_RESKEY_openstackcli volume list|grep -q $OCF_RESKEY_volume_id ; then
ocf_exit_reason "volume-id $OCF_RESKEY_volume_id not found"
diff --color -uNr a/heartbeat/openstack-common.sh b/heartbeat/openstack-common.sh
--- a/heartbeat/openstack-common.sh 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/openstack-common.sh 2022-03-15 16:17:36.232840011 +0100
@@ -0,0 +1,147 @@
+OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
+OCF_RESKEY_insecure_default="false"
+
+: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
+: ${OCF_RESKEY_insecure=${OCF_RESKEY_insecure_default}}
+
+if ocf_is_true "${OCF_RESKEY_insecure}"; then
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --insecure"
+fi
+
+common_meta_data() {
+ cat <<END
+
+<parameter name="cloud" required="0">
+<longdesc lang="en">
+Openstack cloud (from ~/.config/openstack/clouds.yaml or /etc/openstack/clouds.yaml).
+</longdesc>
+<shortdesc lang="en">Cloud from clouds.yaml</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="openrc" required="0">
+<longdesc lang="en">
+Openstack credentials as openrc file from api_access/openrc.
+</longdesc>
+<shortdesc lang="en">openrc file</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="auth_url" required="0">
+<longdesc lang="en">
+Keystone Auth URL
+</longdesc>
+<shortdesc lang="en">Keystone Auth URL</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="username" required="0">
+<longdesc lang="en">
+Username.
+</longdesc>
+<shortdesc lang="en">Username</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="password" required="0">
+<longdesc lang="en">
+Password.
+</longdesc>
+<shortdesc lang="en">Password</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="project_name" required="0">
+<longdesc lang="en">
+Keystone Project.
+</longdesc>
+<shortdesc lang="en">Keystone Project</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="user_domain_name" required="0">
+<longdesc lang="en">
+Keystone User Domain Name.
+</longdesc>
+<shortdesc lang="en">Keystone User Domain Name</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="project_domain_name" required="0">
+<longdesc lang="en">
+Keystone Project Domain Name.
+</longdesc>
+<shortdesc lang="en">Keystone Project Domain Name</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="openstackcli">
+<longdesc lang="en">
+Path to command line tools for openstack.
+</longdesc>
+<shortdesc lang="en">Path to Openstack CLI tool</shortdesc>
+<content type="string" default="${OCF_RESKEY_openstackcli_default}" />
+</parameter>
+
+<parameter name="insecure">
+<longdesc lang="en">
+Allow insecure connections
+</longdesc>
+<shortdesc lang="en">Allow insecure connections</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_insecure_default}" />
+</parameter>
+END
+}
+
+get_config() {
+ if [ -n "$OCF_RESKEY_cloud" ]; then
+ TILDE=$(echo ~)
+ clouds_yaml="$TILDE/.config/openstack/clouds.yaml"
+ if [ ! -f "$clouds_yaml" ]; then
+ clouds_yaml="/etc/openstack/clouds.yaml"
+ fi
+ if [ ! -f "$clouds_yaml" ]; then
+ ocf_exit_reason "~/.config/openstack/clouds.yaml and /etc/openstack/clouds.yaml does not exist"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-cloud $OCF_RESKEY_cloud"
+ elif [ -n "$OCF_RESKEY_openrc" ]; then
+ if [ ! -f "$OCF_RESKEY_openrc" ]; then
+ ocf_exit_reason "$OCF_RESKEY_openrc does not exist"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ . $OCF_RESKEY_openrc
+ else
+ if [ -z "$OCF_RESKEY_auth_url" ]; then
+ ocf_exit_reason "auth_url not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z "$OCF_RESKEY_username" ]; then
+ ocf_exit_reason "username not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z "$OCF_RESKEY_password" ]; then
+ ocf_exit_reason "password not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z "$OCF_RESKEY_project_name" ]; then
+ ocf_exit_reason "project_name not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z "$OCF_RESKEY_user_domain_name" ]; then
+ ocf_exit_reason "user_domain_name not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z "$OCF_RESKEY_project_domain_name" ]; then
+ ocf_exit_reason " not set"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-auth-url $OCF_RESKEY_auth_url"
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-username $OCF_RESKEY_username"
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-password $OCF_RESKEY_password"
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-project-name $OCF_RESKEY_project_name"
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-user-domain-name $OCF_RESKEY_user_domain_name"
+ OCF_RESKEY_openstackcli="${OCF_RESKEY_openstackcli} --os-project-domain-name $OCF_RESKEY_project_domain_name"
+ fi
+}
diff --color -uNr a/heartbeat/openstack-floating-ip b/heartbeat/openstack-floating-ip
--- a/heartbeat/openstack-floating-ip 2022-03-15 16:14:29.370209063 +0100
+++ b/heartbeat/openstack-floating-ip 2022-03-15 16:17:36.233840014 +0100
@@ -34,10 +34,9 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-# Defaults
-OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
+. ${OCF_FUNCTIONS_DIR}/openstack-common.sh
-: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
+# Defaults
#######################################################################
@@ -67,22 +66,11 @@
<shortdesc lang="en">Move a floating IP</shortdesc>
<parameters>
-<parameter name="openstackcli">
-<longdesc lang="en">
-Path to command line tools for openstack.
-</longdesc>
-<shortdesc lang="en">Path to Openstack CLI tool</shortdesc>
-<content type="string" default="${OCF_RESKEY_openstackcli_default}" />
-</parameter>
+END
-<parameter name="openrc" required="1">
-<longdesc lang="en">
-Valid Openstack credentials as openrc file from api_access/openrc.
-</longdesc>
-<shortdesc lang="en">openrc file</shortdesc>
-<content type="string" />
-</parameter>
+common_meta_data
+cat <<END
<parameter name="ip_id" required="1">
<longdesc lang="en">
Floating IP Identifier.
@@ -104,7 +92,7 @@
<actions>
<action name="start" timeout="180s" />
<action name="stop" timeout="180s" />
-<action name="monitor" depth="0" timeout="30s" interval="60s" />
+<action name="monitor" depth="0" timeout="180s" interval="60s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
@@ -115,17 +103,7 @@
osflip_validate() {
check_binary "$OCF_RESKEY_openstackcli"
- if [ -z "$OCF_RESKEY_openrc" ]; then
- ocf_exit_reason "openrc parameter not set"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -f "$OCF_RESKEY_openrc" ] ; then
- ocf_exit_reason "openrc file not found"
- return $OCF_ERR_CONFIGURED
- fi
-
- . $OCF_RESKEY_openrc
+ get_config
if ! $OCF_RESKEY_openstackcli floating ip list|grep -q $OCF_RESKEY_ip_id ; then
ocf_exit_reason "ip-id $OCF_RESKEY_ip_id not found"
diff --color -uNr a/heartbeat/openstack-info b/heartbeat/openstack-info
--- a/heartbeat/openstack-info 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/openstack-info 2022-03-15 16:17:36.234840018 +0100
@@ -0,0 +1,270 @@
+#!/bin/sh
+#
+#
+# OCF resource agent to set attributes from Openstack instance details.
+# It records (in the CIB) various attributes of a node
+#
+# Copyright (c) 2018 Mathieu Grzybek
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+. ${OCF_FUNCTIONS_DIR}/openstack-common.sh
+
+# Defaults
+OCF_RESKEY_pidfile_default="$HA_RSCTMP/OSInfo-${OCF_RESOURCE_HOSTNAME}"
+OCF_RESKEY_delay_default="0"
+OCF_RESKEY_clone_default="0"
+OCF_RESKEY_curlcli_default="/usr/bin/curl"
+OCF_RESKEY_pythoncli_default="/usr/bin/python"
+
+: ${OCF_RESKEY_curlcli=${OCF_RESKEY_curlcli_default}}
+: ${OCF_RESKEY_pythoncli=${OCF_RESKEY_pythoncli_default}}
+: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}}
+: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}}
+: ${OCF_RESKEY_clone=${OCF_RESKEY_clone_default}}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="openstack-info" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+OCF resource agent to set attributes from Openstack instance details.
+It records (in the CIB) various attributes of a node.
+Sample output:
+ openstack_az : nova
+ openstack_flavor : c1.small
+ openstack_id : 60ac4343-5828-49b1-8aac-7c69b1417f31
+ openstack_ports : 7960d889-9750-4160-bf41-c69a41ad72d9:96530d18-57a3-4718-af32-30f2a74c22a2,b0e55a06-bd75-468d-8baa-22cfeb65799f:a55ae917-8016-4b1e-8ffa-04311b9dc7d6
+
+The layout of openstack_ports is a comma-separated list of tuples "subnet_id:port_id".
+</longdesc>
+<shortdesc lang="en">Records various node attributes in the CIB</shortdesc>
+
+<parameters>
+END
+
+common_meta_data
+
+ cat <<END
+<parameter name="pidfile" unique="0">
+<longdesc lang="en">PID file</longdesc>
+<shortdesc lang="en">PID file</shortdesc>
+<content type="string" default="${OCF_RESKEY_pidfile_default}" />
+</parameter>
+
+<parameter name="delay" unique="0">
+<longdesc lang="en">Interval to allow values to stabilize</longdesc>
+<shortdesc lang="en">Dampening Delay</shortdesc>
+<content type="string" default="${OCF_RESKEY_delay_default}" />
+</parameter>
+
+<parameter name="curlcli">
+<longdesc lang="en">
+Path to command line cURL binary.
+</longdesc>
+<shortdesc lang="en">Path to cURL binary</shortdesc>
+<content type="string" default="${OCF_RESKEY_curlcli_default}" />
+</parameter>
+
+<parameter name="pythoncli">
+<longdesc lang="en">
+Path to command line Python interpreter.
+</longdesc>
+<shortdesc lang="en">Path to Python interpreter</shortdesc>
+<content type="string" default="${OCF_RESKEY_pythoncli_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="180s" />
+<action name="stop" timeout="180s" />
+<action name="monitor" timeout="30s" interval="60s"/>
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="20s" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+OSInfoStats() {
+ local result
+ local value
+ local node
+ local node_id
+
+ get_config
+
+ # Nova data: server ID
+ node_id=$($OCF_RESKEY_curlcli \
+ -s http://169.254.169.254/openstack/latest/meta_data.json |
+ $OCF_RESKEY_pythoncli -m json.tool |
+ grep -P '\"uuid\": \".*\",$' |
+ grep -P -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}')
+
+ if [ $? -ne 0 ] ; then
+ ocf_exit_reason "Cannot find server ID"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_id -v "$node_id"
+
+ # Nova data: flavor
+ value=$($OCF_RESKEY_openstackcli server show \
+ --format value \
+ --column flavor \
+ $node_id)
+
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_flavor -v "$value"
+
+ # Nova data: availability zone
+ value=$($OCF_RESKEY_openstackcli server show \
+ --format value \
+ --column OS-EXT-AZ:availability_zone \
+ $node_id)
+
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_az -v "$value"
+
+ # Network data: ports
+ value=""
+ for port_id in $($OCF_RESKEY_openstackcli port list \
+ --format value \
+ --column id \
+ --server $node_id); do
+ subnet_id=$($OCF_RESKEY_openstackcli port show \
+ --format json \
+ --column fixed_ips \
+ ${port_id} | grep -P '\"subnet_id\": \".*\",$' |
+ grep -P -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}')
+ value+="${subnet_id}:${port_id},"
+ done
+ value=$(echo ${value} | sed -e 's/,$//g')
+
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_ports -v "$value"
+
+ if [ ! -z "$OS_REGION_NAME" ] ; then
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_region -v "$OS_REGION_NAME"
+ fi
+
+ if [ ! -z "$OS_TENANT_ID" ] ; then
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_tenant_id -v "$OS_TENANT_ID"
+
+ if [ ! -z "$OS_TENANT_NAME" ] ; then
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_tenant_name -v "$OS_TENANT_NAME"
+ fi
+ else
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_project_id -v "$OS_PROJECT_ID"
+
+ if [ ! -z "$OS_PROJECT_NAME" ] ; then
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_project_name -v "$OS_PROJECT_NAME"
+ fi
+ fi
+
+}
+
+OSInfo_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+OSInfo_start() {
+ echo $OCF_RESKEY_clone > $OCF_RESKEY_pidfile
+ OSInfoStats
+ exit $OCF_SUCCESS
+}
+
+OSInfo_stop() {
+ rm -f $OCF_RESKEY_pidfile
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_id
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_flavor
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_az
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_ports
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_region
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_tenant_id
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_tenant_name
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_project_id
+ ${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -D -n openstack_project_name
+ exit $OCF_SUCCESS
+}
+
+OSInfo_monitor() {
+ if [ -f "$OCF_RESKEY_pidfile" ] ; then
+ OSInfoStats
+ exit $OCF_RUNNING
+ fi
+ exit $OCF_NOT_RUNNING
+}
+
+OSInfo_validate() {
+ check_binary "$OCF_RESKEY_curlcli"
+ check_binary "$OCF_RESKEY_openstackcli"
+ check_binary "$OCF_RESKEY_pythoncli"
+
+ return $OCF_SUCCESS
+}
+
+if [ $# -ne 1 ]; then
+ OSInfo_usage
+ exit $OCF_ERR_ARGS
+fi
+
+if [ x != x${OCF_RESKEY_delay} ]; then
+ OCF_RESKEY_delay="-d ${OCF_RESKEY_delay}"
+fi
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+start) OSInfo_validate || exit $?
+ OSInfo_start
+ ;;
+stop) OSInfo_stop
+ ;;
+monitor) OSInfo_monitor
+ ;;
+validate-all) OSInfo_validate
+ ;;
+usage|help) OSInfo_usage
+ exit $OCF_SUCCESS
+ ;;
+*) OSInfo_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+
+exit $?
diff --color -uNr a/heartbeat/openstack-info.in b/heartbeat/openstack-info.in
--- a/heartbeat/openstack-info.in 2022-03-15 16:14:29.370209063 +0100
+++ b/heartbeat/openstack-info.in 2022-03-15 16:17:36.234840018 +0100
@@ -32,16 +32,16 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/openstack-common.sh
+
# Defaults
OCF_RESKEY_pidfile_default="$HA_RSCTMP/OSInfo-${OCF_RESOURCE_HOSTNAME}"
OCF_RESKEY_delay_default="0"
OCF_RESKEY_clone_default="0"
OCF_RESKEY_curlcli_default="/usr/bin/curl"
-OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
OCF_RESKEY_pythoncli_default="@PYTHON@"
: ${OCF_RESKEY_curlcli=${OCF_RESKEY_curlcli_default}}
-: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
: ${OCF_RESKEY_pythoncli=${OCF_RESKEY_pythoncli_default}}
: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}}
: ${OCF_RESKEY_delay=${OCF_RESKEY_delay_default}}
@@ -70,25 +70,23 @@
<shortdesc lang="en">Records various node attributes in the CIB</shortdesc>
<parameters>
+END
+
+common_meta_data
+
+ cat <<END
<parameter name="pidfile" unique="0">
<longdesc lang="en">PID file</longdesc>
<shortdesc lang="en">PID file</shortdesc>
<content type="string" default="${OCF_RESKEY_pidfile_default}" />
</parameter>
+
<parameter name="delay" unique="0">
<longdesc lang="en">Interval to allow values to stabilize</longdesc>
<shortdesc lang="en">Dampening Delay</shortdesc>
<content type="string" default="${OCF_RESKEY_delay_default}" />
</parameter>
-<parameter name="openrc" required="1">
-<longdesc lang="en">
-Valid Openstack credentials as openrc file from api_access/openrc.
-</longdesc>
-<shortdesc lang="en">openrc file</shortdesc>
-<content type="string" />
-</parameter>
-
<parameter name="curlcli">
<longdesc lang="en">
Path to command line cURL binary.
@@ -97,14 +95,6 @@
<content type="string" default="${OCF_RESKEY_curlcli_default}" />
</parameter>
-<parameter name="openstackcli">
-<longdesc lang="en">
-Path to command line tools for openstack.
-</longdesc>
-<shortdesc lang="en">Path to Openstack CLI tool</shortdesc>
-<content type="string" default="${OCF_RESKEY_openstackcli_default}" />
-</parameter>
-
<parameter name="pythoncli">
<longdesc lang="en">
Path to command line Python interpreter.
@@ -116,9 +106,9 @@
</parameters>
<actions>
-<action name="start" timeout="20s" />
-<action name="stop" timeout="20s" />
-<action name="monitor" timeout="20s" interval="60s"/>
+<action name="start" timeout="180s" />
+<action name="stop" timeout="180s" />
+<action name="monitor" timeout="180s" interval="60s"/>
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="20s" />
</actions>
@@ -134,7 +124,7 @@
local node
local node_id
- . $OCF_RESKEY_openrc
+ get_config
# Nova data: server ID
node_id=$($OCF_RESKEY_curlcli \
@@ -244,16 +234,6 @@
check_binary "$OCF_RESKEY_openstackcli"
check_binary "$OCF_RESKEY_pythoncli"
- if [ -z "$OCF_RESKEY_openrc" ]; then
- ocf_exit_reason "openrc parameter not set"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -f "$OCF_RESKEY_openrc" ] ; then
- ocf_exit_reason "openrc file not found"
- return $OCF_ERR_CONFIGURED
- fi
-
return $OCF_SUCCESS
}
diff --color -uNr a/heartbeat/openstack-virtual-ip b/heartbeat/openstack-virtual-ip
--- a/heartbeat/openstack-virtual-ip 2022-03-15 16:14:29.370209063 +0100
+++ b/heartbeat/openstack-virtual-ip 2022-03-15 16:17:36.235840021 +0100
@@ -34,10 +34,9 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-# Defaults
-OCF_RESKEY_openstackcli_default="/usr/bin/openstack"
+. ${OCF_FUNCTIONS_DIR}/openstack-common.sh
-: ${OCF_RESKEY_openstackcli=${OCF_RESKEY_openstackcli_default}}
+# Defaults
#######################################################################
@@ -68,22 +67,11 @@
<shortdesc lang="en">Move a virtual IP</shortdesc>
<parameters>
-<parameter name="openstackcli">
-<longdesc lang="en">
-Path to command line tools for openstack.
-</longdesc>
-<shortdesc lang="en">Path to Openstack CLI tool</shortdesc>
-<content type="string" default="${OCF_RESKEY_openstackcli_default}" />
-</parameter>
+END
-<parameter name="openrc" required="1">
-<longdesc lang="en">
-Valid Openstack credentials as openrc file from api_access/openrc.
-</longdesc>
-<shortdesc lang="en">openrc file</shortdesc>
-<content type="string" />
-</parameter>
+common_meta_data
+cat <<END
<parameter name="ip" required="1">
<longdesc lang="en">
Virtual IP Address.
@@ -105,7 +93,7 @@
<actions>
<action name="start" timeout="180s" />
<action name="stop" timeout="180s" />
-<action name="monitor" depth="0" timeout="30s" interval="60s" />
+<action name="monitor" depth="0" timeout="180s" interval="60s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
@@ -128,17 +116,7 @@
osvip_validate() {
check_binary "$OCF_RESKEY_openstackcli"
- if [ -z "$OCF_RESKEY_openrc" ]; then
- ocf_exit_reason "openrc parameter not set"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -f "$OCF_RESKEY_openrc" ] ; then
- ocf_exit_reason "openrc file not found"
- return $OCF_ERR_CONFIGURED
- fi
-
- . $OCF_RESKEY_openrc
+ get_config
${HA_SBIN_DIR}/attrd_updater --query -n openstack_ports -N $(crm_node -n) > /dev/null 2>&1
if [ $? -ne 0 ] ; then

@ -1,72 +0,0 @@
From 64f434014bc198055478a139532c7cc133967c5d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 8 Jul 2022 15:41:34 +0200
Subject: [PATCH] openstack-agents: fixes
- openstack-cinder-volume: dont do volume_local_check during start/stop-action
- openstack-floating-ip/openstack-virtual-ip: dont fail in validate()
during probe-calls
- openstack-floating-ip: fix awk only catching last id for node_port_ids
---
heartbeat/openstack-cinder-volume | 2 +-
heartbeat/openstack-floating-ip | 4 ++--
heartbeat/openstack-virtual-ip | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/heartbeat/openstack-cinder-volume b/heartbeat/openstack-cinder-volume
index cc12e58ae..19bf04faf 100755
--- a/heartbeat/openstack-cinder-volume
+++ b/heartbeat/openstack-cinder-volume
@@ -138,7 +138,7 @@ osvol_monitor() {
node_id=$(_get_node_id)
- if ocf_is_true $OCF_RESKEY_volume_local_check ; then
+ if [ "$__OCF_ACTION" = "monitor" ] && ocf_is_true $OCF_RESKEY_volume_local_check ; then
#
# Is the volue attached?
# We check the local devices
diff --git a/heartbeat/openstack-floating-ip b/heartbeat/openstack-floating-ip
index 8c135cc24..6e2895654 100755
--- a/heartbeat/openstack-floating-ip
+++ b/heartbeat/openstack-floating-ip
@@ -111,7 +111,7 @@ osflip_validate() {
fi
${HA_SBIN_DIR}/attrd_updater --query -n openstack_ports -N $(crm_node -n) > /dev/null 2>&1
- if [ $? -ne 0 ] ; then
+ if [ $? -ne 0 ] && ! ocf_is_probe; then
ocf_log warn "attr_updater failed to get openstack_ports attribute of node $OCF_RESOURCE_INSTANCE"
return $OCF_ERR_GENERIC
fi
@@ -129,7 +129,7 @@ osflip_monitor() {
node_port_ids=$(${HA_SBIN_DIR}/attrd_updater --query -n openstack_ports -N $(crm_node -n) \
| awk -F= '{gsub("\"","");print $NF}' \
| tr ',' ' ' \
- | awk -F: '{print $NF}')
+ | awk '{gsub("[^ ]*:", "");print}')
# Is the IP active and attached?
result=$($OCF_RESKEY_openstackcli floating ip show \
diff --git a/heartbeat/openstack-virtual-ip b/heartbeat/openstack-virtual-ip
index a1084c420..c654d980a 100755
--- a/heartbeat/openstack-virtual-ip
+++ b/heartbeat/openstack-virtual-ip
@@ -119,7 +119,7 @@ osvip_validate() {
get_config
${HA_SBIN_DIR}/attrd_updater --query -n openstack_ports -N $(crm_node -n) > /dev/null 2>&1
- if [ $? -ne 0 ] ; then
+ if [ $? -ne 0 ] && ! ocf_is_probe; then
ocf_log warn "attr_updater failed to get openstack_ports attribute of node $OCF_RESOURCE_INSTANCE"
return $OCF_ERR_GENERIC
fi
@@ -136,7 +136,7 @@ osvip_monitor() {
--format value \
--column allowed_address_pairs \
${node_port_id})
- if echo $result | grep -q $OCF_RESKEY_ip ; then
+ if echo $result | grep -q "$OCF_RESKEY_ip"; then
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -S status -n openstack_virtual_ip -v $OCF_RESKEY_ip
return $OCF_SUCCESS

@ -1,26 +0,0 @@
From 8b1d3257e5176a2f50a843a21888c4b4f51f370b Mon Sep 17 00:00:00 2001
From: Valentin Vidic <vvidic@valentin-vidic.from.hr>
Date: Sun, 3 Apr 2022 20:31:50 +0200
Subject: [PATCH] openstack-info: fix bashism
Also simplify striping of trailing comma.
---
heartbeat/openstack-info.in | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/openstack-info.in b/heartbeat/openstack-info.in
index f6dc1ee4d..f3a59fc7a 100755
--- a/heartbeat/openstack-info.in
+++ b/heartbeat/openstack-info.in
@@ -167,9 +167,9 @@ OSInfoStats() {
--column fixed_ips \
${port_id} | grep -P '\"subnet_id\": \".*\",$' |
grep -P -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}')
- value+="${subnet_id}:${port_id},"
+ value="${value}${subnet_id}:${port_id},"
done
- value=$(echo ${value} | sed -e 's/,$//g')
+ value=${value%,}
${HA_SBIN_DIR}/attrd_updater ${OCF_RESKEY_delay} -n openstack_ports -v "$value"

@ -1,82 +0,0 @@
From 4420ef84f3172c67fc7b8b6ae41ea173de017bf4 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@suse.com>
Date: Wed, 25 May 2022 15:12:33 +0200
Subject: [PATCH] aws-vpc-move-ip: Allow to set the interface label
Add a parameter to specify an interface label to distinguish the IP
address managed by aws-vpc-move-ip, similarly as can be done with
IPaddr2. This allows to easily recognize the address from other
addresses assigned to a given interface.
---
heartbeat/aws-vpc-move-ip | 30 +++++++++++++++++++++++++++++-
1 file changed, 29 insertions(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 5d5204080..dee040300 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -43,6 +43,7 @@ OCF_RESKEY_address_default=""
OCF_RESKEY_routing_table_default=""
OCF_RESKEY_routing_table_role_default=""
OCF_RESKEY_interface_default="eth0"
+OCF_RESKEY_iflabel_default=""
OCF_RESKEY_monapi_default="false"
OCF_RESKEY_lookup_type_default="InstanceId"
@@ -54,6 +55,7 @@ OCF_RESKEY_lookup_type_default="InstanceId"
: ${OCF_RESKEY_routing_table=${OCF_RESKEY_routing_table_default}}
: ${OCF_RESKEY_routing_table_role=${OCF_RESKEY_routing_table_role_default}}
: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}}
+: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}}
: ${OCF_RESKEY_lookup_type=${OCF_RESKEY_lookup_type_default}}
@@ -149,6 +151,18 @@ Name of the network interface, i.e. eth0
<content type="string" default="${OCF_RESKEY_interface_default}" />
</parameter>
+<parameter name="iflabel">
+<longdesc lang="en">
+You can specify an additional label for your IP address here.
+This label is appended to your interface name.
+
+The kernel allows alphanumeric labels up to a maximum length of 15
+characters including the interface name and colon (e.g. eth0:foobar1234)
+</longdesc>
+<shortdesc lang="en">Interface label</shortdesc>
+<content type="string" default="${OCF_RESKEY_iflabel_default}"/>
+</parameter>
+
<parameter name="monapi">
<longdesc lang="en">
Enable enhanced monitoring using AWS API calls to check route table entry
@@ -215,6 +229,14 @@ ec2ip_validate() {
return $OCF_ERR_CONFIGURED
fi
+ if [ -n "$OCF_RESKEY_iflabel" ]; then
+ label=${OCF_RESKEY_interface}:${OFC_RESKEY_iflabel}
+ if [ ${#label} -gt 15 ]; then
+ ocf_exit_reason "Interface label [$label] exceeds maximum character limit of 15"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+
TOKEN=$(curl -sX PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
EC2_INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN")
@@ -363,7 +385,13 @@ ec2ip_get_and_configure() {
# Reconfigure the local ip address
ec2ip_drop
- cmd="ip addr add ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface"
+
+ extra_opts=""
+ if [ -n "$OCF_RESKEY_iflabel" ]; then
+ extra_opts="$extra_opts label $OCF_RESKEY_interface:$OCF_RESKEY_iflabel"
+ fi
+
+ cmd="ip addr add ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface $extra_opts"
ocf_log debug "executing command: $cmd"
$cmd
rc=$?

@ -1,25 +0,0 @@
From b3885f7d95fe390371f806c7f3debb3ec8ad012d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 7 Jun 2022 15:20:11 +0200
Subject: [PATCH] lvmlockd: fail when use_lvmlockd has not been set
---
heartbeat/lvmlockd | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd
index 05bb0a2e5..dc7bd2d7e 100755
--- a/heartbeat/lvmlockd
+++ b/heartbeat/lvmlockd
@@ -179,6 +179,11 @@ setup_lvm_config()
out=$(lvmconfig 'global/locking_type' 2> /dev/null)
lock_type=$(echo "$out" | cut -d'=' -f2)
+ if [ -z "$use_lvmlockd" ]; then
+ ocf_exit_reason "\"use_lvmlockd\" not set in /etc/lvm/lvm.conf ..."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
if [ -n "$use_lvmlockd" ] && [ "$use_lvmlockd" != 1 ] ; then
ocf_log info "setting \"use_lvmlockd=1\" in /etc/lvm/lvm.conf ..."
sed -i 's,^[[:blank:]]*use_lvmlockd[[:blank:]]*=.*,\ \ \ \ use_lvmlockd = 1,g' /etc/lvm/lvm.conf

@ -1,195 +0,0 @@
From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Sun, 13 Dec 2020 14:58:34 -0800
Subject: [PATCH] LVM-activate: Fix return codes
OCF_ERR_ARGS should be used when the configuration isn't valid for the
**local** node, and so the resource should not attempt to start again
locally until the issue is corrected.
OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
**any** node, and so the resource should not attempt to start again
anywhere until the issue is corrected.
One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
running (or improperly not running) be an OCF_ERR_GENERIC or
OCF_ERR_ARGS? The fact that it's a state issue rather than a config
issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
user intervention suggests OCF_ERR_ARGS. The approach here is to use
GENERIC for all of these. One can make the case that "improperly
running" should use ARGS, since a process must be manually stopped to
fix the issue, and that "improperly not running" should use GENERIC,
since there's a small chance the process died and will be recovered in
some way.
More info about return code meanings:
- https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted
Resolves: RHBZ#1905820
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c86606637..e951a08e9 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -333,8 +333,7 @@ config_verify()
real=$(lvmconfig "$name" | cut -d'=' -f2)
if [ "$real" != "$expect" ]; then
ocf_exit_reason "config item $name: expect=$expect but real=$real"
- exit $OCF_ERR_CONFIGURED
-
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -366,12 +365,12 @@ lvmlockd_check()
fi
ocf_exit_reason "lvmlockd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -402,17 +401,17 @@ clvmd_check()
# Good: clvmd is running, and lvmlockd is not running
if ! pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmetad >/dev/null 2>&1 ; then
ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmlockd >/dev/null 2>&1 ; then
ocf_exit_reason "lvmlockd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -424,12 +423,12 @@ systemid_check()
source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
if [ "$source" = "" ] || [ "$source" = "none" ]; then
ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
if [ -z ${SYSTEM_ID} ]; then
ocf_exit_reason "local/system_id is not set!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -441,18 +440,18 @@ tagging_check()
# The volume_list must be initialized to something in order to
# guarantee our tag will be filtered on startup
if ! lvm dumpconfig activation/volume_list; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
# Our tag must _NOT_ be in the volume_list. This agent
# overrides the volume_list during activation using the
# special tag reserved for cluster activation
if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -463,13 +462,13 @@ read_parameters()
if [ -z "$VG" ]
then
ocf_exit_reason "You must identify the volume group name!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
then
ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Convert VG_access_mode from string to index
@@ -519,8 +518,10 @@ lvm_validate() {
exit $OCF_NOT_RUNNING
fi
+ # Could be a transient error (e.g., iSCSI connection
+ # issue) so use OCF_ERR_GENERIC
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
# Inconsistency might be due to missing physical volumes, which doesn't
@@ -549,7 +550,7 @@ lvm_validate() {
mode=$?
if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Nothing to do if the VG has no logical volume
@@ -561,11 +562,11 @@ lvm_validate() {
# Check if the given $LV is in the $VG
if [ -n "$LV" ]; then
- OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
+ output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
if [ $? -ne 0 ]; then
- ocf_log err "lvs: ${OUT}"
+ ocf_log err "lvs: ${output}"
ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
fi
@@ -580,7 +581,6 @@ lvm_validate() {
3)
systemid_check
;;
-
4)
tagging_check
;;
@@ -808,10 +808,9 @@ lvm_status() {
dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
2>&1
if [ $? -ne 0 ]; then
- return $OCF_NOT_RUNNING
- else
- return $OCF_SUCCESS
+ return $OCF_ERR_GENERIC
fi
+ return $OCF_SUCCESS
;;
*)
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"

@ -1,39 +0,0 @@
From 46e8d346ca4803245f51a157591c4df1126d3b49 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 12 Jul 2022 12:45:52 +0200
Subject: [PATCH] ocf-tester: use promotable terms
---
tools/ocf-tester.in | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/ocf-tester.in b/tools/ocf-tester.in
index 10822a5a06..f1703ba1b7 100755
--- a/tools/ocf-tester.in
+++ b/tools/ocf-tester.in
@@ -295,10 +295,10 @@ if [ $rc -eq 3 ]; then
elif [ $rc -eq 8 ]; then
test_command demote "Cleanup, demote"
- assert $? 0 "Your agent was a master and could not be demoted" 1
+ assert $? 0 "Your agent was promoted and could not be demoted" 1
test_command stop "Cleanup, stop"
- assert $? 0 "Your agent was a master and could not be stopped" 1
+ assert $? 0 "Your agent was promoted and could not be stopped" 1
elif [ $rc -ne 7 ]; then
test_command stop
@@ -370,10 +370,10 @@ if [ $has_promote -eq 1 -a $has_demote -eq 1 ]; then
assert $? 0 "Demote failed" 1
elif [ $has_promote -eq 0 -a $has_demote -eq 0 ]; then
- info "* Your agent does not support master/slave (optional)"
+ info "* Your agent does not support promotable clones (optional)"
else
- echo "* Your agent partially supports master/slave"
+ echo "* Your agent partially supports promotable clones"
num_errors=`expr $num_errors + 1`
fi

@ -1,166 +0,0 @@
From 687aa646852d5fd5d4e811b2ec562ebffa15e23d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 14 Jul 2022 14:52:07 +0200
Subject: [PATCH] ocf-tester: remove deprecated lrmd/lrmadmin code that hasnt
worked since pre-pacemaker days
---
tools/ocf-tester.8 | 12 ++-----
tools/ocf-tester.in | 81 ---------------------------------------------
2 files changed, 2 insertions(+), 91 deletions(-)
diff --git a/tools/ocf-tester.8 b/tools/ocf-tester.8
index 850ec0be04..3f398282d2 100644
--- a/tools/ocf-tester.8
+++ b/tools/ocf-tester.8
@@ -1,9 +1,9 @@
-.TH OCF-TESTER "8" "January 2012" "Tool for testing if a cluster resource is OCF compliant" "System Administration Utilities"
+.TH OCF-TESTER "8" "July 2022" "Tool for testing if a cluster resource is OCF compliant" "System Administration Utilities"
.SH NAME
ocf-tester \- Part of the Linux-HA project
.SH SYNOPSIS
.B ocf-tester
-[\fI-LhvqdX\fR] \fI-n resource_name \fR[\fI-o name=value\fR]\fI* /full/path/to/resource/agent\fR
+[\fI-hvqdX\fR] \fI-n resource_name \fR[\fI-o name=value\fR]\fI* /full/path/to/resource/agent\fR
.SH DESCRIPTION
Tool for testing if a cluster resource is OCF compliant
.SH OPTIONS
@@ -26,11 +26,6 @@ Name of the resource
\fB\-o\fR name=value
Name and value of any parameters required by the agent
.TP
-\fB\-L\fR
-Use lrmadmin/lrmd for tests
-.PP
-Usage: ocf\-tester [\-Lh] \fB\-n\fR resource_name [\-o name=value]* /full/path/to/resource/agent
-.TP
\fB\-h\fR
This text
.TP
@@ -51,6 +46,3 @@ Name of the resource
.TP
\fB\-o\fR name=value
Name and value of any parameters required by the agent
-.TP
-\fB\-L\fR
-Use lrmadmin/lrmd for tests
diff --git a/tools/ocf-tester.in b/tools/ocf-tester.in
index 10822a5a06..15b14e51ea 100755
--- a/tools/ocf-tester.in
+++ b/tools/ocf-tester.in
@@ -25,8 +25,6 @@
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
-LRMD=@libdir@/heartbeat/lrmd
-LRMADMIN=@sbindir@/lrmadmin
DATADIR=@datadir@
METADATA_LINT="xmllint --noout --valid -"
@@ -61,7 +59,6 @@ usage() {
echo " -X Turn on RA tracing (expect large output)"
echo " -n name Name of the resource"
echo " -o name=value Name and value of any parameters required by the agent"
- echo " -L Use lrmadmin/lrmd for tests"
exit $1
}
@@ -104,7 +101,6 @@ while test "$done" = "0"; do
-o) name=${2%%=*}; value=${2#*=};
lrm_ra_args="$lrm_ra_args $2";
ra_args="$ra_args OCF_RESKEY_$name='$value'"; shift; shift;;
- -L) use_lrmd=1; shift;;
-v) verbose=1; shift;;
-d) export HA_debug=1; shift;;
-X) export OCF_TRACE_RA=1; verbose=1; shift;;
@@ -140,79 +136,6 @@ stopped_rc=7
has_demote=1
has_promote=1
-start_lrmd() {
- lrmd_timeout=0
- lrmd_interval=0
- lrmd_target_rc=EVERYTIME
- lrmd_started=""
- $LRMD -s 2>/dev/null
- rc=$?
- if [ $rc -eq 3 ]; then
- lrmd_started=1
- $LRMD &
- sleep 1
- $LRMD -s 2>/dev/null
- else
- return $rc
- fi
-}
-add_resource() {
- $LRMADMIN -A $OCF_RESOURCE_INSTANCE \
- ocf \
- `basename $agent` \
- $(basename `dirname $agent`) \
- $lrm_ra_args > /dev/null
-}
-del_resource() {
- $LRMADMIN -D $OCF_RESOURCE_INSTANCE
-}
-parse_lrmadmin_output() {
- awk '
-BEGIN{ rc=1; }
-/Waiting for lrmd to callback.../ { n=1; next; }
-n==1 && /----------------operation--------------/ { n++; next; }
-n==2 && /return code:/ { rc=$0; sub("return code: *","",rc); next }
-n==2 && /---------------------------------------/ {
- n++;
- next;
-}
-END{
- if( n!=3 ) exit 1;
- else exit rc;
-}
-'
-}
-exec_resource() {
- op="$1"
- args="$2"
- $LRMADMIN -E $OCF_RESOURCE_INSTANCE \
- $op $lrmd_timeout $lrmd_interval \
- $lrmd_target_rc \
- $args | parse_lrmadmin_output
-}
-
-if [ "$use_lrmd" = 1 ]; then
- echo "Using lrmd/lrmadmin for all tests"
- start_lrmd || {
- echo "could not start lrmd" >&2
- exit 1
- }
- trap '
- [ "$lrmd_started" = 1 ] && $LRMD -k
- ' EXIT
- add_resource || {
- echo "failed to add resource to lrmd" >&2
- exit 1
- }
-fi
-
-lrm_test_command() {
- action="$1"
- msg="$2"
- debug "$msg"
- exec_resource $action "$lrm_ra_args"
-}
-
test_permissions() {
action=meta-data
debug ${1:-"Testing permissions with uid nobody"}
@@ -233,10 +156,6 @@ test_command() {
action=$1; shift
export __OCF_ACTION=$action
msg=${1:-"Testing: $action"}
- if [ "$use_lrmd" = 1 ]; then
- lrm_test_command $action "$msg"
- return $?
- fi
#echo Running: "export $ra_args; $agent $action 2>&1 > /dev/null"
if [ $verbose -eq 0 ]; then
command_output=`$agent $action 2>&1`

@ -1,79 +0,0 @@
From b3eadb8523b599af800a7c772606aa0e90cf142f Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 17:03:02 +0900
Subject: [PATCH 1/2] Make storage_mon -h exit just after printing help
messages.
Previously, when -h or an invalid option was specified, storage_mon
printed the help messages, proceeded processing and then could
throw an error. This was not the behavior that, e.g., users who want
to specify -h option to see the help messages are expecting. To fix
this issue, this commit changes storage_mon so that it exits just
after printing the help messages when -h or an invalid option is
specified.
---
tools/storage_mon.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 7b65bb419..1303371f7 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
fprintf(f, " --verbose emit extra output to stdout\n");
- fprintf(f, " --help print this messages\n");
+ fprintf(f, " --help print this messages, then exit\n");
}
/* Check one device */
@@ -178,9 +178,11 @@ int main(int argc, char *argv[])
break;
case 'h':
usage(argv[0], stdout);
+ exit(0);
break;
default:
usage(argv[0], stderr);
+ exit(-1);
break;
}
From e62795f02d25a772a239e0a4f9eb9d6470c134ee Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 17:56:32 +0900
Subject: [PATCH 2/2] Fix typo in help message.
---
tools/storage_mon.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 1303371f7..3c82d5ee8 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
fprintf(f, " --verbose emit extra output to stdout\n");
- fprintf(f, " --help print this messages, then exit\n");
+ fprintf(f, " --help print this message\n");
}
/* Check one device */
@@ -178,11 +178,11 @@ int main(int argc, char *argv[])
break;
case 'h':
usage(argv[0], stdout);
- exit(0);
+ return 0;
break;
default:
usage(argv[0], stderr);
- exit(-1);
+ return -1;
break;
}

@ -1,36 +0,0 @@
From a68957e8f1e8169438acf5a4321f47ed7d8ceec1 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 20:28:38 +0900
Subject: [PATCH] storage_mon: Fix bug in checking of number of specified
scores.
Previously specifying the maximum allowed number (MAX_DEVICES, currently 25)
of devices and scores as arguments could cause storage_mon to fail unexpectedly
with the error message "too many scores, max is 25". This issue happened
because storage_mon checked whether the number of specified scores
exceeded the upper limit by using the local variable "device_count" indicating
the number of specified devices (not scores). So after the maximum number
of devices arguments were interpreted, the appearance of next score argument
caused the error even when the number of interpreted scores arguments had
not exceeded the maximum.
This patch fixes storage_mon so that it uses the local variable "score_count"
indicating the number of specified scores, to check whether arguments for
scores are specified more than the upper limit.
---
tools/storage_mon.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 3c82d5ee8..c749076c2 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -154,7 +154,7 @@ int main(int argc, char *argv[])
}
break;
case 's':
- if (device_count < MAX_DEVICES) {
+ if (score_count < MAX_DEVICES) {
int score = atoi(optarg);
if (score < 1 || score > 10) {
fprintf(stderr, "Score must be between 1 and 10 inclusive\n");

@ -1,43 +0,0 @@
From c6ea93fcb499c84c3d8e9aad2ced65065a3f6d51 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 22:34:08 +0900
Subject: [PATCH] Fix bug in handling of child process exit.
When storage_mon detects that a child process exits with zero,
it resets the test_forks[] entry for the child process to 0, to avoid
waitpid() for the process again in the loop. But, previously,
storage_mon didn't do that when it detected that a child process
exited with non-zero. Which caused waitpid() to be called again
for the process already gone and to report an error like
"waitpid on XXX failed: No child processes" unexpectedly.
In this case, basically storage_mon should wait until all the child
processes exit and return the final score, instead.
This patch fixes this issue by making storage_mon reset test_works[]
entry even when a child process exits with non-zero.
---
tools/storage_mon.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 3c82d5ee8..83a48ca36 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -232,13 +232,13 @@ int main(int argc, char *argv[])
if (w == test_forks[i]) {
if (WIFEXITED(wstatus)) {
- if (WEXITSTATUS(wstatus) == 0) {
- finished_count++;
- test_forks[i] = 0;
- } else {
+ if (WEXITSTATUS(wstatus) != 0) {
syslog(LOG_ERR, "Error reading from device %s", devices[i]);
final_score += scores[i];
}
+
+ finished_count++;
+ test_forks[i] = 0;
}
}
}

@ -1,417 +0,0 @@
From 0bb52cf9985bda47e13940761b3d8e2eaddf377c Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Wed, 10 Aug 2022 17:35:54 +0900
Subject: [PATCH 1/4] storage_mon: Use the O_DIRECT flag in open() to eliminate
cache effects
---
tools/Makefile.am | 1 +
tools/storage_mon.c | 82 +++++++++++++++++++++++++++++++++------------
2 files changed, 61 insertions(+), 22 deletions(-)
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 1309223b4..08323fee3 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -74,6 +74,7 @@ sfex_stat_LDADD = $(GLIBLIB) -lplumb -lplumbgpl
findif_SOURCES = findif.c
storage_mon_SOURCES = storage_mon.c
+storage_mon_CFLAGS = -D_GNU_SOURCE
if BUILD_TICKLE
halib_PROGRAMS += tickle_tcp
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 930ead41c..ba87492fc 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -31,23 +31,27 @@ static void usage(char *name, FILE *f)
fprintf(f, " --help print this message\n");
}
-/* Check one device */
-static void *test_device(const char *device, int verbose, int inject_error_percent)
+static int open_device(const char *device, int verbose)
{
- uint64_t devsize;
int device_fd;
int res;
+ uint64_t devsize;
off_t seek_spot;
- char buffer[512];
- if (verbose) {
- printf("Testing device %s\n", device);
+#if defined(__linux__) || defined(__FreeBSD__)
+ device_fd = open(device, O_RDONLY|O_DIRECT);
+ if (device_fd >= 0) {
+ return device_fd;
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ return -1;
}
+#endif
device_fd = open(device, O_RDONLY);
if (device_fd < 0) {
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- exit(-1);
+ return -1;
}
#ifdef __FreeBSD__
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
@@ -57,11 +61,12 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
if (res != 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
close(device_fd);
- exit(-1);
+ return -1;
}
if (verbose) {
fprintf(stderr, "%s: size=%zu\n", device, devsize);
}
+
/* Don't fret about real randomness */
srand(time(NULL) + getpid());
/* Pick a random place on the device - sector aligned */
@@ -70,35 +75,64 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
if (res < 0) {
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
close(device_fd);
- exit(-1);
+ return -1;
}
-
if (verbose) {
printf("%s: reading from pos %ld\n", device, seek_spot);
}
+ return device_fd;
+}
+
+/* Check one device */
+static void *test_device(const char *device, int verbose, int inject_error_percent)
+{
+ int device_fd;
+ int sec_size = 0;
+ int res;
+ void *buffer;
+
+ if (verbose) {
+ printf("Testing device %s\n", device);
+ }
+
+ device_fd = open_device(device, verbose);
+ if (device_fd < 0) {
+ exit(-1);
+ }
+
+ ioctl(device_fd, BLKSSZGET, &sec_size);
+ if (sec_size == 0) {
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ goto error;
+ }
- res = read(device_fd, buffer, sizeof(buffer));
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
+ goto error;
+ }
+
+ res = read(device_fd, buffer, sec_size);
+ free(buffer);
if (res < 0) {
fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
- close(device_fd);
- exit(-1);
+ goto error;
}
- if (res < (int)sizeof(buffer)) {
- fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
- close(device_fd);
- exit(-1);
+ if (res < sec_size) {
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
+ goto error;
}
/* Fake an error */
- if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
- close(device_fd);
- exit(-1);
+ if (inject_error_percent) {
+ srand(time(NULL) + getpid());
+ if ((rand() % 100) < inject_error_percent) {
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
+ goto error;
+ }
}
res = close(device_fd);
if (res != 0) {
fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno));
- close(device_fd);
exit(-1);
}
@@ -106,6 +140,10 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
printf("%s: done\n", device);
}
exit(0);
+
+error:
+ close(device_fd);
+ exit(-1);
}
int main(int argc, char *argv[])
From ce4e632f29ed6b86b82a959eac5844655baed153 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Mon, 15 Aug 2022 19:17:21 +0900
Subject: [PATCH 2/4] storage_mon: fix build-related issues
---
tools/storage_mon.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index ba87492fc..e34d1975a 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -38,7 +38,6 @@ static int open_device(const char *device, int verbose)
uint64_t devsize;
off_t seek_spot;
-#if defined(__linux__) || defined(__FreeBSD__)
device_fd = open(device, O_RDONLY|O_DIRECT);
if (device_fd >= 0) {
return device_fd;
@@ -46,7 +45,6 @@ static int open_device(const char *device, int verbose)
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
return -1;
}
-#endif
device_fd = open(device, O_RDONLY);
if (device_fd < 0) {
@@ -100,7 +98,11 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
exit(-1);
}
+#ifdef __FreeBSD__
+ ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
+#else
ioctl(device_fd, BLKSSZGET, &sec_size);
+#endif
if (sec_size == 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
goto error;
From 7a0aaa0dfdebeab3fae9fe9ddc412c3d1f610273 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Wed, 24 Aug 2022 17:36:23 +0900
Subject: [PATCH 3/4] storage_mon: do random lseek even with O_DIRECT, etc
---
tools/storage_mon.c | 118 ++++++++++++++++++++++----------------------
1 file changed, 58 insertions(+), 60 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index e34d1975a..0bdb48649 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -31,38 +31,43 @@ static void usage(char *name, FILE *f)
fprintf(f, " --help print this message\n");
}
-static int open_device(const char *device, int verbose)
+/* Check one device */
+static void *test_device(const char *device, int verbose, int inject_error_percent)
{
+ uint64_t devsize;
+ int flags = O_RDONLY | O_DIRECT;
int device_fd;
int res;
- uint64_t devsize;
off_t seek_spot;
- device_fd = open(device, O_RDONLY|O_DIRECT);
- if (device_fd >= 0) {
- return device_fd;
- } else if (errno != EINVAL) {
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- return -1;
+ if (verbose) {
+ printf("Testing device %s\n", device);
}
- device_fd = open(device, O_RDONLY);
+ device_fd = open(device, flags);
if (device_fd < 0) {
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- return -1;
+ if (errno != EINVAL) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ exit(-1);
+ }
+ flags &= ~O_DIRECT;
+ device_fd = open(device, flags);
+ if (device_fd < 0) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ exit(-1);
+ }
}
#ifdef __FreeBSD__
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
#else
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
#endif
- if (res != 0) {
+ if (res < 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
- close(device_fd);
- return -1;
+ goto error;
}
if (verbose) {
- fprintf(stderr, "%s: size=%zu\n", device, devsize);
+ printf("%s: opened %s O_DIRECT, size=%zu\n", device, (flags & O_DIRECT)?"with":"without", devsize);
}
/* Don't fret about real randomness */
@@ -72,65 +77,58 @@ static int open_device(const char *device, int verbose)
res = lseek(device_fd, seek_spot, SEEK_SET);
if (res < 0) {
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
- close(device_fd);
- return -1;
+ goto error;
}
if (verbose) {
printf("%s: reading from pos %ld\n", device, seek_spot);
}
- return device_fd;
-}
-
-/* Check one device */
-static void *test_device(const char *device, int verbose, int inject_error_percent)
-{
- int device_fd;
- int sec_size = 0;
- int res;
- void *buffer;
-
- if (verbose) {
- printf("Testing device %s\n", device);
- }
- device_fd = open_device(device, verbose);
- if (device_fd < 0) {
- exit(-1);
- }
+ if (flags & O_DIRECT) {
+ int sec_size = 0;
+ void *buffer;
#ifdef __FreeBSD__
- ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
+ res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
#else
- ioctl(device_fd, BLKSSZGET, &sec_size);
+ res = ioctl(device_fd, BLKSSZGET, &sec_size);
#endif
- if (sec_size == 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
- goto error;
- }
+ if (res < 0) {
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ goto error;
+ }
- if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
- fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
- goto error;
- }
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
+ goto error;
+ }
+ res = read(device_fd, buffer, sec_size);
+ free(buffer);
+ if (res < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
+ goto error;
+ }
+ if (res < sec_size) {
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
+ goto error;
+ }
+ } else {
+ char buffer[512];
- res = read(device_fd, buffer, sec_size);
- free(buffer);
- if (res < 0) {
- fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
- goto error;
- }
- if (res < sec_size) {
- fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
- goto error;
+ res = read(device_fd, buffer, sizeof(buffer));
+ if (res < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
+ goto error;
+ }
+ if (res < (int)sizeof(buffer)) {
+ fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
+ goto error;
+ }
}
/* Fake an error */
- if (inject_error_percent) {
- srand(time(NULL) + getpid());
- if ((rand() % 100) < inject_error_percent) {
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
- goto error;
- }
+ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
+ goto error;
}
res = close(device_fd);
if (res != 0) {
From db97e055a17526cec056c595844a9d8851e3ee19 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Thu, 25 Aug 2022 16:03:46 +0900
Subject: [PATCH 4/4] storage_mon: improve error messages when ioctl() fails
---
tools/storage_mon.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 0bdb48649..f829c5081 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -63,7 +63,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
#endif
if (res < 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ fprintf(stderr, "Failed to get device size for %s: %s\n", device, strerror(errno));
goto error;
}
if (verbose) {
@@ -93,7 +93,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
res = ioctl(device_fd, BLKSSZGET, &sec_size);
#endif
if (res < 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ fprintf(stderr, "Failed to get block device sector size for %s: %s\n", device, strerror(errno));
goto error;
}

@ -1,68 +0,0 @@
From fcceb714085836de9db4493b527e94d85dd72626 Mon Sep 17 00:00:00 2001
From: ut002970 <liuxingwei@uniontech.com>
Date: Wed, 6 Sep 2023 15:27:05 +0800
Subject: [PATCH 1/3] modify error message
---
heartbeat/mysql-common.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index 8104019b03..a93acc4c60 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -254,7 +254,7 @@ mysql_common_start()
while [ $start_wait = 1 ]; do
if ! ps $pid > /dev/null 2>&1; then
wait $pid
- ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation"
+ ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation, log message you can check $OCF_RESKEY_log"
return $OCF_ERR_GENERIC
fi
mysql_common_status info
From 8f9b344cd5b3cb96ea0f94b7ab0306da2234ac00 Mon Sep 17 00:00:00 2001
From: ut002970 <liuxingwei@uniontech.com>
Date: Wed, 6 Sep 2023 15:56:24 +0800
Subject: [PATCH 2/3] modify error message
---
heartbeat/mysql-common.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index a93acc4c60..d5b2286737 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -254,7 +254,7 @@ mysql_common_start()
while [ $start_wait = 1 ]; do
if ! ps $pid > /dev/null 2>&1; then
wait $pid
- ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), please check your installation, log message you can check $OCF_RESKEY_log"
+ ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), Check $OCF_RESKEY_log for details"
return $OCF_ERR_GENERIC
fi
mysql_common_status info
From a292b3c552bf3f2beea5f73e0d171546c0a1273c Mon Sep 17 00:00:00 2001
From: ut002970 <liuxingwei@uniontech.com>
Date: Wed, 6 Sep 2023 16:10:48 +0800
Subject: [PATCH 3/3] modify error message
---
heartbeat/mysql-common.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index d5b2286737..d6b4e3cdf4 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -254,7 +254,7 @@ mysql_common_start()
while [ $start_wait = 1 ]; do
if ! ps $pid > /dev/null 2>&1; then
wait $pid
- ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?), Check $OCF_RESKEY_log for details"
+ ocf_exit_reason "MySQL server failed to start (pid=$pid) (rc=$?). Check $OCF_RESKEY_log for details"
return $OCF_ERR_GENERIC
fi
mysql_common_status info

@ -1,75 +0,0 @@
From 0063164d72bbaca68f12a2f0a7dbae9ccb41fa39 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 26 Jul 2022 09:08:26 +0200
Subject: [PATCH] ethmonitor/ovsmonitor/pgsql: remove ignored attrd_updater
"-q" parameter
attrd_updater in 2.1.3 no longer ignores the -q parameter, which makes
these agents break. It never did anything in attrd_updater, and is
probably left-over from copy/paste crm_attribute code that got changed
to attrd_updater.
---
heartbeat/ethmonitor | 2 +-
heartbeat/ovsmonitor | 2 +-
heartbeat/pgsql | 8 ++++----
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index ba8574131..451738a0b 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -464,7 +464,7 @@ END
set_cib_value() {
local score=`expr $1 \* $OCF_RESKEY_multiplier`
- attrd_updater -n $ATTRNAME -v $score -q
+ attrd_updater -n $ATTRNAME -v $score
local rc=$?
case $rc in
0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
diff --git a/heartbeat/ovsmonitor b/heartbeat/ovsmonitor
index 872ce86eb..6765da4b9 100755
--- a/heartbeat/ovsmonitor
+++ b/heartbeat/ovsmonitor
@@ -355,7 +355,7 @@ END
set_cib_value() {
local score=`expr $1 \* $OCF_RESKEY_multiplier`
- attrd_updater -n $ATTRNAME -v $score -q
+ attrd_updater -n $ATTRNAME -v $score
local rc=$?
case $rc in
0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index 94aceb324..e93d66855 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -808,7 +808,7 @@ pgsql_real_stop() {
local stop_escalate
if ocf_is_true ${OCF_RESKEY_check_wal_receiver}; then
- attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D -q
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -D
fi
if ! pgsql_status
@@ -937,16 +937,16 @@ pgsql_wal_receiver_status() {
receiver_parent_pids=`ps -ef | tr -s " " | grep "[w]al\s*receiver" | cut -d " " -f 3`
if echo "$receiver_parent_pids" | grep -q -w "$PID" ; then
- attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal" -q
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal"
return 0
fi
if [ $pgsql_real_monitor_status -eq "$OCF_RUNNING_MASTER" ]; then
- attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)" -q
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "normal (master)"
return 0
fi
- attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR" -q
+ attrd_updater -n "$PGSQL_WAL_RECEIVER_STATUS_ATTR" -v "ERROR"
ocf_log warn "wal receiver process is not running"
return 1
}

@ -1,903 +0,0 @@
From 5dcd5153f0318e4766f7f4d3e61dfdb4b352c39c Mon Sep 17 00:00:00 2001
From: MSSedusch <sedusch@microsoft.com>
Date: Mon, 30 May 2022 15:08:10 +0200
Subject: [PATCH 1/2] add new Azure Events AZ resource agent
---
.gitignore | 1 +
configure.ac | 8 +
doc/man/Makefile.am | 4 +
heartbeat/Makefile.am | 4 +
heartbeat/azure-events-az.in | 782 +++++++++++++++++++++++++++++++++++
5 files changed, 799 insertions(+)
create mode 100644 heartbeat/azure-events-az.in
diff --git a/.gitignore b/.gitignore
index 0c259b5cf..e2b7c039c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@ heartbeat/Squid
heartbeat/SysInfo
heartbeat/aws-vpc-route53
heartbeat/azure-events
+heartbeat/azure-events-az
heartbeat/clvm
heartbeat/conntrackd
heartbeat/dnsupdate
diff --git a/configure.ac b/configure.ac
index eeecfad0e..5716a2be2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -523,6 +523,13 @@ if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
fi
AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1)
+BUILD_AZURE_EVENTS_AZ=1
+if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
+ BUILD_AZURE_EVENTS_AZ=0
+ AC_MSG_WARN("Not building azure-events-az")
+fi
+AM_CONDITIONAL(BUILD_AZURE_EVENTS_AZ, test $BUILD_AZURE_EVENTS_AZ -eq 1)
+
BUILD_GCP_PD_MOVE=1
if test -z "$PYTHON" || test "x${HAVE_PYMOD_GOOGLEAPICLIENT}" != xyes || test $BUILD_OCF_PY -eq 0; then
BUILD_GCP_PD_MOVE=0
@@ -976,6 +983,7 @@ rgmanager/Makefile \
dnl Files we output that need to be executable
AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events])
+AC_CONFIG_FILES([heartbeat/azure-events-az], [chmod +x heartbeat/azure-events-az])
AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget])
AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID])
AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE])
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index cd8fd16bf..658c700ac 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -219,6 +219,10 @@ if BUILD_AZURE_EVENTS
man_MANS += ocf_heartbeat_azure-events.7
endif
+if BUILD_AZURE_EVENTS_AZ
+man_MANS += ocf_heartbeat_azure-events-az.7
+endif
+
if BUILD_GCP_PD_MOVE
man_MANS += ocf_heartbeat_gcp-pd-move.7
endif
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 20d41e36a..1133dc13e 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -188,6 +188,10 @@ if BUILD_AZURE_EVENTS
ocf_SCRIPTS += azure-events
endif
+if BUILD_AZURE_EVENTS_AZ
+ocf_SCRIPTS += azure-events-az
+endif
+
if BUILD_GCP_PD_MOVE
ocf_SCRIPTS += gcp-pd-move
endif
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
new file mode 100644
index 000000000..616fc8d9e
--- /dev/null
+++ b/heartbeat/azure-events-az.in
@@ -0,0 +1,782 @@
+#!@PYTHON@ -tt
+#
+# Resource agent for monitoring Azure Scheduled Events
+#
+# License: GNU General Public License (GPL)
+# (c) 2018 Tobias Niekamp, Microsoft Corp.
+# and Linux-HA contributors
+
+import os
+import sys
+import time
+import subprocess
+import json
+try:
+ import urllib2
+ from urllib2 import URLError
+except ImportError:
+ import urllib.request as urllib2
+ from urllib.error import URLError
+import socket
+from collections import defaultdict
+
+OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT"))
+sys.path.append(OCF_FUNCTIONS_DIR)
+import ocf
+
+##############################################################################
+
+
+VERSION = "0.10"
+USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
+
+attr_globalPullState = "azure-events-az_globalPullState"
+attr_lastDocVersion = "azure-events-az_lastDocVersion"
+attr_curNodeState = "azure-events-az_curNodeState"
+attr_pendingEventIDs = "azure-events-az_pendingEventIDs"
+attr_healthstate = "#health-azure"
+
+default_loglevel = ocf.logging.INFO
+default_relevantEventTypes = set(["Reboot", "Redeploy"])
+
+global_pullMaxAttempts = 3
+global_pullDelaySecs = 1
+
+##############################################################################
+
+class attrDict(defaultdict):
+ """
+ A wrapper for accessing dict keys like an attribute
+ """
+ def __init__(self, data):
+ super(attrDict, self).__init__(attrDict)
+ for d in data.keys():
+ self.__setattr__(d, data[d])
+
+ def __getattr__(self, key):
+ try:
+ return self[key]
+ except KeyError:
+ raise AttributeError(key)
+
+ def __setattr__(self, key, value):
+ self[key] = value
+
+##############################################################################
+
+class azHelper:
+ """
+ Helper class for Azure's metadata API (including Scheduled Events)
+ """
+ metadata_host = "http://169.254.169.254/metadata"
+ instance_api = "instance"
+ events_api = "scheduledevents"
+ api_version = "2019-08-01"
+
+ @staticmethod
+ def _sendMetadataRequest(endpoint, postData=None):
+ """
+ Send a request to Azure's Azure Metadata Service API
+ """
+ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
+ data = ""
+ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
+ ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
+
+ if postData and type(postData) != bytes:
+ postData = postData.encode()
+
+ req = urllib2.Request(url, postData)
+ req.add_header("Metadata", "true")
+ req.add_header("User-Agent", USER_AGENT)
+ try:
+ resp = urllib2.urlopen(req)
+ except URLError as e:
+ if hasattr(e, 'reason'):
+ ocf.logger.warning("Failed to reach the server: %s" % e.reason)
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
+ elif hasattr(e, 'code'):
+ ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
+ else:
+ data = resp.read()
+ ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
+
+ if data:
+ data = json.loads(data)
+
+ ocf.logger.debug("_sendMetadataRequest: finished")
+ return data
+
+ @staticmethod
+ def getInstanceInfo():
+ """
+ Fetch details about the current VM from Azure's Azure Metadata Service API
+ """
+ ocf.logger.debug("getInstanceInfo: begin")
+
+ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
+ ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
+
+ if jsondata:
+ ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
+ return attrDict(jsondata["compute"])
+ else:
+ ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
+ sys.exit(ocf.OCF_ERR_GENERIC)
+
+ @staticmethod
+ def pullScheduledEvents():
+ """
+ Retrieve all currently scheduled events via Azure Metadata Service API
+ """
+ ocf.logger.debug("pullScheduledEvents: begin")
+
+ jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
+ ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
+
+ ocf.logger.debug("pullScheduledEvents: finished")
+ return attrDict(jsondata)
+
+ @staticmethod
+ def forceEvents(eventIDs):
+ """
+ Force a set of events to start immediately
+ """
+ ocf.logger.debug("forceEvents: begin")
+
+ events = []
+ for e in eventIDs:
+ events.append({
+ "EventId": e,
+ })
+ postData = {
+ "StartRequests" : events
+ }
+ ocf.logger.info("forceEvents: postData = %s" % postData)
+ resp = azHelper._sendMetadataRequest(azHelper.events_api, postData=json.dumps(postData))
+
+ ocf.logger.debug("forceEvents: finished")
+ return
+
+##############################################################################
+
+class clusterHelper:
+ """
+ Helper functions for Pacemaker control via crm
+ """
+ @staticmethod
+ def _getLocation(node):
+ """
+ Helper function to retrieve local/global attributes
+ """
+ if node:
+ return ["--node", node]
+ else:
+ return ["--type", "crm_config"]
+
+ @staticmethod
+ def _exec(command, *args):
+ """
+ Helper function to execute a UNIX command
+ """
+ args = list(args)
+ ocf.logger.debug("_exec: begin; command = %s, args = %s" % (command, str(args)))
+
+ def flatten(*n):
+ return (str(e) for a in n
+ for e in (flatten(*a) if isinstance(a, (tuple, list)) else (str(a),)))
+ command = list(flatten([command] + args))
+ ocf.logger.debug("_exec: cmd = %s" % " ".join(command))
+ try:
+ ret = subprocess.check_output(command)
+ if type(ret) != str:
+ ret = ret.decode()
+ ocf.logger.debug("_exec: return = %s" % ret)
+ return ret.rstrip()
+ except Exception as err:
+ ocf.logger.exception(err)
+ return None
+
+ @staticmethod
+ def setAttr(key, value, node=None):
+ """
+ Set the value of a specific global/local attribute in the Pacemaker cluster
+ """
+ ocf.logger.debug("setAttr: begin; key = %s, value = %s, node = %s" % (key, value, node))
+
+ if value:
+ ret = clusterHelper._exec("crm_attribute",
+ "--name", key,
+ "--update", value,
+ clusterHelper._getLocation(node))
+ else:
+ ret = clusterHelper._exec("crm_attribute",
+ "--name", key,
+ "--delete",
+ clusterHelper._getLocation(node))
+
+ ocf.logger.debug("setAttr: finished")
+ return len(ret) == 0
+
+ @staticmethod
+ def getAttr(key, node=None):
+ """
+ Retrieve a global/local attribute from the Pacemaker cluster
+ """
+ ocf.logger.debug("getAttr: begin; key = %s, node = %s" % (key, node))
+
+ val = clusterHelper._exec("crm_attribute",
+ "--name", key,
+ "--query", "--quiet",
+ "--default", "",
+ clusterHelper._getLocation(node))
+ ocf.logger.debug("getAttr: finished")
+ if not val:
+ return None
+ return val if not val.isdigit() else int(val)
+
+ @staticmethod
+ def getAllNodes():
+ """
+ Get a list of hostnames for all nodes in the Pacemaker cluster
+ """
+ ocf.logger.debug("getAllNodes: begin")
+
+ nodes = []
+ nodeList = clusterHelper._exec("crm_node", "--list")
+ for n in nodeList.split("\n"):
+ nodes.append(n.split()[1])
+ ocf.logger.debug("getAllNodes: finished; return %s" % str(nodes))
+
+ return nodes
+
+ @staticmethod
+ def getHostNameFromAzName(azName):
+ """
+ Helper function to get the actual host name from an Azure node name
+ """
+ return clusterHelper.getAttr("hostName_%s" % azName)
+
+ @staticmethod
+ def removeHoldFromNodes():
+ """
+ Remove the ON_HOLD state from all nodes in the Pacemaker cluster
+ """
+ ocf.logger.debug("removeHoldFromNodes: begin")
+
+ for n in clusterHelper.getAllNodes():
+ if clusterHelper.getAttr(attr_curNodeState, node=n) == "ON_HOLD":
+ clusterHelper.setAttr(attr_curNodeState, "AVAILABLE", node=n)
+ ocf.logger.info("removeHoldFromNodes: removed ON_HOLD from node %s" % n)
+
+ ocf.logger.debug("removeHoldFromNodes: finished")
+ return False
+
+ @staticmethod
+ def otherNodesAvailable(exceptNode):
+ """
+ Check if there are any nodes (except a given node) in the Pacemaker cluster that have state AVAILABLE
+ """
+ ocf.logger.debug("otherNodesAvailable: begin; exceptNode = %s" % exceptNode)
+
+ for n in clusterHelper.getAllNodes():
+ state = clusterHelper.getAttr(attr_curNodeState, node=n)
+ state = stringToNodeState(state) if state else AVAILABLE
+ if state == AVAILABLE and n != exceptNode.hostName:
+ ocf.logger.info("otherNodesAvailable: at least %s is available" % n)
+ ocf.logger.debug("otherNodesAvailable: finished")
+ return True
+ ocf.logger.info("otherNodesAvailable: no other nodes are available")
+ ocf.logger.debug("otherNodesAvailable: finished")
+
+ return False
+
+ @staticmethod
+ def transitionSummary():
+ """
+ Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
+ """
+ # <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
+ # # crm_simulate -Ls
+ # Transition Summary:
+ # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
+ # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
+ # * Move rsc_ip_HN1_HDB03 (Started hsr3-db0 -> hsr3-db1)
+ # * Start rsc_nc_HN1_HDB03 (hsr3-db1)
+ # # Excepted result when there are no pending actions:
+ # Transition Summary:
+ ocf.logger.debug("transitionSummary: begin")
+
+ summary = clusterHelper._exec("crm_simulate", "-Ls")
+ if not summary:
+ ocf.logger.warning("transitionSummary: could not load transition summary")
+ return False
+ if summary.find("Transition Summary:") < 0:
+ ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
+ return False
+ summary = summary.split("Transition Summary:")[1]
+ ret = summary.split("\n").pop(0)
+
+ ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
+ return ret
+
+ @staticmethod
+ def listOperationsOnNode(node):
+ """
+ Get a list of all current operations for a given node (used to check if any resources are pending)
+ """
+ # hsr3-db1:/home/tniek # crm_resource --list-operations -N hsr3-db0
+ # rsc_azure-events-az (ocf::heartbeat:azure-events-az): Started: rsc_azure-events-az_start_0 (node=hsr3-db0, call=91, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=115ms): complete
+ # rsc_azure-events-az (ocf::heartbeat:azure-events-az): Started: rsc_azure-events-az_monitor_10000 (node=hsr3-db0, call=93, rc=0, last-rc-change=Fri Jun 8 22:37:47 2018, exec=197ms): complete
+ # rsc_SAPHana_HN1_HDB03 (ocf::suse:SAPHana): Master: rsc_SAPHana_HN1_HDB03_start_0 (node=hsr3-db0, call=-1, rc=193, last-rc-change=Fri Jun 8 22:37:46 2018, exec=0ms): pending
+ # rsc_SAPHanaTopology_HN1_HDB03 (ocf::suse:SAPHanaTopology): Started: rsc_SAPHanaTopology_HN1_HDB03_start_0 (node=hsr3-db0, call=90, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=3214ms): complete
+ ocf.logger.debug("listOperationsOnNode: begin; node = %s" % node)
+
+ resources = clusterHelper._exec("crm_resource", "--list-operations", "-N", node)
+ if len(resources) == 0:
+ ret = []
+ else:
+ ret = resources.split("\n")
+
+ ocf.logger.debug("listOperationsOnNode: finished; return = %s" % str(ret))
+ return ret
+
+ @staticmethod
+ def noPendingResourcesOnNode(node):
+ """
+ Check that there are no pending resources on a given node
+ """
+ ocf.logger.debug("noPendingResourcesOnNode: begin; node = %s" % node)
+
+ for r in clusterHelper.listOperationsOnNode(node):
+ ocf.logger.debug("noPendingResourcesOnNode: * %s" % r)
+ resource = r.split()[-1]
+ if resource == "pending":
+ ocf.logger.info("noPendingResourcesOnNode: found resource %s that is still pending" % resource)
+ ocf.logger.debug("noPendingResourcesOnNode: finished; return = False")
+ return False
+ ocf.logger.info("noPendingResourcesOnNode: no pending resources on node %s" % node)
+ ocf.logger.debug("noPendingResourcesOnNode: finished; return = True")
+
+ return True
+
+ @staticmethod
+ def allResourcesStoppedOnNode(node):
+ """
+ Check that all resources on a given node are stopped
+ """
+ ocf.logger.debug("allResourcesStoppedOnNode: begin; node = %s" % node)
+
+ if clusterHelper.noPendingResourcesOnNode(node):
+ if len(clusterHelper.transitionSummary()) == 0:
+ ocf.logger.info("allResourcesStoppedOnNode: no pending resources on node %s and empty transition summary" % node)
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = True")
+ return True
+ ocf.logger.info("allResourcesStoppedOnNode: transition summary is not empty")
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
+ return False
+
+ ocf.logger.info("allResourcesStoppedOnNode: still pending resources on node %s" % node)
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
+ return False
+
+##############################################################################
+
+AVAILABLE = 0 # Node is online and ready to handle events
+STOPPING = 1 # Standby has been triggered, but some resources are still running
+IN_EVENT = 2 # All resources are stopped, and event has been initiated via Azure Metadata Service
+ON_HOLD = 3 # Node has a pending event that cannot be started there are no other nodes available
+
+def stringToNodeState(name):
+ if type(name) == int: return name
+ if name == "STOPPING": return STOPPING
+ if name == "IN_EVENT": return IN_EVENT
+ if name == "ON_HOLD": return ON_HOLD
+ return AVAILABLE
+
+def nodeStateToString(state):
+ if state == STOPPING: return "STOPPING"
+ if state == IN_EVENT: return "IN_EVENT"
+ if state == ON_HOLD: return "ON_HOLD"
+ return "AVAILABLE"
+
+##############################################################################
+
+class Node:
+ """
+ Core class implementing logic for a cluster node
+ """
+ def __init__(self, ra):
+ self.raOwner = ra
+ self.azInfo = azHelper.getInstanceInfo()
+ self.azName = self.azInfo.name
+ self.hostName = socket.gethostname()
+ self.setAttr("azName", self.azName)
+ clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
+
+ def getAttr(self, key):
+ """
+ Get a local attribute
+ """
+ return clusterHelper.getAttr(key, node=self.hostName)
+
+ def setAttr(self, key, value):
+ """
+ Set a local attribute
+ """
+ return clusterHelper.setAttr(key, value, node=self.hostName)
+
+ def selfOrOtherNode(self, node):
+ """
+ Helper function to distinguish self/other node
+ """
+ return node if node else self.hostName
+
+ def setState(self, state, node=None):
+ """
+ Set the state for a given node (or self)
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("setState: begin; node = %s, state = %s" % (node, nodeStateToString(state)))
+
+ clusterHelper.setAttr(attr_curNodeState, nodeStateToString(state), node=node)
+
+ ocf.logger.debug("setState: finished")
+
+ def getState(self, node=None):
+ """
+ Get the state for a given node (or self)
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("getState: begin; node = %s" % node)
+
+ state = clusterHelper.getAttr(attr_curNodeState, node=node)
+ ocf.logger.debug("getState: state = %s" % state)
+ ocf.logger.debug("getState: finished")
+ if not state:
+ return AVAILABLE
+ return stringToNodeState(state)
+
+ def setEventIDs(self, eventIDs, node=None):
+ """
+ Set pending EventIDs for a given node (or self)
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("setEventIDs: begin; node = %s, eventIDs = %s" % (node, str(eventIDs)))
+
+ if eventIDs:
+ eventIDStr = ",".join(eventIDs)
+ else:
+ eventIDStr = None
+ clusterHelper.setAttr(attr_pendingEventIDs, eventIDStr, node=node)
+
+ ocf.logger.debug("setEventIDs: finished")
+ return
+
+ def getEventIDs(self, node=None):
+ """
+ Get pending EventIDs for a given node (or self)
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("getEventIDs: begin; node = %s" % node)
+
+ eventIDStr = clusterHelper.getAttr(attr_pendingEventIDs, node=node)
+ if eventIDStr:
+ eventIDs = eventIDStr.split(",")
+ else:
+ eventIDs = None
+
+ ocf.logger.debug("getEventIDs: finished; eventIDs = %s" % str(eventIDs))
+ return eventIDs
+
+ def updateNodeStateAndEvents(self, state, eventIDs, node=None):
+ """
+ Set the state and pending EventIDs for a given node (or self)
+ """
+ ocf.logger.debug("updateNodeStateAndEvents: begin; node = %s, state = %s, eventIDs = %s" % (node, nodeStateToString(state), str(eventIDs)))
+
+ self.setState(state, node=node)
+ self.setEventIDs(eventIDs, node=node)
+
+ ocf.logger.debug("updateNodeStateAndEvents: finished")
+ return state
+
+ def putNodeStandby(self, node=None):
+ """
+ Put self to standby
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("putNodeStandby: begin; node = %s" % node)
+
+ clusterHelper._exec("crm_attribute",
+ "--node", node,
+ "--name", attr_healthstate,
+ "--update", "-1000000",
+ "--lifetime=forever")
+
+ ocf.logger.debug("putNodeStandby: finished")
+
+ def isNodeInStandby(self, node=None):
+ """
+ check if node is in standby
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("isNodeInStandby: begin; node = %s" % node)
+ isInStandy = False
+
+ healthAttributeStr = clusterHelper.getAttr(attr_healthstate, node)
+ if healthAttributeStr is not None:
+ try:
+ healthAttribute = int(healthAttributeStr)
+ isInStandy = healthAttribute < 0
+ except ValueError:
+ # Handle the exception
+ ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
+
+ ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
+ return isInStandy
+
+ def putNodeOnline(self, node=None):
+ """
+ Put self back online
+ """
+ node = self.selfOrOtherNode(node)
+ ocf.logger.debug("putNodeOnline: begin; node = %s" % node)
+
+ clusterHelper._exec("crm_attribute",
+ "--node", node,
+ "--name", "#health-azure",
+ "--update", "0",
+ "--lifetime=forever")
+
+ ocf.logger.debug("putNodeOnline: finished")
+
+ def separateEvents(self, events):
+ """
+ Split own/other nodes' events
+ """
+ ocf.logger.debug("separateEvents: begin; events = %s" % str(events))
+
+ localEvents = []
+ remoteEvents = []
+ for e in events:
+ e = attrDict(e)
+ if e.EventType not in self.raOwner.relevantEventTypes:
+ continue
+ if self.azName in e.Resources:
+ localEvents.append(e)
+ else:
+ remoteEvents.append(e)
+ ocf.logger.debug("separateEvents: finished; localEvents = %s, remoteEvents = %s" % (str(localEvents), str(remoteEvents)))
+ return (localEvents, remoteEvents)
+
+##############################################################################
+
+class raAzEvents:
+ """
+ Main class for resource agent
+ """
+ def __init__(self, relevantEventTypes):
+ self.node = Node(self)
+ self.relevantEventTypes = relevantEventTypes
+
+ def monitor(self):
+ ocf.logger.debug("monitor: begin")
+
+ events = azHelper.pullScheduledEvents()
+
+ # get current document version
+ curDocVersion = events.DocumentIncarnation
+ lastDocVersion = self.node.getAttr(attr_lastDocVersion)
+ ocf.logger.debug("monitor: lastDocVersion = %s; curDocVersion = %s" % (lastDocVersion, curDocVersion))
+
+ # split events local/remote
+ (localEvents, remoteEvents) = self.node.separateEvents(events.Events)
+
+ # ensure local events are only executing once
+ if curDocVersion == lastDocVersion:
+ ocf.logger.info("monitor: already handled curDocVersion, skip")
+ return ocf.OCF_SUCCESS
+
+ localAzEventIDs = set()
+ for e in localEvents:
+ localAzEventIDs.add(e.EventId)
+
+ curState = self.node.getState()
+ clusterEventIDs = self.node.getEventIDs()
+
+ ocf.logger.debug("monitor: curDocVersion has not been handled yet")
+
+ if clusterEventIDs:
+ # there are pending events set, so our state must be STOPPING or IN_EVENT
+ i = 0; touchedEventIDs = False
+ while i < len(clusterEventIDs):
+ # clean up pending events that are already finished according to AZ
+ if clusterEventIDs[i] not in localAzEventIDs:
+ ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
+ clusterEventIDs.pop(i)
+ touchedEventIDs = True
+ else:
+ i += 1
+ if len(clusterEventIDs) > 0:
+ # there are still pending events (either because we're still stopping, or because the event is still in place)
+ # either way, we need to wait
+ if touchedEventIDs:
+ ocf.logger.info("monitor: added new local clusterEvent %s" % str(clusterEventIDs))
+ self.node.setEventIDs(clusterEventIDs)
+ else:
+ ocf.logger.info("monitor: no local clusterEvents were updated")
+ else:
+ # there are no more pending events left after cleanup
+ if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
+ # and no pending resources on the node -> set it back online
+ ocf.logger.info("monitor: all local events finished -> clean up, put node online and AVAILABLE")
+ curState = self.node.updateNodeStateAndEvents(AVAILABLE, None)
+ self.node.putNodeOnline()
+ clusterHelper.removeHoldFromNodes()
+ # If Azure Scheduled Events are not used for 24 hours (e.g. because the cluster was asleep), it will be disabled for a VM.
+ # When the cluster wakes up and starts using it again, the DocumentIncarnation is reset.
+ # We need to remove it during cleanup, otherwise azure-events-az will not process the event after wakeup
+ self.node.setAttr(attr_lastDocVersion, None)
+ else:
+ ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
+ else:
+ if curState == AVAILABLE:
+ if len(localAzEventIDs) > 0:
+ if clusterHelper.otherNodesAvailable(self.node):
+ ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
+ curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
+ else:
+ ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
+ self.node.setState(ON_HOLD)
+ else:
+ ocf.logger.debug("monitor: no local azEvents to handle")
+
+ if curState == STOPPING:
+ eventIDsForNode = {}
+ if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
+ if not self.node.isNodeInStandby():
+ ocf.logger.info("monitor: all local resources are started properly -> put node standby and exit")
+ self.node.putNodeStandby()
+ return ocf.OCF_SUCCESS
+
+ for e in localEvents:
+ ocf.logger.info("monitor: handling remote event %s (%s; nodes = %s)" % (e.EventId, e.EventType, str(e.Resources)))
+ # before we can force an event to start, we need to ensure all nodes involved have stopped their resources
+ if e.EventStatus == "Scheduled":
+ allNodesStopped = True
+ for azName in e.Resources:
+ hostName = clusterHelper.getHostNameFromAzName(azName)
+ state = self.node.getState(node=hostName)
+ if state == STOPPING:
+ # the only way we can continue is when node state is STOPPING, but all resources have been stopped
+ if not clusterHelper.allResourcesStoppedOnNode(hostName):
+ ocf.logger.info("monitor: (at least) node %s has still resources running -> wait" % hostName)
+ allNodesStopped = False
+ break
+ elif state in (AVAILABLE, IN_EVENT, ON_HOLD):
+ ocf.logger.info("monitor: node %s is still %s -> remote event needs to be picked up locally" % (hostName, nodeStateToString(state)))
+ allNodesStopped = False
+ break
+ if allNodesStopped:
+ ocf.logger.info("monitor: nodes %s are stopped -> add remote event %s to force list" % (str(e.Resources), e.EventId))
+ for n in e.Resources:
+ hostName = clusterHelper.getHostNameFromAzName(n)
+ if hostName in eventIDsForNode:
+ eventIDsForNode[hostName].append(e.EventId)
+ else:
+ eventIDsForNode[hostName] = [e.EventId]
+ elif e.EventStatus == "Started":
+ ocf.logger.info("monitor: remote event already started")
+
+ # force the start of all events whose nodes are ready (i.e. have no more resources running)
+ if len(eventIDsForNode.keys()) > 0:
+ eventIDsToForce = set([item for sublist in eventIDsForNode.values() for item in sublist])
+ ocf.logger.info("monitor: set nodes %s to IN_EVENT; force remote events %s" % (str(eventIDsForNode.keys()), str(eventIDsToForce)))
+ for node, eventId in eventIDsForNode.items():
+ self.node.updateNodeStateAndEvents(IN_EVENT, eventId, node=node)
+ azHelper.forceEvents(eventIDsToForce)
+ self.node.setAttr(attr_lastDocVersion, curDocVersion)
+ else:
+ ocf.logger.info("monitor: some local resources are not clean yet -> wait")
+
+ ocf.logger.debug("monitor: finished")
+ return ocf.OCF_SUCCESS
+
+##############################################################################
+
+def setLoglevel(verbose):
+ # set up writing into syslog
+ loglevel = default_loglevel
+ if verbose:
+ opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1))
+ urllib2.install_opener(opener)
+ loglevel = ocf.logging.DEBUG
+ ocf.log.setLevel(loglevel)
+
+description = (
+ "Microsoft Azure Scheduled Events monitoring agent",
+ """This resource agent implements a monitor for scheduled
+(maintenance) events for a Microsoft Azure VM.
+
+If any relevant events are found, it moves all Pacemaker resources
+away from the affected node to allow for a graceful shutdown.
+
+ Usage:
+ [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
+
+ action (required): Supported values: monitor, help, meta-data
+ eventTypes (optional): List of event types to be considered
+ relevant by the resource agent (comma-separated).
+ Supported values: Freeze,Reboot,Redeploy
+ Default = Reboot,Redeploy
+/ verbose (optional): If set to true, displays debug info.
+ Default = false
+
+ Deployment:
+ crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
+ op monitor interval=10s
+ crm configure clone cln_azure-events-az rsc_azure-events-az
+
+For further information on Microsoft Azure Scheduled Events, please
+refer to the following documentation:
+https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events
+""")
+
+def monitor_action(eventTypes):
+ relevantEventTypes = set(eventTypes.split(",") if eventTypes else [])
+ ra = raAzEvents(relevantEventTypes)
+ return ra.monitor()
+
+def validate_action(eventTypes):
+ if eventTypes:
+ for event in eventTypes.split(","):
+ if event not in ("Freeze", "Reboot", "Redeploy"):
+ ocf.ocf_exit_reason("Event type not one of Freeze, Reboot, Redeploy: " + eventTypes)
+ return ocf.OCF_ERR_CONFIGURED
+ return ocf.OCF_SUCCESS
+
+def main():
+ agent = ocf.Agent("azure-events-az", shortdesc=description[0], longdesc=description[1])
+ agent.add_parameter(
+ "eventTypes",
+ shortdesc="List of resources to be considered",
+ longdesc="A comma-separated list of event types that will be handled by this resource agent. (Possible values: Freeze,Reboot,Redeploy)",
+ content_type="string",
+ default="Reboot,Redeploy")
+ agent.add_parameter(
+ "verbose",
+ shortdesc="Enable verbose agent logging",
+ longdesc="Set to true to enable verbose logging",
+ content_type="boolean",
+ default="false")
+ agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
+ agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
+ agent.add_action("validate-all", timeout=20, handler=validate_action)
+ agent.add_action("monitor", timeout=240, interval=10, handler=monitor_action)
+ setLoglevel(ocf.is_true(ocf.get_parameter("verbose", "false")))
+ agent.run()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
From a95337d882c7cc69d604b050159ad50b679f18be Mon Sep 17 00:00:00 2001
From: MSSedusch <sedusch@microsoft.com>
Date: Thu, 2 Jun 2022 14:10:33 +0200
Subject: [PATCH 2/2] Remove developer documentation
---
heartbeat/azure-events-az.in | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 616fc8d9e..59d095306 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -723,17 +723,6 @@ description = (
If any relevant events are found, it moves all Pacemaker resources
away from the affected node to allow for a graceful shutdown.
- Usage:
- [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
-
- action (required): Supported values: monitor, help, meta-data
- eventTypes (optional): List of event types to be considered
- relevant by the resource agent (comma-separated).
- Supported values: Freeze,Reboot,Redeploy
- Default = Reboot,Redeploy
-/ verbose (optional): If set to true, displays debug info.
- Default = false
-
Deployment:
crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
op monitor interval=10s

@ -1,298 +0,0 @@
From 764757380af19d3a21d40f3c9624e4135ff074e1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 2 Nov 2022 10:26:31 +0100
Subject: [PATCH] nfsserver: add nfsv4_only parameter to make it run without
rpc-statd/rpcbind services
---
heartbeat/nfsserver | 200 +++++++++++++++++++++++++-------------------
1 file changed, 114 insertions(+), 86 deletions(-)
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 9bbd603e5..cb2d43ab1 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -79,6 +79,16 @@ Init script for nfsserver
<content type="string" default="auto detected" />
</parameter>
+<parameter name="nfsv4_only" unique="0" required="0">
+<longdesc lang="en">
+Run in NFSv4 only mode (rpc-statd and rpcbind services masked).
+</longdesc>
+<shortdesc lang="en">
+NFSv4 only mode.
+</shortdesc>
+<content type="boolean" default="false" />
+</parameter>
+
<parameter name="nfs_no_notify" unique="0" required="0">
<longdesc lang="en">
Do not send reboot notifications to NFSv3 clients during server startup.
@@ -332,7 +342,7 @@ v3locking_exec()
if [ $EXEC_MODE -eq 2 ]; then
nfs_exec $cmd nfs-lock.service
elif [ $EXEC_MODE -eq 3 ]; then
- nfs_exec $cmd rpc-statd.service
+ nfs_exec $cmd rpc-statd.service
else
case $cmd in
start) locking_start;;
@@ -348,20 +358,22 @@ nfsserver_systemd_monitor()
local rc
local fn
- ocf_log debug "Status: rpcbind"
- rpcinfo > /dev/null 2>&1
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpcbind is not running"
- return $OCF_NOT_RUNNING
- fi
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ ocf_log debug "Status: rpcbind"
+ rpcinfo > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpcbind is not running"
+ return $OCF_NOT_RUNNING
+ fi
- ocf_log debug "Status: nfs-mountd"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "nfs-mountd is not running"
- return $OCF_NOT_RUNNING
+ ocf_log debug "Status: nfs-mountd"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "nfs-mountd is not running"
+ return $OCF_NOT_RUNNING
+ fi
fi
ocf_log debug "Status: nfs-idmapd"
@@ -375,12 +387,14 @@ nfsserver_systemd_monitor()
return $OCF_NOT_RUNNING
fi
- ocf_log debug "Status: rpc-statd"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpc-statd is not running"
- return $OCF_NOT_RUNNING
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ ocf_log debug "Status: rpc-statd"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpc-statd is not running"
+ return $OCF_NOT_RUNNING
+ fi
fi
nfs_exec is-active nfs-server
@@ -424,7 +438,7 @@ nfsserver_monitor ()
if [ $rc -eq 0 ]; then
# don't report success if nfs servers are up
# without locking daemons.
- v3locking_exec "status"
+ ocf_is_true "$OCF_RESKEY_nfsv4_only" || v3locking_exec "status"
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "NFS server is up, but the locking daemons are down"
@@ -786,48 +800,54 @@ nfsserver_start ()
# systemd
case $EXEC_MODE in
- [23]) nfs_exec start rpcbind
- local i=1
- while : ; do
- ocf_log info "Start: rpcbind i: $i"
- rpcinfo > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ [23]) if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start rpcbind
+ local i=1
+ while : ; do
+ ocf_log info "Start: rpcbind i: $i"
+ rpcinfo > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
;;
esac
- # check to see if we need to start rpc.statd
- v3locking_exec "status"
- if [ $? -ne $OCF_SUCCESS ]; then
- v3locking_exec "start"
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_exit_reason "Failed to start NFS server locking daemons"
- return $rc
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ # check to see if we need to start rpc.statd
+ v3locking_exec "status"
+ if [ $? -ne $OCF_SUCCESS ]; then
+ v3locking_exec "start"
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_exit_reason "Failed to start NFS server locking daemons"
+ return $rc
+ fi
+ else
+ ocf_log info "rpc.statd already up"
fi
- else
- ocf_log info "rpc.statd already up"
fi
# systemd
case $EXEC_MODE in
- [23]) nfs_exec start nfs-mountd
- local i=1
- while : ; do
- ocf_log info "Start: nfs-mountd i: $i"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ [23]) if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start nfs-mountd
+ local i=1
+ while : ; do
+ ocf_log info "Start: nfs-mountd i: $i"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
nfs_exec start nfs-idmapd
local i=1
@@ -839,24 +859,26 @@ nfsserver_start ()
ocf_log debug "$(cat $fn)"
rm -f $fn
if [ "$rc" -eq "0" ]; then
- break;
+ break
fi
sleep 1
i=$((i + 1))
done
- nfs_exec start rpc-statd
- local i=1
- while : ; do
- ocf_log info "Start: rpc-statd i: $i"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start rpc-statd
+ local i=1
+ while : ; do
+ ocf_log info "Start: rpc-statd i: $i"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
esac
@@ -914,13 +936,15 @@ nfsserver_stop ()
sleep 1
done
- nfs_exec stop rpc-statd > /dev/null 2>&1
- ocf_log info "Stop: rpc-statd"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- ocf_exit_reason "Failed to stop rpc-statd"
- return $OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec stop rpc-statd > /dev/null 2>&1
+ ocf_log info "Stop: rpc-statd"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop rpc-statd"
+ return $OCF_ERR_GENERIC
+ fi
fi
nfs_exec stop nfs-idmapd > /dev/null 2>&1
@@ -935,13 +959,15 @@ nfsserver_stop ()
return $OCF_ERR_GENERIC
fi
- nfs_exec stop nfs-mountd > /dev/null 2>&1
- ocf_log info "Stop: nfs-mountd"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -eq "0" ]; then
- ocf_exit_reason "Failed to stop nfs-mountd"
- return $OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec stop nfs-mountd > /dev/null 2>&1
+ ocf_log info "Stop: nfs-mountd"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop nfs-mountd"
+ return $OCF_ERR_GENERIC
+ fi
fi
if systemctl --no-legend list-unit-files "nfsdcld*" | grep -q nfsdcld; then
@@ -960,10 +986,12 @@ nfsserver_stop ()
esac
- v3locking_exec "stop"
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Failed to stop NFS locking daemons"
- rc=$OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ v3locking_exec "stop"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to stop NFS locking daemons"
+ rc=$OCF_ERR_GENERIC
+ fi
fi
# systemd

@ -1,147 +0,0 @@
From 237d55120a7c8d761f839c96651e722b3bb3bc88 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 12 Oct 2022 13:57:30 +0200
Subject: [PATCH 1/4] IPsrcaddr: fix PROTO regex
---
heartbeat/IPsrcaddr | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 7dbf65ff5..24406d296 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -188,7 +188,7 @@ IPADDR="\($OCTET\.\)\{3\}$OCTET"
SRCCLAUSE="src$WS$WS*\($IPADDR\)"
MATCHROUTE="\(.*${WS}\)\($SRCCLAUSE\)\($WS.*\|$\)"
METRICCLAUSE=".*\(metric$WS[^ ]\+\)"
-PROTOCLAUSE=".*\(proto$WS[^ ]\+\)"
+PROTOCLAUSE=".*\(proto$WS[^ ]\+\).*"
FINDIF=findif
# findif needs that to be set
From c70ba457851a401cb201cb87d23bdbc5f4fcd2b3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 12 Oct 2022 14:00:30 +0200
Subject: [PATCH 2/4] IPsrcaddr: detect metric for main table only, and allow
specifying metric if necessary
---
heartbeat/IPsrcaddr | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 24406d296..4745eb8a7 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -59,12 +59,14 @@ OCF_RESKEY_ipaddress_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_destination_default="0.0.0.0/0"
OCF_RESKEY_proto_default=""
+OCF_RESKEY_metric_default=""
OCF_RESKEY_table_default=""
: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}}
: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
: ${OCF_RESKEY_destination=${OCF_RESKEY_destination_default}}
: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
+: ${OCF_RESKEY_metric=${OCF_RESKEY_metric_default}}
: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
#######################################################################
@@ -143,6 +145,14 @@ Proto to match when finding network. E.g. "kernel".
<content type="string" default="${OCF_RESKEY_proto_default}" />
</parameter>
+<parameter name="metric">
+<longdesc lang="en">
+Metric. Only needed if incorrect metric value is used.
+</longdesc>
+<shortdesc lang="en">Metric</shortdesc>
+<content type="string" default="${OCF_RESKEY_metric_default}" />
+</parameter>
+
<parameter name="table">
<longdesc lang="en">
Table to modify. E.g. "local".
@@ -548,8 +558,14 @@ rc=$?
INTERFACE=`echo $findif_out | awk '{print $1}'`
LISTROUTE=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress`
-METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"`
[ -z "$PROTO" ] && PROTO=`echo $LISTROUTE | sed -n "s/$PROTOCLAUSE/\1/p"`
+if [ -n "$OCF_RESKEY_metric" ]; then
+ METRIC="metric $OCF_RESKEY_metric"
+elif [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ]; then
+ METRIC=`echo $LISTROUTE | sed -n "s/$METRICCLAUSE/\1/p"`
+else
+ METRIC=""
+fi
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
NETWORK=`echo $LISTROUTE | grep -m 1 -o '^[^ ]*'`
From c514f12f7a19440f475938f2a4659e5e9667fa25 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 12 Oct 2022 14:01:26 +0200
Subject: [PATCH 3/4] IPsrcaddr: use scope host when using non-main tables
---
heartbeat/IPsrcaddr | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 4745eb8a7..926246008 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -279,8 +279,14 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
+ if [ -z "$TABLE" ] || [ "${TABLE#table }" = "main" ]; then
+ SCOPE="link"
+ else
+ SCOPE="host"
+ fi
+
PRIMARY_IP="$($IP2UTIL -4 -o addr show dev $INTERFACE primary | awk '{split($4,a,"/");print a[1]}')"
- OPTS="proto kernel scope link src $PRIMARY_IP"
+ OPTS="proto kernel scope $SCOPE src $PRIMARY_IP"
$IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC || \
errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed"
From 1f387ac8017b3eee23b41eadafd58ce21a29eb21 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 13 Oct 2022 13:11:28 +0200
Subject: [PATCH 4/4] IPsrcaddr: fix monitor/status for default route not being
equal to src IP before start, and change route src correctly in stop-action
---
heartbeat/IPsrcaddr | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 926246008..1bd41a930 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -229,6 +229,7 @@ srca_read() {
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
+ [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "status" ] && [ "${ROUTE%% *}" = "default" ] && return 1
return 2
}
@@ -292,8 +293,8 @@ srca_stop() {
errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS $METRIC' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
- $CMDCHANGE $ROUTE_WO_SRC || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
+ $CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $PRIMARY_IP' failed"
fi
return $?

@ -1,175 +0,0 @@
From cab190c737fdf58268aa5c009f6089b754862b22 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 16:32:50 -0800
Subject: [PATCH 1/3] Filesystem: Fix OpenBSD check in fstype_supported()
fstype_supported() is supposed to skip the /proc/filesystems check if
the OS is OpenBSD. Instead, it skips the check if the OS is **not**
OpenBSD. That means the function has been a no-op for all other distros.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 010c1dcfc..8b4792152 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -440,7 +440,7 @@ fstype_supported()
local support="$FSTYPE"
local rc
- if [ "X${HOSTOS}" != "XOpenBSD" ];then
+ if [ "X${HOSTOS}" = "XOpenBSD" ];then
# skip checking /proc/filesystems for obsd
return $OCF_SUCCESS
fi
From 5d38b87daa9cfffa89a193df131d6ebd87cd05aa Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 18:26:32 -0800
Subject: [PATCH 2/3] Filesystem: Improve fstype_supported logs for fuse
Make it more clear when we have to use a different name to check for
support of a particular filesystem. Currently only used for fuse-type
filesystems.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 8b4792152..4d84846c1 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -455,6 +455,10 @@ fstype_supported()
fuse.*|glusterfs|rozofs) support="fuse";;
esac
+ if [ "$support" != "$FSTYPE" ]; then
+ ocf_log info "Checking support for $FSTYPE as \"$support\""
+ fi
+
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ]; then
# found the fs type
@@ -465,7 +469,7 @@ fstype_supported()
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
- ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
+ ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems and failed to load kernel module"
return $OCF_ERR_INSTALLED
fi
@@ -478,11 +482,11 @@ fstype_supported()
# yes. found the filesystem after doing the modprobe
return $OCF_SUCCESS
fi
- ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
+ ocf_log debug "Unable to find support for $support in /proc/filesystems after modprobe, trying again"
sleep 1
done
- ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
+ ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems"
return $OCF_ERR_INSTALLED
}
@@ -837,6 +841,9 @@ Filesystem_monitor()
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
+# FIXME!! Needs some tuning to match fstype_supported() (e.g., for
+# fuse). Can we just call fstype_supported() with a flag like
+# "no_modprobe" instead?
#
Filesystem_validate_all()
{
From e2174244067b02d798e0f12437f0f499c80f91fe Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 18:55:47 -0800
Subject: [PATCH 3/3] Filesystem: Add support for Amazon EFS mount helper
mount.efs, the mount helper for Amazon Elastic File System (EFS)
provided by amazon-efs-utils [1], is a wrapper for mount.nfs4. It offers
a number of AWS-specific mount options and some security improvements
like encryption of data in transit.
This commit adds support by treating an fstype=efs like fstype=nfs4 for
the most part.
Resolves: RHBZ#2049319
[1] https://docs.aws.amazon.com/efs/latest/ug/efs-mount-helper.html
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 4d84846c1..1a90d6a42 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -341,7 +341,7 @@ determine_blockdevice() {
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
- nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre)
+ nfs4|nfs|efs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre)
: ;;
*)
match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
@@ -423,7 +423,7 @@ is_fsck_needed() {
no) false;;
""|auto)
case "$FSTYPE" in
- ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs)
+ ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|efs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs)
false;;
*)
true;;
@@ -450,9 +450,11 @@ fstype_supported()
return $OCF_SUCCESS
fi
- # support fuse-filesystems (e.g. GlusterFS)
+ # support fuse-filesystems (e.g. GlusterFS) and Amazon Elastic File
+ # System (EFS)
case "$FSTYPE" in
fuse.*|glusterfs|rozofs) support="fuse";;
+ efs) support="nfs4";;
esac
if [ "$support" != "$FSTYPE" ]; then
@@ -701,7 +703,7 @@ Filesystem_stop()
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
- nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
+ nfs4|nfs|efs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
@@ -892,7 +894,7 @@ set_blockdevice_var() {
# these are definitely not block devices
case "$FSTYPE" in
- nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;;
+ nfs4|nfs|efs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;;
esac
if $(is_option "loop"); then
@@ -1013,7 +1015,7 @@ is_option "ro" &&
CLUSTERSAFE=2
case "$FSTYPE" in
-nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
+nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not

@ -1,84 +0,0 @@
From 4d87bcfe5df8a1e40ee945e095ac9e7cca147ec4 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 29 Jun 2022 10:26:25 +0200
Subject: [PATCH] IPaddr2/IPsrcaddr: add/modify table parameter to be able to
find interface while using policy based routing
---
heartbeat/IPaddr2 | 12 ++++++++++++
heartbeat/IPsrcaddr | 5 ++++-
heartbeat/findif.sh | 2 +-
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 97a7431a2..e8384c586 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -73,6 +73,7 @@ OCF_RESKEY_ip_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_broadcast_default=""
OCF_RESKEY_iflabel_default=""
+OCF_RESKEY_table_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_lvs_support_default=false
OCF_RESKEY_lvs_ipv6_addrlabel_default=false
@@ -97,6 +98,7 @@ OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}}
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
+: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}}
@@ -239,6 +241,16 @@ If a label is specified in nic name, this parameter has no effect.
<content type="string" default="${OCF_RESKEY_iflabel_default}"/>
</parameter>
+<parameter name="table">
+<longdesc lang="en">
+Table to use to lookup which interface to use for the IP.
+
+This can be used for policy based routing. See man ip-rule(8).
+</longdesc>
+<shortdesc lang="en">Table</shortdesc>
+<content type="string" default="${OCF_RESKEY_table_default}" />
+</parameter>
+
<parameter name="lvs_support">
<longdesc lang="en">
Enable support for LVS Direct Routing configurations. In case a IP
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 1bd41a930..cf106cc34 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -155,13 +155,16 @@ Metric. Only needed if incorrect metric value is used.
<parameter name="table">
<longdesc lang="en">
-Table to modify. E.g. "local".
+Table to modify and use for interface lookup. E.g. "local".
The table has to have a route matching the "destination" parameter.
+
+This can be used for policy based routing. See man ip-rule(8).
</longdesc>
<shortdesc lang="en">Table</shortdesc>
<content type="string" default="${OCF_RESKEY_table_default}" />
</parameter>
+
</parameters>
<actions>
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 66bc6d56a..1a40cc9a4 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -32,7 +32,7 @@ prefixcheck() {
getnetworkinfo()
{
local line netinfo
- ip -o -f inet route list match $OCF_RESKEY_ip table local scope host | (while read line;
+ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line;
do
netinfo=`echo $line | awk '{print $2}'`
case $netinfo in

@ -1,35 +0,0 @@
From da9e8e691f39494e14f8f11173b6ab6433384396 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 20 Jun 2023 14:19:23 +0200
Subject: [PATCH] findif.sh: fix table parameter so it uses main table by
default
---
heartbeat/findif.sh | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 1a40cc9a4b..6c04c98c19 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -32,7 +32,7 @@ prefixcheck() {
getnetworkinfo()
{
local line netinfo
- ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line;
+ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table:=main}" scope host | (while read line;
do
netinfo=`echo $line | awk '{print $2}'`
case $netinfo in
@@ -215,9 +215,9 @@ findif()
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
else
- set -- $(ip -o -f $family route list match $match $scope | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
fi
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in

@ -1,25 +0,0 @@
From 97a05e0e662ed922c9ecd016b39ab90ee233d5c9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 24 Nov 2022 10:36:56 +0100
Subject: [PATCH] mysql-common: return error in stop-action if kill fails to
stop the process, so the node can get fenced
---
heartbeat/mysql-common.sh | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index 34e1c6748..8104019b0 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -318,6 +318,10 @@ mysql_common_stop()
if [ $? != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
/bin/kill -KILL $pid > /dev/null
+ mysql_common_status info $pid
+ if [ $? != $OCF_NOT_RUNNING ]; then
+ return $OCF_ERR_GENERIC
+ fi
fi
ocf_log info "MySQL stopped";

@ -1,42 +0,0 @@
From 2695888c983df331b0fee407a5c69c493a360313 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 30 Nov 2022 12:07:05 +0100
Subject: [PATCH] lvmlockd: add "use_lvmlockd = 1" if it's commented out or
missing
---
heartbeat/lvmlockd | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd
index dc7bd2d7e..f4b299f28 100755
--- a/heartbeat/lvmlockd
+++ b/heartbeat/lvmlockd
@@ -180,14 +180,23 @@ setup_lvm_config()
lock_type=$(echo "$out" | cut -d'=' -f2)
if [ -z "$use_lvmlockd" ]; then
- ocf_exit_reason "\"use_lvmlockd\" not set in /etc/lvm/lvm.conf ..."
- exit $OCF_ERR_CONFIGURED
- fi
+ ocf_log info "adding \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..."
+ cat >> /etc/lvm/lvm.conf << EOF
+
+global {
+ use_lvmlockd = 1
+}
+EOF
- if [ -n "$use_lvmlockd" ] && [ "$use_lvmlockd" != 1 ] ; then
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "unable to add \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ elif [ "$use_lvmlockd" != 1 ] ; then
ocf_log info "setting \"use_lvmlockd=1\" in /etc/lvm/lvm.conf ..."
sed -i 's,^[[:blank:]]*use_lvmlockd[[:blank:]]*=.*,\ \ \ \ use_lvmlockd = 1,g' /etc/lvm/lvm.conf
fi
+
if [ -n "$lock_type" ] ; then
# locking_type was removed from config in v2.03
ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03"

@ -1,137 +0,0 @@
From bf89ad06d5da5c05533c80a37a37c8dbbcd123aa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 8 Dec 2022 15:40:07 +0100
Subject: [PATCH] galera/mpathpersist/sg_persist/IPsrcaddr: only check notify
and promotable when OCF_CHECK_LEVEL=10
Pacemaker has started running validate-all action before creating the
resource. It doesnt provide notify/promotable settings while doing so,
so this patch moves these checks to OCF_CHECK_LEVEL 10 and runs the
validate action at OCF_CHECK_LEVEL 10 in the start-action.
---
heartbeat/IPsrcaddr | 13 ++++++++-----
heartbeat/galera.in | 9 ++++++---
heartbeat/mpathpersist.in | 13 +++++++++----
heartbeat/sg_persist.in | 13 +++++++++----
4 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 1bd41a930..66e2ad8cd 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -510,11 +510,13 @@ srca_validate_all() {
fi
# We should serve this IP address of course
- if ip_status "$ipaddress"; then
- :
- else
- ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
- return $OCF_ERR_INSTALLED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if ip_status "$ipaddress"; then
+ :
+ else
+ ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
+ return $OCF_ERR_INSTALLED
+ fi
fi
return $OCF_SUCCESS
}
@@ -540,6 +542,7 @@ esac
ipaddress="$OCF_RESKEY_ipaddress"
+[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10
srca_validate_all
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index cd2fee7c0..6aed3e4b6 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -1015,9 +1015,11 @@ galera_stop()
galera_validate()
{
- if ! ocf_is_ms; then
- ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
- return $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if ! ocf_is_ms; then
+ ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
+ return $OCF_ERR_CONFIGURED
+ fi
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
@@ -1035,6 +1037,7 @@ case "$1" in
exit $OCF_SUCCESS;;
esac
+[ "$__OCF_ACTION" = "start" ] && OCF_CHECK_LEVEL=10
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
diff --git a/heartbeat/mpathpersist.in b/heartbeat/mpathpersist.in
index 0e2c2a4a0..8a46b9930 100644
--- a/heartbeat/mpathpersist.in
+++ b/heartbeat/mpathpersist.in
@@ -630,10 +630,11 @@ mpathpersist_action_notify() {
}
mpathpersist_action_validate_all () {
-
- if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
- ocf_log err "Master options misconfigured."
- exit $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
+ ocf_log err "Master options misconfigured."
+ exit $OCF_ERR_CONFIGURED
+ fi
fi
return $OCF_SUCCESS
@@ -659,6 +660,10 @@ case $ACTION in
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
mpathpersist_init
+ if [ "$__OCF_ACTION" = "start" ]; then
+ OCF_CHECK_LEVEL=10
+ mpathpersist_action_validate_all
+ fi
mpathpersist_action_$ACTION
exit $?
;;
diff --git a/heartbeat/sg_persist.in b/heartbeat/sg_persist.in
index 16048ea6f..620c02f4a 100644
--- a/heartbeat/sg_persist.in
+++ b/heartbeat/sg_persist.in
@@ -643,10 +643,11 @@ sg_persist_action_notify() {
}
sg_persist_action_validate_all () {
-
- if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
- ocf_log err "Master options misconfigured."
- exit $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
+ ocf_log err "Master options misconfigured."
+ exit $OCF_ERR_CONFIGURED
+ fi
fi
return $OCF_SUCCESS
@@ -672,6 +673,10 @@ case $ACTION in
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
sg_persist_init
+ if [ "$__OCF_ACTION" = "start" ]; then
+ OCF_CHECK_LEVEL=10
+ sg_persist_action_validate_all
+ fi
sg_persist_action_$ACTION
exit $?
;;

@ -1,49 +0,0 @@
From 21666c5c842b8a6028699ee78db75a1d7134fad0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 4 Jan 2023 10:39:16 +0100
Subject: [PATCH 1/2] Filesystem: remove validate-all mountpoint warning as it
is auto-created during start-action if it doesnt exist
---
heartbeat/Filesystem | 4 ----
1 file changed, 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 44270ad98..65088029e 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -851,10 +851,6 @@ Filesystem_monitor()
#
Filesystem_validate_all()
{
- if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then
- ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
- fi
-
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
From 8a7f40b6ab93d8d39230d864ab06a57ff48d6f1f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 5 Jan 2023 13:09:48 +0100
Subject: [PATCH 2/2] CTDB: change public_addresses validate-all warning to
info
---
heartbeat/CTDB.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 46f56cfac..b4af66bc1 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -940,7 +940,7 @@ ctdb_validate() {
fi
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
- ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
+ ocf_log info "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
fi
if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then

@ -1,68 +0,0 @@
--- a/heartbeat/pgsqlms 2023-01-04 14:42:36.093258702 +0100
+++ b/heartbeat/pgsqlms 2023-01-04 14:40:52.403994545 +0100
@@ -66,6 +66,7 @@
my $maxlag = $ENV{'OCF_RESKEY_maxlag'} || $maxlag_default;
my $recovery_tpl = $ENV{'OCF_RESKEY_recovery_template'}
|| "$pgdata/recovery.conf.pcmk";
+my $ocf_check_level = $ENV{'OCF_CHECK_LEVEL'} || 0;
# PostgreSQL commands path
@@ -1304,26 +1305,28 @@
return $OCF_ERR_INSTALLED;
}
- # check notify=true
- $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
- --meta --get-parameter notify 2>/dev/null };
- chomp $ans;
- unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
- ocf_exit_reason(
- 'You must set meta parameter notify=true for your master resource'
- );
- return $OCF_ERR_INSTALLED;
- }
+ if ( $ocf_check_level == 10 ) {
+ # check notify=true
+ $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
+ --meta --get-parameter notify 2>/dev/null };
+ chomp $ans;
+ unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter notify=true for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
- # check master-max=1
- unless (
- defined $ENV{'OCF_RESKEY_CRM_meta_master_max'}
- and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
- ) {
- ocf_exit_reason(
- 'You must set meta parameter master-max=1 for your master resource'
- );
- return $OCF_ERR_INSTALLED;
+ # check master-max=1
+ unless (
+ defined $ENV{'OCF_RESKEY_CRM_meta_master_max'}
+ and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
+ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter master-max=1 for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
}
if ( $PGVERNUM >= $PGVER_12 ) {
@@ -2242,6 +2245,9 @@
# Set current node name.
$nodename = ocf_local_nodename();
+if ( $__OCF_ACTION ne 'validate-all' ) {
+ $ocf_check_level = 10;
+}
$exit_code = pgsql_validate_all();
exit $exit_code if $exit_code != $OCF_SUCCESS or $__OCF_ACTION eq 'validate-all';

@ -1,187 +0,0 @@
From 81f9e1a04dfd2274ccb906310b4f191485e342ab Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 11 Jan 2023 13:22:24 +0100
Subject: [PATCH 1/2] exportfs: move testdir() to start-action to avoid failing
during resource creation (validate-all) and make it create the directory if
it doesnt exist
---
heartbeat/exportfs | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index c10777fa9..2307a9e67 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -301,6 +301,16 @@ exportfs_monitor ()
fi
}
+testdir() {
+ if [ ! -d $1 ]; then
+ mkdir -p "$1"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Unable to create directory $1"
+ return 1
+ fi
+ fi
+ return 0
+}
export_one() {
local dir=$1
local opts sep
@@ -331,6 +341,10 @@ export_one() {
}
exportfs_start ()
{
+ if ! forall testdir; then
+ return $OCF_ERR_INSTALLED
+ fi
+
if exportfs_monitor; then
ocf_log debug "already exported"
return $OCF_SUCCESS
@@ -428,14 +442,6 @@ exportfs_stop ()
fi
}
-testdir() {
- if [ ! -d $1 ]; then
- ocf_is_probe ||
- ocf_log err "$1 does not exist or is not a directory"
- return 1
- fi
- return 0
-}
exportfs_validate_all ()
{
if echo "$OCF_RESKEY_fsid" | grep -q -F ','; then
@@ -447,9 +453,6 @@ exportfs_validate_all ()
ocf_exit_reason "use integer fsid when exporting multiple directories"
return $OCF_ERR_CONFIGURED
fi
- if ! forall testdir; then
- return $OCF_ERR_INSTALLED
- fi
}
for dir in $OCF_RESKEY_directory; do
@@ -466,7 +469,7 @@ for dir in $OCF_RESKEY_directory; do
fi
else
case "$__OCF_ACTION" in
- stop|monitor)
+ stop|monitor|validate-all)
canonicalized_dir="$dir"
ocf_log debug "$dir does not exist"
;;
From 8ee41af82cda35149f8e0cfede6a8ddef3e221e1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 11 Jan 2023 13:25:57 +0100
Subject: [PATCH 2/2] pgsql: dont run promotable and file checks that could be
on shared storage during validate-all action
---
heartbeat/pgsql | 53 +++++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index aa8a13a84..532063ac5 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1835,7 +1835,7 @@ check_config() {
if [ ! -f "$1" ]; then
if ocf_is_probe; then
- ocf_log info "Configuration file is $1 not readable during probe."
+ ocf_log info "Unable to read $1 during probe."
rc=1
else
ocf_exit_reason "Configuration file $1 doesn't exist"
@@ -1846,8 +1846,7 @@ check_config() {
return $rc
}
-# Validate most critical parameters
-pgsql_validate_all() {
+validate_ocf_check_level_10() {
local version
local check_config_rc
local rep_mode_string
@@ -1883,12 +1882,6 @@ pgsql_validate_all() {
fi
fi
- getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
- if [ ! $? -eq 0 ]; then
- ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
- return $OCF_ERR_INSTALLED;
- fi
-
if ocf_is_probe; then
ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
else
@@ -1898,18 +1891,6 @@ pgsql_validate_all() {
fi
fi
- if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
- then
- ocf_exit_reason "monitor password can't be empty"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
- then
- ocf_exit_reason "monitor_user has to be set if monitor_password is set"
- return $OCF_ERR_CONFIGURED
- fi
-
if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
@@ -2027,6 +2008,35 @@ pgsql_validate_all() {
return $OCF_SUCCESS
}
+# Validate most critical parameters
+pgsql_validate_all() {
+ local rc
+
+ getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
+ if [ ! $? -eq 0 ]; then
+ ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
+ return $OCF_ERR_INSTALLED;
+ fi
+
+ if [ -n "$OCF_RESKEY_monitor_user" ] && [ -z "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor password can't be empty"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -z "$OCF_RESKEY_monitor_user" ] && [ -n "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor_user has to be set if monitor_password is set"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ validate_ocf_check_level_10
+ rc=$?
+ [ $rc -ne "$OCF_SUCCESS" ] && exit $rc
+ fi
+
+ return $OCF_SUCCESS
+}
+
#
# Check if we need to create a log file
@@ -2163,6 +2173,7 @@ case "$1" in
exit $OCF_SUCCESS;;
esac
+[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10
pgsql_validate_all
rc=$?

@ -1,23 +0,0 @@
--- ClusterLabs-resource-agents-fd0720f7/heartbeat/pgsqlms 2023-01-16 10:54:30.897188238 +0100
+++ pgsqlms 2023-01-10 14:21:19.281286242 +0100
@@ -1351,12 +1351,14 @@
return $OCF_ERR_ARGS;
}
- $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts};
- unless ($guc =~ /\bapplication_name='?$nodename'?\b/) {
- ocf_exit_reason(
- q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }.
- q{It is currently set to '%s'}, $nodename, $guc );
- return $OCF_ERR_ARGS;
+ if ( $ocf_check_level == 10 ) {
+ $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts};
+ unless ($guc =~ /\bapplication_name='?$nodename'?\b/) {
+ ocf_exit_reason(
+ q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }.
+ q{It is currently set to '%s'}, $nodename, $guc );
+ return $OCF_ERR_ARGS;
+ }
}
}
else {

@ -1,24 +0,0 @@
From e7a748d35fe56f2be727ecae1885a2f1366f41bf Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 15 Mar 2023 13:03:07 +0100
Subject: [PATCH] ethmonitor: dont log "Interface does not exist" for
monitor-action
---
heartbeat/ethmonitor | 3 +++
1 file changed, 3 insertions(+)
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index 451738a0b5..f9c9ef4bdd 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -271,6 +271,9 @@ if_init() {
validate-all)
ocf_exit_reason "Interface $NIC does not exist"
exit $OCF_ERR_CONFIGURED;;
+ monitor)
+ ocf_log debug "Interface $NIC does not exist"
+ ;;
*)
## It might be a bond interface which is temporarily not available, therefore we want to continue here
ocf_log warn "Interface $NIC does not exist"

@ -1,156 +0,0 @@
From 51dd5d5d051aa3b3f0c104f8e80f212cd5780fc3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 14 Mar 2023 09:14:28 +0100
Subject: [PATCH] LVM-activate: failover with missing PVs
There area two changes included:
- Allow the system ID to be changed on a VG when the VG is
missing PVs, as long as a majority of PVs are still present.
This requires a recent version of lvm that supports the
--majoritypvs option for vgchange.
- Use --activationmode degraded when activating LVs so that
raid LVs can be activated when legs are missing, as long as
sufficient devices are available for raid to provide all the
data in the LV.
By David Teigland.
---
heartbeat/LVM-activate | 82 ++++++++++++++++++++++++++++++++----------
1 file changed, 64 insertions(+), 18 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index e951a08e9c..f6f24a3b52 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -50,6 +50,8 @@ OCF_RESKEY_vg_access_mode_default=""
OCF_RESKEY_activation_mode_default="exclusive"
OCF_RESKEY_tag_default="pacemaker"
OCF_RESKEY_partial_activation_default="false"
+OCF_RESKEY_degraded_activation_default="false"
+OCF_RESKEY_majority_pvs_default="false"
: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}}
: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}}
@@ -57,6 +59,8 @@ OCF_RESKEY_partial_activation_default="false"
: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}}
: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}}
: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
+: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}}
+: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}}
# If LV is given, only activate this named LV; otherwise, activate all
# LVs in the named VG.
@@ -191,6 +195,29 @@ logical volumes.
<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
</parameter>
+<parameter name="degraded_activation" unique="0" required="0">
+<longdesc lang="en">
+Activate RAID LVs using the "degraded" activation mode. This allows RAID
+LVs to be activated with missing PVs if all data can be provided with
+RAID redundancy. The RAID level determines the number of PVs that are
+required for degraded activation to succeed. If fewer PVs are available,
+then degraded activation will fail. Also enable majority_pvs.
+</longdesc>
+<shortdesc lang="en">Activate RAID LVs in degraded mode when missing PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_degraded_activation_default}" />
+</parameter>
+
+<parameter name="majority_pvs" unique="0" required="0">
+<longdesc lang="en">
+If set, the VG system ID can be reassigned to a new host if a majority
+of PVs in the VG are present. Otherwise, VG failover with system ID
+will fail when the VG is missing PVs. Also enable degraded_activation
+when RAID LVs are used.
+</longdesc>
+<shortdesc lang="en">Allow changing the system ID of a VG with a majority of PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_majority_pvs_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -524,24 +551,27 @@ lvm_validate() {
exit $OCF_ERR_GENERIC
fi
- # Inconsistency might be due to missing physical volumes, which doesn't
- # automatically mean we should fail. If partial_activation=true then
- # we should let start try to handle it, or if no PVs are listed as
- # "unknown device" then another node may have marked a device missing
- # where we have access to all of them and can start without issue.
- case $(vgs -o attr --noheadings $VG | tr -d ' ') in
- ???p??*)
- if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
- # We are missing devices and cannot activate partially
- ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
- exit $OCF_ERR_GENERIC
+ vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null)
+
+ if [ $vg_missing_pv_count -gt 0 ]; then
+ ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs."
+
+ # Setting new system ID will succeed if over half of PVs remain.
+ # Don't try to calculate here if a majority is present,
+ # but leave this up to the vgchange command to determine.
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
+ ocf_log warn "Attempting fail over with missing PVs (majority.)"
+
+ # Setting new system ID will fail, and behavior is undefined for
+ # other access modes.
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ ocf_log warn "Attempting fail over with missing PVs (partial.)"
+
else
- # We are missing devices but are allowed to activate partially.
- # Assume that caused the vgck failure and carry on
- ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true"
+ exit $OCF_ERR_GENERIC
fi
- ;;
- esac
+ fi
# Get the access mode from VG metadata and check if it matches the input
# value. Skip to check "tagging" mode because there's no reliable way to
@@ -601,7 +631,18 @@ lvm_validate() {
do_activate() {
do_activate_opt=$1
- if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then
+ # This will allow a RAID LV to be activated if sufficient
+ # devices are available to allow the LV to be usable
+ do_activate_opt="${do_activate_opt} --activationmode degraded"
+
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # This will allow a mirror LV to be activated if any
+ # devices are missing, but the activated LV may not be
+ # usable, so it is not recommended. Also, other LV
+ # types without data redundancy will be activated
+ # when partial is set.
+ # RAID LVs and degraded_activation should be used instead.
do_activate_opt="${do_activate_opt} --partial"
fi
@@ -661,11 +702,16 @@ clvmd_activate() {
}
systemid_activate() {
+ majority_opt=""
set_autoactivation=0
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
+ vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs"
+ fi
+
# Put our system ID on the VG
- vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
+ vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
--systemid ${SYSTEM_ID} ${VG}
vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1

@ -1,70 +0,0 @@
From 706b48fd93a75a582c538013aea1418b6ed69dd0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 9 Mar 2023 15:57:59 +0100
Subject: [PATCH] mysql: promotable fixes to avoid nodes getting bounced around
by setting -v 1/-v 2, and added OCF_CHECK_LEVEL=10 for promotable resources
to be able to distinguish between promoted and not
---
heartbeat/mysql | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 9ab49ab20e..29ed427319 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -757,6 +757,10 @@ mysql_monitor() {
status_loglevel="info"
fi
+ if ocf_is_ms; then
+ OCF_CHECK_LEVEL=10
+ fi
+
mysql_common_status $status_loglevel
rc=$?
@@ -777,7 +781,13 @@ mysql_monitor() {
return $rc
fi
- if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
+ if [ $OCF_CHECK_LEVEL -eq 10 ]; then
+ if [ -z "$OCF_RESKEY_test_table" ]; then
+ ocf_exit_reason "test_table not set"
+ return $OCF_ERR_CONFIGURED
+
+ fi
+
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
@@ -795,18 +805,16 @@ mysql_monitor() {
ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
- else
- # In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point
- ocf_is_ms && $CRM_MASTER -v 1
fi
if ocf_is_ms && ! get_read_only; then
ocf_log debug "MySQL monitor succeeded (master)";
# Always set master score for the master
- $CRM_MASTER -v 2
+ $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
return $OCF_RUNNING_MASTER
else
ocf_log debug "MySQL monitor succeeded";
+ ocf_is_ms && $CRM_MASTER -v 1
return $OCF_SUCCESS
fi
}
@@ -873,7 +881,6 @@ mysql_start() {
# preference set by the administrator. We choose a low
# greater-than-zero preference.
$CRM_MASTER -v 1
-
fi
# Initial monitor action

@ -1,32 +0,0 @@
From 34483f8029ea9ab25220cfee71d53adaf5aacaa0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 14 Jun 2023 14:37:01 +0200
Subject: [PATCH] mysql: fix promotion_score bouncing between ~3600 and 1 on
demoted nodes
---
heartbeat/mysql | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 29ed42731..1df2fc0f2 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -517,17 +517,6 @@ check_slave() {
exit $OCF_ERR_INSTALLED
fi
- elif ocf_is_ms; then
- # Even if we're not set to evict lagging slaves, we can
- # still use the seconds behind master value to set our
- # master preference.
- local master_pref
- master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
- if [ $master_pref -lt 0 ]; then
- # Sanitize a below-zero preference to just zero
- master_pref=0
- fi
- $CRM_MASTER -v $master_pref
fi
# is the slave ok to have a VIP on it

@ -1,54 +0,0 @@
From 81bb58b05d2ddabd17fe31af39f0e857e61db3c9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 Mar 2023 16:53:45 +0200
Subject: [PATCH] azure-events*: fix for no "Transition Summary" for Pacemaker
2.1+
---
heartbeat/azure-events-az.in | 8 ++++----
heartbeat/azure-events.in | 6 +++---
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 59d0953061..67c02c6422 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -311,10 +311,10 @@ class clusterHelper:
summary = clusterHelper._exec("crm_simulate", "-Ls")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
- return False
+ return ""
if summary.find("Transition Summary:") < 0:
- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
- return False
+ ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
+ return ""
summary = summary.split("Transition Summary:")[1]
ret = summary.split("\n").pop(0)
@@ -768,4 +768,4 @@ def main():
agent.run()
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
index 66e129060a..5ad658df93 100644
--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
@@ -310,10 +310,10 @@ class clusterHelper:
summary = clusterHelper._exec("crm_simulate", "-Ls")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
- return False
+ return ""
if summary.find("Transition Summary:") < 0:
- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
- return False
+ ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
+ return ""
summary = summary.split("Transition Summary:")[1]
ret = summary.split("\n").pop(0)

@ -1,77 +0,0 @@
From ff53e5c8d6867e580506d132fba6fcf6aa46b804 Mon Sep 17 00:00:00 2001
From: Peter Varkoly <varkoly@suse.com>
Date: Sat, 29 Apr 2023 08:09:11 +0200
Subject: [PATCH] Use -LS instead of -Ls as parameter to get the Transition
Summary
---
heartbeat/azure-events-az.in | 9 +++++----
heartbeat/azure-events.in | 9 +++++----
2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 67c02c642..46d4d1f3d 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -298,7 +298,7 @@ class clusterHelper:
Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
"""
# <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
- # # crm_simulate -Ls
+ # # crm_simulate -LS
# Transition Summary:
# * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
# * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
@@ -308,15 +308,16 @@ class clusterHelper:
# Transition Summary:
ocf.logger.debug("transitionSummary: begin")
- summary = clusterHelper._exec("crm_simulate", "-Ls")
+ summary = clusterHelper._exec("crm_simulate", "-LS")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
return ""
if summary.find("Transition Summary:") < 0:
ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
return ""
- summary = summary.split("Transition Summary:")[1]
- ret = summary.split("\n").pop(0)
+ j=summary.find('Transition Summary:') + len('Transition Summary:')
+ l=summary.lower().find('executing cluster transition:')
+ ret = list(filter(str.strip, summary[j:l].split("\n")))
ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
return ret
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
index 5ad658df9..90acaba62 100644
--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
@@ -297,7 +297,7 @@ class clusterHelper:
Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
"""
# <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
- # # crm_simulate -Ls
+ # # crm_simulate -LS
# Transition Summary:
# * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
# * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
@@ -307,15 +307,16 @@ class clusterHelper:
# Transition Summary:
ocf.logger.debug("transitionSummary: begin")
- summary = clusterHelper._exec("crm_simulate", "-Ls")
+ summary = clusterHelper._exec("crm_simulate", "-LS")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
return ""
if summary.find("Transition Summary:") < 0:
ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
return ""
- summary = summary.split("Transition Summary:")[1]
- ret = summary.split("\n").pop(0)
+ j=summary.find('Transition Summary:') + len('Transition Summary:')
+ l=summary.lower().find('executing cluster transition:')
+ ret = list(filter(str.strip, summary[j:l].split("\n")))
ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
return ret

@ -1,23 +0,0 @@
From b02b06c437b1d8cb1dcfe8ace47c2efc4a0e476c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 30 Mar 2023 14:44:41 +0200
Subject: [PATCH] Filesystem: fail if AWS efs-utils not installed when
fstype=efs
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 65088029ec..50c68f115b 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -456,7 +456,7 @@ fstype_supported()
# System (EFS)
case "$FSTYPE" in
fuse.*|glusterfs|rozofs) support="fuse";;
- efs) support="nfs4";;
+ efs) check_binary "mount.efs"; support="nfs4";;
esac
if [ "$support" != "$FSTYPE" ]; then

@ -1,29 +0,0 @@
From 78622f1d3e46d58b78efe33643d05bea4d6948a2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 17 May 2023 12:29:38 +0200
Subject: [PATCH] Filesystem: create systemd drop-in for network filesystems
---
heartbeat/Filesystem | 2 ++
1 file changed, 2 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 50c68f115..65a9dffb5 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1021,6 +1021,7 @@ is_option "ro" &&
case "$FSTYPE" in
nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
CLUSTERSAFE=1 # this is kind of safe too
+ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
@@ -1028,6 +1029,7 @@ nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|c
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
+ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
else
CLUSTERSAFE=0 # these are not allowed
fi

@ -1,125 +0,0 @@
From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 4 Jul 2023 14:40:19 +0200
Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill
signals and signal_delay for large filesystems
---
heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 70 insertions(+), 10 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 65a9dffb5..fe608ebfd 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto"
OCF_RESKEY_fast_stop_default="no"
OCF_RESKEY_force_clones_default="false"
OCF_RESKEY_force_unmount_default="true"
+OCF_RESKEY_term_signals_default="TERM"
+OCF_RESKEY_kill_signals_default="KILL"
+OCF_RESKEY_signal_delay_default="1"
# RHEL specific defaults
if is_redhat_based; then
@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then
fi
: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
: ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}}
+: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}}
+: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}}
+: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}}
# Variables used by multiple methods
HOSTOS=$(uname)
@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system.
<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>
+<parameter name="term_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
+</parameter>
+
+<parameter name="kill_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
+</parameter>
+
+<parameter name="signal_delay">
+<longdesc lang="en">
+How many seconds to wait after sending term/kill signals to processes in stop-action.
+</longdesc>
+<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
+</parameter>
+
</parameters>
<actions>
@@ -663,19 +693,49 @@ try_umount() {
}
return $OCF_ERR_GENERIC
}
-fs_stop() {
- local SUB="$1" timeout=$2 sig cnt
- for sig in TERM KILL; do
- cnt=$((timeout/2)) # try half time with TERM
- while [ $cnt -gt 0 ]; do
- try_umount "$SUB" &&
- return $OCF_SUCCESS
- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
+timeout_child() {
+ local pid="$1" timeout="$2" killer ret
+
+ # start job in the background that will KILL the given process after timeout expires
+ sleep $timeout && kill -s KILL $pid &
+ killer=$!
+
+ # block until the child process either exits on its own or gets killed by the above killer pipeline
+ wait $pid
+ ret=$?
+
+ # ret would be 127 + child exit code if the timeout expired
+ [ $ret -lt 128 ] && kill -s KILL $killer
+ return $ret
+}
+fs_stop_loop() {
+ local SUB="$1" signals="$2" sig
+ while true; do
+ for sig in $signals; do
signal_processes "$SUB" $sig
- cnt=$((cnt-1))
- sleep 1
done
+ sleep $OCF_RESKEY_signal_delay
+ try_umount "$SUB" && return $OCF_SUCCESS
done
+}
+fs_stop() {
+ local SUB="$1" timeout=$2 grace_time ret
+ grace_time=$((timeout/2))
+
+ # try gracefully terminating processes for up to half of the configured timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # try killing them for the rest of the timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # timeout expired
+ ocf_exit_reason "Couldn't unmount $SUB within given timeout"
return $OCF_ERR_GENERIC
}

@ -1,49 +0,0 @@
From 7056635f3f94c1bcaaa5ed5563dc3b0e9f6749e0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 18 Jul 2023 14:12:27 +0200
Subject: [PATCH] Filesystem: dont use boolean type for non-boolean parameters
---
heartbeat/Filesystem | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index ee55a4843..b9aae8d50 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -269,7 +269,7 @@ fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.
</longdesc>
<shortdesc lang="en">Kill processes before unmount</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
+<content type="string" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>
<parameter name="term_signals">
@@ -277,7 +277,7 @@ block if unresponsive nfs mounts are in use on the system.
Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
+<content type="string" default="${OCF_RESKEY_term_signals_default}" />
</parameter>
<parameter name="kill_signals">
@@ -285,7 +285,7 @@ Signals (names or numbers, whitespace separated) to send processes during gracef
Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
+<content type="string" default="${OCF_RESKEY_kill_signals_default}" />
</parameter>
<parameter name="signal_delay">
@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef
How many seconds to wait after sending term/kill signals to processes in stop-action.
</longdesc>
<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
+<content type="string" default="${OCF_RESKEY_kill_signal_delay}" />
</parameter>
</parameters>

@ -1,23 +0,0 @@
From f779fad52e5f515ca81218da6098398bdecac286 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 20 Jul 2023 10:18:12 +0200
Subject: [PATCH] Filesystem: fix incorrect variable name for signal_delay
default in metadata
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index b9aae8d50..066562891 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef
How many seconds to wait after sending term/kill signals to processes in stop-action.
</longdesc>
<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
-<content type="string" default="${OCF_RESKEY_kill_signal_delay}" />
+<content type="string" default="${OCF_RESKEY_signal_delay_default}" />
</parameter>
</parameters>

@ -1,27 +0,0 @@
From a913eb6a9a8732db7c56d2e0be937dbd0db9dc38 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 26 May 2023 12:45:13 +0200
Subject: [PATCH] Delay: increase stop, status and monitor timeouts to 40s to
avoid failing with default values
---
heartbeat/Delay | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Delay b/heartbeat/Delay
index 7ba6623f24..bc6c13559b 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -89,9 +89,9 @@ Defaults to "startdelay" if unspecified.
<actions>
<action name="start" timeout="30s" />
-<action name="stop" timeout="30s" />
-<action name="status" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" depth="0" timeout="30s" interval="10s" />
+<action name="stop" timeout="40s" />
+<action name="status" depth="0" timeout="40s" interval="10s" />
+<action name="monitor" depth="0" timeout="40s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
</actions>

@ -1,30 +0,0 @@
From fe8a807dae0398b811d1ee63ebd7202280b2b678 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 18 Jul 2023 14:51:00 +0200
Subject: [PATCH] Delay: remove statement about defaulting to "startdelay"
value if not specified
---
heartbeat/Delay | 2 --
1 file changed, 2 deletions(-)
diff --git a/heartbeat/Delay b/heartbeat/Delay
index bc6c13559..5aa8f4608 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -71,7 +71,6 @@ How long in seconds to delay on start operation.
<parameter name="stopdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on stop operation.
-Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Stop delay</shortdesc>
<content type="integer" default="${OCF_RESKEY_stopdelay_default}" />
@@ -80,7 +79,6 @@ Defaults to "startdelay" if unspecified.
<parameter name="mondelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on monitor operation.
-Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Monitor delay</shortdesc>
<content type="integer" default="${OCF_RESKEY_mondelay_default}" />

@ -1,49 +0,0 @@
diff --color -uNr a/heartbeat/awseip b/heartbeat/awseip
--- a/heartbeat/awseip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/awseip 2021-02-15 16:47:36.624610378 +0100
@@ -43,7 +43,7 @@
#
# Defaults
#
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
diff --color -uNr a/heartbeat/awsvip b/heartbeat/awsvip
--- a/heartbeat/awsvip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/awsvip 2021-02-15 16:47:48.960632484 +0100
@@ -42,7 +42,7 @@
#
# Defaults
#
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
diff --color -uNr a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
--- a/heartbeat/aws-vpc-move-ip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/aws-vpc-move-ip 2021-02-15 16:47:55.484644118 +0100
@@ -35,7 +35,7 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
diff --color -uNr a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
--- a/heartbeat/aws-vpc-route53.in 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/aws-vpc-route53.in 2021-02-15 16:47:59.808651828 +0100
@@ -45,7 +45,7 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""

@ -5,7 +5,7 @@ diff --color -uNr a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in
from ocf import logger
try:
+ sys.path.insert(0, '/usr/lib/fence-agents/support/google')
+ sys.path.insert(0, "/usr/lib/fence-agents/support/google/lib/python#PYTHON3_VERSION#/site-packages")
import googleapiclient.discovery
except ImportError:
pass
@ -16,7 +16,7 @@ diff --color -uNr a/heartbeat/gcp-vpc-move-route.in b/heartbeat/gcp-vpc-move-rou
from ocf import *
try:
+ sys.path.insert(0, '/usr/lib/fence-agents/support/google')
+ sys.path.insert(0, "/usr/lib/fence-agents/support/google/lib/python#PYTHON3_VERSION#/site-packages")
import googleapiclient.discovery
import pyroute2
try:
@ -27,7 +27,7 @@ diff --color -uNr a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.i
from ocf import *
try:
+ sys.path.insert(0, '/usr/lib/fence-agents/support/google')
+ sys.path.insert(0, "/usr/lib/fence-agents/support/google/lib/python#PYTHON3_VERSION#/site-packages")
import googleapiclient.discovery
try:
from google.oauth2.service_account import Credentials as ServiceAccountCredentials

@ -1,787 +0,0 @@
diff --color -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
--- a/doc/man/Makefile.am 2021-08-25 09:51:53.037906134 +0200
+++ b/doc/man/Makefile.am 2021-08-25 09:48:44.578408475 +0200
@@ -97,6 +97,8 @@
ocf_heartbeat_ManageRAID.7 \
ocf_heartbeat_ManageVE.7 \
ocf_heartbeat_NodeUtilization.7 \
+ ocf_heartbeat_nova-compute-wait.7 \
+ ocf_heartbeat_NovaEvacuate.7 \
ocf_heartbeat_Pure-FTPd.7 \
ocf_heartbeat_Raid1.7 \
ocf_heartbeat_Route.7 \
diff --color -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
--- a/heartbeat/Makefile.am 2021-08-25 09:51:53.038906137 +0200
+++ b/heartbeat/Makefile.am 2021-08-25 09:48:44.588408501 +0200
@@ -29,6 +29,8 @@
ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat
+ospdir = $(OCF_RA_DIR_PREFIX)/openstack
+
dtddir = $(datadir)/$(PACKAGE_NAME)
dtd_DATA = ra-api-1.dtd metadata.rng
@@ -50,6 +52,9 @@
send_ua_SOURCES = send_ua.c IPv6addr_utils.c
send_ua_LDADD = $(LIBNETLIBS)
+osp_SCRIPTS = nova-compute-wait \
+ NovaEvacuate
+
ocf_SCRIPTS = AoEtarget \
AudibleAlarm \
ClusterMon \
diff --color -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait
--- a/heartbeat/nova-compute-wait 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/nova-compute-wait 2021-08-25 09:50:14.626646141 +0200
@@ -0,0 +1,345 @@
+#!/bin/sh
+#
+#
+# nova-compute-wait agent manages compute daemons.
+#
+# Copyright (c) 2015
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+###
+
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="nova-compute-wait" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+OpenStack Nova Compute Server.
+</longdesc>
+<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>
+
+<parameters>
+
+<parameter name="auth_url" unique="0" required="1">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="username" unique="0" required="1">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+</parameter>
+
+<parameter name="password" unique="0" required="1">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="tenant_name" unique="0" required="1">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="domain" unique="0" required="0">
+<longdesc lang="en">
+DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
+</longdesc>
+<shortdesc lang="en">DNS domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="endpoint_type" unique="0" required="0">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="no_shared_storage" unique="0" required="0">
+<longdesc lang="en">
+Deprecated option not in use
+</longdesc>
+<shortdesc lang="en">Deprecated</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+<parameter name="evacuation_delay" unique="0" required="0">
+<longdesc lang="en">
+How long to wait for nova to finish evacuating instances elsewhere
+before starting nova-compute. Only used when the agent detects
+evacuations might be in progress.
+
+You may need to increase the start timeout when increasing this value.
+</longdesc>
+<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
+<content type="integer" default="120" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="600" />
+<action name="stop" timeout="300" />
+<action name="monitor" timeout="20" interval="10" depth="0"/>
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# don't exit on TERM, to test that lrmd makes sure that we do exit
+trap sigterm_handler TERM
+sigterm_handler() {
+ ocf_log info "They use TERM to bring us down. No such luck."
+ return
+}
+
+nova_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+nova_start() {
+ build_unfence_overlay
+
+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ if [ "x$state" = x ]; then
+ : never been fenced
+
+ elif [ "x$state" = xno ]; then
+ : has been evacuated, however it could have been 1s ago
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+
+ else
+ while [ "x$state" != "xno" ]; do
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ sleep 5
+ done
+
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+ fi
+
+ touch "$statefile"
+
+ return $OCF_SUCCESS
+}
+
+nova_stop() {
+ rm -f "$statefile"
+ return $OCF_SUCCESS
+}
+
+nova_monitor() {
+ if [ ! -f "$statefile" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ return $OCF_SUCCESS
+}
+
+nova_notify() {
+ return $OCF_SUCCESS
+}
+
+build_unfence_overlay() {
+ fence_options=""
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ candidates=$(/usr/sbin/stonith_admin -l ${NOVA_HOST})
+ for candidate in ${candidates}; do
+ pcs stonith show $d | grep -q fence_compute
+ if [ $? = 0 ]; then
+ ocf_log info "Unfencing nova based on: $candidate"
+ fence_auth=$(pcs stonith show $candidate | grep Attributes: | sed -e s/Attributes:// -e s/-/_/g -e 's/[^ ]\+=/OCF_RESKEY_\0/g' -e s/passwd/password/g)
+ eval "export $fence_auth"
+ break
+ fi
+ done
+ fi
+
+ # Copied from NovaEvacuate
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_domain}" ]; then
+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_region_name}" ]; then
+ fence_options="${fence_options} \
+ --region-name ${OCF_RESKEY_region_name}"
+ fi
+
+ if [ -n "${OCF_RESKEY_insecure}" ]; then
+ if ocf_is_true "${OCF_RESKEY_insecure}"; then
+ fence_options="${fence_options} --insecure"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL)
+ ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
+ "not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ mkdir -p /run/systemd/system/openstack-nova-compute.service.d
+ cat<<EOF>/run/systemd/system/openstack-nova-compute.service.d/unfence-20.conf
+[Service]
+ExecStartPost=/sbin/fence_compute ${fence_options} -o on -n ${NOVA_HOST}
+EOF
+}
+
+nova_validate() {
+ rc=$OCF_SUCCESS
+
+ check_binary crudini
+ check_binary nova-compute
+ check_binary fence_compute
+
+ if [ ! -f /etc/nova/nova.conf ]; then
+ ocf_exit_reason "/etc/nova/nova.conf not found"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ # Is the state directory writable?
+ state_dir=$(dirname $statefile)
+ touch "$state_dir/$$"
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Invalid state directory: $state_dir"
+ return $OCF_ERR_ARGS
+ fi
+ rm -f "$state_dir/$$"
+
+ NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
+ if [ $? = 1 ]; then
+ short_host=$(uname -n | awk -F. '{print $1}')
+ if [ "x${OCF_RESKEY_domain}" != x ]; then
+ NOVA_HOST=${short_host}.${OCF_RESKEY_domain}
+ else
+ NOVA_HOST=$(uname -n)
+ fi
+ fi
+
+ if [ $rc != $OCF_SUCCESS ]; then
+ exit $rc
+ fi
+ return $rc
+}
+
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
+
+: ${OCF_RESKEY_evacuation_delay=120}
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) nova_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+case $__OCF_ACTION in
+start) nova_validate; nova_start;;
+stop) nova_stop;;
+monitor) nova_validate; nova_monitor;;
+notify) nova_notify;;
+validate-all) exit $OCF_SUCCESS;;
+*) nova_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
+
diff --color -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
--- a/heartbeat/NovaEvacuate 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/NovaEvacuate 2021-08-25 09:50:23.780670326 +0200
@@ -0,0 +1,400 @@
+#!/bin/bash
+#
+# Copyright 2015 Red Hat, Inc.
+#
+# Description: Manages evacuation of nodes running nova-compute
+#
+# Authors: Andrew Beekhof
+#
+# Support: openstack@lists.openstack.org
+# License: Apache Software License (ASL) 2.0
+#
+
+
+#######################################################################
+# Initialization:
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+###
+
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="NovaEvacuate" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged.
+</longdesc>
+<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc>
+
+<parameters>
+
+<parameter name="auth_url" unique="0" required="1">
+<longdesc lang="en">
+Authorization URL for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Authorization URL</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="username" unique="0" required="1">
+<longdesc lang="en">
+Username for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Username</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="password" unique="0" required="1">
+<longdesc lang="en">
+Password for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Password</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="tenant_name" unique="0" required="1">
+<longdesc lang="en">
+Tenant name for connecting to keystone in admin context.
+Note that with Keystone V3 tenant names are only unique within a domain.
+</longdesc>
+<shortdesc lang="en">Tenant name</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="domain" unique="0" required="0">
+<longdesc lang="en">
+DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
+</longdesc>
+<shortdesc lang="en">DNS domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="endpoint_type" unique="0" required="0">
+<longdesc lang="en">
+Nova API location (internal, public or admin URL)
+</longdesc>
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="region_name" unique="0" required="0">
+<longdesc lang="en">
+Region name for connecting to nova.
+</longdesc>
+<shortdesc lang="en">Region name</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="insecure" unique="0" required="0">
+<longdesc lang="en">
+Explicitly allow client to perform "insecure" TLS (https) requests.
+The server's certificate will not be verified against any certificate authorities.
+This option should be used with caution.
+</longdesc>
+<shortdesc lang="en">Allow insecure TLS requests</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+<parameter name="no_shared_storage" unique="0" required="0">
+<longdesc lang="en">
+Indicate that nova storage for instances is not shared across compute
+nodes. This must match the reality of how nova storage is configured!
+Otherwise VMs could end up in error state upon evacuation. When
+storage is non-shared, instances on dead hypervisors will be rebuilt
+from their original image or volume, so anything on ephemeral storage
+will be lost.
+</longdesc>
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+<parameter name="verbose" unique="0" required="0">
+<longdesc lang="en">
+Enable extra logging from the evacuation process
+</longdesc>
+<shortdesc lang="en">Enable debug logging</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+<parameter name="evacuate_delay" unique="0" required="0">
+<longdesc lang="en">
+Allows delaying the nova evacuate API call, e.g. to give a storage array time to clean
+up eventual locks/leases.
+</longdesc>
+<shortdesc lang="en">Nova evacuate delay</shortdesc>
+<content type="integer" default="0" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20" />
+<action name="stop" timeout="20" />
+<action name="monitor" timeout="600" interval="10" depth="0"/>
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# don't exit on TERM, to test that lrmd makes sure that we do exit
+trap sigterm_handler TERM
+sigterm_handler() {
+ ocf_log info "They use TERM to bring us down. No such luck."
+ return
+}
+
+evacuate_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+evacuate_stop() {
+ rm -f "$statefile"
+ return $OCF_SUCCESS
+}
+
+evacuate_start() {
+ touch "$statefile"
+ # Do not invole monitor here so that the start timeout can be low
+ return $?
+}
+
+update_evacuation() {
+ attrd_updater -p -n evacuate -Q -N ${1} -U ${2}
+ arc=$?
+ if [ ${arc} != 0 ]; then
+ ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
+ fi
+ return ${arc}
+}
+
+handle_evacuations() {
+ while [ $# -gt 0 ]; do
+ node=$1
+ state=$2
+ shift; shift;
+ need_evacuate=0
+
+ case $state in
+ "")
+ ;;
+ no)
+ ocf_log debug "$node is either fine or already handled"
+ ;;
+ yes) need_evacuate=1
+ ;;
+ *@*)
+ where=$(echo $state | awk -F@ '{print $1}')
+ when=$(echo $state | awk -F@ '{print $2}')
+ now=$(date +%s)
+
+ if [ $(($now - $when)) -gt 60 ]; then
+ ocf_log info "Processing partial evacuation of $node by" \
+ "$where at $when"
+ need_evacuate=1
+ else
+ # Give some time for any in-flight evacuations to either
+ # complete or fail Nova won't react well if there are two
+ # overlapping requests
+ ocf_log info "Deferring processing partial evacuation of" \
+ "$node by $where at $when"
+ fi
+ ;;
+ esac
+
+ if [ $need_evacuate = 1 ]; then
+ fence_agent="fence_compute"
+
+ if have_binary fence_evacuate; then
+ fence_agent="fence_evacuate"
+ fi
+
+ if [ ${OCF_RESKEY_evacuate_delay} != 0 ]; then
+ ocf_log info "Delaying nova evacuate by $OCF_RESKEY_evacuate_delay seconds"
+ sleep ${OCF_RESKEY_evacuate_delay}
+ fi
+
+ ocf_log notice "Initiating evacuation of $node with $fence_agent"
+ $fence_agent ${fence_options} -o status -n ${node}
+ if [ $? = 1 ]; then
+ ocf_log info "Nova does not know about ${node}"
+ # Dont mark as no because perhaps nova is unavailable right now
+ continue
+ fi
+
+ update_evacuation ${node} "$(uname -n)@$(date +%s)"
+ if [ $? != 0 ]; then
+ return $OCF_SUCCESS
+ fi
+
+ $fence_agent ${fence_options} -o off -n $node
+ rc=$?
+
+ if [ $rc = 0 ]; then
+ update_evacuation ${node} no
+ ocf_log notice "Completed evacuation of $node"
+ else
+ ocf_log warn "Evacuation of $node failed: $rc"
+ update_evacuation ${node} yes
+ fi
+ fi
+ done
+
+ return $OCF_SUCCESS
+}
+
+evacuate_monitor() {
+ if [ ! -f "$statefile" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ handle_evacuations $(
+ attrd_updater -n evacuate -A \
+ 2> >(grep -v "attribute does not exist" 1>&2) |
+ sed 's/ value=""/ value="no"/' |
+ tr '="' ' ' |
+ awk '{print $4" "$6}'
+ )
+ return $OCF_SUCCESS
+}
+
+evacuate_validate() {
+ rc=$OCF_SUCCESS
+ fence_options=""
+
+ if ! have_binary fence_evacuate; then
+ check_binary fence_compute
+ fi
+
+ # Is the state directory writable?
+ state_dir=$(dirname $statefile)
+ touch "$state_dir/$$"
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Invalid state directory: $state_dir"
+ return $OCF_ERR_ARGS
+ fi
+ rm -f "$state_dir/$$"
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_domain}" ]; then
+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_region_name}" ]; then
+ fence_options="${fence_options} \
+ --region-name ${OCF_RESKEY_region_name}"
+ fi
+
+ if [ -n "${OCF_RESKEY_insecure}" ]; then
+ if ocf_is_true "${OCF_RESKEY_insecure}"; then
+ fence_options="${fence_options} --insecure"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_verbose}" ]; then
+ if ocf_is_true "${OCF_RESKEY_verbose}"; then
+ fence_options="${fence_options} --verbose"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL)
+ ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
+ "not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ if [ $rc != $OCF_SUCCESS ]; then
+ exit $rc
+ fi
+ return $rc
+}
+
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
+
+case $__OCF_ACTION in
+ start)
+ evacuate_validate
+ evacuate_start
+ ;;
+ stop)
+ evacuate_stop
+ ;;
+ monitor)
+ evacuate_validate
+ evacuate_monitor
+ ;;
+ meta-data)
+ meta_data
+ exit $OCF_SUCCESS
+ ;;
+ usage|help)
+ evacuate_usage
+ exit $OCF_SUCCESS
+ ;;
+ validate-all)
+ exit $OCF_SUCCESS
+ ;;
+ *)
+ evacuate_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc

@ -1,7 +1,7 @@
diff --color -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
--- a/doc/man/Makefile.am 2021-04-12 12:51:56.831835953 +0200
+++ b/doc/man/Makefile.am 2021-04-13 13:38:14.198361848 +0200
@@ -154,6 +154,7 @@
--- a/doc/man/Makefile.am 2023-10-11 09:03:53.000000000 +0200
+++ b/doc/man/Makefile.am 2024-06-12 09:14:42.898393461 +0200
@@ -184,6 +184,7 @@
ocf_heartbeat_ovsmonitor.7 \
ocf_heartbeat_pgagent.7 \
ocf_heartbeat_pgsql.7 \
@ -10,9 +10,9 @@ diff --color -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
ocf_heartbeat_podman.7 \
ocf_heartbeat_portblock.7 \
diff --color -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
--- a/heartbeat/Makefile.am 2021-04-12 12:51:56.831835953 +0200
+++ b/heartbeat/Makefile.am 2021-04-13 13:37:45.741292178 +0200
@@ -149,6 +149,7 @@
--- a/heartbeat/Makefile.am 2023-10-11 09:03:53.000000000 +0200
+++ b/heartbeat/Makefile.am 2024-06-12 09:14:42.898393461 +0200
@@ -156,6 +156,7 @@
ovsmonitor \
pgagent \
pgsql \
@ -20,7 +20,7 @@ diff --color -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
pingd \
podman \
portblock \
@@ -209,7 +210,10 @@
@@ -224,7 +225,10 @@
mysql-common.sh \
nfsserver-redhat.sh \
findif.sh \
@ -34,7 +34,7 @@ diff --color -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
hbdir = $(sysconfdir)/ha.d
diff --color -uNr a/heartbeat/OCF_Directories.pm b/heartbeat/OCF_Directories.pm
--- a/heartbeat/OCF_Directories.pm 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/OCF_Directories.pm 2021-04-13 13:37:35.621267404 +0200
+++ b/heartbeat/OCF_Directories.pm 2024-06-12 09:23:45.434638170 +0200
@@ -0,0 +1,139 @@
+#!/usr/bin/perl
+# This program is open source, licensed under the PostgreSQL License.
@ -146,7 +146,7 @@ diff --color -uNr a/heartbeat/OCF_Directories.pm b/heartbeat/OCF_Directories.pm
+ our @EXPORT_OK = ( @EXPORT );
+}
+
+our $INITDIR = ( $ENV{'INITDIR'} || '/etc/init.d' );
+our $INITDIR = ( $ENV{'INITDIR'} || '/etc/rc.d/init.d' );
+our $HA_DIR = ( $ENV{'HA_DIR'} || '/etc/ha.d' );
+our $HA_RCDIR = ( $ENV{'HA_RCDIR'} || '/etc/ha.d/rc.d' );
+our $HA_CONFDIR = ( $ENV{'HA_CONFDIR'} || '/etc/ha.d/conf' );
@ -177,7 +177,7 @@ diff --color -uNr a/heartbeat/OCF_Directories.pm b/heartbeat/OCF_Directories.pm
+
diff --color -uNr a/heartbeat/OCF_Functions.pm b/heartbeat/OCF_Functions.pm
--- a/heartbeat/OCF_Functions.pm 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/OCF_Functions.pm 2021-04-13 13:37:35.621267404 +0200
+++ b/heartbeat/OCF_Functions.pm 2023-01-04 12:25:21.724889658 +0100
@@ -0,0 +1,631 @@
+#!/usr/bin/perl
+# This program is open source, licensed under the PostgreSQL License.
@ -812,7 +812,7 @@ diff --color -uNr a/heartbeat/OCF_Functions.pm b/heartbeat/OCF_Functions.pm
+Licensed under the PostgreSQL License.
diff --color -uNr a/heartbeat/OCF_ReturnCodes.pm b/heartbeat/OCF_ReturnCodes.pm
--- a/heartbeat/OCF_ReturnCodes.pm 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/OCF_ReturnCodes.pm 2021-04-13 13:37:35.621267404 +0200
+++ b/heartbeat/OCF_ReturnCodes.pm 2023-01-04 12:25:21.724889658 +0100
@@ -0,0 +1,97 @@
+#!/usr/bin/perl
+# This program is open source, licensed under the PostgreSQL License.
@ -913,8 +913,8 @@ diff --color -uNr a/heartbeat/OCF_ReturnCodes.pm b/heartbeat/OCF_ReturnCodes.pm
+Licensed under the PostgreSQL License.
diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
--- a/heartbeat/pgsqlms 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/pgsqlms 2021-04-13 13:37:40.934280411 +0200
@@ -0,0 +1,2308 @@
+++ b/heartbeat/pgsqlms 2024-06-12 10:48:57.220019549 +0200
@@ -0,0 +1,2337 @@
+#!/usr/bin/perl
+# This program is open source, licensed under the PostgreSQL License.
+# For license terms, see the LICENSE file.
@ -945,17 +945,15 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+use File::Temp;
+use Data::Dumper;
+
+my $OCF_FUNCTIONS_DIR;
+BEGIN {
+ $OCF_FUNCTIONS_DIR = defined $ENV{'OCF_FUNCTIONS_DIR'} ? "$ENV{'OCF_FUNCTIONS_DIR'}" : "$ENV{'OCF_ROOT'}/lib/heartbeat";
+}
+use lib "$OCF_FUNCTIONS_DIR";
+use FindBin;
+use lib "$FindBin::RealBin/../heartbeat/";
+use lib "$FindBin::RealBin/../../lib/heartbeat/";
+
+use OCF_ReturnCodes;
+use OCF_Directories;
+use OCF_Functions;
+
+our $VERSION = 'v2.3.0';
+our $VERSION = '2.3.0';
+our $PROGRAM = 'pgsqlms';
+
+# OCF environment
@ -1145,11 +1143,14 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ # We check locations of connected standbies by querying the
+ # "pg_stat_replication" view.
+ # The row_number applies on the result set ordered on write_location ASC so
+ # the highest row_number should be given to the closest node from the
+ # master, then the lowest node name (alphanumeric sort) in case of equality.
+ # The result set itself is order by priority DESC to process best known
+ # candidate first.
+ # The row_number applies on the result set ordered on write_location DESC so
+ # the smallest row_number should be given to the closest node from the
+ # primary (1), then the lowest node name (alphanumeric sort) in case of
+ # equality. This row_number - 1 is then used to decrease the priority (score) by
+ # step of 10 units starting from 1000.
+ # E.g. row_number = 1 and maxlag = 0, ( 1000 - (row_number - 1) * 10 ) * 1 = 1000
+ # The result set itself is order by priority DESC to process best
+ # known candidate first.
+ $query = qq{
+ SELECT application_name, priority, location, state, current_lag
+ FROM (
@ -1157,7 +1158,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ (1000 - (
+ row_number() OVER (
+ PARTITION BY state IN ('startup', 'backup')
+ ORDER BY location ASC, application_name ASC
+ ORDER BY location DESC, application_name ASC
+ ) - 1
+ ) * 10
+ ) * CASE WHEN ( $maxlag > 0
@ -1326,8 +1327,8 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ return 0;
+}
+
+# Check if the current transiation is a recover of a master clone on given node.
+sub _is_master_recover {
+# Check if the current transition is a recover of the primary on given node.
+sub _is_primary_recover {
+ my ( $n ) = @_;
+
+ return (
@ -1336,8 +1337,8 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ );
+}
+
+# Check if the current transition is a recover of a slave clone on given node.
+sub _is_slave_recover {
+# Check if the current transition is a recover of a standby clone on given node.
+sub _is_standby_recover {
+ my ( $n ) = @_;
+
+ return (
@ -1346,7 +1347,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ );
+}
+
+# check if th current transition is a switchover to the given node.
+# check if the current transition is a switchover to the given node.
+sub _is_switchover {
+ my ( $n ) = @_;
+ my $old = $OCF_NOTIFY_ENV{'master'}[0]{'uname'};
@ -1626,18 +1627,18 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+}
+
+# Check the write_location of all secondaries, and adapt their master score so
+# that the instance closest to the master will be the selected candidate should
+# a promotion be triggered.
+# that the instance closest to the primary will be the selected candidate
+# should a promotion be triggered.
+# NOTE: This is only a hint to pacemaker! The selected candidate to promotion
+# actually re-check it is the best candidate and force a re-election by failing
+# if a better one exists. This avoid a race condition between the call of the
+# monitor action and the promotion where another slave might have catchup faster
+# with the master.
+# monitor action and the promotion where another standby might have catchup
+# faster with the primary.
+# NOTE: we cannot directly use the write_location, neither a lsn_diff value as
+# promotion score as Pacemaker considers any value greater than 1,000,000 as
+# INFINITY.
+#
+# This sub must be executed from a master monitor action.
+# This sub must be executed from a Master-role monitor action.
+#
+sub _check_locations {
+ my $partition_nodes;
@ -1659,7 +1660,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ # If no lag are reported at this point, it means that there is no
+ # secondary instance connected.
+ ocf_log( 'warning', 'No secondary connected to the master' )
+ ocf_log( 'warning', 'No secondary connected to the primary' )
+ if $row_num == 0;
+
+ # For each standby connected, set their master score based on the following
@ -1739,10 +1740,10 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+# _check_switchover
+# check if the pgsql switchover to the localnode is safe.
+# This is supposed to be called **after** the master has been stopped or demoted.
+# This sub checks if the local standby received the shutdown checkpoint from the
+# old master to make sure it can take over the master role and the old master
+# will be able to catchup as a standby after.
+# This is supposed to be called **after** the primary has been stopped or
+# demoted. It checks if the local standby received the shutdown checkpoint
+# from the old primary to make sure it can promote safely and the old
+# primary will be able to catchup as a standby after.
+#
+# Returns 0 if switchover is safe
+# Returns 1 if swithcover is not safe
@ -1762,20 +1763,20 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ .' Need to check the last record in WAL',
+ $OCF_NOTIFY_ENV{'demote'}[0]{'uname'}, $nodename );
+
+ # check if we received the shutdown checkpoint of the master during its
+ # check if we received the shutdown checkpoint of the primary during its
+ # demote process.
+ # We need the last local checkpoint LSN and the last received LSN from
+ # master to check in the WAL between these adresses if we have a
+ # primary to check in the WAL between these addresses if we have a
+ # "checkpoint shutdown" using pg_xlogdump/pg_waldump.
+ #
+ # Force a checkpoint to make sure the controldata shows the very last TL
+ # and the master's shutdown checkpoint
+ # and the primary's shutdown checkpoint
+ _query( q{ CHECKPOINT }, {} );
+ %cdata = _get_controldata();
+ $tl = $cdata{'tl'};
+ $last_redo = $cdata{'redo'};
+
+ # Get the last received LSN from master
+ # Get the last received LSN from primary
+ $last_lsn = _get_last_received_lsn();
+
+ unless ( defined $last_lsn ) {
@ -1796,12 +1797,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ if ( $rc == 0 and
+ $ans =~ m{^rmgr: XLOG.*desc: (?i:checkpoint)(?::|_SHUTDOWN) redo [0-9A-F/]+; tli $tl;.*; shutdown$}m
+ ) {
+ ocf_log( 'info', 'Slave received the shutdown checkpoint' );
+ ocf_log( 'info', 'Standby received the shutdown checkpoint' );
+ return 0;
+ }
+
+ ocf_exit_reason(
+ 'Did not receive the shutdown checkpoint from the old master!' );
+ 'Did not receive the shutdown checkpoint from the old primary!' );
+
+ return 1;
+}
@ -1828,7 +1829,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ elsif ( $is_in_recovery eq 'f' ) {
+ # The instance is a primary.
+ ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a primary");
+ # Check lsn diff with current slaves if any
+ # Check lsn diff with current standbys if any
+ _check_locations() if $__OCF_ACTION eq 'monitor';
+ return $OCF_RUNNING_MASTER;
+ }
@ -1904,9 +1905,10 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ elsif ( $controldata_rc == $OCF_SUCCESS ) {
+ # The controldata has not been updated to "shutdown in recovery".
+ # It should mean we had a crash on a secondary instance.
+ # There is no "FAILED_SLAVE" return code, so we return a generic error.
+ # There is no "FAILED_STANDBY" return code, so we return a generic
+ # error.
+ ocf_exit_reason(
+ 'Instance "%s" controldata indicates a running secondary instance, the instance has probably crashed',
+ 'Instance "%s" controldata indicates a running standby instance, the instance has probably crashed',
+ $OCF_RESOURCE_INSTANCE );
+ return $OCF_ERR_GENERIC;
+ }
@ -1980,9 +1982,9 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+=item B<maxlag>
+
+Maximum lag allowed on a standby before we set a negative master score on it.
+Maximum lag allowed on a standby before forbidding any promotion to it.
+The calculation is based on the difference between the current xlog location on
+the master and the write location on the standby.
+the primary and the write location on the standby.
+
+(optional, integer, default "0" disables this feature)
+
@ -2014,13 +2016,16 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+sub ocf_meta_data {
+ print qq{<?xml version="1.0"?>
+ <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+ <resource-agent name="pgsqlms">
+ <resource-agent name="pgsqlms" version="$VERSION">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource.
+ Resource script for PostgreSQL in replication. It manages PostgreSQL
+ servers using streaming replication as an HA resource.
+ </longdesc>
+ <shortdesc lang="en">Manages PostgreSQL servers in replication</shortdesc>
+ <shortdesc lang="en">
+ Manages PostgreSQL servers in replication
+ </shortdesc>
+ <parameters>
+ <parameter name="system_user" unique="0" required="0">
+ <longdesc lang="en">
@ -2032,7 +2037,8 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ <parameter name="bindir" unique="0" required="0">
+ <longdesc lang="en">
+ Path to the directory storing the PostgreSQL binaries. The agent uses psql, pg_isready, pg_controldata and pg_ctl.
+ Path to the directory storing the PostgreSQL binaries. The agent
+ uses psql, pg_isready, pg_controldata and pg_ctl.
+ </longdesc>
+ <shortdesc lang="en">Path to the PostgreSQL binaries</shortdesc>
+ <content type="string" default="$bindir_default" />
@ -2048,17 +2054,23 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ <parameter name="datadir" unique="1" required="0">
+ <longdesc lang="en">
+ Path to the directory set in data_directory from your postgresql.conf file. This parameter
+ has the same default than PostgreSQL itself: the pgdata parameter value. Unless you have a
+ special PostgreSQL setup and you understand this parameter, ignore it.
+ Path to the directory set in data_directory from your
+ postgresql.conf file. This parameter has the same default than
+ PostgreSQL itself: the pgdata parameter value. Unless you have a
+ special PostgreSQL setup and you understand this parameter,
+ ignore it.
+ </longdesc>
+ <shortdesc lang="en">Path to the directory set in data_directory from your postgresql.conf file</shortdesc>
+ <shortdesc lang="en">
+ Path to the directory set in data_directory from your
+ postgresql.conf file
+ </shortdesc>
+ <content type="string" default="PGDATA" />
+ </parameter>
+
+ <parameter name="pghost" unique="0" required="0">
+ <longdesc lang="en">
+ Host IP address or unix socket folder the instance is listening on.
+ Host IP address or unix socket folder the instance is listening
+ on.
+ </longdesc>
+ <shortdesc lang="en">Instance IP or unix socket folder</shortdesc>
+ <content type="string" default="$pghost_default" />
@ -2074,25 +2086,31 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ <parameter name="maxlag" unique="0" required="0">
+ <longdesc lang="en">
+ Maximum lag allowed on a standby before we set a negative master score on it. The calculation
+ is based on the difference between the current LSN on the master and the LSN
+ written on the standby.
+ This parameter must be a valid positive number as described in PostgreSQL documentation.
+ Maximum lag allowed on a standby before forbidding any promotion
+ on it. The calculation is based on the difference between the
+ current LSN on the primary and the LSN written on the standby.
+ This parameter must be a valid positive number as described in
+ PostgreSQL documentation.
+ See: https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC
+ </longdesc>
+ <shortdesc lang="en">Maximum write lag before we mark a standby as inappropriate to promote</shortdesc>
+ <shortdesc lang="en">
+ Maximum write lag before we mark a standby as inappropriate to
+ promote
+ </shortdesc>
+ <content type="integer" default="$maxlag_default" />
+ </parameter>
+
+ <parameter name="recovery_template" unique="1" required="0">
+ <longdesc lang="en">
+ Path to the recovery.conf template. This file is simply copied to \$PGDATA
+ before starting the instance as slave.
+ Path to the recovery.conf template. This file is simply copied
+ to \$PGDATA before starting the instance as standby.
+ ONLY for PostgreSQL 11 and bellow. This parameter is IGNORED for
+ PostgreSQL 12 and higher. The cluster will refuse to start if a template
+ file is found.
+ PostgreSQL 12 and higher. The cluster will refuse to start if a
+ template file is found.
+ </longdesc>
+ <shortdesc lang="en">Path to the recovery.conf template for PostgreSQL 11 and older.</shortdesc>
+ <shortdesc lang="en">
+ Path to the recovery.conf template for PostgreSQL 11 and older.
+ </shortdesc>
+ <content type="string" default="PGDATA/recovery.conf.pcmk" />
+ </parameter>
+
@ -2103,7 +2121,9 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ postgresql.conf file is not in the data directory (PGDATA), eg.:
+ "-c config_file=/etc/postgresql/9.3/main/postgresql.conf".
+ </longdesc>
+ <shortdesc lang="en">Additionnal arguments given to the postgres process on startup.</shortdesc>
+ <shortdesc lang="en">
+ Additionnal arguments given to the postgres process on startup.
+ </shortdesc>
+ <content type="string" default="$start_opts_default" />
+ </parameter>
+
@ -2114,7 +2134,6 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ <action name="reload" timeout="20" />
+ <action name="promote" timeout="30" />
+ <action name="demote" timeout="120" />
+ <action name="monitor" depth="0" timeout="10" interval="15"/>
+ <action name="monitor" depth="0" timeout="10" interval="15" role="Master"/>
+ <action name="monitor" depth="0" timeout="10" interval="16" role="Slave"/>
+ <action name="notify" timeout="60" />
@ -2148,11 +2167,11 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+=item B<promote>
+
+Promotes the resource to the Master role. Suggested minimum timeout: 30.
+Promotes the resource to the primary role. Suggested minimum timeout: 30.
+
+=item B<demote>
+
+Demotes the resource to the Slave role. Suggested minimum timeout: 120.
+Demotes the resource to the standby role. Suggested minimum timeout: 120.
+
+=item B<monitor (Master role)>
+
@ -2215,19 +2234,17 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.0.9' ) == 2
+ ) {
+ ocf_exit_reason(
+ 'PAF %s is compatible with Pacemaker 1.1.13 and greater',
+ 'PAF v%s is compatible with Pacemaker 1.1.13 and greater',
+ $VERSION
+ );
+ return $OCF_ERR_INSTALLED;
+ }
+
+ # check notify=true
+ $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
+ --meta --get-parameter notify 2>/dev/null };
+ chomp $ans;
+ unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
+ unless ( defined $ENV{'OCF_RESKEY_CRM_meta_notify'}
+ and lc($ENV{'OCF_RESKEY_CRM_meta_notify'}) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter notify=true for your master resource'
+ 'You must set meta parameter notify=true for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
@ -2238,7 +2255,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
+ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter master-max=1 for your master resource'
+ 'You must set meta parameter master-max=1 for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
@ -2399,14 +2416,14 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ # Check if a master score exists in the cluster.
+ # During the very first start of the cluster, no master score will
+ # exists on any of the existing slaves, unless an admin designated
+ # one of them using crm_master. If no master exists the cluster will
+ # not promote a master among the slaves.
+ # exists on any of the existing standbys, unless an admin designated
+ # one of them using crm_master. If no master score exists the
+ # cluster can not pick a standby to promote.
+ # To solve this situation, we check if there is at least one master
+ # score existing on one node in the cluster. Do nothing if at least
+ # one master score is found among the clones of the resource. If no
+ # master score exists, set a score of 1 only if the resource was a
+ # shut downed master before the start.
+ # one master score is found among the clones of the resource.
+ # If no master score exists, set a score of 1 only if the resource
+ # was a shut downed primary before the start.
+ if ( $prev_state eq "shut down" and not _master_score_exists() ) {
+ ocf_log( 'info', 'No master score around. Set mine to 1' );
+
@ -2417,7 +2434,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ }
+
+ ocf_exit_reason(
+ 'Instance "%s" is not running as a slave (returned %d)',
+ 'Instance "%s" is not running as a standby (returned %d)',
+ $OCF_RESOURCE_INSTANCE, $rc );
+
+ return $OCF_ERR_GENERIC;
@ -2624,7 +2641,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ return $OCF_SUCCESS;
+ }
+ elsif ( $rc == $OCF_NOT_RUNNING ) {
+ # Instance is stopped. Nothing to do.
+ # Instance is stopped. Need to start as standby.
+ ocf_log( 'debug', 'pgsql_demote: "%s" currently shut down',
+ $OCF_RESOURCE_INSTANCE );
+ }
@ -2638,12 +2655,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ return $OCF_ERR_GENERIC;
+ }
+
+ # TODO we need to make sure at least one slave is connected!!
+ # TODO Do we need to make sure at least one standby is connected?
+
+ # WARNING if the resource state is stopped instead of master, the ocf ra dev
+ # rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop where
+ # it computes transitions of demote(failing)->stop->start->promote actions
+ # until failcount == migration-threshold.
+ # WARNING if the resource state is stopped instead of primary, the ocf ra
+ # dev rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop
+ # where it computes transitions of demote(failing)->stop->start->promote
+ # actions until failcount == migration-threshold.
+ # This is a really ugly trick to keep going with the demode action if the
+ # rsc is already stopped gracefully.
+ # See discussion "CRM trying to demote a stopped resource" on
@ -2711,12 +2728,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ $rc = pgsql_monitor();
+
+ if ( $rc == $OCF_SUCCESS ) {
+ # Running as slave. Normal, expected behavior.
+ # Running as standby. Normal, expected behavior.
+ ocf_log( 'debug', 'pgsql_promote: "%s" currently running as a standby',
+ $OCF_RESOURCE_INSTANCE );
+ }
+ elsif ( $rc == $OCF_RUNNING_MASTER ) {
+ # Already a master. Unexpected, but not a problem.
+ # Already a primary. Unexpected, but not a problem.
+ ocf_log( 'info', '"%s" already running as a primary',
+ $OCF_RESOURCE_INSTANCE );
+ return $OCF_SUCCESS;
@ -2756,19 +2773,20 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ # internal error during _check_switchover
+ }
+
+ # Do not check for a better candidate if we try to recover the master
+ # Recover of a master is detected during the pre-promote action. It sets the
+ # private attribute 'recover_master' to '1' if this is a master recover.
+ if ( _get_priv_attr( 'recover_master' ) eq '1' ) {
+ ocf_log( 'info', 'Recovering old master, no election needed');
+ # Do not check for a better candidate if we try to recover the primary.
+ # Recover of a primary is detected during the pre-promote action. It sets
+ # the private attribute 'recover_primary' to '1' if this is a primary
+ # recover.
+ if ( _get_priv_attr( 'recover_primary' ) eq '1' ) {
+ ocf_log( 'info', 'Recovering old primary, no election needed')
+ }
+ else {
+
+ # The promotion is occurring on the best known candidate (highest
+ # master score), as chosen by pacemaker during the last working monitor
+ # on previous master (see pgsql_monitor/_check_locations subs).
+ # on previous primary (see pgsql_monitor/_check_locations subs).
+ # To avoid any race condition between the last monitor action on the
+ # previous master and the **real** most up-to-date standby, we
+ # previous primary and the **real** most up-to-date standby, we
+ # set each standby location during the "pre-promote" action, and stored
+ # them using the "lsn_location" resource attribute.
+ #
@ -2891,8 +2909,8 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ return $OCF_SUCCESS;
+}
+
+# This action is called **before** the actual promotion when a failing master is
+# considered unreclaimable, recoverable or a new master must be promoted
+# This action is called **before** the actual promotion when a failing primary
+# is considered unreclaimable, recoverable or a new primary must be promoted
+# (switchover or first start).
+# As every "notify" action, it is executed almost simultaneously on all
+# available nodes.
@ -2907,11 +2925,11 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ ocf_log( 'info', 'Promoting instance on node "%s"',
+ $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} );
+
+ # No need to do an election between slaves if this is recovery of the master
+ if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) {
+ ocf_log( 'warning', 'This is a master recovery!' );
+ # No need to do an election if this is a recovery of the primary
+ if ( _is_primary_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) {
+ ocf_log( 'warning', 'This is a primary recovery!' );
+
+ _set_priv_attr( 'recover_master', '1' )
+ _set_priv_attr( 'recover_primary', '1' )
+ if $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename;
+
+ return $OCF_SUCCESS;
@ -2919,7 +2937,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ # Environment cleanup!
+ _delete_priv_attr( 'lsn_location' );
+ _delete_priv_attr( 'recover_master' );
+ _delete_priv_attr( 'recover_primary' );
+ _delete_priv_attr( 'nodes' );
+ _delete_priv_attr( 'cancel_switchover' );
+
@ -2943,19 +2961,19 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ # FIXME: should we allow a switchover to a lagging slave?
+ }
+
+ # We need to trigger an election between existing slaves to promote the best
+ # one based on its current LSN location. Each node set a private attribute
+ # "lsn_location" with its TL and LSN location.
+ # We need to trigger an election between existing standbys to promote the
+ # best one based on its current LSN location. Each node set a private
+ # attribute "lsn_location" with its TL and LSN location.
+ #
+ # During the following promote action, The designated standby for
+ # promotion use these attributes to check if the instance to be promoted
+ # is the best one, so we can avoid a race condition between the last
+ # successful monitor on the previous master and the current promotion.
+ # successful monitor on the previous primary and the current promotion.
+
+ # As we can not break the transition from a notification action, we check
+ # during the promotion if each node TL and LSN are valid.
+
+ # Force a checpoint to make sure the controldata shows the very last TL
+ # Force a checkpoint to make sure the controldata shows the very last TL
+ _query( q{ CHECKPOINT }, {} );
+ %cdata = _get_controldata();
+ $node_lsn = _get_last_received_lsn( 'in decimal' );
@ -2977,12 +2995,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ ocf_log( 'warning', 'Could not set the current node LSN' )
+ if $? != 0 ;
+
+ # If this node is the future master, keep track of the slaves that
+ # If this node is the future primary, keep track of the standbys that
+ # received the same notification to compare our LSN with them during
+ # promotion
+ if ( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename ) {
+ # Build the list of active nodes:
+ # master + slave + start - stop
+ # primary + standby + start - stop
+ # FIXME: Deal with rsc started during the same transaction but **after**
+ # the promotion ?
+ $active_nodes{ $_->{'uname'} }++ foreach @{ $OCF_NOTIFY_ENV{'active'} },
@ -2995,26 +3013,27 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ _set_priv_attr( 'nodes', $attr_nodes );
+ }
+
+ # whatever the result, it is ignored by pacemaker.
+ return $OCF_SUCCESS;
+}
+
+# This action is called after a promote action.
+sub pgsql_notify_post_promote {
+
+ # We have a new master (or the previous one recovered).
+ # We have a new primary (or the previous one recovered).
+ # Environment cleanup!
+ _delete_priv_attr( 'lsn_location' );
+ _delete_priv_attr( 'recover_master' );
+ _delete_priv_attr( 'recover_primary' );
+ _delete_priv_attr( 'nodes' );
+ _delete_priv_attr( 'cancel_switchover' );
+
+ # whatever the result, it is ignored by pacemaker.
+ return $OCF_SUCCESS;
+}
+
+# This is called before a demote occurs.
+sub pgsql_notify_pre_demote {
+ my $rc;
+ my %cdata;
+
+ # do nothing if the local node will not be demoted
+ return $OCF_SUCCESS unless scalar
@ -3022,12 +3041,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ $rc = pgsql_monitor();
+
+ # do nothing if this is not a master recovery
+ return $OCF_SUCCESS unless _is_master_recover( $nodename )
+ # do nothing if this is not a primary recovery
+ return $OCF_SUCCESS unless _is_primary_recover( $nodename )
+ and $rc == $OCF_FAILED_MASTER;
+
+ # in case of master crash, we need to detect if the CRM tries to recover
+ # the master clone. The usual transition is to do:
+ # in case of primary crash, we need to detect if the CRM tries to recover
+ # the primary. The usual transition is to do:
+ # demote->stop->start->promote
+ #
+ # There are multiple flaws with this transition:
@ -3040,18 +3059,26 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ # If it success, at least it will be demoted correctly with a normal
+ # status. If it fails, it will be catched up in next steps.
+
+ ocf_log( 'info', 'Trying to start failing master "%s"...',
+ ocf_log( 'info', 'Trying to start failing primary "%s"',
+ $OCF_RESOURCE_INSTANCE );
+
+ # Either the instance managed to start or it couldn't.
+ # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't
+ # We rely on the pg_ctl '-w' switch to take care of this. If it couldn't
+ # start, this error will be catched up later during the various checks
+ _pg_ctl_start();
+ if( _pg_ctl_start() == 0 ) {
+ my %cdata = _get_controldata();
+
+ %cdata = _get_controldata();
+ ocf_log( 'info', 'Recovery of %s succeed', $OCF_RESOURCE_INSTANCE );
+ ocf_log( 'info', 'State is "%s" after recovery attempt',
+ $cdata{'state'} );
+ }
+ else {
+ ocf_log( 'err', 'Could not recover failing primary %s',
+ $OCF_RESOURCE_INSTANCE );
+ }
+
+ ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} );
+
+ # whatever the result, it is ignored by pacemaker.
+ return $OCF_SUCCESS;
+}
+
@ -3066,12 +3093,12 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ $rc = _controldata_to_ocf();
+
+ # do nothing if this is not a slave recovery
+ return $OCF_SUCCESS unless _is_slave_recover( $nodename )
+ # do nothing if this is not a standby recovery
+ return $OCF_SUCCESS unless _is_standby_recover( $nodename )
+ and $rc == $OCF_RUNNING_SLAVE;
+
+ # in case of slave crash, we need to detect if the CRM tries to recover
+ # the slaveclone. The usual transition is to do: stop->start
+ # in case of standby crash, we need to detect if the CRM tries to recover
+ # it. The usual transition is to do: stop->start
+ #
+ # This transition can not work because the instance is in
+ # OCF_ERR_GENERIC step. So the stop action will fail, leading most
@ -3081,7 +3108,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ # If it success, at least it will be stopped correctly with a normal
+ # status. If it fails, it will be catched up in next steps.
+
+ ocf_log( 'info', 'Trying to start failing slave "%s"...',
+ ocf_log( 'info', 'Trying to start failing standby "%s"...',
+ $OCF_RESOURCE_INSTANCE );
+
+ # Either the instance managed to start or it couldn't.
@ -3093,6 +3120,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+
+ ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} );
+
+ # whatever the result, it is ignored by pacemaker.
+ return $OCF_SUCCESS;
+}
+
@ -3116,6 +3144,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+ elsif ( /^pre-stop$/ ) { return pgsql_notify_pre_stop() }
+ }
+
+ # whatever the result, it is ignored by pacemaker.
+ return $OCF_SUCCESS;
+}
+
@ -3225,7 +3254,7 @@ diff --color -uNr a/heartbeat/pgsqlms b/heartbeat/pgsqlms
+=cut
diff --color -uNr a/paf_LICENSE b/paf_LICENSE
--- a/paf_LICENSE 1970-01-01 01:00:00.000000000 +0100
+++ b/paf_LICENSE 2021-04-14 09:16:39.083555835 +0200
+++ b/paf_LICENSE 2023-01-04 12:25:21.721889640 +0100
@@ -0,0 +1,19 @@
+Copyright (c) 2016-2020, Jehan-Guillaume de Rorthais, Mael Rimbault.
+
@ -3248,7 +3277,7 @@ diff --color -uNr a/paf_LICENSE b/paf_LICENSE
+
diff --color -uNr a/paf_README.md b/paf_README.md
--- a/paf_README.md 1970-01-01 01:00:00.000000000 +0100
+++ b/paf_README.md 2021-04-14 09:18:57.450968048 +0200
+++ b/paf_README.md 2023-01-04 12:25:21.721889640 +0100
@@ -0,0 +1,86 @@
+# PostgreSQL Automatic Failover
+

File diff suppressed because it is too large Load Diff

@ -1,36 +0,0 @@
From c3c1f1a3005f6a6d3d03d6bf0f0ac7605838146f Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Tue, 26 Sep 2023 14:02:39 +0900
Subject: [PATCH] Low: storage-mon: Remove unnecessary code and fix typos.
---
tools/storage_mon.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 1231570c8..1aae29e58 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -318,12 +318,7 @@ static int32_t sigchld_handler(int32_t sig, void *data)
daemon_check_first_all_devices = TRUE;
}
}
-#if 0
- if (shutting_down == FALSE) {
- finished_count++;
- test_forks[index] = 0;
- }
-#endif
+
finished_count++;
test_forks[index] = 0;
@@ -521,7 +516,7 @@ static int test_device_main(gpointer data)
}
}
} else {
- /* Rrun the child process timeout watch timer. */
+ /* Run the child process timeout watch timer. */
qb_loop_timer_add(storage_mon_poll_handle, QB_LOOP_MED, timeout * QB_TIME_NS_IN_SEC, NULL, child_timeout_handler, &expire_handle);
}
}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save