You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
367 lines
14 KiB
367 lines
14 KiB
From 764dacb6195f8940f13b9c322b1bc8189c5619fc Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Mon, 6 Sep 2021 12:13:42 +0200
|
|
Subject: [PATCH 1/6] Fix NFSv4 lock failover: set NFS Server Scope
|
|
|
|
Problem: https://github.com/ClusterLabs/resource-agents/issues/1644
|
|
RFC8881, 8.4.2.1 State Reclaim:
|
|
|
|
| If the server scope is different, the client should not attempt to
|
|
| reclaim locks. In this situation, no lock reclaim is possible.
|
|
| Any attempt to re-obtain the locks with non-reclaim operations is
|
|
| problematic since there is no guarantee that the existing
|
|
| filehandles will be recognized by the new server, or that if
|
|
| recognized, they denote the same objects. It is best to treat the
|
|
| locks as having been revoked by the reconfiguration event.
|
|
|
|
That's why for lock reclaim to even be attempted, we have to define and set
|
|
the same server scope for NFSD on all cluster nodes in the NFS failover
|
|
cluster. And in linux, that is done by setting the uts nodename for the
|
|
command that starts the nfsd kernel threads.
|
|
|
|
For "init scripts", just set it directly using unshare --uts.
|
|
For systemd units, add NFS_SERVER_SCOPE to some environment files
|
|
and inject the "unshare --uts" into the ExecStart command lines
|
|
using override drop-in files.
|
|
---
|
|
heartbeat/nfsserver | 120 +++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 119 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index 96b19abe36..0888378645 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -5,6 +5,18 @@
|
|
# by hxinwei@gmail.com
|
|
# License: GNU General Public License v2 (GPLv2) and later
|
|
|
|
+
|
|
+# I don't know for certain whether all services actuall _need_ this,
|
|
+# I know that at least nfs-server needs it.
|
|
+# The rgmanager resource agent in rgmanager/src/resources/nfsserver.sh.in
|
|
+# did the unshare for gssd and idmapd as well, even though it seems unclear why.
|
|
+# Let's start with just the nfs-server, and add others if/when we have clear
|
|
+# indication they need it.
|
|
+#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpcbind.service"
|
|
+NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-server.service"
|
|
+SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE=/run/sysconfig/nfs-server-scope
|
|
+SYSTEMD_UNSHARE_UTS_DROPIN=51-resource-agents-unshare-uts.conf
|
|
+
|
|
if [ -n "$OCF_DEBUG_LIBRARY" ]; then
|
|
. $OCF_DEBUG_LIBRARY
|
|
else
|
|
@@ -99,6 +111,31 @@ Specifies the length of sm-notify retry time (minutes).
|
|
<content type="integer" default="" />
|
|
</parameter>
|
|
|
|
+<parameter name="nfs_server_scope" unique="0" required="0">
|
|
+<longdesc lang="en">
|
|
+RFC8881, 8.4.2.1 State Reclaim:
|
|
+
|
|
+If the server scope is different, the client should not attempt to
|
|
+reclaim locks. In this situation, no lock reclaim is possible.
|
|
+Any attempt to re-obtain the locks with non-reclaim operations is
|
|
+problematic since there is no guarantee that the existing
|
|
+filehandles will be recognized by the new server, or that if
|
|
+recognized, they denote the same objects. It is best to treat the
|
|
+locks as having been revoked by the reconfiguration event.
|
|
+
|
|
+For lock reclaim to even be attempted, we have to define and set the same
|
|
+server scope for NFSD on all cluster nodes in the NFS failover cluster.
|
|
+
|
|
+This agent won't "guess" a suitable server scope name for you, you need to
|
|
+explicitly specify this. But without it, NFSv4 lock reclaim after failover
|
|
+won't work properly. Suggested value: the failover "service IP".
|
|
+</longdesc>
|
|
+<shortdesc lang="en">
|
|
+RFC8881 NFS server scope for (lock) state reclaim after failover.
|
|
+</shortdesc>
|
|
+<content type="string"/>
|
|
+</parameter>
|
|
+
|
|
<parameter name="nfs_ip" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Comma separated list of floating IP addresses used to access the nfs service
|
|
@@ -269,7 +306,11 @@ nfs_exec()
|
|
set_exec_mode
|
|
|
|
case $EXEC_MODE in
|
|
- 1) ${OCF_RESKEY_nfs_init_script} $cmd;;
|
|
+ 1) if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
|
|
+ ${OCF_RESKEY_nfs_init_script} $cmd
|
|
+ else
|
|
+ unshare -u /bin/sh -c "hostname ${OCF_RESKEY_nfs_server_scope}; exec ${OCF_RESKEY_nfs_init_script} $cmd"
|
|
+ fi ;;
|
|
2) if ! echo $svc | grep -q "\."; then
|
|
svc="${svc}.service"
|
|
fi
|
|
@@ -623,6 +664,74 @@ notify_locks()
|
|
fi
|
|
}
|
|
|
|
+# Problem: https://github.com/ClusterLabs/resource-agents/issues/1644
|
|
+# RFC8881, 8.4.2.1 State Reclaim:
|
|
+#
|
|
+# | If the server scope is different, the client should not attempt to
|
|
+# | reclaim locks. In this situation, no lock reclaim is possible.
|
|
+# | Any attempt to re-obtain the locks with non-reclaim operations is
|
|
+# | problematic since there is no guarantee that the existing
|
|
+# | filehandles will be recognized by the new server, or that if
|
|
+# | recognized, they denote the same objects. It is best to treat the
|
|
+# | locks as having been revoked by the reconfiguration event.
|
|
+#
|
|
+# That's why for lock reclaim to even be attempted, we have to define and set
|
|
+# the same server scope for NFSD on all cluster nodes in the NFS failover
|
|
+# cluster. And in linux, that is done by setting the uts nodename for the
|
|
+# command that starts the nfsd kernel threads.
|
|
+#
|
|
+inject_unshare_uts_name_into_systemd_units ()
|
|
+{
|
|
+ local END_TAG="# END OF DROP-IN FOR NFS SERVER SCOPE"
|
|
+ local services
|
|
+ services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
|
|
+
|
|
+ local svc dir dropin edited_exec_start do_reload=false
|
|
+ for svc in $services ; do
|
|
+ dir=/run/systemd/system/$svc.d
|
|
+ dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
|
|
+ grep -sqF "$END_TAG" "$dropin" && continue
|
|
+
|
|
+ test -d "$dir" || mkdir -p "$dir"
|
|
+ test -e "$dropin" && rm -f "$dropin"
|
|
+
|
|
+ edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\(.*\\)#ExecStart=/usr/bin/unshare --uts /bin/sh -ec 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\1#p")
|
|
+ cat > "$dropin" <<___
|
|
+[Service]
|
|
+EnvironmentFile=$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE
|
|
+# reset list of exec start, then re-populate with unshared uts namespace
|
|
+ExecStart=
|
|
+$edited_exec_start
|
|
+$END_TAG
|
|
+___
|
|
+ do_reload=true
|
|
+ ocf_log debug "injected unshare --uts into $dropin"
|
|
+ done
|
|
+
|
|
+ mkdir -p "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE%/*}"
|
|
+ echo "NFS_SERVER_SCOPE=$OCF_RESKEY_nfs_server_scope" > "$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE"
|
|
+
|
|
+ $do_reload && systemctl daemon-reload
|
|
+}
|
|
+
|
|
+remove_unshare_uts_dropins ()
|
|
+{
|
|
+ local services
|
|
+ services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE)
|
|
+
|
|
+ local svc dir dropin do_reload=false
|
|
+ for svc in $services ; do
|
|
+ dir=/run/systemd/system/$svc.d
|
|
+ dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
|
|
+ test -e "$dropin" || continue
|
|
+ rm -f "$dropin"
|
|
+ do_reload=true
|
|
+ ocf_log debug "removed unshare --uts from $svc"
|
|
+ done
|
|
+ rm -f "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE}"
|
|
+ $do_reload && systemctl daemon-reload
|
|
+}
|
|
+
|
|
nfsserver_start ()
|
|
{
|
|
local rc;
|
|
@@ -636,6 +745,13 @@ nfsserver_start ()
|
|
is_redhat_based && set_env_args
|
|
bind_tree
|
|
prepare_directory
|
|
+ case $EXEC_MODE in [23])
|
|
+ if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
|
|
+ remove_unshare_uts_dropins
|
|
+ else
|
|
+ inject_unshare_uts_name_into_systemd_units
|
|
+ fi ;;
|
|
+ esac
|
|
|
|
if ! `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir "`; then
|
|
mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir
|
|
@@ -854,6 +970,8 @@ nfsserver_stop ()
|
|
ocf_log info "NFS server stopped"
|
|
fi
|
|
|
|
+ case $EXEC_MODE in [23]) remove_unshare_uts_dropins;; esac
|
|
+
|
|
return $rc
|
|
}
|
|
|
|
|
|
From 515697b53c1614d05d39491c9af83e8d8b844b17 Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Fri, 8 Oct 2021 12:01:41 +0200
|
|
Subject: [PATCH 2/6] Fix NFSv4 lock failover: set NFS Server Scope, regardless
|
|
of EXEC_MODE
|
|
|
|
Debian (and other systems) may provide "init scripts",
|
|
which will only redirect back to systemd.
|
|
|
|
If we just unshare --uts the init script invocation,
|
|
the uts namespace is useless in that case.
|
|
|
|
If systemd is running, mangle the nfs-server.service unit,
|
|
independent of the "EXEC_MODE".
|
|
---
|
|
heartbeat/nfsserver | 18 ++++++++++++++----
|
|
1 file changed, 14 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index 0888378645..054aabbaf6 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -745,13 +745,20 @@ nfsserver_start ()
|
|
is_redhat_based && set_env_args
|
|
bind_tree
|
|
prepare_directory
|
|
- case $EXEC_MODE in [23])
|
|
+
|
|
+ # Debian (and other systems) may provide "init scripts",
|
|
+ # which will only redirect back to systemd.
|
|
+ # If we just unshare --uts the init script invocation,
|
|
+ # the uts namespace is useless in that case.
|
|
+ # If systemd is running, mangle the nfs-server.service unit,
|
|
+ # independent of the "EXEC_MODE" we detected.
|
|
+ if $systemd_is_running ; then
|
|
if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
|
|
remove_unshare_uts_dropins
|
|
else
|
|
inject_unshare_uts_name_into_systemd_units
|
|
- fi ;;
|
|
- esac
|
|
+ fi
|
|
+ fi
|
|
|
|
if ! `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir "`; then
|
|
mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir
|
|
@@ -970,7 +977,9 @@ nfsserver_stop ()
|
|
ocf_log info "NFS server stopped"
|
|
fi
|
|
|
|
- case $EXEC_MODE in [23]) remove_unshare_uts_dropins;; esac
|
|
+ if $systemd_is_running; then
|
|
+ remove_unshare_uts_dropins
|
|
+ fi
|
|
|
|
return $rc
|
|
}
|
|
@@ -1008,6 +1017,7 @@ nfsserver_validate ()
|
|
}
|
|
|
|
nfsserver_validate
|
|
+systemd_is_running && systemd_is_running=true || systemd_is_running=false
|
|
|
|
case $__OCF_ACTION in
|
|
start) nfsserver_start
|
|
|
|
From e83c20d88f404f9f9d829c654883d60eb6cc9ff3 Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Fri, 8 Oct 2021 17:06:18 +0200
|
|
Subject: [PATCH 3/6] Fix NFSv4 lock failover: add missing "|cut -f1" in
|
|
remove_unshare_uts_dropins
|
|
|
|
---
|
|
heartbeat/nfsserver | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index 054aabbaf6..d3db89a537 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -717,7 +717,7 @@ ___
|
|
remove_unshare_uts_dropins ()
|
|
{
|
|
local services
|
|
- services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE)
|
|
+ services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
|
|
|
|
local svc dir dropin do_reload=false
|
|
for svc in $services ; do
|
|
|
|
From b5b0e4a0b60d285af576b2d8ecfbe95e5a177a87 Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Fri, 8 Oct 2021 17:07:13 +0200
|
|
Subject: [PATCH 4/6] Fix NFSv4 lock failover: get rid of "world-inaccessible"
|
|
warning
|
|
|
|
by temporarily changing the umask before generating the dropins
|
|
---
|
|
heartbeat/nfsserver | 3 +++
|
|
1 file changed, 3 insertions(+)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index d3db89a537..447e0302b2 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -687,6 +687,8 @@ inject_unshare_uts_name_into_systemd_units ()
|
|
services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
|
|
|
|
local svc dir dropin edited_exec_start do_reload=false
|
|
+ local old_umask=$(umask)
|
|
+ umask 0022
|
|
for svc in $services ; do
|
|
dir=/run/systemd/system/$svc.d
|
|
dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
|
|
@@ -710,6 +712,7 @@ ___
|
|
|
|
mkdir -p "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE%/*}"
|
|
echo "NFS_SERVER_SCOPE=$OCF_RESKEY_nfs_server_scope" > "$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE"
|
|
+ umask $old_umask
|
|
|
|
$do_reload && systemctl daemon-reload
|
|
}
|
|
|
|
From 3c6c91ce5a00eeef9cd766389d73a0b42580a1e6 Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Fri, 8 Oct 2021 17:08:09 +0200
|
|
Subject: [PATCH 5/6] Fix NFSv4 lock failover: deal with "special executable
|
|
prefix" chars in ExecStart
|
|
|
|
---
|
|
heartbeat/nfsserver | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index 447e0302b2..5326bd2c6e 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -697,7 +697,7 @@ inject_unshare_uts_name_into_systemd_units ()
|
|
test -d "$dir" || mkdir -p "$dir"
|
|
test -e "$dropin" && rm -f "$dropin"
|
|
|
|
- edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\(.*\\)#ExecStart=/usr/bin/unshare --uts /bin/sh -ec 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\1#p")
|
|
+ edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\([-+:!@]*\\)\\(.*\\)#ExecStart=\\1/usr/bin/unshare --uts /bin/sh -c 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\2#p")
|
|
cat > "$dropin" <<___
|
|
[Service]
|
|
EnvironmentFile=$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE
|
|
|
|
From 512fbaf61e6d24a1236ef50e323ea17a62485c36 Mon Sep 17 00:00:00 2001
|
|
From: Lars Ellenberg <lars.ellenberg@linbit.com>
|
|
Date: Fri, 8 Oct 2021 17:08:59 +0200
|
|
Subject: [PATCH 6/6] Fix NFSv4 lock failover: add rpc-statd-notify to the
|
|
comment list of potentially interesting services
|
|
|
|
---
|
|
heartbeat/nfsserver | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
|
|
index 5326bd2c6e..240dd1a76c 100755
|
|
--- a/heartbeat/nfsserver
|
|
+++ b/heartbeat/nfsserver
|
|
@@ -12,7 +12,7 @@
|
|
# did the unshare for gssd and idmapd as well, even though it seems unclear why.
|
|
# Let's start with just the nfs-server, and add others if/when we have clear
|
|
# indication they need it.
|
|
-#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpcbind.service"
|
|
+#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpc-statd-notify.service rpcbind.service"
|
|
NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-server.service"
|
|
SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE=/run/sysconfig/nfs-server-scope
|
|
SYSTEMD_UNSHARE_UTS_DROPIN=51-resource-agents-unshare-uts.conf
|