import xfsprogs-5.19.0-4.el9

c9 imports/c9/xfsprogs-5.19.0-4.el9
MSVSphere Packaging Team 1 year ago
parent b739e49f48
commit 1e2d70815f

2
.gitignore vendored

@ -1 +1 @@
SOURCES/xfsprogs-5.14.2.tar.xz
SOURCES/xfsprogs-5.19.0.tar.xz

@ -1 +1 @@
035e552cf4a08d5dbe1330ec1e3e6ceeb21b6bc9 SOURCES/xfsprogs-5.14.2.tar.xz
12afbdd497603b98945ea18f9aa5a78c671a6e4c SOURCES/xfsprogs-5.19.0.tar.xz

@ -1,85 +0,0 @@
--- xfsprogs-5.12.0.orig/man/man8/mkfs.xfs.8
+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8
@@ -203,7 +203,7 @@ December 1901 to January 2038, and quota
.IP
By default,
.B mkfs.xfs
-will not enable this feature.
+in RHEL9 will enable this feature.
If the option
.B \-m crc=0
is used, the large timestamp feature is not supported and is disabled.
@@ -256,7 +256,7 @@ This can be used to reduce mount times w
.IP
By default,
.B mkfs.xfs
-will not enable this option.
+in RHEL9 will enable this option.
This feature is only available for filesystems created with the (default)
.B \-m finobt=1
option set.
--- xfsprogs-5.12.0.orig/mkfs/xfs_mkfs.c
+++ xfsprogs-5.12.0/mkfs/xfs_mkfs.c
@@ -3795,6 +3797,23 @@ cfgfile_parse(
cli->cfgfile);
}
+static unsigned int get_system_kver(void)
+{
+ const char *kver = getenv("KVER");
+ struct utsname utsname;
+ int a, b, c;
+
+ if (!kver) {
+ uname(&utsname);
+ kver = utsname.release;
+ }
+
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
+ return LINUX_VERSION_CODE;
+
+ return KERNEL_VERSION(a,b,c);
+}
+
int
main(
int argc,
@@ -3848,17 +3867,25 @@ main(
.spinodes = true,
.rmapbt = false,
.reflink = true,
- .inobtcnt = false,
+ .inobtcnt = true,
.parent_pointers = false,
.nodalign = false,
.nortalign = false,
- .bigtime = false,
+ .bigtime = true,
},
};
struct list_head buffer_list;
+ unsigned int kver;
int error;
+ /* turn bigtime & inobtcnt back off if running under older kernels */
+ kver = get_system_kver();
+ if (kver < KERNEL_VERSION(5,10,0)) {
+ dft.sb_feat.inobtcnt = false;
+ dft.sb_feat.bigtime = false;
+ }
+
platform_uuid_generate(&cli.uuid);
progname = basename(argv[0]);
setlocale(LC_ALL, "");
--- xfsprogs-5.14.0/mkfs/xfs_mkfs.c.orig
+++ xfsprogs-5.14.0/mkfs/xfs_mkfs.c
@@ -12,6 +12,8 @@
#include "libfrog/convert.h"
#include "proto.h"
#include <ini.h>
+#include <linux/version.h>
+#include <sys/utsname.h>
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))

@ -1,5 +1,5 @@
--- xfsprogs-5.12.0/man/man8/mkfs.xfs.8.backup 2021-12-01 20:48:09.241170607 +0100
+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8 2021-12-03 15:34:32.382616819 +0100
--- a/man/man8/mkfs.xfs.8.in 2021-12-01 20:48:09.241170607 +0100
+++ b/man/man8/mkfs.xfs.8.in 2021-12-03 15:34:32.382616819 +0100
@@ -1091,6 +1091,12 @@
.HP
.PD

@ -1,17 +0,0 @@
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (GNU/Linux)
iQIcBAABAgAGBQJhrpYSAAoJECCuFpLhPd7gIH4P/1KdoaOWy77MqSmd5RWVN+D3
PQTD4YIjPtpLnec/dnX8972jX+WeeT0ydkviOAE3J6gejlHa10S+5SvpXTQLl9Cm
jDbNgLh7bV/mdH9H9RNmNM8xtCMufhO09BLt7O6MyXu1g3T+Np1kaG+hoNtS5oze
fx7r/nh9ZCeCyMRTGrRlso5xNXL92qL9SkgbPWAtTjvvrcsaTkhNrC4o08tyv8BN
oMeZU1IHSiiFXs4RlUtmzZKXVrWiWWJTVOdnb107qNAchghbOivqo9zuoxqtkl2R
Yb3YQfYD7eyGVdXgY6CTGCnE9HEVE3sa/2MjZ1KTWP9ZK/F9gZT2izEF/dtbpV8G
7IdKBpKda7tDKJcUw994HyUiZfYiHVqcTwcPDvK18fKqxd1khSKbhShc3nvjo4P2
yGSGrDaOp8pkPc3QJrSMYQQAYVWtle9Y2Uj+TDMu8/XJi+pJxbxn5B7XtKlP6MWm
5pNjA4mh3zX63D78NCWa/XimSFUpramlDI1LE9enaUidvdRMg6vMn8XYh4g0uGgP
2Yp1F2Z2VGX0NxrulHowMqxGcgtneC5bZcsJ2GeZ4r6i+p8VIS5ZJjNLZ2p993c8
w/32yLouC2j+pcKn3Ljwd2aPQBnGO5IK9CWn93PSG7kWvuN8S8RC0MoCH/a/xXCz
Xbg1NICCCVKczfyFtQM7
=J/Jr
-----END PGP SIGNATURE-----

@ -0,0 +1,52 @@
--- a/mkfs/xfs_mkfs.c.orig 2022-08-12 20:38:21.000000000 +0200
+++ b/mkfs/xfs_mkfs.c 2023-01-25 11:06:01.863076713 +0100
@@ -13,6 +13,8 @@
#include "libfrog/crc32cselftest.h"
#include "proto.h"
#include <ini.h>
+#include <linux/version.h>
+#include <sys/utsname.h>
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))
@@ -3998,6 +4000,23 @@
cli->cfgfile);
}
+static unsigned int get_system_kver(void)
+{
+ const char *kver = getenv("KVER");
+ struct utsname utsname;
+ int a, b, c;
+
+ if (!kver) {
+ uname(&utsname);
+ kver = utsname.release;
+ }
+
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
+ return LINUX_VERSION_CODE;
+
+ return KERNEL_VERSION(a,b,c);
+}
+
int
main(
int argc,
@@ -4077,8 +4096,16 @@
};
struct list_head buffer_list;
+ unsigned int kver;
int error;
+ /* turn bigtime & inobtcnt back off if running under older kernels */
+ kver = get_system_kver();
+ if (kver < KERNEL_VERSION(5,10,0)) {
+ dft.sb_feat.inobtcnt = false;
+ dft.sb_feat.bigtime = false;
+ }
+
platform_uuid_generate(&cli.uuid);
progname = basename(argv[0]);
setlocale(LC_ALL, "");

@ -0,0 +1,91 @@
From 17b691400e8ce0755bb1d7a33490fbc014067e5e Mon Sep 17 00:00:00 2001
From: Pavel Reichl <preichl@redhat.com>
Date: Fri, 27 Jan 2023 06:30:20 +0100
Subject: [PATCH] mkfs: tolerate tiny filesystems
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
man/man8/mkfs.xfs.8.in | 4 ++--
mkfs/xfs_mkfs.c | 23 ++++++++++++++---------
2 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
index 211e7b0c..03f0fda8 100644
--- a/man/man8/mkfs.xfs.8.in
+++ b/man/man8/mkfs.xfs.8.in
@@ -405,7 +405,7 @@ is required if
is given. Otherwise, it is only needed if the filesystem should occupy
less space than the size of the special file.
-The data section must be at least 300MB in size.
+The data section should be at least 300MB in size.
.TP
.BI sunit= value
This is used to specify the stripe unit for a RAID device or a
@@ -705,7 +705,7 @@ described above. The overriding minimum value for size is 512 blocks.
With some combinations of filesystem block size, inode size,
and directory block size, the minimum log size is larger than 512 blocks.
-The log must be at least 64MB in size.
+The log should be at least 64MB in size.
The log cannot be more than 2GB in size.
.TP
.BI version= value
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index 9dd0e79c..72c906d6 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2503,6 +2503,8 @@ validate_supported(
struct xfs_mount *mp,
struct cli_params *cli)
{
+ bool deprecated = false;
+
/* Undocumented option to enable unsupported tiny filesystems. */
if (!cli->is_supported) {
printf(
@@ -2532,9 +2534,8 @@ validate_supported(
* 64MB * (8 / 7) * 4 = 293MB
*/
if (mp->m_sb.sb_dblocks < MEGABYTES(300, mp->m_sb.sb_blocklog)) {
- fprintf(stderr,
- _("Filesystem must be larger than 300MB.\n"));
- usage();
+ printf(_("Filesystem should be larger than 300MB.\n"));
+ deprecated = true;
}
/*
@@ -2543,9 +2544,8 @@ validate_supported(
*/
if (mp->m_sb.sb_logblocks <
XFS_MIN_REALISTIC_LOG_BLOCKS(mp->m_sb.sb_blocklog)) {
- fprintf(stderr,
- _("Log size must be at least 64MB.\n"));
- usage();
+ printf( _("Log size should be at least 64MB.\n"));
+ deprecated = true;
}
/*
@@ -2553,9 +2553,14 @@ validate_supported(
* have redundant superblocks.
*/
if (mp->m_sb.sb_agcount < 2) {
- fprintf(stderr,
- _("Filesystem must have at least 2 superblocks for redundancy!\n"));
- usage();
+ printf(
+ _("Filesystem should have at least 2 superblocks for redundancy!\n"));
+ deprecated = true;
+ }
+
+ if (deprecated) {
+ printf(
+_("Support for filesystems like this one is deprecated and they will not be supported in future releases.\n"));
}
}
--
2.39.1

@ -0,0 +1,113 @@
From b445624f0882badf00da739c52e58a85c18ae002 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:56:35 +0100
Subject: [PATCH] xfs: estimate post-merge refcounts correctly
Source kernel commit: b25d1984aa884fc91a73a5a407b9ac976d441e9b
Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount
metadata corruptions after being run. Specifically, xfs_repair noticed
single-block refcount records that could be combined but had not been.
The root cause of this is improper MAXREFCOUNT edge case handling in
xfs_refcount_merge_extents. When we're trying to find candidates for a
refcount btree record merge, we compute the refcount attribute of the
merged record, but we fail to account for the fact that once a record
hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence
the computed refcount is wrong, and we fail to merge the extents.
Fix this by adjusting the merge predicates to compute the adjusted
refcount correctly.
Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_refcount.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
index f6167c5f..29258bdd 100644
--- a/libxfs/xfs_refcount.c
+++ b/libxfs/xfs_refcount.c
@@ -819,6 +819,17 @@ xfs_refc_valid(
return rc->rc_startblock != NULLAGBLOCK;
}
+static inline xfs_nlink_t
+xfs_refc_merge_refcount(
+ const struct xfs_refcount_irec *irec,
+ enum xfs_refc_adjust_op adjust)
+{
+ /* Once a record hits MAXREFCOUNT, it is pinned there forever */
+ if (irec->rc_refcount == MAXREFCOUNT)
+ return MAXREFCOUNT;
+ return irec->rc_refcount + adjust;
+}
+
static inline bool
xfs_refc_want_merge_center(
const struct xfs_refcount_irec *left,
@@ -830,6 +841,7 @@ xfs_refc_want_merge_center(
unsigned long long *ulenp)
{
unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* To merge with a center record, both shoulder records must be
@@ -845,9 +857,10 @@ xfs_refc_want_merge_center(
return false;
/* The shoulder record refcounts must match the new refcount. */
- if (left->rc_refcount != cleft->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
return false;
- if (right->rc_refcount != cleft->rc_refcount + adjust)
+ if (right->rc_refcount != new_refcount)
return false;
/*
@@ -870,6 +883,7 @@ xfs_refc_want_merge_left(
enum xfs_refc_adjust_op adjust)
{
unsigned long long ulen = left->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* For a left merge, the left shoulder record must be adjacent to the
@@ -880,7 +894,8 @@ xfs_refc_want_merge_left(
return false;
/* Left shoulder record refcount must match the new refcount. */
- if (left->rc_refcount != cleft->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+ if (left->rc_refcount != new_refcount)
return false;
/*
@@ -902,6 +917,7 @@ xfs_refc_want_merge_right(
enum xfs_refc_adjust_op adjust)
{
unsigned long long ulen = right->rc_blockcount;
+ xfs_nlink_t new_refcount;
/*
* For a right merge, the right shoulder record must be adjacent to the
@@ -912,7 +928,8 @@ xfs_refc_want_merge_right(
return false;
/* Right shoulder record refcount must match the new refcount. */
- if (right->rc_refcount != cright->rc_refcount + adjust)
+ new_refcount = xfs_refc_merge_refcount(cright, adjust);
+ if (right->rc_refcount != new_refcount)
return false;
/*
--
2.40.0

@ -0,0 +1,88 @@
From a68dabd45f3591456ecf7e35f6a6077db79f6bc6 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:59:35 +0100
Subject: [PATCH] xfs: fix off-by-one error in xfs_btree_space_to_height
Source kernel commit: c0f399ff51495ac8d30367418f4f6292ecd61fbe
Lately I've been stress-testing extreme-sized rmap btrees by using the
(new) xfs_db bmap_inflate command to clone bmbt mappings billions of
times and then using xfs_repair to build new rmap and refcount btrees.
This of course is /much/ faster than actually FICLONEing a file billions
of times.
Unfortunately, xfs_repair fails in xfs_btree_bload_compute_geometry with
EOVERFLOW, which indicates that xfs_mount.m_rmap_maxlevels is not
sufficiently large for the test scenario. For a 1TB filesystem (~67
million AG blocks, 4 AGs) the btheight command reports:
$ xfs_db -c 'btheight -n 4400801200 -w min rmapbt' /dev/sda
rmapbt: worst case per 4096-byte block: 84 records (leaf) / 45 keyptrs (node)
level 0: 4400801200 records, 52390491 blocks
level 1: 52390491 records, 1164234 blocks
level 2: 1164234 records, 25872 blocks
level 3: 25872 records, 575 blocks
level 4: 575 records, 13 blocks
level 5: 13 records, 1 block
6 levels, 53581186 blocks total
The AG is sufficiently large to build this rmap btree. Unfortunately,
m_rmap_maxlevels is 5. Augmenting the loop in the space->height
function to report height, node blocks, and blocks remaining produces
this:
ht 1 node_blocks 45 blockleft 67108863
ht 2 node_blocks 2025 blockleft 67108818
ht 3 node_blocks 91125 blockleft 67106793
ht 4 node_blocks 4100625 blockleft 67015668
final height: 5
The goal of this function is to compute the maximum height btree that
can be stored in the given number of ondisk fsblocks. Starting with the
top level of the tree, each iteration through the loop adds the fanout
factor of the next level down until we run out of blocks. IOWs, maximum
height is achieved by using the smallest fanout factor that can apply
to that level.
However, the loop setup is not correct. Top level btree blocks are
allowed to contain fewer than minrecs items, so the computation is
incorrect because the first time through the loop it should be using a
fanout factor of 2. With this corrected, the above becomes:
ht 1 node_blocks 2 blockleft 67108863
ht 2 node_blocks 90 blockleft 67108861
ht 3 node_blocks 4050 blockleft 67108771
ht 4 node_blocks 182250 blockleft 67104721
ht 5 node_blocks 8201250 blockleft 66922471
final height: 6
Fixes: 9ec691205e7d ("xfs: compute the maximum height of the rmap btree when reflink enabled")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_btree.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
index 65d38637..38a3092d 100644
--- a/libxfs/xfs_btree.c
+++ b/libxfs/xfs_btree.c
@@ -4663,7 +4663,12 @@ xfs_btree_space_to_height(
const unsigned int *limits,
unsigned long long leaf_blocks)
{
- unsigned long long node_blocks = limits[1];
+ /*
+ * The root btree block can have fewer than minrecs pointers in it
+ * because the tree might not be big enough to require that amount of
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
+ */
+ unsigned long long node_blocks = 2;
unsigned long long blocks_left = leaf_blocks - 1;
unsigned int height = 1;
--
2.40.0

@ -0,0 +1,119 @@
From b827e2318ea2bb3eabca13a965c2535a1d7289e5 Mon Sep 17 00:00:00 2001
From: Long Li <leo.lilong@huawei.com>
Date: Fri, 18 Nov 2022 12:23:57 +0100
Subject: [PATCH] xfs: fix sb write verify for lazysbcount
Source kernel commit: 7cecd500d90164419add650e26cc1de03a7a66cb
When lazysbcount is enabled, fsstress and loop mount/unmount test report
the following problems:
XFS (loop0): SB summary counter sanity check failed
XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460,
xfs_sb block 0x0
XFS (loop0): Unmount and run xfs_repair
XFS (loop0): First 128 bytes of corrupted metadata buffer:
00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(..
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z
00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... ..........
00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................
00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................
00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................
00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................
XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply
+0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem.
XFS (loop0): Please unmount the filesystem and rectify the problem(s)
XFS (loop0): log mount/recovery failed: error -117
XFS (loop0): log mount failed
This corruption will shutdown the file system and the file system will
no longer be mountable. The following script can reproduce the problem,
but it may take a long time.
#!/bin/bash
device=/dev/sda
testdir=/mnt/test
round=0
function fail()
{
echo "$*"
exit 1
}
mkdir -p $testdir
while [ $round -lt 10000 ]
do
echo "******* round $round ********"
mkfs.xfs -f $device
mount $device $testdir || fail "mount failed!"
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null &
sleep 4
killall -w fsstress
umount $testdir
xfs_repair -e $device > /dev/null
if [ $? -eq 2 ];then
echo "ERR CODE 2: Dirty log exception during repair."
exit 1
fi
round=$(($round+1))
done
With lazysbcount is enabled, There is no additional lock protection for
reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the
m_ifree, this will make the m_ifree greater than m_icount. For example,
consider the following sequence and ifreedelta is postive:
CPU0 CPU1
xfs_log_sb xfs_trans_unreserve_and_mod_sb
---------- ------------------------------
percpu_counter_sum(&mp->m_icount)
percpu_counter_add_batch(&mp->m_icount,
idelta, XFS_ICOUNT_BATCH)
percpu_counter_add(&mp->m_ifree, ifreedelta);
percpu_counter_sum(&mp->m_ifree)
After this, incorrect inode count (sb_ifree > sb_icount) will be writen to
the log. In the subsequent writing of sb, incorrect inode count (sb_ifree >
sb_icount) will fail to pass the boundary check in xfs_validate_sb_write()
that cause the file system shutdown.
When lazysbcount is enabled, we don't need to guarantee that Lazy sb
counters are completely correct, but we do need to guarantee that sb_ifree
<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount
must be satisfied any time that there /cannot/ be other threads allocating
or freeing inode chunks. If the constraint is violated under these
circumstances, sb_i{count,free} (the ondisk superblock inode counters)
maybe incorrect and need to be marked sick at unmount, the count will
be rebuilt on the next mount.
Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_sb.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
index fc33dc4a..d05f0e6e 100644
--- a/libxfs/xfs_sb.c
+++ b/libxfs/xfs_sb.c
@@ -970,7 +970,9 @@ xfs_log_sb(
*/
if (xfs_has_lazysbcount(mp)) {
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+ mp->m_sb.sb_ifree = min_t(uint64_t,
+ percpu_counter_sum(&mp->m_ifree),
+ mp->m_sb.sb_icount);
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
}
--
2.40.0

@ -0,0 +1,73 @@
From f5ef812888a81be534466fa34df747c16bb65b7f Mon Sep 17 00:00:00 2001
From: Guo Xuenan <guoxuenan@huawei.com>
Date: Wed, 15 Mar 2023 15:57:35 +0100
Subject: [PATCH] xfs: get rid of assert from xfs_btree_islastblock
Source kernel commit: 8c25febf23963431686f04874b96321288504127
xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up,
turn on the debugging knob can trigger the assert of xfs_btree_islastblock,
test script as follows:
while true
do
mount $disk $mountpoint
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null
echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk
sleep 10
umount $mountpoint
done
Kick off fsstress and only *then* turn on the debugging knob. If it
happens that the knob gets turned on after the cntbt lookup succeeds
but before the call to xfs_btree_islastblock, then we *can* end up in
the situation where a previously checked btree block suddenly starts
returning EFSCORRUPTED from xfs_btree_check_block. Kaboom.
Darrick give a very detailed explanation as follows:
Looking back at commit 27d9ee577dcce, I think the point of all this was
to make sure that the cursor has actually performed a lookup, and that
the btree block at whatever level we're asking about is ok.
If the caller hasn't ever done a lookup, the bc_levels array will be
empty, so cur->bc_levels[level].bp pointer will be NULL. The call to
xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is
pointless.
If the caller did a lookup but the lookup failed due to block
corruption, the corresponding cur->bc_levels[level].bp pointer will also
be NULL, and we'll still crash. The "ASSERT(xfs_btree_check_block);"
logic is also unnecessary.
If the cursor level points to an inode root, the block buffer will be
incore, so it had better always be consistent.
If the caller ignores a failed lookup after a successful one and calls
this function, the cursor state is garbage and the assert wouldn't have
tripped anyway. So get rid of the assert.
Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock")
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_btree.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
index eef27858..29c4b4cc 100644
--- a/libxfs/xfs_btree.h
+++ b/libxfs/xfs_btree.h
@@ -556,7 +556,6 @@ xfs_btree_islastblock(
struct xfs_buf *bp;
block = xfs_btree_get_block(cur, level, &bp);
- ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
--
2.40.0

@ -0,0 +1,187 @@
From d1dca9f6b365e439878e550ed0c801bbfb6d347b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 15 Mar 2023 15:55:35 +0100
Subject: [PATCH] xfs: hoist refcount record merge predicates
Source kernel commit: 9d720a5a658f5135861773f26e927449bef93d61
Hoist these multiline conditionals into separate static inline helpers
to improve readability and set the stage for corruption fixes that will
be introduced in the next patch.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++++++++------
1 file changed, 113 insertions(+), 16 deletions(-)
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
index 64e66861..f6167c5f 100644
--- a/libxfs/xfs_refcount.c
+++ b/libxfs/xfs_refcount.c
@@ -814,11 +814,119 @@ out_error:
/* Is this extent valid? */
static inline bool
xfs_refc_valid(
- struct xfs_refcount_irec *rc)
+ const struct xfs_refcount_irec *rc)
{
return rc->rc_startblock != NULLAGBLOCK;
}
+static inline bool
+xfs_refc_want_merge_center(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ bool cleft_is_cright,
+ enum xfs_refc_adjust_op adjust,
+ unsigned long long *ulenp)
+{
+ unsigned long long ulen = left->rc_blockcount;
+
+ /*
+ * To merge with a center record, both shoulder records must be
+ * adjacent to the record we want to adjust. This is only true if
+ * find_left and find_right made all four records valid.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
+ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
+ return false;
+
+ /* There must only be one record for the entire range. */
+ if (!cleft_is_cright)
+ return false;
+
+ /* The shoulder record refcounts must match the new refcount. */
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+ if (right->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount + right->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ *ulenp = ulen;
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_left(
+ const struct xfs_refcount_irec *left,
+ const struct xfs_refcount_irec *cleft,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = left->rc_blockcount;
+
+ /*
+ * For a left merge, the left shoulder record must be adjacent to the
+ * start of the range. If this is true, find_left made left and cleft
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
+ return false;
+
+ /* Left shoulder record refcount must match the new refcount. */
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cleft->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
+static inline bool
+xfs_refc_want_merge_right(
+ const struct xfs_refcount_irec *cright,
+ const struct xfs_refcount_irec *right,
+ enum xfs_refc_adjust_op adjust)
+{
+ unsigned long long ulen = right->rc_blockcount;
+
+ /*
+ * For a right merge, the right shoulder record must be adjacent to the
+ * end of the range. If this is true, find_right made cright and right
+ * contain valid contents.
+ */
+ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
+ return false;
+
+ /* Right shoulder record refcount must match the new refcount. */
+ if (right->rc_refcount != cright->rc_refcount + adjust)
+ return false;
+
+ /*
+ * The new record cannot exceed the max length. ulen is a ULL as the
+ * individual record block counts can be up to (u32 - 1) in length
+ * hence we need to catch u32 addition overflows here.
+ */
+ ulen += cright->rc_blockcount;
+ if (ulen >= MAXREFCEXTLEN)
+ return false;
+
+ return true;
+}
+
/*
* Try to merge with any extents on the boundaries of the adjustment range.
*/
@@ -860,23 +968,15 @@ xfs_refcount_merge_extents(
(cleft.rc_blockcount == cright.rc_blockcount);
/* Try to merge left, cleft, and right. cleft must == cright. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
- right.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- right.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
+ adjust, &ulen)) {
*shape_changed = true;
return xfs_refcount_merge_center_extents(cur, &left, &cleft,
&right, ulen, aglen);
}
/* Try to merge left and cleft. */
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
- left.rc_refcount == cleft.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
*shape_changed = true;
error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
agbno, aglen);
@@ -892,10 +992,7 @@ xfs_refcount_merge_extents(
}
/* Try to merge cright and right. */
- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
- right.rc_refcount == cright.rc_refcount + adjust &&
- ulen < MAXREFCEXTLEN) {
+ if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
*shape_changed = true;
return xfs_refcount_merge_right_extent(cur, &right, &cright,
aglen);
--
2.40.0

@ -0,0 +1,69 @@
From 798d43495df2c8a09a73b8e868a71d8f2fd81d5e Mon Sep 17 00:00:00 2001
From: Andrey Strachuk <strochuk@ispras.ru>
Date: Wed, 24 Aug 2022 10:24:01 +0200
Subject: [PATCH] xfs: removed useless condition in function xfs_attr_node_get
Source kernel commit: 0f38063d7a38015a47ca1488406bf21e0effe80e
At line 1561, variable "state" is being compared
with NULL every loop iteration.
-------------------------------------------------------------------
1561 for (i = 0; state != NULL && i < state->path.active; i++) {
1562 xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1563 state->path.blk[i].bp = NULL;
1564 }
-------------------------------------------------------------------
However, it cannot be NULL.
----------------------------------------
1546 state = xfs_da_state_alloc(args);
----------------------------------------
xfs_da_state_alloc calls kmem_cache_zalloc. kmem_cache_zalloc is
called with __GFP_NOFAIL flag and, therefore, it cannot return NULL.
--------------------------------------------------------------------------
struct xfs_da_state *
xfs_da_state_alloc(
struct xfs_da_args *args)
{
struct xfs_da_state *state;
state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
state->args = args;
state->mp = args->dp->i_mount;
return state;
}
--------------------------------------------------------------------------
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Signed-off-by: Andrey Strachuk <strochuk@ispras.ru>
Fixes: 4d0cdd2bb8f0 ("xfs: clean up xfs_attr_node_hasname")
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/xfs_attr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c
index 08973934..b451fcdb 100644
--- a/libxfs/xfs_attr.c
+++ b/libxfs/xfs_attr.c
@@ -1556,7 +1556,7 @@ xfs_attr_node_get(
* If not in a transaction, we have to release all the buffers.
*/
out_release:
- for (i = 0; state != NULL && i < state->path.active; i++) {
+ for (i = 0; i < state->path.active; i++) {
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
--
2.40.0

@ -0,0 +1,34 @@
From 7374f58bfeb38467bab6552a47a5cd6bbe3c2e2e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 20 Dec 2022 16:53:34 -0800
Subject: [PATCH] xfs_db: fix dir3 block magic check
Fix this broken check, which (amazingly) went unnoticed until I cranked
up the warning level /and/ built the system for s390x.
Fixes: e96864ff4d4 ("xfs_db: enable blockget for v5 filesystems")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
db/check.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/db/check.c b/db/check.c
index bb27ce58..964756d0 100644
--- a/db/check.c
+++ b/db/check.c
@@ -2578,7 +2578,7 @@ process_data_dir_v2(
error++;
}
if ((be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC ||
- be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC) &&
+ be32_to_cpu(data->magic) == XFS_DIR3_BLOCK_MAGIC) &&
stale != be32_to_cpu(btp->stale)) {
if (!sflag || v)
dbprintf(_("dir %lld block %d bad stale tail count %d\n"),
--
2.40.0

@ -0,0 +1,266 @@
From 945c7341dedab44ae5daed83377e6366c3fb8fee Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 23 Nov 2022 09:09:33 -0800
Subject: [PATCH] xfs_repair: retain superblock buffer to avoid write hook
deadlock
Every now and then I experience the following deadlock in xfs_repair
when I'm running the offline repair fuzz tests:
#0 futex_wait (private=0, expected=2, futex_word=0x55555566df70) at ../sysdeps/nptl/futex-internal.h:146
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555566df70, private=0) at ./nptl/lowlevellock.c:49
#2 lll_mutex_lock_optimized (mutex=0x55555566df70) at ./nptl/pthread_mutex_lock.c:48
#3 ___pthread_mutex_lock (mutex=mutex@entry=0x55555566df70) at ./nptl/pthread_mutex_lock.c:93
#4 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:231
#5 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e01b0, nodep=nodep@entry=0x7fffe55e0168) at cache.c:452
#6 __cache_lookup (key=key@entry=0x7fffe55e01b0, flags=0, bpp=bpp@entry=0x7fffe55e0228) at rdwr.c:405
#7 libxfs_getbuf_flags (btp=0x55555566de00, blkno=0, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0228) at rdwr.c:457
#8 libxfs_buf_read_map (btp=0x55555566de00, map=map@entry=0x7fffe55e0280, nmaps=nmaps@entry=1, flags=flags@entry=0, bpp=bpp@entry=0x7fffe55e0278, ops=0x5555556233e0 <xfs_sb_buf_ops>)
at rdwr.c:704
#9 libxfs_buf_read (ops=<optimized out>, bpp=0x7fffe55e0278, flags=0, numblks=<optimized out>, blkno=0, target=<optimized out>)
at /storage/home/djwong/cdev/work/xfsprogs/build-x86_64/libxfs/libxfs_io.h:195
#10 libxfs_getsb (mp=mp@entry=0x7fffffffd690) at rdwr.c:162
#11 force_needsrepair (mp=0x7fffffffd690) at xfs_repair.c:924
#12 repair_capture_writeback (bp=<optimized out>) at xfs_repair.c:1000
#13 libxfs_bwrite (bp=0x7fffe011e530) at rdwr.c:869
#14 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:240
#15 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e0470, nodep=nodep@entry=0x7fffe55e0428) at cache.c:452
#16 __cache_lookup (key=key@entry=0x7fffe55e0470, flags=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:405
#17 libxfs_getbuf_flags (btp=0x55555566de00, blkno=12736, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0538) at rdwr.c:457
#18 __libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:501
#19 libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:525
#20 pf_queue_io (args=args@entry=0x5555556722c0, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flag=flag@entry=11) at prefetch.c:124
#21 pf_read_bmbt_reclist (args=0x5555556722c0, rp=<optimized out>, numrecs=78) at prefetch.c:220
#22 pf_scan_lbtree (dbno=dbno@entry=1211, level=level@entry=1, isadir=isadir@entry=1, args=args@entry=0x5555556722c0, func=0x55555557f240 <pf_scanfunc_bmap>) at prefetch.c:298
#23 pf_read_btinode (isadir=1, dino=<optimized out>, args=0x5555556722c0) at prefetch.c:385
#24 pf_read_inode_dirs (args=args@entry=0x5555556722c0, bp=bp@entry=0x7fffdc023790) at prefetch.c:459
#25 pf_read_inode_dirs (bp=<optimized out>, args=0x5555556722c0) at prefetch.c:411
#26 pf_batch_read (args=args@entry=0x5555556722c0, which=which@entry=PF_PRIMARY, buf=buf@entry=0x7fffd001d000) at prefetch.c:609
#27 pf_io_worker (param=0x5555556722c0) at prefetch.c:673
#28 start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#29 clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>From this stack trace, we see that xfs_repair's prefetch module is
getting some xfs_buf objects ahead of initiating a read (#19). The
buffer cache has hit its limit, so it calls cache_shake (#14) to free
some unused xfs_bufs. The buffer it finds is a dirty buffer, so it
calls libxfs_bwrite to flush it out to disk, which in turn invokes the
buffer write hook that xfs_repair set up in 3b7667cb to mark the ondisk
filesystem's superblock as NEEDSREPAIR until repair actually completes.
Unfortunately, the NEEDSREPAIR handler itself needs to grab the
superblock buffer, so it makes another call into the buffer cache (#9),
which sees that the cache is full and tries to shake it(#4). Hence we
deadlock on cm_mutex because shaking is not reentrant.
Fix this by retaining a reference to the superblock buffer when possible
so that the writeback hook doesn't have to access the buffer cache to
set NEEDSREPAIR.
Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
libxfs/libxfs_api_defs.h | 2 ++
libxfs/libxfs_io.h | 1 +
libxfs/rdwr.c | 8 +++++
repair/phase2.c | 8 +++++
repair/protos.h | 1 +
repair/xfs_repair.c | 75 +++++++++++++++++++++++++++++++++++-----
6 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
index 2716a731..f8efcce7 100644
--- a/libxfs/libxfs_api_defs.h
+++ b/libxfs/libxfs_api_defs.h
@@ -53,9 +53,11 @@
#define xfs_buf_delwri_submit libxfs_buf_delwri_submit
#define xfs_buf_get libxfs_buf_get
#define xfs_buf_get_uncached libxfs_buf_get_uncached
+#define xfs_buf_lock libxfs_buf_lock
#define xfs_buf_read libxfs_buf_read
#define xfs_buf_read_uncached libxfs_buf_read_uncached
#define xfs_buf_relse libxfs_buf_relse
+#define xfs_buf_unlock libxfs_buf_unlock
#define xfs_bunmapi libxfs_bunmapi
#define xfs_bwrite libxfs_bwrite
#define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
index 9c0e2704..fae86427 100644
--- a/libxfs/libxfs_io.h
+++ b/libxfs/libxfs_io.h
@@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp)
}
void xfs_buf_lock(struct xfs_buf *bp);
+void xfs_buf_unlock(struct xfs_buf *bp);
int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
struct xfs_buf **bpp);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 20e0793c..d5aad3ea 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -384,6 +384,14 @@ xfs_buf_lock(
pthread_mutex_lock(&bp->b_lock);
}
+void
+xfs_buf_unlock(
+ struct xfs_buf *bp)
+{
+ if (use_xfs_buf_lock)
+ pthread_mutex_unlock(&bp->b_lock);
+}
+
static int
__cache_lookup(
struct xfs_bufkey *key,
diff --git a/repair/phase2.c b/repair/phase2.c
index 56a39bb4..2ada95ae 100644
--- a/repair/phase2.c
+++ b/repair/phase2.c
@@ -370,6 +370,14 @@ phase2(
} else
do_log(_("Phase 2 - using internal log\n"));
+ /*
+ * Now that we've set up the buffer cache the way we want it, try to
+ * grab our own reference to the primary sb so that the hooks will not
+ * have to call out to the buffer cache.
+ */
+ if (mp->m_buf_writeback_fn)
+ retain_primary_sb(mp);
+
/* Zero log if applicable */
do_log(_(" - zero log...\n"));
diff --git a/repair/protos.h b/repair/protos.h
index 03ebae14..83e471ff 100644
--- a/repair/protos.h
+++ b/repair/protos.h
@@ -16,6 +16,7 @@ int get_sb(xfs_sb_t *sbp,
xfs_off_t off,
int size,
xfs_agnumber_t agno);
+int retain_primary_sb(struct xfs_mount *mp);
void write_primary_sb(xfs_sb_t *sbp,
int size);
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 871b428d..ff29bea9 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -749,6 +749,63 @@ check_fs_vs_host_sectsize(
}
}
+/*
+ * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
+ * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
+ * cache while trying to get the primary sb buffer if the first non-sb write to
+ * the filesystem is the result of a cache shake. Retain a reference to the
+ * primary sb buffer to avoid all that.
+ */
+static struct xfs_buf *primary_sb_bp; /* buffer for superblock */
+
+int
+retain_primary_sb(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
+ XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
+ &xfs_sb_buf_ops);
+ if (error)
+ return error;
+
+ libxfs_buf_unlock(primary_sb_bp);
+ return 0;
+}
+
+static void
+drop_primary_sb(void)
+{
+ if (!primary_sb_bp)
+ return;
+
+ libxfs_buf_lock(primary_sb_bp);
+ libxfs_buf_relse(primary_sb_bp);
+ primary_sb_bp = NULL;
+}
+
+static int
+get_primary_sb(
+ struct xfs_mount *mp,
+ struct xfs_buf **bpp)
+{
+ int error;
+
+ *bpp = NULL;
+
+ if (!primary_sb_bp) {
+ error = retain_primary_sb(mp);
+ if (error)
+ return error;
+ }
+
+ libxfs_buf_lock(primary_sb_bp);
+ xfs_buf_hold(primary_sb_bp);
+ *bpp = primary_sb_bp;
+ return 0;
+}
+
/* Clear needsrepair after a successful repair run. */
void
clear_needsrepair(
@@ -769,15 +826,14 @@ clear_needsrepair(
do_warn(
_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
error);
- return;
+ goto drop;
}
/* Clear needsrepair from the superblock. */
- bp = libxfs_getsb(mp);
- if (!bp || bp->b_error) {
+ error = get_primary_sb(mp, &bp);
+ if (error) {
do_warn(
- _("Cannot clear needsrepair from primary super, err=%d.\n"),
- bp ? bp->b_error : ENOMEM);
+ _("Cannot clear needsrepair from primary super, err=%d.\n"), error);
} else {
mp->m_sb.sb_features_incompat &=
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
@@ -786,6 +842,8 @@ clear_needsrepair(
}
if (bp)
libxfs_buf_relse(bp);
+drop:
+ drop_primary_sb();
}
static void
@@ -808,11 +866,10 @@ force_needsrepair(
xfs_sb_version_needsrepair(&mp->m_sb))
return;
- bp = libxfs_getsb(mp);
- if (!bp || bp->b_error) {
+ error = get_primary_sb(mp, &bp);
+ if (error) {
do_log(
- _("couldn't get superblock to set needsrepair, err=%d\n"),
- bp ? bp->b_error : ENOMEM);
+ _("couldn't get superblock to set needsrepair, err=%d\n"), error);
} else {
/*
* It's possible that we need to set NEEDSREPAIR before we've
--
2.40.0

@ -0,0 +1,17 @@
-----BEGIN PGP SIGNATURE-----
iQJIBAABCAAyFiEEK4GFkZ6NJImBhp3tIK4WkuE93uAFAmL2oA0UHHNhbmRlZW5A
c2FuZGVlbi5uZXQACgkQIK4WkuE93uCKsxAAxhIfv5fSmuy7Qh+b8w3a4khG8r5V
BtaNAqbKE+Zz1Rl8z3TGlYKXcI/Ruola5UKUjXzGeHBbXcOGWDpWoaMp5kDnloUP
SovsR+qpBUijPO4bff/7aR9+4tHS2UvEv0yDuG8gd6VyOGxPXDgwFSCEse4bWf61
l5UZN3dPXOMmm+G4ZITWuq1Us+3/uz12WwSuftCbPsghcCIDhnEQSA40yqyU2yJH
WTkLBb9/ROFN2iLjJ2HWeCEdZ62+mT767tZrmaQHlzFbK+rrl3CA7S+RjWHFA0qz
tDBLJLDMBdAExmR+sG+48pZTbgd1s8aXmMjCt80q5OzB5dKTfoYlqljkZxyIM0E2
y2Kq716s4rBHV/Na/lhqYMPAPY3hG9iDOLKAVSJWbI7i/j5t5+Uin697Ha79cupO
3bD7tJBr4JpL+tu9Q9khQhekGgSGqfQG/a9aJA0H8DoPsmOuuUrJBKs10npvVu18
Cx5WHwNeadK3rr0DLgik0X1POUex0fD+xjYXMWCHSJZhoC2wCbyvgg1xwYgEg16n
iyh+yxYSOlNgTZsseP/AmgsfKDO0hH/k4PiXrd4vT4+jDcVzBEQ62j8QJ629/qyC
mChjhcbzrJosAseLDMbm5gM2M79nqavrdvBhJ9JMEIjq/5m8VaQrmZE12w48lh92
WIngBgst2MrB8mE=
=KQvs
-----END PGP SIGNATURE-----

@ -0,0 +1,39 @@
From 780e93c5103d3c19d53c36ab7f4794d14912f3a5 Mon Sep 17 00:00:00 2001
From: Bill O'Donnell <bodonnel@redhat.com>
Date: Fri, 28 Jul 2023 17:20:17 -0500
Subject: [PATCH] mkfs.xfs.8: correction on mkfs.xfs manpage since reflink and
dax are compatible
Merged early in 2023: Commit 480017957d638 xfs: remove restrictions for fsdax
and reflink. There needs to be a corresponding change to the mkfs.xfs manpage
to remove the incompatiblity statement.
Signed-off-by: Bill O'Donnell <bodonnel@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
man/man8/mkfs.xfs.8.in | 7 -------
1 file changed, 7 deletions(-)
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
index ce6f1e2d..08bb92f6 100644
--- a/man/man8/mkfs.xfs.8.in
+++ b/man/man8/mkfs.xfs.8.in
@@ -323,13 +323,6 @@ option set. When the option
.B \-m crc=0
is used, the reference count btree feature is not supported and reflink is
disabled.
-.IP
-Note: the filesystem DAX mount option (
-.B \-o dax
-) is incompatible with
-reflink-enabled XFS filesystems. To use filesystem DAX with XFS, specify the
-.B \-m reflink=0
-option to mkfs.xfs to disable the reflink feature.
.RE
.PP
.PD 0
--
2.41.0

@ -0,0 +1,128 @@
From 8e698ee72c4ecbbf18264568eb310875839fd601 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 2 May 2023 09:14:36 +1000
Subject: [PATCH] xfs: set bnobt/cntbt numrecs correctly when formatting new
AGs
Through generic/300, I discovered that mkfs.xfs creates corrupt
filesystems when given these parameters:
# mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
Filesystems formatted with --unsupported are not supported!!
meta-data=/dev/sda isize=512 agcount=8, agsize=16352 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=1
= reflink=1 bigtime=1 inobtcount=1 nrext64=1
data = bsize=4096 blocks=130816, imaxpct=25
= sunit=32 swidth=128 blks
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
log =internal log bsize=4096 blocks=8192, version=2
= sectsz=512 sunit=32 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
= rgcount=0 rgsize=0 blks
Discarding blocks...Done.
# xfs_repair -n /dev/sda
Phase 1 - find and verify superblock...
- reporting progress in intervals of 15 minutes
Phase 2 - using internal log
- zero log...
- 16:30:50: zeroing log - 16320 of 16320 blocks done
- scan filesystem freespace and inode maps...
agf_freeblks 25, counted 0 in ag 4
sb_fdblocks 8823, counted 8798
The root cause of this problem is the numrecs handling in
xfs_freesp_init_recs, which is used to initialize a new AG. Prior to
calling the function, we set up the new bnobt block with numrecs == 1
and rely on _freesp_init_recs to format that new record. If the last
record created has a blockcount of zero, then it sets numrecs = 0.
That last bit isn't correct if the AG contains the log, the start of the
log is not immediately after the initial blocks due to stripe alignment,
and the end of the log is perfectly aligned with the end of the AG. For
this case, we actually formatted a single bnobt record to handle the
free space before the start of the (stripe aligned) log, and incremented
arec to try to format a second record. That second record turned out to
be unnecessary, so what we really want is to leave numrecs at 1.
The numrecs handling itself is overly complicated because a different
function sets numrecs == 1. Change the bnobt creation code to start
with numrecs set to zero and only increment it after successfully
formatting a free space extent into the btree block.
Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
fs/xfs/libxfs/xfs_ag.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 1b078bbbf225..9b373a0c7aaf 100644
--- a//libxfs/xfs_ag.c
+++ b//libxfs/xfs_ag.c
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
ASSERT(start >= mp->m_ag_prealloc_blocks);
if (start != mp->m_ag_prealloc_blocks) {
/*
- * Modify first record to pad stripe align of log
+ * Modify first record to pad stripe align of log and
+ * bump the record count.
*/
arec->ar_blockcount = cpu_to_be32(start -
mp->m_ag_prealloc_blocks);
+ be16_add_cpu(&block->bb_numrecs, 1);
nrec = arec + 1;
/*
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
be32_to_cpu(arec->ar_startblock) +
be32_to_cpu(arec->ar_blockcount));
arec = nrec;
- be16_add_cpu(&block->bb_numrecs, 1);
}
/*
* Change record start to after the internal log
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
}
/*
- * Calculate the record block count and check for the case where
- * the log might have consumed all available space in the AG. If
- * so, reset the record count to 0 to avoid exposure of an invalid
- * record start block.
+ * Calculate the block count of this record; if it is nonzero,
+ * increment the record count.
*/
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
- if (!arec->ar_blockcount)
- block->bb_numrecs = 0;
+ if (arec->ar_blockcount)
+ be16_add_cpu(&block->bb_numrecs, 1);
}
/*
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@@ -548,7 +547,7 @@ xfs_cntroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
--
2.40.1

@ -0,0 +1,28 @@
From 31980fef484df925b23824244de8ef353e436cef Mon Sep 17 00:00:00 2001
From: Pavel Reichl <preichl@redhat.com>
Date: Thu, 8 Jun 2023 10:57:52 +0200
Subject: [PATCH] mkfs: fix man's default value for sparse option
Fixes: 9cf846b51 ("mkfs: enable sparse inodes by default")
Suggested-by: Lukas Herbolt <lukas@herbolt.com>
Signed-off-by: Pavel Reichl <preichl@redhat.com>
---
man/man8/mkfs.xfs.8.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
index 49e64d47a..48e26ece7 100644
--- a/man/man8/mkfs.xfs.8.in
+++ b/man/man8/mkfs.xfs.8.in
@@ -631,7 +631,7 @@ Enable sparse inode chunk allocation. The
.I value
is either 0 or 1, with 1 signifying that sparse allocation is enabled.
If the value is omitted, 1 is assumed. Sparse inode allocation is
-disabled by default. This feature is only available for filesystems
+enabled by default. This feature is only available for filesystems
formatted with
.B \-m crc=1.
.IP
--
2.40.1

@ -1,7 +1,7 @@
Summary: Utilities for managing the XFS filesystem
Name: xfsprogs
Version: 5.14.2
Release: 1%{?dist}
Version: 5.19.0
Release: 4%{?dist}
License: GPL+ and LGPLv2+
URL: https://xfs.wiki.kernel.org
Source0: http://kernel.org/pub/linux/utils/fs/xfs/xfsprogs/%{name}-%{version}.tar.xz
@ -23,8 +23,20 @@ Obsoletes: xfsprogs-qa-devel <= %{version}
Conflicts: xfsdump < 3.0.1
Suggests: xfsprogs-xfs_scrub
Patch0: xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch
Patch0: xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch
Patch1: xfsprogs-5.12.0-example-conf.patch
Patch2: xfsprogs-5.19.0-mkfs-tolerate-tiny-filesystems.patch
Patch3: xfsprogs-5.19.0-xfs-hoist-refcount-record-merge-predicates.patch
Patch4: xfsprogs-5.19.0-xfs_db-fix-dir3-block-magic-check.patch
Patch5: xfsprogs-5.19.0-xfs-estimate-post-merge-refcounts-correctly.patch
Patch7: xfsprogs-5.19.0-xfs-fix-off-by-one-error-in-xfs_btree_space_to_heigh.patch
Patch8: xfsprogs-5.19.0-xfs-fix-sb-write-verify-for-lazysbcount.patch
Patch9: xfsprogs-5.19.0-xfs-get-rid-of-assert-from-xfs_btree_islastblock.patch
Patch10: xfsprogs-5.19.0-xfs-removed-useless-condition-in-function-xfs_attr_n.patch
Patch11: xfsprogs-5.19.0-xfs_repair-retain-superblock-buffer-to-avoid-write-h.patch
Patch12: xfsprogs-kernel-xfs-set-bnobt-cntbt-numrecs-correctly-when-formattin.patch
Patch13: xfsprogs-rhelonly-mkfs-fix-man-s-default-value-for-sparse-option.patch
Patch14: xfsprogs-6.5.0-mkfs.xfs.8-correction-on-mkfs.xfs-manpage-since-refl.patch
%description
A set of commands to use the XFS filesystem, including mkfs.xfs.
@ -136,6 +148,26 @@ install -m 0644 %{SOURCE3} %{buildroot}%{mkfsdir}
%{_libdir}/*.so
%changelog
* Wed Aug 02 2023 Pavel Reichl <preichl@redhat.com> - 5.19.0-4
- Fix man page, mkfs.xfs(8): Update section on dax+reflink
- compatibility (#2226900)
* Tue Jun 20 2023 Pavel Reichl <preichl@redhat.com> - 5.19.0-3
- Fix man page default for sparse mkfs option (#2216118)
* Fri May 26 2023 Pavel Reichl <preichl@redhat.com> - 5.19.0-2
- Fix xfs corrupted when AG size is a multiple of stripe width
- Related: rhbz#2192982
* Tue Jan 10 2023 Pavel Reichl <preichl@redhat.com> - 5.19.0-1
- New upstream release
- Tolerate tiny (<300MB) filesystems
- Rename xfsprogs-5.12.0-default-bigtime-inobtcnt-on.patch to
xfsprogs-5.19.0-disable-old-kernel-bigtime-inobtcnt-on.patch
and amend it to reflect upstream changes
- Backport all "Fixing" patches relevant to 5.19
Related: rhbz#2142910
* Fri Jan 21 2022 Pavel Reichl <preichl@redhat.com> - 5.14.2-1
- New upstream release
Related: rhbz#2041525

Loading…
Cancel
Save