parent
3d9b32c747
commit
824e0a430e
@ -1 +1 @@
|
|||||||
SOURCES/xfsprogs-5.14.2.tar.xz
|
SOURCES/xfsprogs-5.19.0.tar.xz
|
||||||
|
@ -1 +1 @@
|
|||||||
035e552cf4a08d5dbe1330ec1e3e6ceeb21b6bc9 SOURCES/xfsprogs-5.14.2.tar.xz
|
12afbdd497603b98945ea18f9aa5a78c671a6e4c SOURCES/xfsprogs-5.19.0.tar.xz
|
||||||
|
@ -1,85 +0,0 @@
|
|||||||
--- xfsprogs-5.12.0.orig/man/man8/mkfs.xfs.8
|
|
||||||
+++ xfsprogs-5.12.0/man/man8/mkfs.xfs.8
|
|
||||||
@@ -203,7 +203,7 @@ December 1901 to January 2038, and quota
|
|
||||||
.IP
|
|
||||||
By default,
|
|
||||||
.B mkfs.xfs
|
|
||||||
-will not enable this feature.
|
|
||||||
+in RHEL9 will enable this feature.
|
|
||||||
If the option
|
|
||||||
.B \-m crc=0
|
|
||||||
is used, the large timestamp feature is not supported and is disabled.
|
|
||||||
@@ -256,7 +256,7 @@ This can be used to reduce mount times w
|
|
||||||
.IP
|
|
||||||
By default,
|
|
||||||
.B mkfs.xfs
|
|
||||||
-will not enable this option.
|
|
||||||
+in RHEL9 will enable this option.
|
|
||||||
This feature is only available for filesystems created with the (default)
|
|
||||||
.B \-m finobt=1
|
|
||||||
option set.
|
|
||||||
--- xfsprogs-5.12.0.orig/mkfs/xfs_mkfs.c
|
|
||||||
+++ xfsprogs-5.12.0/mkfs/xfs_mkfs.c
|
|
||||||
@@ -3795,6 +3797,23 @@ cfgfile_parse(
|
|
||||||
cli->cfgfile);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static unsigned int get_system_kver(void)
|
|
||||||
+{
|
|
||||||
+ const char *kver = getenv("KVER");
|
|
||||||
+ struct utsname utsname;
|
|
||||||
+ int a, b, c;
|
|
||||||
+
|
|
||||||
+ if (!kver) {
|
|
||||||
+ uname(&utsname);
|
|
||||||
+ kver = utsname.release;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
|
|
||||||
+ return LINUX_VERSION_CODE;
|
|
||||||
+
|
|
||||||
+ return KERNEL_VERSION(a,b,c);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int
|
|
||||||
main(
|
|
||||||
int argc,
|
|
||||||
@@ -3848,17 +3867,25 @@ main(
|
|
||||||
.spinodes = true,
|
|
||||||
.rmapbt = false,
|
|
||||||
.reflink = true,
|
|
||||||
- .inobtcnt = false,
|
|
||||||
+ .inobtcnt = true,
|
|
||||||
.parent_pointers = false,
|
|
||||||
.nodalign = false,
|
|
||||||
.nortalign = false,
|
|
||||||
- .bigtime = false,
|
|
||||||
+ .bigtime = true,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
struct list_head buffer_list;
|
|
||||||
+ unsigned int kver;
|
|
||||||
int error;
|
|
||||||
|
|
||||||
+ /* turn bigtime & inobtcnt back off if running under older kernels */
|
|
||||||
+ kver = get_system_kver();
|
|
||||||
+ if (kver < KERNEL_VERSION(5,10,0)) {
|
|
||||||
+ dft.sb_feat.inobtcnt = false;
|
|
||||||
+ dft.sb_feat.bigtime = false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
platform_uuid_generate(&cli.uuid);
|
|
||||||
progname = basename(argv[0]);
|
|
||||||
setlocale(LC_ALL, "");
|
|
||||||
--- xfsprogs-5.14.0/mkfs/xfs_mkfs.c.orig
|
|
||||||
+++ xfsprogs-5.14.0/mkfs/xfs_mkfs.c
|
|
||||||
@@ -12,6 +12,8 @@
|
|
||||||
#include "libfrog/convert.h"
|
|
||||||
#include "proto.h"
|
|
||||||
#include <ini.h>
|
|
||||||
+#include <linux/version.h>
|
|
||||||
+#include <sys/utsname.h>
|
|
||||||
|
|
||||||
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
|
|
||||||
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))
|
|
@ -1,17 +0,0 @@
|
|||||||
-----BEGIN PGP SIGNATURE-----
|
|
||||||
Version: GnuPG v2.0.22 (GNU/Linux)
|
|
||||||
|
|
||||||
iQIcBAABAgAGBQJhrpYSAAoJECCuFpLhPd7gIH4P/1KdoaOWy77MqSmd5RWVN+D3
|
|
||||||
PQTD4YIjPtpLnec/dnX8972jX+WeeT0ydkviOAE3J6gejlHa10S+5SvpXTQLl9Cm
|
|
||||||
jDbNgLh7bV/mdH9H9RNmNM8xtCMufhO09BLt7O6MyXu1g3T+Np1kaG+hoNtS5oze
|
|
||||||
fx7r/nh9ZCeCyMRTGrRlso5xNXL92qL9SkgbPWAtTjvvrcsaTkhNrC4o08tyv8BN
|
|
||||||
oMeZU1IHSiiFXs4RlUtmzZKXVrWiWWJTVOdnb107qNAchghbOivqo9zuoxqtkl2R
|
|
||||||
Yb3YQfYD7eyGVdXgY6CTGCnE9HEVE3sa/2MjZ1KTWP9ZK/F9gZT2izEF/dtbpV8G
|
|
||||||
7IdKBpKda7tDKJcUw994HyUiZfYiHVqcTwcPDvK18fKqxd1khSKbhShc3nvjo4P2
|
|
||||||
yGSGrDaOp8pkPc3QJrSMYQQAYVWtle9Y2Uj+TDMu8/XJi+pJxbxn5B7XtKlP6MWm
|
|
||||||
5pNjA4mh3zX63D78NCWa/XimSFUpramlDI1LE9enaUidvdRMg6vMn8XYh4g0uGgP
|
|
||||||
2Yp1F2Z2VGX0NxrulHowMqxGcgtneC5bZcsJ2GeZ4r6i+p8VIS5ZJjNLZ2p993c8
|
|
||||||
w/32yLouC2j+pcKn3Ljwd2aPQBnGO5IK9CWn93PSG7kWvuN8S8RC0MoCH/a/xXCz
|
|
||||||
Xbg1NICCCVKczfyFtQM7
|
|
||||||
=J/Jr
|
|
||||||
-----END PGP SIGNATURE-----
|
|
@ -0,0 +1,52 @@
|
|||||||
|
--- a/mkfs/xfs_mkfs.c.orig 2022-08-12 20:38:21.000000000 +0200
|
||||||
|
+++ b/mkfs/xfs_mkfs.c 2023-01-25 11:06:01.863076713 +0100
|
||||||
|
@@ -13,6 +13,8 @@
|
||||||
|
#include "libfrog/crc32cselftest.h"
|
||||||
|
#include "proto.h"
|
||||||
|
#include <ini.h>
|
||||||
|
+#include <linux/version.h>
|
||||||
|
+#include <sys/utsname.h>
|
||||||
|
|
||||||
|
#define TERABYTES(count, blog) ((uint64_t)(count) << (40 - (blog)))
|
||||||
|
#define GIGABYTES(count, blog) ((uint64_t)(count) << (30 - (blog)))
|
||||||
|
@@ -3998,6 +4000,23 @@
|
||||||
|
cli->cfgfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static unsigned int get_system_kver(void)
|
||||||
|
+{
|
||||||
|
+ const char *kver = getenv("KVER");
|
||||||
|
+ struct utsname utsname;
|
||||||
|
+ int a, b, c;
|
||||||
|
+
|
||||||
|
+ if (!kver) {
|
||||||
|
+ uname(&utsname);
|
||||||
|
+ kver = utsname.release;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (sscanf(kver, "%d.%d.%d", &a, &b, &c) != 3)
|
||||||
|
+ return LINUX_VERSION_CODE;
|
||||||
|
+
|
||||||
|
+ return KERNEL_VERSION(a,b,c);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int
|
||||||
|
main(
|
||||||
|
int argc,
|
||||||
|
@@ -4077,8 +4096,16 @@
|
||||||
|
};
|
||||||
|
|
||||||
|
struct list_head buffer_list;
|
||||||
|
+ unsigned int kver;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
+ /* turn bigtime & inobtcnt back off if running under older kernels */
|
||||||
|
+ kver = get_system_kver();
|
||||||
|
+ if (kver < KERNEL_VERSION(5,10,0)) {
|
||||||
|
+ dft.sb_feat.inobtcnt = false;
|
||||||
|
+ dft.sb_feat.bigtime = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
platform_uuid_generate(&cli.uuid);
|
||||||
|
progname = basename(argv[0]);
|
||||||
|
setlocale(LC_ALL, "");
|
@ -0,0 +1,91 @@
|
|||||||
|
From 17b691400e8ce0755bb1d7a33490fbc014067e5e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pavel Reichl <preichl@redhat.com>
|
||||||
|
Date: Fri, 27 Jan 2023 06:30:20 +0100
|
||||||
|
Subject: [PATCH] mkfs: tolerate tiny filesystems
|
||||||
|
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
man/man8/mkfs.xfs.8.in | 4 ++--
|
||||||
|
mkfs/xfs_mkfs.c | 23 ++++++++++++++---------
|
||||||
|
2 files changed, 16 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
|
||||||
|
index 211e7b0c..03f0fda8 100644
|
||||||
|
--- a/man/man8/mkfs.xfs.8.in
|
||||||
|
+++ b/man/man8/mkfs.xfs.8.in
|
||||||
|
@@ -405,7 +405,7 @@ is required if
|
||||||
|
is given. Otherwise, it is only needed if the filesystem should occupy
|
||||||
|
less space than the size of the special file.
|
||||||
|
|
||||||
|
-The data section must be at least 300MB in size.
|
||||||
|
+The data section should be at least 300MB in size.
|
||||||
|
.TP
|
||||||
|
.BI sunit= value
|
||||||
|
This is used to specify the stripe unit for a RAID device or a
|
||||||
|
@@ -705,7 +705,7 @@ described above. The overriding minimum value for size is 512 blocks.
|
||||||
|
With some combinations of filesystem block size, inode size,
|
||||||
|
and directory block size, the minimum log size is larger than 512 blocks.
|
||||||
|
|
||||||
|
-The log must be at least 64MB in size.
|
||||||
|
+The log should be at least 64MB in size.
|
||||||
|
The log cannot be more than 2GB in size.
|
||||||
|
.TP
|
||||||
|
.BI version= value
|
||||||
|
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
|
||||||
|
index 9dd0e79c..72c906d6 100644
|
||||||
|
--- a/mkfs/xfs_mkfs.c
|
||||||
|
+++ b/mkfs/xfs_mkfs.c
|
||||||
|
@@ -2503,6 +2503,8 @@ validate_supported(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct cli_params *cli)
|
||||||
|
{
|
||||||
|
+ bool deprecated = false;
|
||||||
|
+
|
||||||
|
/* Undocumented option to enable unsupported tiny filesystems. */
|
||||||
|
if (!cli->is_supported) {
|
||||||
|
printf(
|
||||||
|
@@ -2532,9 +2534,8 @@ validate_supported(
|
||||||
|
* 64MB * (8 / 7) * 4 = 293MB
|
||||||
|
*/
|
||||||
|
if (mp->m_sb.sb_dblocks < MEGABYTES(300, mp->m_sb.sb_blocklog)) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- _("Filesystem must be larger than 300MB.\n"));
|
||||||
|
- usage();
|
||||||
|
+ printf(_("Filesystem should be larger than 300MB.\n"));
|
||||||
|
+ deprecated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -2543,9 +2544,8 @@ validate_supported(
|
||||||
|
*/
|
||||||
|
if (mp->m_sb.sb_logblocks <
|
||||||
|
XFS_MIN_REALISTIC_LOG_BLOCKS(mp->m_sb.sb_blocklog)) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- _("Log size must be at least 64MB.\n"));
|
||||||
|
- usage();
|
||||||
|
+ printf( _("Log size should be at least 64MB.\n"));
|
||||||
|
+ deprecated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -2553,9 +2553,14 @@ validate_supported(
|
||||||
|
* have redundant superblocks.
|
||||||
|
*/
|
||||||
|
if (mp->m_sb.sb_agcount < 2) {
|
||||||
|
- fprintf(stderr,
|
||||||
|
- _("Filesystem must have at least 2 superblocks for redundancy!\n"));
|
||||||
|
- usage();
|
||||||
|
+ printf(
|
||||||
|
+ _("Filesystem should have at least 2 superblocks for redundancy!\n"));
|
||||||
|
+ deprecated = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (deprecated) {
|
||||||
|
+ printf(
|
||||||
|
+_("Support for filesystems like this one is deprecated and they will not be supported in future releases.\n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
@ -0,0 +1,113 @@
|
|||||||
|
From b445624f0882badf00da739c52e58a85c18ae002 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Wed, 15 Mar 2023 15:56:35 +0100
|
||||||
|
Subject: [PATCH] xfs: estimate post-merge refcounts correctly
|
||||||
|
|
||||||
|
Source kernel commit: b25d1984aa884fc91a73a5a407b9ac976d441e9b
|
||||||
|
|
||||||
|
Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount
|
||||||
|
metadata corruptions after being run. Specifically, xfs_repair noticed
|
||||||
|
single-block refcount records that could be combined but had not been.
|
||||||
|
|
||||||
|
The root cause of this is improper MAXREFCOUNT edge case handling in
|
||||||
|
xfs_refcount_merge_extents. When we're trying to find candidates for a
|
||||||
|
refcount btree record merge, we compute the refcount attribute of the
|
||||||
|
merged record, but we fail to account for the fact that once a record
|
||||||
|
hits rc_refcount == MAXREFCOUNT, it is pinned that way forever. Hence
|
||||||
|
the computed refcount is wrong, and we fail to merge the extents.
|
||||||
|
|
||||||
|
Fix this by adjusting the merge predicates to compute the adjusted
|
||||||
|
refcount correctly.
|
||||||
|
|
||||||
|
Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree")
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||||
|
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_refcount.c | 25 +++++++++++++++++++++----
|
||||||
|
1 file changed, 21 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
|
||||||
|
index f6167c5f..29258bdd 100644
|
||||||
|
--- a/libxfs/xfs_refcount.c
|
||||||
|
+++ b/libxfs/xfs_refcount.c
|
||||||
|
@@ -819,6 +819,17 @@ xfs_refc_valid(
|
||||||
|
return rc->rc_startblock != NULLAGBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline xfs_nlink_t
|
||||||
|
+xfs_refc_merge_refcount(
|
||||||
|
+ const struct xfs_refcount_irec *irec,
|
||||||
|
+ enum xfs_refc_adjust_op adjust)
|
||||||
|
+{
|
||||||
|
+ /* Once a record hits MAXREFCOUNT, it is pinned there forever */
|
||||||
|
+ if (irec->rc_refcount == MAXREFCOUNT)
|
||||||
|
+ return MAXREFCOUNT;
|
||||||
|
+ return irec->rc_refcount + adjust;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static inline bool
|
||||||
|
xfs_refc_want_merge_center(
|
||||||
|
const struct xfs_refcount_irec *left,
|
||||||
|
@@ -830,6 +841,7 @@ xfs_refc_want_merge_center(
|
||||||
|
unsigned long long *ulenp)
|
||||||
|
{
|
||||||
|
unsigned long long ulen = left->rc_blockcount;
|
||||||
|
+ xfs_nlink_t new_refcount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To merge with a center record, both shoulder records must be
|
||||||
|
@@ -845,9 +857,10 @@ xfs_refc_want_merge_center(
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* The shoulder record refcounts must match the new refcount. */
|
||||||
|
- if (left->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
|
||||||
|
+ if (left->rc_refcount != new_refcount)
|
||||||
|
return false;
|
||||||
|
- if (right->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ if (right->rc_refcount != new_refcount)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -870,6 +883,7 @@ xfs_refc_want_merge_left(
|
||||||
|
enum xfs_refc_adjust_op adjust)
|
||||||
|
{
|
||||||
|
unsigned long long ulen = left->rc_blockcount;
|
||||||
|
+ xfs_nlink_t new_refcount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a left merge, the left shoulder record must be adjacent to the
|
||||||
|
@@ -880,7 +894,8 @@ xfs_refc_want_merge_left(
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Left shoulder record refcount must match the new refcount. */
|
||||||
|
- if (left->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ new_refcount = xfs_refc_merge_refcount(cleft, adjust);
|
||||||
|
+ if (left->rc_refcount != new_refcount)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -902,6 +917,7 @@ xfs_refc_want_merge_right(
|
||||||
|
enum xfs_refc_adjust_op adjust)
|
||||||
|
{
|
||||||
|
unsigned long long ulen = right->rc_blockcount;
|
||||||
|
+ xfs_nlink_t new_refcount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a right merge, the right shoulder record must be adjacent to the
|
||||||
|
@@ -912,7 +928,8 @@ xfs_refc_want_merge_right(
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Right shoulder record refcount must match the new refcount. */
|
||||||
|
- if (right->rc_refcount != cright->rc_refcount + adjust)
|
||||||
|
+ new_refcount = xfs_refc_merge_refcount(cright, adjust);
|
||||||
|
+ if (right->rc_refcount != new_refcount)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
From a68dabd45f3591456ecf7e35f6a6077db79f6bc6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Wed, 15 Mar 2023 15:59:35 +0100
|
||||||
|
Subject: [PATCH] xfs: fix off-by-one error in xfs_btree_space_to_height
|
||||||
|
|
||||||
|
Source kernel commit: c0f399ff51495ac8d30367418f4f6292ecd61fbe
|
||||||
|
|
||||||
|
Lately I've been stress-testing extreme-sized rmap btrees by using the
|
||||||
|
(new) xfs_db bmap_inflate command to clone bmbt mappings billions of
|
||||||
|
times and then using xfs_repair to build new rmap and refcount btrees.
|
||||||
|
This of course is /much/ faster than actually FICLONEing a file billions
|
||||||
|
of times.
|
||||||
|
|
||||||
|
Unfortunately, xfs_repair fails in xfs_btree_bload_compute_geometry with
|
||||||
|
EOVERFLOW, which indicates that xfs_mount.m_rmap_maxlevels is not
|
||||||
|
sufficiently large for the test scenario. For a 1TB filesystem (~67
|
||||||
|
million AG blocks, 4 AGs) the btheight command reports:
|
||||||
|
|
||||||
|
$ xfs_db -c 'btheight -n 4400801200 -w min rmapbt' /dev/sda
|
||||||
|
rmapbt: worst case per 4096-byte block: 84 records (leaf) / 45 keyptrs (node)
|
||||||
|
level 0: 4400801200 records, 52390491 blocks
|
||||||
|
level 1: 52390491 records, 1164234 blocks
|
||||||
|
level 2: 1164234 records, 25872 blocks
|
||||||
|
level 3: 25872 records, 575 blocks
|
||||||
|
level 4: 575 records, 13 blocks
|
||||||
|
level 5: 13 records, 1 block
|
||||||
|
6 levels, 53581186 blocks total
|
||||||
|
|
||||||
|
The AG is sufficiently large to build this rmap btree. Unfortunately,
|
||||||
|
m_rmap_maxlevels is 5. Augmenting the loop in the space->height
|
||||||
|
function to report height, node blocks, and blocks remaining produces
|
||||||
|
this:
|
||||||
|
|
||||||
|
ht 1 node_blocks 45 blockleft 67108863
|
||||||
|
ht 2 node_blocks 2025 blockleft 67108818
|
||||||
|
ht 3 node_blocks 91125 blockleft 67106793
|
||||||
|
ht 4 node_blocks 4100625 blockleft 67015668
|
||||||
|
final height: 5
|
||||||
|
|
||||||
|
The goal of this function is to compute the maximum height btree that
|
||||||
|
can be stored in the given number of ondisk fsblocks. Starting with the
|
||||||
|
top level of the tree, each iteration through the loop adds the fanout
|
||||||
|
factor of the next level down until we run out of blocks. IOWs, maximum
|
||||||
|
height is achieved by using the smallest fanout factor that can apply
|
||||||
|
to that level.
|
||||||
|
|
||||||
|
However, the loop setup is not correct. Top level btree blocks are
|
||||||
|
allowed to contain fewer than minrecs items, so the computation is
|
||||||
|
incorrect because the first time through the loop it should be using a
|
||||||
|
fanout factor of 2. With this corrected, the above becomes:
|
||||||
|
|
||||||
|
ht 1 node_blocks 2 blockleft 67108863
|
||||||
|
ht 2 node_blocks 90 blockleft 67108861
|
||||||
|
ht 3 node_blocks 4050 blockleft 67108771
|
||||||
|
ht 4 node_blocks 182250 blockleft 67104721
|
||||||
|
ht 5 node_blocks 8201250 blockleft 66922471
|
||||||
|
final height: 6
|
||||||
|
|
||||||
|
Fixes: 9ec691205e7d ("xfs: compute the maximum height of the rmap btree when reflink enabled")
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_btree.c | 7 ++++++-
|
||||||
|
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c
|
||||||
|
index 65d38637..38a3092d 100644
|
||||||
|
--- a/libxfs/xfs_btree.c
|
||||||
|
+++ b/libxfs/xfs_btree.c
|
||||||
|
@@ -4663,7 +4663,12 @@ xfs_btree_space_to_height(
|
||||||
|
const unsigned int *limits,
|
||||||
|
unsigned long long leaf_blocks)
|
||||||
|
{
|
||||||
|
- unsigned long long node_blocks = limits[1];
|
||||||
|
+ /*
|
||||||
|
+ * The root btree block can have fewer than minrecs pointers in it
|
||||||
|
+ * because the tree might not be big enough to require that amount of
|
||||||
|
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
|
||||||
|
+ */
|
||||||
|
+ unsigned long long node_blocks = 2;
|
||||||
|
unsigned long long blocks_left = leaf_blocks - 1;
|
||||||
|
unsigned int height = 1;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,119 @@
|
|||||||
|
From b827e2318ea2bb3eabca13a965c2535a1d7289e5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Long Li <leo.lilong@huawei.com>
|
||||||
|
Date: Fri, 18 Nov 2022 12:23:57 +0100
|
||||||
|
Subject: [PATCH] xfs: fix sb write verify for lazysbcount
|
||||||
|
|
||||||
|
Source kernel commit: 7cecd500d90164419add650e26cc1de03a7a66cb
|
||||||
|
|
||||||
|
When lazysbcount is enabled, fsstress and loop mount/unmount test report
|
||||||
|
the following problems:
|
||||||
|
|
||||||
|
XFS (loop0): SB summary counter sanity check failed
|
||||||
|
XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460,
|
||||||
|
xfs_sb block 0x0
|
||||||
|
XFS (loop0): Unmount and run xfs_repair
|
||||||
|
XFS (loop0): First 128 bytes of corrupted metadata buffer:
|
||||||
|
00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00 XFSB.........(..
|
||||||
|
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
|
||||||
|
00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a i.|._.D..t....4Z
|
||||||
|
00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80 ..... ..........
|
||||||
|
00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82 ................
|
||||||
|
00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00 ................
|
||||||
|
00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00 ................
|
||||||
|
00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19 ................
|
||||||
|
XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply
|
||||||
|
+0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580). Shutting down filesystem.
|
||||||
|
XFS (loop0): Please unmount the filesystem and rectify the problem(s)
|
||||||
|
XFS (loop0): log mount/recovery failed: error -117
|
||||||
|
XFS (loop0): log mount failed
|
||||||
|
|
||||||
|
This corruption will shutdown the file system and the file system will
|
||||||
|
no longer be mountable. The following script can reproduce the problem,
|
||||||
|
but it may take a long time.
|
||||||
|
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
device=/dev/sda
|
||||||
|
testdir=/mnt/test
|
||||||
|
round=0
|
||||||
|
|
||||||
|
function fail()
|
||||||
|
{
|
||||||
|
echo "$*"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
mkdir -p $testdir
|
||||||
|
while [ $round -lt 10000 ]
|
||||||
|
do
|
||||||
|
echo "******* round $round ********"
|
||||||
|
mkfs.xfs -f $device
|
||||||
|
mount $device $testdir || fail "mount failed!"
|
||||||
|
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null &
|
||||||
|
sleep 4
|
||||||
|
killall -w fsstress
|
||||||
|
umount $testdir
|
||||||
|
xfs_repair -e $device > /dev/null
|
||||||
|
if [ $? -eq 2 ];then
|
||||||
|
echo "ERR CODE 2: Dirty log exception during repair."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
round=$(($round+1))
|
||||||
|
done
|
||||||
|
|
||||||
|
With lazysbcount is enabled, There is no additional lock protection for
|
||||||
|
reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the
|
||||||
|
m_ifree, this will make the m_ifree greater than m_icount. For example,
|
||||||
|
consider the following sequence and ifreedelta is postive:
|
||||||
|
|
||||||
|
CPU0 CPU1
|
||||||
|
xfs_log_sb xfs_trans_unreserve_and_mod_sb
|
||||||
|
---------- ------------------------------
|
||||||
|
percpu_counter_sum(&mp->m_icount)
|
||||||
|
percpu_counter_add_batch(&mp->m_icount,
|
||||||
|
idelta, XFS_ICOUNT_BATCH)
|
||||||
|
percpu_counter_add(&mp->m_ifree, ifreedelta);
|
||||||
|
percpu_counter_sum(&mp->m_ifree)
|
||||||
|
|
||||||
|
After this, incorrect inode count (sb_ifree > sb_icount) will be writen to
|
||||||
|
the log. In the subsequent writing of sb, incorrect inode count (sb_ifree >
|
||||||
|
sb_icount) will fail to pass the boundary check in xfs_validate_sb_write()
|
||||||
|
that cause the file system shutdown.
|
||||||
|
|
||||||
|
When lazysbcount is enabled, we don't need to guarantee that Lazy sb
|
||||||
|
counters are completely correct, but we do need to guarantee that sb_ifree
|
||||||
|
<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount
|
||||||
|
must be satisfied any time that there /cannot/ be other threads allocating
|
||||||
|
or freeing inode chunks. If the constraint is violated under these
|
||||||
|
circumstances, sb_i{count,free} (the ondisk superblock inode counters)
|
||||||
|
maybe incorrect and need to be marked sick at unmount, the count will
|
||||||
|
be rebuilt on the next mount.
|
||||||
|
|
||||||
|
Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks")
|
||||||
|
Signed-off-by: Long Li <leo.lilong@huawei.com>
|
||||||
|
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_sb.c | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
|
||||||
|
index fc33dc4a..d05f0e6e 100644
|
||||||
|
--- a/libxfs/xfs_sb.c
|
||||||
|
+++ b/libxfs/xfs_sb.c
|
||||||
|
@@ -970,7 +970,9 @@ xfs_log_sb(
|
||||||
|
*/
|
||||||
|
if (xfs_has_lazysbcount(mp)) {
|
||||||
|
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
|
||||||
|
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
|
||||||
|
+ mp->m_sb.sb_ifree = min_t(uint64_t,
|
||||||
|
+ percpu_counter_sum(&mp->m_ifree),
|
||||||
|
+ mp->m_sb.sb_icount);
|
||||||
|
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,73 @@
|
|||||||
|
From f5ef812888a81be534466fa34df747c16bb65b7f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Guo Xuenan <guoxuenan@huawei.com>
|
||||||
|
Date: Wed, 15 Mar 2023 15:57:35 +0100
|
||||||
|
Subject: [PATCH] xfs: get rid of assert from xfs_btree_islastblock
|
||||||
|
|
||||||
|
Source kernel commit: 8c25febf23963431686f04874b96321288504127
|
||||||
|
|
||||||
|
xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up,
|
||||||
|
turn on the debugging knob can trigger the assert of xfs_btree_islastblock,
|
||||||
|
test script as follows:
|
||||||
|
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
mount $disk $mountpoint
|
||||||
|
fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null
|
||||||
|
echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk
|
||||||
|
sleep 10
|
||||||
|
umount $mountpoint
|
||||||
|
done
|
||||||
|
|
||||||
|
Kick off fsstress and only *then* turn on the debugging knob. If it
|
||||||
|
happens that the knob gets turned on after the cntbt lookup succeeds
|
||||||
|
but before the call to xfs_btree_islastblock, then we *can* end up in
|
||||||
|
the situation where a previously checked btree block suddenly starts
|
||||||
|
returning EFSCORRUPTED from xfs_btree_check_block. Kaboom.
|
||||||
|
|
||||||
|
Darrick give a very detailed explanation as follows:
|
||||||
|
Looking back at commit 27d9ee577dcce, I think the point of all this was
|
||||||
|
to make sure that the cursor has actually performed a lookup, and that
|
||||||
|
the btree block at whatever level we're asking about is ok.
|
||||||
|
|
||||||
|
If the caller hasn't ever done a lookup, the bc_levels array will be
|
||||||
|
empty, so cur->bc_levels[level].bp pointer will be NULL. The call to
|
||||||
|
xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is
|
||||||
|
pointless.
|
||||||
|
|
||||||
|
If the caller did a lookup but the lookup failed due to block
|
||||||
|
corruption, the corresponding cur->bc_levels[level].bp pointer will also
|
||||||
|
be NULL, and we'll still crash. The "ASSERT(xfs_btree_check_block);"
|
||||||
|
logic is also unnecessary.
|
||||||
|
|
||||||
|
If the cursor level points to an inode root, the block buffer will be
|
||||||
|
incore, so it had better always be consistent.
|
||||||
|
|
||||||
|
If the caller ignores a failed lookup after a successful one and calls
|
||||||
|
this function, the cursor state is garbage and the assert wouldn't have
|
||||||
|
tripped anyway. So get rid of the assert.
|
||||||
|
|
||||||
|
Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock")
|
||||||
|
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
|
||||||
|
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_btree.h | 1 -
|
||||||
|
1 file changed, 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_btree.h b/libxfs/xfs_btree.h
|
||||||
|
index eef27858..29c4b4cc 100644
|
||||||
|
--- a/libxfs/xfs_btree.h
|
||||||
|
+++ b/libxfs/xfs_btree.h
|
||||||
|
@@ -556,7 +556,6 @@ xfs_btree_islastblock(
|
||||||
|
struct xfs_buf *bp;
|
||||||
|
|
||||||
|
block = xfs_btree_get_block(cur, level, &bp);
|
||||||
|
- ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
|
||||||
|
|
||||||
|
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
|
||||||
|
return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,187 @@
|
|||||||
|
From d1dca9f6b365e439878e550ed0c801bbfb6d347b Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Wed, 15 Mar 2023 15:55:35 +0100
|
||||||
|
Subject: [PATCH] xfs: hoist refcount record merge predicates
|
||||||
|
|
||||||
|
Source kernel commit: 9d720a5a658f5135861773f26e927449bef93d61
|
||||||
|
|
||||||
|
Hoist these multiline conditionals into separate static inline helpers
|
||||||
|
to improve readability and set the stage for corruption fixes that will
|
||||||
|
be introduced in the next patch.
|
||||||
|
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||||
|
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++++++++------
|
||||||
|
1 file changed, 113 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_refcount.c b/libxfs/xfs_refcount.c
|
||||||
|
index 64e66861..f6167c5f 100644
|
||||||
|
--- a/libxfs/xfs_refcount.c
|
||||||
|
+++ b/libxfs/xfs_refcount.c
|
||||||
|
@@ -814,11 +814,119 @@ out_error:
|
||||||
|
/* Is this extent valid? */
|
||||||
|
static inline bool
|
||||||
|
xfs_refc_valid(
|
||||||
|
- struct xfs_refcount_irec *rc)
|
||||||
|
+ const struct xfs_refcount_irec *rc)
|
||||||
|
{
|
||||||
|
return rc->rc_startblock != NULLAGBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline bool
|
||||||
|
+xfs_refc_want_merge_center(
|
||||||
|
+ const struct xfs_refcount_irec *left,
|
||||||
|
+ const struct xfs_refcount_irec *cleft,
|
||||||
|
+ const struct xfs_refcount_irec *cright,
|
||||||
|
+ const struct xfs_refcount_irec *right,
|
||||||
|
+ bool cleft_is_cright,
|
||||||
|
+ enum xfs_refc_adjust_op adjust,
|
||||||
|
+ unsigned long long *ulenp)
|
||||||
|
+{
|
||||||
|
+ unsigned long long ulen = left->rc_blockcount;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * To merge with a center record, both shoulder records must be
|
||||||
|
+ * adjacent to the record we want to adjust. This is only true if
|
||||||
|
+ * find_left and find_right made all four records valid.
|
||||||
|
+ */
|
||||||
|
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
|
||||||
|
+ !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* There must only be one record for the entire range. */
|
||||||
|
+ if (!cleft_is_cright)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* The shoulder record refcounts must match the new refcount. */
|
||||||
|
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ return false;
|
||||||
|
+ if (right->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The new record cannot exceed the max length. ulen is a ULL as the
|
||||||
|
+ * individual record block counts can be up to (u32 - 1) in length
|
||||||
|
+ * hence we need to catch u32 addition overflows here.
|
||||||
|
+ */
|
||||||
|
+ ulen += cleft->rc_blockcount + right->rc_blockcount;
|
||||||
|
+ if (ulen >= MAXREFCEXTLEN)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ *ulenp = ulen;
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline bool
|
||||||
|
+xfs_refc_want_merge_left(
|
||||||
|
+ const struct xfs_refcount_irec *left,
|
||||||
|
+ const struct xfs_refcount_irec *cleft,
|
||||||
|
+ enum xfs_refc_adjust_op adjust)
|
||||||
|
+{
|
||||||
|
+ unsigned long long ulen = left->rc_blockcount;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * For a left merge, the left shoulder record must be adjacent to the
|
||||||
|
+ * start of the range. If this is true, find_left made left and cleft
|
||||||
|
+ * contain valid contents.
|
||||||
|
+ */
|
||||||
|
+ if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* Left shoulder record refcount must match the new refcount. */
|
||||||
|
+ if (left->rc_refcount != cleft->rc_refcount + adjust)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The new record cannot exceed the max length. ulen is a ULL as the
|
||||||
|
+ * individual record block counts can be up to (u32 - 1) in length
|
||||||
|
+ * hence we need to catch u32 addition overflows here.
|
||||||
|
+ */
|
||||||
|
+ ulen += cleft->rc_blockcount;
|
||||||
|
+ if (ulen >= MAXREFCEXTLEN)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline bool
|
||||||
|
+xfs_refc_want_merge_right(
|
||||||
|
+ const struct xfs_refcount_irec *cright,
|
||||||
|
+ const struct xfs_refcount_irec *right,
|
||||||
|
+ enum xfs_refc_adjust_op adjust)
|
||||||
|
+{
|
||||||
|
+ unsigned long long ulen = right->rc_blockcount;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * For a right merge, the right shoulder record must be adjacent to the
|
||||||
|
+ * end of the range. If this is true, find_right made cright and right
|
||||||
|
+ * contain valid contents.
|
||||||
|
+ */
|
||||||
|
+ if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* Right shoulder record refcount must match the new refcount. */
|
||||||
|
+ if (right->rc_refcount != cright->rc_refcount + adjust)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The new record cannot exceed the max length. ulen is a ULL as the
|
||||||
|
+ * individual record block counts can be up to (u32 - 1) in length
|
||||||
|
+ * hence we need to catch u32 addition overflows here.
|
||||||
|
+ */
|
||||||
|
+ ulen += cright->rc_blockcount;
|
||||||
|
+ if (ulen >= MAXREFCEXTLEN)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Try to merge with any extents on the boundaries of the adjustment range.
|
||||||
|
*/
|
||||||
|
@@ -860,23 +968,15 @@ xfs_refcount_merge_extents(
|
||||||
|
(cleft.rc_blockcount == cright.rc_blockcount);
|
||||||
|
|
||||||
|
/* Try to merge left, cleft, and right. cleft must == cright. */
|
||||||
|
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
|
||||||
|
- right.rc_blockcount;
|
||||||
|
- if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
|
||||||
|
- xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
|
||||||
|
- left.rc_refcount == cleft.rc_refcount + adjust &&
|
||||||
|
- right.rc_refcount == cleft.rc_refcount + adjust &&
|
||||||
|
- ulen < MAXREFCEXTLEN) {
|
||||||
|
+ if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
|
||||||
|
+ adjust, &ulen)) {
|
||||||
|
*shape_changed = true;
|
||||||
|
return xfs_refcount_merge_center_extents(cur, &left, &cleft,
|
||||||
|
&right, ulen, aglen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to merge left and cleft. */
|
||||||
|
- ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
|
||||||
|
- if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
|
||||||
|
- left.rc_refcount == cleft.rc_refcount + adjust &&
|
||||||
|
- ulen < MAXREFCEXTLEN) {
|
||||||
|
+ if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
|
||||||
|
*shape_changed = true;
|
||||||
|
error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
|
||||||
|
agbno, aglen);
|
||||||
|
@@ -892,10 +992,7 @@ xfs_refcount_merge_extents(
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to merge cright and right. */
|
||||||
|
- ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
|
||||||
|
- if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
|
||||||
|
- right.rc_refcount == cright.rc_refcount + adjust &&
|
||||||
|
- ulen < MAXREFCEXTLEN) {
|
||||||
|
+ if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
|
||||||
|
*shape_changed = true;
|
||||||
|
return xfs_refcount_merge_right_extent(cur, &right, &cright,
|
||||||
|
aglen);
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,69 @@
|
|||||||
|
From 798d43495df2c8a09a73b8e868a71d8f2fd81d5e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Andrey Strachuk <strochuk@ispras.ru>
|
||||||
|
Date: Wed, 24 Aug 2022 10:24:01 +0200
|
||||||
|
Subject: [PATCH] xfs: removed useless condition in function xfs_attr_node_get
|
||||||
|
|
||||||
|
Source kernel commit: 0f38063d7a38015a47ca1488406bf21e0effe80e
|
||||||
|
|
||||||
|
At line 1561, variable "state" is being compared
|
||||||
|
with NULL every loop iteration.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
1561 for (i = 0; state != NULL && i < state->path.active; i++) {
|
||||||
|
1562 xfs_trans_brelse(args->trans, state->path.blk[i].bp);
|
||||||
|
1563 state->path.blk[i].bp = NULL;
|
||||||
|
1564 }
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
|
||||||
|
However, it cannot be NULL.
|
||||||
|
|
||||||
|
----------------------------------------
|
||||||
|
1546 state = xfs_da_state_alloc(args);
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
xfs_da_state_alloc calls kmem_cache_zalloc. kmem_cache_zalloc is
|
||||||
|
called with __GFP_NOFAIL flag and, therefore, it cannot return NULL.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------
|
||||||
|
struct xfs_da_state *
|
||||||
|
xfs_da_state_alloc(
|
||||||
|
struct xfs_da_args *args)
|
||||||
|
{
|
||||||
|
struct xfs_da_state *state;
|
||||||
|
|
||||||
|
state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
|
||||||
|
state->args = args;
|
||||||
|
state->mp = args->dp->i_mount;
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
--------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Found by Linux Verification Center (linuxtesting.org) with SVACE.
|
||||||
|
|
||||||
|
Signed-off-by: Andrey Strachuk <strochuk@ispras.ru>
|
||||||
|
|
||||||
|
Fixes: 4d0cdd2bb8f0 ("xfs: clean up xfs_attr_node_hasname")
|
||||||
|
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/xfs_attr.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/xfs_attr.c b/libxfs/xfs_attr.c
|
||||||
|
index 08973934..b451fcdb 100644
|
||||||
|
--- a/libxfs/xfs_attr.c
|
||||||
|
+++ b/libxfs/xfs_attr.c
|
||||||
|
@@ -1556,7 +1556,7 @@ xfs_attr_node_get(
|
||||||
|
* If not in a transaction, we have to release all the buffers.
|
||||||
|
*/
|
||||||
|
out_release:
|
||||||
|
- for (i = 0; state != NULL && i < state->path.active; i++) {
|
||||||
|
+ for (i = 0; i < state->path.active; i++) {
|
||||||
|
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
|
||||||
|
state->path.blk[i].bp = NULL;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,34 @@
|
|||||||
|
From 7374f58bfeb38467bab6552a47a5cd6bbe3c2e2e Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Tue, 20 Dec 2022 16:53:34 -0800
|
||||||
|
Subject: [PATCH] xfs_db: fix dir3 block magic check
|
||||||
|
|
||||||
|
Fix this broken check, which (amazingly) went unnoticed until I cranked
|
||||||
|
up the warning level /and/ built the system for s390x.
|
||||||
|
|
||||||
|
Fixes: e96864ff4d4 ("xfs_db: enable blockget for v5 filesystems")
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||||
|
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
db/check.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/db/check.c b/db/check.c
|
||||||
|
index bb27ce58..964756d0 100644
|
||||||
|
--- a/db/check.c
|
||||||
|
+++ b/db/check.c
|
||||||
|
@@ -2578,7 +2578,7 @@ process_data_dir_v2(
|
||||||
|
error++;
|
||||||
|
}
|
||||||
|
if ((be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC ||
|
||||||
|
- be32_to_cpu(data->magic) == XFS_DIR2_BLOCK_MAGIC) &&
|
||||||
|
+ be32_to_cpu(data->magic) == XFS_DIR3_BLOCK_MAGIC) &&
|
||||||
|
stale != be32_to_cpu(btp->stale)) {
|
||||||
|
if (!sflag || v)
|
||||||
|
dbprintf(_("dir %lld block %d bad stale tail count %d\n"),
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,266 @@
|
|||||||
|
From 945c7341dedab44ae5daed83377e6366c3fb8fee Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Wed, 23 Nov 2022 09:09:33 -0800
|
||||||
|
Subject: [PATCH] xfs_repair: retain superblock buffer to avoid write hook
|
||||||
|
deadlock
|
||||||
|
|
||||||
|
Every now and then I experience the following deadlock in xfs_repair
|
||||||
|
when I'm running the offline repair fuzz tests:
|
||||||
|
|
||||||
|
#0 futex_wait (private=0, expected=2, futex_word=0x55555566df70) at ../sysdeps/nptl/futex-internal.h:146
|
||||||
|
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555566df70, private=0) at ./nptl/lowlevellock.c:49
|
||||||
|
#2 lll_mutex_lock_optimized (mutex=0x55555566df70) at ./nptl/pthread_mutex_lock.c:48
|
||||||
|
#3 ___pthread_mutex_lock (mutex=mutex@entry=0x55555566df70) at ./nptl/pthread_mutex_lock.c:93
|
||||||
|
#4 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:231
|
||||||
|
#5 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e01b0, nodep=nodep@entry=0x7fffe55e0168) at cache.c:452
|
||||||
|
#6 __cache_lookup (key=key@entry=0x7fffe55e01b0, flags=0, bpp=bpp@entry=0x7fffe55e0228) at rdwr.c:405
|
||||||
|
#7 libxfs_getbuf_flags (btp=0x55555566de00, blkno=0, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0228) at rdwr.c:457
|
||||||
|
#8 libxfs_buf_read_map (btp=0x55555566de00, map=map@entry=0x7fffe55e0280, nmaps=nmaps@entry=1, flags=flags@entry=0, bpp=bpp@entry=0x7fffe55e0278, ops=0x5555556233e0 <xfs_sb_buf_ops>)
|
||||||
|
at rdwr.c:704
|
||||||
|
#9 libxfs_buf_read (ops=<optimized out>, bpp=0x7fffe55e0278, flags=0, numblks=<optimized out>, blkno=0, target=<optimized out>)
|
||||||
|
at /storage/home/djwong/cdev/work/xfsprogs/build-x86_64/libxfs/libxfs_io.h:195
|
||||||
|
#10 libxfs_getsb (mp=mp@entry=0x7fffffffd690) at rdwr.c:162
|
||||||
|
#11 force_needsrepair (mp=0x7fffffffd690) at xfs_repair.c:924
|
||||||
|
#12 repair_capture_writeback (bp=<optimized out>) at xfs_repair.c:1000
|
||||||
|
#13 libxfs_bwrite (bp=0x7fffe011e530) at rdwr.c:869
|
||||||
|
#14 cache_shake (cache=cache@entry=0x55555566de60, priority=priority@entry=2, purge=purge@entry=false) at cache.c:240
|
||||||
|
#15 cache_node_get (cache=cache@entry=0x55555566de60, key=key@entry=0x7fffe55e0470, nodep=nodep@entry=0x7fffe55e0428) at cache.c:452
|
||||||
|
#16 __cache_lookup (key=key@entry=0x7fffe55e0470, flags=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:405
|
||||||
|
#17 libxfs_getbuf_flags (btp=0x55555566de00, blkno=12736, len=<optimized out>, flags=<optimized out>, bpp=0x7fffe55e0538) at rdwr.c:457
|
||||||
|
#18 __libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:501
|
||||||
|
#19 libxfs_buf_get_map (btp=<optimized out>, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flags=flags@entry=1, bpp=bpp@entry=0x7fffe55e0538) at rdwr.c:525
|
||||||
|
#20 pf_queue_io (args=args@entry=0x5555556722c0, map=map@entry=0x7fffe55e05b0, nmaps=<optimized out>, flag=flag@entry=11) at prefetch.c:124
|
||||||
|
#21 pf_read_bmbt_reclist (args=0x5555556722c0, rp=<optimized out>, numrecs=78) at prefetch.c:220
|
||||||
|
#22 pf_scan_lbtree (dbno=dbno@entry=1211, level=level@entry=1, isadir=isadir@entry=1, args=args@entry=0x5555556722c0, func=0x55555557f240 <pf_scanfunc_bmap>) at prefetch.c:298
|
||||||
|
#23 pf_read_btinode (isadir=1, dino=<optimized out>, args=0x5555556722c0) at prefetch.c:385
|
||||||
|
#24 pf_read_inode_dirs (args=args@entry=0x5555556722c0, bp=bp@entry=0x7fffdc023790) at prefetch.c:459
|
||||||
|
#25 pf_read_inode_dirs (bp=<optimized out>, args=0x5555556722c0) at prefetch.c:411
|
||||||
|
#26 pf_batch_read (args=args@entry=0x5555556722c0, which=which@entry=PF_PRIMARY, buf=buf@entry=0x7fffd001d000) at prefetch.c:609
|
||||||
|
#27 pf_io_worker (param=0x5555556722c0) at prefetch.c:673
|
||||||
|
#28 start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
|
||||||
|
#29 clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
|
||||||
|
|
||||||
|
>From this stack trace, we see that xfs_repair's prefetch module is
|
||||||
|
getting some xfs_buf objects ahead of initiating a read (#19). The
|
||||||
|
buffer cache has hit its limit, so it calls cache_shake (#14) to free
|
||||||
|
some unused xfs_bufs. The buffer it finds is a dirty buffer, so it
|
||||||
|
calls libxfs_bwrite to flush it out to disk, which in turn invokes the
|
||||||
|
buffer write hook that xfs_repair set up in 3b7667cb to mark the ondisk
|
||||||
|
filesystem's superblock as NEEDSREPAIR until repair actually completes.
|
||||||
|
|
||||||
|
Unfortunately, the NEEDSREPAIR handler itself needs to grab the
|
||||||
|
superblock buffer, so it makes another call into the buffer cache (#9),
|
||||||
|
which sees that the cache is full and tries to shake it(#4). Hence we
|
||||||
|
deadlock on cm_mutex because shaking is not reentrant.
|
||||||
|
|
||||||
|
Fix this by retaining a reference to the superblock buffer when possible
|
||||||
|
so that the writeback hook doesn't have to access the buffer cache to
|
||||||
|
set NEEDSREPAIR.
|
||||||
|
|
||||||
|
Fixes: 3b7667cb ("xfs_repair: set NEEDSREPAIR the first time we write to a filesystem")
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||||
|
Signed-off-by: Carlos Maiolino <cem@kernel.org>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
libxfs/libxfs_api_defs.h | 2 ++
|
||||||
|
libxfs/libxfs_io.h | 1 +
|
||||||
|
libxfs/rdwr.c | 8 +++++
|
||||||
|
repair/phase2.c | 8 +++++
|
||||||
|
repair/protos.h | 1 +
|
||||||
|
repair/xfs_repair.c | 75 +++++++++++++++++++++++++++++++++++-----
|
||||||
|
6 files changed, 86 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h
|
||||||
|
index 2716a731..f8efcce7 100644
|
||||||
|
--- a/libxfs/libxfs_api_defs.h
|
||||||
|
+++ b/libxfs/libxfs_api_defs.h
|
||||||
|
@@ -53,9 +53,11 @@
|
||||||
|
#define xfs_buf_delwri_submit libxfs_buf_delwri_submit
|
||||||
|
#define xfs_buf_get libxfs_buf_get
|
||||||
|
#define xfs_buf_get_uncached libxfs_buf_get_uncached
|
||||||
|
+#define xfs_buf_lock libxfs_buf_lock
|
||||||
|
#define xfs_buf_read libxfs_buf_read
|
||||||
|
#define xfs_buf_read_uncached libxfs_buf_read_uncached
|
||||||
|
#define xfs_buf_relse libxfs_buf_relse
|
||||||
|
+#define xfs_buf_unlock libxfs_buf_unlock
|
||||||
|
#define xfs_bunmapi libxfs_bunmapi
|
||||||
|
#define xfs_bwrite libxfs_bwrite
|
||||||
|
#define xfs_calc_dquots_per_chunk libxfs_calc_dquots_per_chunk
|
||||||
|
diff --git a/libxfs/libxfs_io.h b/libxfs/libxfs_io.h
|
||||||
|
index 9c0e2704..fae86427 100644
|
||||||
|
--- a/libxfs/libxfs_io.h
|
||||||
|
+++ b/libxfs/libxfs_io.h
|
||||||
|
@@ -226,6 +226,7 @@ xfs_buf_hold(struct xfs_buf *bp)
|
||||||
|
}
|
||||||
|
|
||||||
|
void xfs_buf_lock(struct xfs_buf *bp);
|
||||||
|
+void xfs_buf_unlock(struct xfs_buf *bp);
|
||||||
|
|
||||||
|
int libxfs_buf_get_uncached(struct xfs_buftarg *targ, size_t bblen, int flags,
|
||||||
|
struct xfs_buf **bpp);
|
||||||
|
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
|
||||||
|
index 20e0793c..d5aad3ea 100644
|
||||||
|
--- a/libxfs/rdwr.c
|
||||||
|
+++ b/libxfs/rdwr.c
|
||||||
|
@@ -384,6 +384,14 @@ xfs_buf_lock(
|
||||||
|
pthread_mutex_lock(&bp->b_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
+void
|
||||||
|
+xfs_buf_unlock(
|
||||||
|
+ struct xfs_buf *bp)
|
||||||
|
+{
|
||||||
|
+ if (use_xfs_buf_lock)
|
||||||
|
+ pthread_mutex_unlock(&bp->b_lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int
|
||||||
|
__cache_lookup(
|
||||||
|
struct xfs_bufkey *key,
|
||||||
|
diff --git a/repair/phase2.c b/repair/phase2.c
|
||||||
|
index 56a39bb4..2ada95ae 100644
|
||||||
|
--- a/repair/phase2.c
|
||||||
|
+++ b/repair/phase2.c
|
||||||
|
@@ -370,6 +370,14 @@ phase2(
|
||||||
|
} else
|
||||||
|
do_log(_("Phase 2 - using internal log\n"));
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Now that we've set up the buffer cache the way we want it, try to
|
||||||
|
+ * grab our own reference to the primary sb so that the hooks will not
|
||||||
|
+ * have to call out to the buffer cache.
|
||||||
|
+ */
|
||||||
|
+ if (mp->m_buf_writeback_fn)
|
||||||
|
+ retain_primary_sb(mp);
|
||||||
|
+
|
||||||
|
/* Zero log if applicable */
|
||||||
|
do_log(_(" - zero log...\n"));
|
||||||
|
|
||||||
|
diff --git a/repair/protos.h b/repair/protos.h
|
||||||
|
index 03ebae14..83e471ff 100644
|
||||||
|
--- a/repair/protos.h
|
||||||
|
+++ b/repair/protos.h
|
||||||
|
@@ -16,6 +16,7 @@ int get_sb(xfs_sb_t *sbp,
|
||||||
|
xfs_off_t off,
|
||||||
|
int size,
|
||||||
|
xfs_agnumber_t agno);
|
||||||
|
+int retain_primary_sb(struct xfs_mount *mp);
|
||||||
|
void write_primary_sb(xfs_sb_t *sbp,
|
||||||
|
int size);
|
||||||
|
|
||||||
|
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
|
||||||
|
index 871b428d..ff29bea9 100644
|
||||||
|
--- a/repair/xfs_repair.c
|
||||||
|
+++ b/repair/xfs_repair.c
|
||||||
|
@@ -749,6 +749,63 @@ check_fs_vs_host_sectsize(
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * If we set up a writeback function to set NEEDSREPAIR while the filesystem is
|
||||||
|
+ * dirty, there's a chance that calling libxfs_getsb could deadlock the buffer
|
||||||
|
+ * cache while trying to get the primary sb buffer if the first non-sb write to
|
||||||
|
+ * the filesystem is the result of a cache shake. Retain a reference to the
|
||||||
|
+ * primary sb buffer to avoid all that.
|
||||||
|
+ */
|
||||||
|
+static struct xfs_buf *primary_sb_bp; /* buffer for superblock */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+retain_primary_sb(
|
||||||
|
+ struct xfs_mount *mp)
|
||||||
|
+{
|
||||||
|
+ int error;
|
||||||
|
+
|
||||||
|
+ error = -libxfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||||
|
+ XFS_FSS_TO_BB(mp, 1), 0, &primary_sb_bp,
|
||||||
|
+ &xfs_sb_buf_ops);
|
||||||
|
+ if (error)
|
||||||
|
+ return error;
|
||||||
|
+
|
||||||
|
+ libxfs_buf_unlock(primary_sb_bp);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+drop_primary_sb(void)
|
||||||
|
+{
|
||||||
|
+ if (!primary_sb_bp)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ libxfs_buf_lock(primary_sb_bp);
|
||||||
|
+ libxfs_buf_relse(primary_sb_bp);
|
||||||
|
+ primary_sb_bp = NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+get_primary_sb(
|
||||||
|
+ struct xfs_mount *mp,
|
||||||
|
+ struct xfs_buf **bpp)
|
||||||
|
+{
|
||||||
|
+ int error;
|
||||||
|
+
|
||||||
|
+ *bpp = NULL;
|
||||||
|
+
|
||||||
|
+ if (!primary_sb_bp) {
|
||||||
|
+ error = retain_primary_sb(mp);
|
||||||
|
+ if (error)
|
||||||
|
+ return error;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ libxfs_buf_lock(primary_sb_bp);
|
||||||
|
+ xfs_buf_hold(primary_sb_bp);
|
||||||
|
+ *bpp = primary_sb_bp;
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Clear needsrepair after a successful repair run. */
|
||||||
|
void
|
||||||
|
clear_needsrepair(
|
||||||
|
@@ -769,15 +826,14 @@ clear_needsrepair(
|
||||||
|
do_warn(
|
||||||
|
_("Cannot clear needsrepair due to flush failure, err=%d.\n"),
|
||||||
|
error);
|
||||||
|
- return;
|
||||||
|
+ goto drop;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clear needsrepair from the superblock. */
|
||||||
|
- bp = libxfs_getsb(mp);
|
||||||
|
- if (!bp || bp->b_error) {
|
||||||
|
+ error = get_primary_sb(mp, &bp);
|
||||||
|
+ if (error) {
|
||||||
|
do_warn(
|
||||||
|
- _("Cannot clear needsrepair from primary super, err=%d.\n"),
|
||||||
|
- bp ? bp->b_error : ENOMEM);
|
||||||
|
+ _("Cannot clear needsrepair from primary super, err=%d.\n"), error);
|
||||||
|
} else {
|
||||||
|
mp->m_sb.sb_features_incompat &=
|
||||||
|
~XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR;
|
||||||
|
@@ -786,6 +842,8 @@ clear_needsrepair(
|
||||||
|
}
|
||||||
|
if (bp)
|
||||||
|
libxfs_buf_relse(bp);
|
||||||
|
+drop:
|
||||||
|
+ drop_primary_sb();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
@@ -808,11 +866,10 @@ force_needsrepair(
|
||||||
|
xfs_sb_version_needsrepair(&mp->m_sb))
|
||||||
|
return;
|
||||||
|
|
||||||
|
- bp = libxfs_getsb(mp);
|
||||||
|
- if (!bp || bp->b_error) {
|
||||||
|
+ error = get_primary_sb(mp, &bp);
|
||||||
|
+ if (error) {
|
||||||
|
do_log(
|
||||||
|
- _("couldn't get superblock to set needsrepair, err=%d\n"),
|
||||||
|
- bp ? bp->b_error : ENOMEM);
|
||||||
|
+ _("couldn't get superblock to set needsrepair, err=%d\n"), error);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* It's possible that we need to set NEEDSREPAIR before we've
|
||||||
|
--
|
||||||
|
2.40.0
|
||||||
|
|
@ -0,0 +1,17 @@
|
|||||||
|
-----BEGIN PGP SIGNATURE-----
|
||||||
|
|
||||||
|
iQJIBAABCAAyFiEEK4GFkZ6NJImBhp3tIK4WkuE93uAFAmL2oA0UHHNhbmRlZW5A
|
||||||
|
c2FuZGVlbi5uZXQACgkQIK4WkuE93uCKsxAAxhIfv5fSmuy7Qh+b8w3a4khG8r5V
|
||||||
|
BtaNAqbKE+Zz1Rl8z3TGlYKXcI/Ruola5UKUjXzGeHBbXcOGWDpWoaMp5kDnloUP
|
||||||
|
SovsR+qpBUijPO4bff/7aR9+4tHS2UvEv0yDuG8gd6VyOGxPXDgwFSCEse4bWf61
|
||||||
|
l5UZN3dPXOMmm+G4ZITWuq1Us+3/uz12WwSuftCbPsghcCIDhnEQSA40yqyU2yJH
|
||||||
|
WTkLBb9/ROFN2iLjJ2HWeCEdZ62+mT767tZrmaQHlzFbK+rrl3CA7S+RjWHFA0qz
|
||||||
|
tDBLJLDMBdAExmR+sG+48pZTbgd1s8aXmMjCt80q5OzB5dKTfoYlqljkZxyIM0E2
|
||||||
|
y2Kq716s4rBHV/Na/lhqYMPAPY3hG9iDOLKAVSJWbI7i/j5t5+Uin697Ha79cupO
|
||||||
|
3bD7tJBr4JpL+tu9Q9khQhekGgSGqfQG/a9aJA0H8DoPsmOuuUrJBKs10npvVu18
|
||||||
|
Cx5WHwNeadK3rr0DLgik0X1POUex0fD+xjYXMWCHSJZhoC2wCbyvgg1xwYgEg16n
|
||||||
|
iyh+yxYSOlNgTZsseP/AmgsfKDO0hH/k4PiXrd4vT4+jDcVzBEQ62j8QJ629/qyC
|
||||||
|
mChjhcbzrJosAseLDMbm5gM2M79nqavrdvBhJ9JMEIjq/5m8VaQrmZE12w48lh92
|
||||||
|
WIngBgst2MrB8mE=
|
||||||
|
=KQvs
|
||||||
|
-----END PGP SIGNATURE-----
|
@ -0,0 +1,39 @@
|
|||||||
|
From 780e93c5103d3c19d53c36ab7f4794d14912f3a5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Bill O'Donnell <bodonnel@redhat.com>
|
||||||
|
Date: Fri, 28 Jul 2023 17:20:17 -0500
|
||||||
|
Subject: [PATCH] mkfs.xfs.8: correction on mkfs.xfs manpage since reflink and
|
||||||
|
dax are compatible
|
||||||
|
|
||||||
|
Merged early in 2023: Commit 480017957d638 xfs: remove restrictions for fsdax
|
||||||
|
and reflink. There needs to be a corresponding change to the mkfs.xfs manpage
|
||||||
|
to remove the incompatiblity statement.
|
||||||
|
|
||||||
|
Signed-off-by: Bill O'Donnell <bodonnel@redhat.com>
|
||||||
|
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
man/man8/mkfs.xfs.8.in | 7 -------
|
||||||
|
1 file changed, 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
|
||||||
|
index ce6f1e2d..08bb92f6 100644
|
||||||
|
--- a/man/man8/mkfs.xfs.8.in
|
||||||
|
+++ b/man/man8/mkfs.xfs.8.in
|
||||||
|
@@ -323,13 +323,6 @@ option set. When the option
|
||||||
|
.B \-m crc=0
|
||||||
|
is used, the reference count btree feature is not supported and reflink is
|
||||||
|
disabled.
|
||||||
|
-.IP
|
||||||
|
-Note: the filesystem DAX mount option (
|
||||||
|
-.B \-o dax
|
||||||
|
-) is incompatible with
|
||||||
|
-reflink-enabled XFS filesystems. To use filesystem DAX with XFS, specify the
|
||||||
|
-.B \-m reflink=0
|
||||||
|
-option to mkfs.xfs to disable the reflink feature.
|
||||||
|
.RE
|
||||||
|
.PP
|
||||||
|
.PD 0
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
@ -0,0 +1,128 @@
|
|||||||
|
From 8e698ee72c4ecbbf18264568eb310875839fd601 Mon Sep 17 00:00:00 2001
|
||||||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||||||
|
Date: Tue, 2 May 2023 09:14:36 +1000
|
||||||
|
Subject: [PATCH] xfs: set bnobt/cntbt numrecs correctly when formatting new
|
||||||
|
AGs
|
||||||
|
|
||||||
|
Through generic/300, I discovered that mkfs.xfs creates corrupt
|
||||||
|
filesystems when given these parameters:
|
||||||
|
|
||||||
|
# mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
|
||||||
|
Filesystems formatted with --unsupported are not supported!!
|
||||||
|
meta-data=/dev/sda isize=512 agcount=8, agsize=16352 blks
|
||||||
|
= sectsz=512 attr=2, projid32bit=1
|
||||||
|
= crc=1 finobt=1, sparse=1, rmapbt=1
|
||||||
|
= reflink=1 bigtime=1 inobtcount=1 nrext64=1
|
||||||
|
data = bsize=4096 blocks=130816, imaxpct=25
|
||||||
|
= sunit=32 swidth=128 blks
|
||||||
|
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
|
||||||
|
log =internal log bsize=4096 blocks=8192, version=2
|
||||||
|
= sectsz=512 sunit=32 blks, lazy-count=1
|
||||||
|
realtime =none extsz=4096 blocks=0, rtextents=0
|
||||||
|
= rgcount=0 rgsize=0 blks
|
||||||
|
Discarding blocks...Done.
|
||||||
|
# xfs_repair -n /dev/sda
|
||||||
|
Phase 1 - find and verify superblock...
|
||||||
|
- reporting progress in intervals of 15 minutes
|
||||||
|
Phase 2 - using internal log
|
||||||
|
- zero log...
|
||||||
|
- 16:30:50: zeroing log - 16320 of 16320 blocks done
|
||||||
|
- scan filesystem freespace and inode maps...
|
||||||
|
agf_freeblks 25, counted 0 in ag 4
|
||||||
|
sb_fdblocks 8823, counted 8798
|
||||||
|
|
||||||
|
The root cause of this problem is the numrecs handling in
|
||||||
|
xfs_freesp_init_recs, which is used to initialize a new AG. Prior to
|
||||||
|
calling the function, we set up the new bnobt block with numrecs == 1
|
||||||
|
and rely on _freesp_init_recs to format that new record. If the last
|
||||||
|
record created has a blockcount of zero, then it sets numrecs = 0.
|
||||||
|
|
||||||
|
That last bit isn't correct if the AG contains the log, the start of the
|
||||||
|
log is not immediately after the initial blocks due to stripe alignment,
|
||||||
|
and the end of the log is perfectly aligned with the end of the AG. For
|
||||||
|
this case, we actually formatted a single bnobt record to handle the
|
||||||
|
free space before the start of the (stripe aligned) log, and incremented
|
||||||
|
arec to try to format a second record. That second record turned out to
|
||||||
|
be unnecessary, so what we really want is to leave numrecs at 1.
|
||||||
|
|
||||||
|
The numrecs handling itself is overly complicated because a different
|
||||||
|
function sets numrecs == 1. Change the bnobt creation code to start
|
||||||
|
with numrecs set to zero and only increment it after successfully
|
||||||
|
formatting a free space extent into the btree block.
|
||||||
|
|
||||||
|
Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
|
||||||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||||||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||||||
|
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
fs/xfs/libxfs/xfs_ag.c | 19 +++++++++----------
|
||||||
|
1 file changed, 9 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
|
||||||
|
index 1b078bbbf225..9b373a0c7aaf 100644
|
||||||
|
--- a//libxfs/xfs_ag.c
|
||||||
|
+++ b//libxfs/xfs_ag.c
|
||||||
|
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
|
||||||
|
ASSERT(start >= mp->m_ag_prealloc_blocks);
|
||||||
|
if (start != mp->m_ag_prealloc_blocks) {
|
||||||
|
/*
|
||||||
|
- * Modify first record to pad stripe align of log
|
||||||
|
+ * Modify first record to pad stripe align of log and
|
||||||
|
+ * bump the record count.
|
||||||
|
*/
|
||||||
|
arec->ar_blockcount = cpu_to_be32(start -
|
||||||
|
mp->m_ag_prealloc_blocks);
|
||||||
|
+ be16_add_cpu(&block->bb_numrecs, 1);
|
||||||
|
nrec = arec + 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
|
||||||
|
be32_to_cpu(arec->ar_startblock) +
|
||||||
|
be32_to_cpu(arec->ar_blockcount));
|
||||||
|
arec = nrec;
|
||||||
|
- be16_add_cpu(&block->bb_numrecs, 1);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Change record start to after the internal log
|
||||||
|
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * Calculate the record block count and check for the case where
|
||||||
|
- * the log might have consumed all available space in the AG. If
|
||||||
|
- * so, reset the record count to 0 to avoid exposure of an invalid
|
||||||
|
- * record start block.
|
||||||
|
+ * Calculate the block count of this record; if it is nonzero,
|
||||||
|
+ * increment the record count.
|
||||||
|
*/
|
||||||
|
arec->ar_blockcount = cpu_to_be32(id->agsize -
|
||||||
|
be32_to_cpu(arec->ar_startblock));
|
||||||
|
- if (!arec->ar_blockcount)
|
||||||
|
- block->bb_numrecs = 0;
|
||||||
|
+ if (arec->ar_blockcount)
|
||||||
|
+ be16_add_cpu(&block->bb_numrecs, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
|
||||||
|
struct xfs_buf *bp,
|
||||||
|
struct aghdr_init_data *id)
|
||||||
|
{
|
||||||
|
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
|
||||||
|
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
|
||||||
|
xfs_freesp_init_recs(mp, bp, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -548,7 +547,7 @@ xfs_cntroot_init(
|
||||||
|
struct xfs_buf *bp,
|
||||||
|
struct aghdr_init_data *id)
|
||||||
|
{
|
||||||
|
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
|
||||||
|
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
|
||||||
|
xfs_freesp_init_recs(mp, bp, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
@ -0,0 +1,28 @@
|
|||||||
|
From 31980fef484df925b23824244de8ef353e436cef Mon Sep 17 00:00:00 2001
|
||||||
|
From: Pavel Reichl <preichl@redhat.com>
|
||||||
|
Date: Thu, 8 Jun 2023 10:57:52 +0200
|
||||||
|
Subject: [PATCH] mkfs: fix man's default value for sparse option
|
||||||
|
|
||||||
|
Fixes: 9cf846b51 ("mkfs: enable sparse inodes by default")
|
||||||
|
Suggested-by: Lukas Herbolt <lukas@herbolt.com>
|
||||||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||||||
|
---
|
||||||
|
man/man8/mkfs.xfs.8.in | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/man/man8/mkfs.xfs.8.in b/man/man8/mkfs.xfs.8.in
|
||||||
|
index 49e64d47a..48e26ece7 100644
|
||||||
|
--- a/man/man8/mkfs.xfs.8.in
|
||||||
|
+++ b/man/man8/mkfs.xfs.8.in
|
||||||
|
@@ -631,7 +631,7 @@ Enable sparse inode chunk allocation. The
|
||||||
|
.I value
|
||||||
|
is either 0 or 1, with 1 signifying that sparse allocation is enabled.
|
||||||
|
If the value is omitted, 1 is assumed. Sparse inode allocation is
|
||||||
|
-disabled by default. This feature is only available for filesystems
|
||||||
|
+enabled by default. This feature is only available for filesystems
|
||||||
|
formatted with
|
||||||
|
.B \-m crc=1.
|
||||||
|
.IP
|
||||||
|
--
|
||||||
|
2.40.1
|
||||||
|
|
Loading…
Reference in new issue