You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
129 lines
4.9 KiB
129 lines
4.9 KiB
1 year ago
|
From 8e698ee72c4ecbbf18264568eb310875839fd601 Mon Sep 17 00:00:00 2001
|
||
|
From: "Darrick J. Wong" <djwong@kernel.org>
|
||
|
Date: Tue, 2 May 2023 09:14:36 +1000
|
||
|
Subject: [PATCH] xfs: set bnobt/cntbt numrecs correctly when formatting new
|
||
|
AGs
|
||
|
|
||
|
Through generic/300, I discovered that mkfs.xfs creates corrupt
|
||
|
filesystems when given these parameters:
|
||
|
|
||
|
# mkfs.xfs -d size=512M /dev/sda -f -d su=128k,sw=4 --unsupported
|
||
|
Filesystems formatted with --unsupported are not supported!!
|
||
|
meta-data=/dev/sda isize=512 agcount=8, agsize=16352 blks
|
||
|
= sectsz=512 attr=2, projid32bit=1
|
||
|
= crc=1 finobt=1, sparse=1, rmapbt=1
|
||
|
= reflink=1 bigtime=1 inobtcount=1 nrext64=1
|
||
|
data = bsize=4096 blocks=130816, imaxpct=25
|
||
|
= sunit=32 swidth=128 blks
|
||
|
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
|
||
|
log =internal log bsize=4096 blocks=8192, version=2
|
||
|
= sectsz=512 sunit=32 blks, lazy-count=1
|
||
|
realtime =none extsz=4096 blocks=0, rtextents=0
|
||
|
= rgcount=0 rgsize=0 blks
|
||
|
Discarding blocks...Done.
|
||
|
# xfs_repair -n /dev/sda
|
||
|
Phase 1 - find and verify superblock...
|
||
|
- reporting progress in intervals of 15 minutes
|
||
|
Phase 2 - using internal log
|
||
|
- zero log...
|
||
|
- 16:30:50: zeroing log - 16320 of 16320 blocks done
|
||
|
- scan filesystem freespace and inode maps...
|
||
|
agf_freeblks 25, counted 0 in ag 4
|
||
|
sb_fdblocks 8823, counted 8798
|
||
|
|
||
|
The root cause of this problem is the numrecs handling in
|
||
|
xfs_freesp_init_recs, which is used to initialize a new AG. Prior to
|
||
|
calling the function, we set up the new bnobt block with numrecs == 1
|
||
|
and rely on _freesp_init_recs to format that new record. If the last
|
||
|
record created has a blockcount of zero, then it sets numrecs = 0.
|
||
|
|
||
|
That last bit isn't correct if the AG contains the log, the start of the
|
||
|
log is not immediately after the initial blocks due to stripe alignment,
|
||
|
and the end of the log is perfectly aligned with the end of the AG. For
|
||
|
this case, we actually formatted a single bnobt record to handle the
|
||
|
free space before the start of the (stripe aligned) log, and incremented
|
||
|
arec to try to format a second record. That second record turned out to
|
||
|
be unnecessary, so what we really want is to leave numrecs at 1.
|
||
|
|
||
|
The numrecs handling itself is overly complicated because a different
|
||
|
function sets numrecs == 1. Change the bnobt creation code to start
|
||
|
with numrecs set to zero and only increment it after successfully
|
||
|
formatting a free space extent into the btree block.
|
||
|
|
||
|
Fixes: f327a00745ff ("xfs: account for log space when formatting new AGs")
|
||
|
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
|
||
|
Reviewed-by: Dave Chinner <dchinner@redhat.com>
|
||
|
Signed-off-by: Dave Chinner <david@fromorbit.com>
|
||
|
Signed-off-by: Pavel Reichl <preichl@redhat.com>
|
||
|
---
|
||
|
fs/xfs/libxfs/xfs_ag.c | 19 +++++++++----------
|
||
|
1 file changed, 9 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
|
||
|
index 1b078bbbf225..9b373a0c7aaf 100644
|
||
|
--- a//libxfs/xfs_ag.c
|
||
|
+++ b//libxfs/xfs_ag.c
|
||
|
@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
|
||
|
ASSERT(start >= mp->m_ag_prealloc_blocks);
|
||
|
if (start != mp->m_ag_prealloc_blocks) {
|
||
|
/*
|
||
|
- * Modify first record to pad stripe align of log
|
||
|
+ * Modify first record to pad stripe align of log and
|
||
|
+ * bump the record count.
|
||
|
*/
|
||
|
arec->ar_blockcount = cpu_to_be32(start -
|
||
|
mp->m_ag_prealloc_blocks);
|
||
|
+ be16_add_cpu(&block->bb_numrecs, 1);
|
||
|
nrec = arec + 1;
|
||
|
|
||
|
/*
|
||
|
@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
|
||
|
be32_to_cpu(arec->ar_startblock) +
|
||
|
be32_to_cpu(arec->ar_blockcount));
|
||
|
arec = nrec;
|
||
|
- be16_add_cpu(&block->bb_numrecs, 1);
|
||
|
}
|
||
|
/*
|
||
|
* Change record start to after the internal log
|
||
|
@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
- * Calculate the record block count and check for the case where
|
||
|
- * the log might have consumed all available space in the AG. If
|
||
|
- * so, reset the record count to 0 to avoid exposure of an invalid
|
||
|
- * record start block.
|
||
|
+ * Calculate the block count of this record; if it is nonzero,
|
||
|
+ * increment the record count.
|
||
|
*/
|
||
|
arec->ar_blockcount = cpu_to_be32(id->agsize -
|
||
|
be32_to_cpu(arec->ar_startblock));
|
||
|
- if (!arec->ar_blockcount)
|
||
|
- block->bb_numrecs = 0;
|
||
|
+ if (arec->ar_blockcount)
|
||
|
+ be16_add_cpu(&block->bb_numrecs, 1);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
@@ -538,7 +537,7 @@ xfs_bnoroot_init(
|
||
|
struct xfs_buf *bp,
|
||
|
struct aghdr_init_data *id)
|
||
|
{
|
||
|
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
|
||
|
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
|
||
|
xfs_freesp_init_recs(mp, bp, id);
|
||
|
}
|
||
|
|
||
|
@@ -548,7 +547,7 @@ xfs_cntroot_init(
|
||
|
struct xfs_buf *bp,
|
||
|
struct aghdr_init_data *id)
|
||
|
{
|
||
|
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
|
||
|
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
|
||
|
xfs_freesp_init_recs(mp, bp, id);
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.40.1
|
||
|
|